{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9998916321996858, "eval_steps": 500, "global_step": 27682, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.224520020951108e-05, "grad_norm": 439.81265542842215, "learning_rate": 6.01684717208183e-09, "loss": 1.5944, "step": 1 }, { "epoch": 0.00014449040041902215, "grad_norm": 422.5603507192789, "learning_rate": 1.203369434416366e-08, "loss": 1.5676, "step": 2 }, { "epoch": 0.00021673560062853323, "grad_norm": 389.93893752740314, "learning_rate": 1.8050541516245488e-08, "loss": 1.5645, "step": 3 }, { "epoch": 0.0002889808008380443, "grad_norm": 276.58958200734895, "learning_rate": 2.406738868832732e-08, "loss": 1.5481, "step": 4 }, { "epoch": 0.0003612260010475554, "grad_norm": 656.1307748840927, "learning_rate": 3.008423586040915e-08, "loss": 1.8228, "step": 5 }, { "epoch": 0.00043347120125706646, "grad_norm": 375.9519999281823, "learning_rate": 3.6101083032490976e-08, "loss": 1.625, "step": 6 }, { "epoch": 0.0005057164014665776, "grad_norm": 558.7571587908293, "learning_rate": 4.21179302045728e-08, "loss": 1.7485, "step": 7 }, { "epoch": 0.0005779616016760886, "grad_norm": 391.6773424963461, "learning_rate": 4.813477737665464e-08, "loss": 1.6608, "step": 8 }, { "epoch": 0.0006502068018855998, "grad_norm": 403.6424005230372, "learning_rate": 5.4151624548736464e-08, "loss": 1.6779, "step": 9 }, { "epoch": 0.0007224520020951108, "grad_norm": 438.26738057377713, "learning_rate": 6.01684717208183e-08, "loss": 1.7037, "step": 10 }, { "epoch": 0.0007946972023046219, "grad_norm": 227.02279965347094, "learning_rate": 6.618531889290012e-08, "loss": 1.4411, "step": 11 }, { "epoch": 0.0008669424025141329, "grad_norm": 277.32529692132306, "learning_rate": 7.220216606498195e-08, "loss": 1.54, "step": 12 }, { "epoch": 0.0009391876027236441, "grad_norm": 413.53476274673693, "learning_rate": 7.82190132370638e-08, "loss": 1.6152, "step": 13 }, { "epoch": 0.0010114328029331551, "grad_norm": 248.7297180801884, "learning_rate": 8.42358604091456e-08, "loss": 1.4512, "step": 14 }, { "epoch": 0.0010836780031426663, "grad_norm": 55.94775787579092, "learning_rate": 9.025270758122745e-08, "loss": 1.2864, "step": 15 }, { "epoch": 0.0011559232033521772, "grad_norm": 38.385114200527006, "learning_rate": 9.626955475330928e-08, "loss": 1.3397, "step": 16 }, { "epoch": 0.0012281684035616884, "grad_norm": 52.4331885146469, "learning_rate": 1.0228640192539111e-07, "loss": 1.418, "step": 17 }, { "epoch": 0.0013004136037711995, "grad_norm": 53.048610059990025, "learning_rate": 1.0830324909747293e-07, "loss": 1.3503, "step": 18 }, { "epoch": 0.0013726588039807105, "grad_norm": 39.173630981283985, "learning_rate": 1.1432009626955476e-07, "loss": 1.3592, "step": 19 }, { "epoch": 0.0014449040041902216, "grad_norm": 49.42661710572174, "learning_rate": 1.203369434416366e-07, "loss": 1.3006, "step": 20 }, { "epoch": 0.0015171492043997328, "grad_norm": 28.176378851713938, "learning_rate": 1.263537906137184e-07, "loss": 1.2994, "step": 21 }, { "epoch": 0.0015893944046092437, "grad_norm": 53.309088218790144, "learning_rate": 1.3237063778580024e-07, "loss": 1.3753, "step": 22 }, { "epoch": 0.0016616396048187549, "grad_norm": 95.72017322644167, "learning_rate": 1.3838748495788207e-07, "loss": 1.3621, "step": 23 }, { "epoch": 0.0017338848050282658, "grad_norm": 150.485243774099, "learning_rate": 1.444043321299639e-07, "loss": 1.2712, "step": 24 }, { "epoch": 0.001806130005237777, "grad_norm": 99.59996646742658, "learning_rate": 1.5042117930204574e-07, "loss": 1.4777, "step": 25 }, { "epoch": 0.0018783752054472881, "grad_norm": 52.70700215504459, "learning_rate": 1.564380264741276e-07, "loss": 1.4399, "step": 26 }, { "epoch": 0.001950620405656799, "grad_norm": 30.04132387456849, "learning_rate": 1.6245487364620937e-07, "loss": 1.2957, "step": 27 }, { "epoch": 0.0020228656058663102, "grad_norm": 41.00552405383557, "learning_rate": 1.684717208182912e-07, "loss": 1.2942, "step": 28 }, { "epoch": 0.0020951108060758214, "grad_norm": 41.8872489664404, "learning_rate": 1.7448856799037306e-07, "loss": 1.4321, "step": 29 }, { "epoch": 0.0021673560062853326, "grad_norm": 45.29876138707465, "learning_rate": 1.805054151624549e-07, "loss": 1.4258, "step": 30 }, { "epoch": 0.0022396012064948437, "grad_norm": 26.342687691733712, "learning_rate": 1.8652226233453673e-07, "loss": 1.3671, "step": 31 }, { "epoch": 0.0023118464067043544, "grad_norm": 27.243494779717597, "learning_rate": 1.9253910950661856e-07, "loss": 1.2738, "step": 32 }, { "epoch": 0.0023840916069138656, "grad_norm": 24.128446816999666, "learning_rate": 1.985559566787004e-07, "loss": 1.349, "step": 33 }, { "epoch": 0.0024563368071233768, "grad_norm": 21.115521172232523, "learning_rate": 2.0457280385078222e-07, "loss": 1.2512, "step": 34 }, { "epoch": 0.002528582007332888, "grad_norm": 23.99035705122607, "learning_rate": 2.1058965102286403e-07, "loss": 1.4412, "step": 35 }, { "epoch": 0.002600827207542399, "grad_norm": 19.671560853081395, "learning_rate": 2.1660649819494586e-07, "loss": 1.2348, "step": 36 }, { "epoch": 0.00267307240775191, "grad_norm": 21.67147930199318, "learning_rate": 2.226233453670277e-07, "loss": 1.3018, "step": 37 }, { "epoch": 0.002745317607961421, "grad_norm": 18.773669955385653, "learning_rate": 2.2864019253910952e-07, "loss": 1.2628, "step": 38 }, { "epoch": 0.002817562808170932, "grad_norm": 21.818626439737148, "learning_rate": 2.3465703971119135e-07, "loss": 1.3498, "step": 39 }, { "epoch": 0.0028898080083804433, "grad_norm": 20.088418446061464, "learning_rate": 2.406738868832732e-07, "loss": 1.2909, "step": 40 }, { "epoch": 0.0029620532085899544, "grad_norm": 21.95666536176643, "learning_rate": 2.4669073405535504e-07, "loss": 1.349, "step": 41 }, { "epoch": 0.0030342984087994656, "grad_norm": 41.95591383887005, "learning_rate": 2.527075812274368e-07, "loss": 1.1927, "step": 42 }, { "epoch": 0.0031065436090089763, "grad_norm": 16.640534220703092, "learning_rate": 2.5872442839951865e-07, "loss": 1.3773, "step": 43 }, { "epoch": 0.0031787888092184875, "grad_norm": 17.762022433213193, "learning_rate": 2.647412755716005e-07, "loss": 1.2191, "step": 44 }, { "epoch": 0.0032510340094279986, "grad_norm": 18.721809048091565, "learning_rate": 2.707581227436823e-07, "loss": 1.3404, "step": 45 }, { "epoch": 0.0033232792096375098, "grad_norm": 16.502429205354417, "learning_rate": 2.7677496991576415e-07, "loss": 1.2293, "step": 46 }, { "epoch": 0.003395524409847021, "grad_norm": 18.47724876500363, "learning_rate": 2.82791817087846e-07, "loss": 1.353, "step": 47 }, { "epoch": 0.0034677696100565317, "grad_norm": 16.392851671917853, "learning_rate": 2.888086642599278e-07, "loss": 1.3076, "step": 48 }, { "epoch": 0.003540014810266043, "grad_norm": 21.164947840470663, "learning_rate": 2.9482551143200964e-07, "loss": 1.2769, "step": 49 }, { "epoch": 0.003612260010475554, "grad_norm": 15.670350689983627, "learning_rate": 3.008423586040915e-07, "loss": 1.2156, "step": 50 }, { "epoch": 0.003684505210685065, "grad_norm": 14.258624227749467, "learning_rate": 3.068592057761733e-07, "loss": 1.2634, "step": 51 }, { "epoch": 0.0037567504108945763, "grad_norm": 14.204949088725424, "learning_rate": 3.128760529482552e-07, "loss": 1.1854, "step": 52 }, { "epoch": 0.0038289956111040874, "grad_norm": 18.905718936239737, "learning_rate": 3.1889290012033697e-07, "loss": 1.3068, "step": 53 }, { "epoch": 0.003901240811313598, "grad_norm": 14.362272516080868, "learning_rate": 3.2490974729241875e-07, "loss": 1.2616, "step": 54 }, { "epoch": 0.00397348601152311, "grad_norm": 13.230859138579092, "learning_rate": 3.3092659446450063e-07, "loss": 1.2445, "step": 55 }, { "epoch": 0.0040457312117326205, "grad_norm": 12.393833202057776, "learning_rate": 3.369434416365824e-07, "loss": 1.1561, "step": 56 }, { "epoch": 0.004117976411942131, "grad_norm": 15.986154287578048, "learning_rate": 3.429602888086643e-07, "loss": 1.3318, "step": 57 }, { "epoch": 0.004190221612151643, "grad_norm": 15.518121770094613, "learning_rate": 3.489771359807461e-07, "loss": 1.1257, "step": 58 }, { "epoch": 0.0042624668123611535, "grad_norm": 13.727282814054325, "learning_rate": 3.5499398315282796e-07, "loss": 1.2271, "step": 59 }, { "epoch": 0.004334712012570665, "grad_norm": 14.4713762507517, "learning_rate": 3.610108303249098e-07, "loss": 1.2817, "step": 60 }, { "epoch": 0.004406957212780176, "grad_norm": 12.615817214677003, "learning_rate": 3.6702767749699157e-07, "loss": 1.1911, "step": 61 }, { "epoch": 0.004479202412989687, "grad_norm": 14.598803411072986, "learning_rate": 3.7304452466907345e-07, "loss": 1.2859, "step": 62 }, { "epoch": 0.004551447613199198, "grad_norm": 14.833731199493542, "learning_rate": 3.7906137184115523e-07, "loss": 1.3429, "step": 63 }, { "epoch": 0.004623692813408709, "grad_norm": 12.157282376029102, "learning_rate": 3.850782190132371e-07, "loss": 1.1658, "step": 64 }, { "epoch": 0.0046959380136182205, "grad_norm": 14.035400864136093, "learning_rate": 3.910950661853189e-07, "loss": 1.2471, "step": 65 }, { "epoch": 0.004768183213827731, "grad_norm": 12.133264615489608, "learning_rate": 3.971119133574008e-07, "loss": 1.2198, "step": 66 }, { "epoch": 0.004840428414037243, "grad_norm": 11.55833698282732, "learning_rate": 4.0312876052948256e-07, "loss": 1.1482, "step": 67 }, { "epoch": 0.0049126736142467535, "grad_norm": 15.894465969698826, "learning_rate": 4.0914560770156444e-07, "loss": 1.205, "step": 68 }, { "epoch": 0.004984918814456264, "grad_norm": 14.196320736759137, "learning_rate": 4.151624548736462e-07, "loss": 1.3537, "step": 69 }, { "epoch": 0.005057164014665776, "grad_norm": 14.127240838742019, "learning_rate": 4.2117930204572805e-07, "loss": 1.2591, "step": 70 }, { "epoch": 0.0051294092148752865, "grad_norm": 11.741090034167598, "learning_rate": 4.2719614921780994e-07, "loss": 1.1881, "step": 71 }, { "epoch": 0.005201654415084798, "grad_norm": 12.687507817308244, "learning_rate": 4.332129963898917e-07, "loss": 1.17, "step": 72 }, { "epoch": 0.005273899615294309, "grad_norm": 12.379379576387867, "learning_rate": 4.392298435619736e-07, "loss": 1.155, "step": 73 }, { "epoch": 0.00534614481550382, "grad_norm": 11.901085401770478, "learning_rate": 4.452466907340554e-07, "loss": 1.2085, "step": 74 }, { "epoch": 0.005418390015713331, "grad_norm": 10.750920810740288, "learning_rate": 4.5126353790613726e-07, "loss": 1.2441, "step": 75 }, { "epoch": 0.005490635215922842, "grad_norm": 13.472293254931582, "learning_rate": 4.5728038507821904e-07, "loss": 1.0728, "step": 76 }, { "epoch": 0.0055628804161323535, "grad_norm": 11.629979533524352, "learning_rate": 4.6329723225030087e-07, "loss": 1.2, "step": 77 }, { "epoch": 0.005635125616341864, "grad_norm": 13.67171205260039, "learning_rate": 4.693140794223827e-07, "loss": 1.2063, "step": 78 }, { "epoch": 0.005707370816551376, "grad_norm": 11.579249771403788, "learning_rate": 4.7533092659446454e-07, "loss": 1.1468, "step": 79 }, { "epoch": 0.0057796160167608865, "grad_norm": 13.504701149044358, "learning_rate": 4.813477737665464e-07, "loss": 1.1438, "step": 80 }, { "epoch": 0.005851861216970397, "grad_norm": 13.233993362634939, "learning_rate": 4.873646209386282e-07, "loss": 1.1567, "step": 81 }, { "epoch": 0.005924106417179909, "grad_norm": 11.612598654468586, "learning_rate": 4.933814681107101e-07, "loss": 1.3001, "step": 82 }, { "epoch": 0.0059963516173894196, "grad_norm": 12.684947090564851, "learning_rate": 4.993983152827919e-07, "loss": 1.1483, "step": 83 }, { "epoch": 0.006068596817598931, "grad_norm": 12.587611062886706, "learning_rate": 5.054151624548736e-07, "loss": 1.0058, "step": 84 }, { "epoch": 0.006140842017808442, "grad_norm": 11.971761538755285, "learning_rate": 5.114320096269555e-07, "loss": 1.2153, "step": 85 }, { "epoch": 0.006213087218017953, "grad_norm": 11.024803979908675, "learning_rate": 5.174488567990373e-07, "loss": 1.238, "step": 86 }, { "epoch": 0.006285332418227464, "grad_norm": 11.349717433285715, "learning_rate": 5.234657039711192e-07, "loss": 1.0858, "step": 87 }, { "epoch": 0.006357577618436975, "grad_norm": 12.009360794984119, "learning_rate": 5.29482551143201e-07, "loss": 1.1351, "step": 88 }, { "epoch": 0.0064298228186464865, "grad_norm": 15.091476237575199, "learning_rate": 5.354993983152829e-07, "loss": 1.1764, "step": 89 }, { "epoch": 0.006502068018855997, "grad_norm": 15.227783529961002, "learning_rate": 5.415162454873646e-07, "loss": 1.2352, "step": 90 }, { "epoch": 0.006574313219065508, "grad_norm": 14.614404736617365, "learning_rate": 5.475330926594465e-07, "loss": 1.1896, "step": 91 }, { "epoch": 0.0066465584192750195, "grad_norm": 13.523810969352006, "learning_rate": 5.535499398315283e-07, "loss": 1.1908, "step": 92 }, { "epoch": 0.00671880361948453, "grad_norm": 12.550243262862725, "learning_rate": 5.595667870036102e-07, "loss": 1.1479, "step": 93 }, { "epoch": 0.006791048819694042, "grad_norm": 12.483295723451425, "learning_rate": 5.65583634175692e-07, "loss": 1.1595, "step": 94 }, { "epoch": 0.006863294019903553, "grad_norm": 15.051566433418463, "learning_rate": 5.716004813477738e-07, "loss": 1.3276, "step": 95 }, { "epoch": 0.006935539220113063, "grad_norm": 14.577747663089484, "learning_rate": 5.776173285198556e-07, "loss": 1.1423, "step": 96 }, { "epoch": 0.007007784420322575, "grad_norm": 13.349669811723107, "learning_rate": 5.836341756919375e-07, "loss": 1.1783, "step": 97 }, { "epoch": 0.007080029620532086, "grad_norm": 14.144732979782118, "learning_rate": 5.896510228640193e-07, "loss": 1.2443, "step": 98 }, { "epoch": 0.007152274820741597, "grad_norm": 11.519474954296149, "learning_rate": 5.956678700361012e-07, "loss": 1.1997, "step": 99 }, { "epoch": 0.007224520020951108, "grad_norm": 12.861968621342779, "learning_rate": 6.01684717208183e-07, "loss": 1.1582, "step": 100 }, { "epoch": 0.0072967652211606195, "grad_norm": 12.144631764218978, "learning_rate": 6.077015643802648e-07, "loss": 1.1442, "step": 101 }, { "epoch": 0.00736901042137013, "grad_norm": 11.99403042130002, "learning_rate": 6.137184115523466e-07, "loss": 1.208, "step": 102 }, { "epoch": 0.007441255621579641, "grad_norm": 11.332706508879784, "learning_rate": 6.197352587244285e-07, "loss": 1.1922, "step": 103 }, { "epoch": 0.0075135008217891526, "grad_norm": 9.198543632339561, "learning_rate": 6.257521058965104e-07, "loss": 1.1416, "step": 104 }, { "epoch": 0.007585746021998663, "grad_norm": 9.811663355057018, "learning_rate": 6.31768953068592e-07, "loss": 1.1644, "step": 105 }, { "epoch": 0.007657991222208175, "grad_norm": 10.664680613954468, "learning_rate": 6.377858002406739e-07, "loss": 1.1329, "step": 106 }, { "epoch": 0.007730236422417686, "grad_norm": 11.87829736806828, "learning_rate": 6.438026474127558e-07, "loss": 1.2457, "step": 107 }, { "epoch": 0.007802481622627196, "grad_norm": 13.50912336847104, "learning_rate": 6.498194945848375e-07, "loss": 1.1398, "step": 108 }, { "epoch": 0.007874726822836707, "grad_norm": 9.760445892279373, "learning_rate": 6.558363417569194e-07, "loss": 1.093, "step": 109 }, { "epoch": 0.00794697202304622, "grad_norm": 14.395685588043547, "learning_rate": 6.618531889290013e-07, "loss": 1.1612, "step": 110 }, { "epoch": 0.00801921722325573, "grad_norm": 11.775174706979206, "learning_rate": 6.678700361010831e-07, "loss": 1.1608, "step": 111 }, { "epoch": 0.008091462423465241, "grad_norm": 10.692869388031994, "learning_rate": 6.738868832731648e-07, "loss": 1.217, "step": 112 }, { "epoch": 0.008163707623674752, "grad_norm": 11.047801797667574, "learning_rate": 6.799037304452467e-07, "loss": 1.2239, "step": 113 }, { "epoch": 0.008235952823884262, "grad_norm": 9.404574526989055, "learning_rate": 6.859205776173286e-07, "loss": 1.0854, "step": 114 }, { "epoch": 0.008308198024093775, "grad_norm": 10.736669037076572, "learning_rate": 6.919374247894104e-07, "loss": 1.1454, "step": 115 }, { "epoch": 0.008380443224303286, "grad_norm": 12.147185174599736, "learning_rate": 6.979542719614923e-07, "loss": 1.1893, "step": 116 }, { "epoch": 0.008452688424512796, "grad_norm": 11.513351650816176, "learning_rate": 7.03971119133574e-07, "loss": 1.2205, "step": 117 }, { "epoch": 0.008524933624722307, "grad_norm": 9.57022241666787, "learning_rate": 7.099879663056559e-07, "loss": 1.1085, "step": 118 }, { "epoch": 0.008597178824931818, "grad_norm": 12.53828791902745, "learning_rate": 7.160048134777377e-07, "loss": 1.1605, "step": 119 }, { "epoch": 0.00866942402514133, "grad_norm": 12.027640616261584, "learning_rate": 7.220216606498196e-07, "loss": 1.1451, "step": 120 }, { "epoch": 0.008741669225350841, "grad_norm": 10.007801446007752, "learning_rate": 7.280385078219015e-07, "loss": 1.175, "step": 121 }, { "epoch": 0.008813914425560352, "grad_norm": 13.065425855409503, "learning_rate": 7.340553549939831e-07, "loss": 1.1639, "step": 122 }, { "epoch": 0.008886159625769862, "grad_norm": 10.05231827904405, "learning_rate": 7.40072202166065e-07, "loss": 1.0774, "step": 123 }, { "epoch": 0.008958404825979375, "grad_norm": 12.37034812344877, "learning_rate": 7.460890493381469e-07, "loss": 1.1093, "step": 124 }, { "epoch": 0.009030650026188886, "grad_norm": 12.996364965610036, "learning_rate": 7.521058965102288e-07, "loss": 1.2248, "step": 125 }, { "epoch": 0.009102895226398396, "grad_norm": 13.880306294604281, "learning_rate": 7.581227436823105e-07, "loss": 1.153, "step": 126 }, { "epoch": 0.009175140426607907, "grad_norm": 11.014619995156226, "learning_rate": 7.641395908543923e-07, "loss": 1.1961, "step": 127 }, { "epoch": 0.009247385626817418, "grad_norm": 12.162729610459506, "learning_rate": 7.701564380264742e-07, "loss": 1.2476, "step": 128 }, { "epoch": 0.00931963082702693, "grad_norm": 10.541900230102057, "learning_rate": 7.761732851985561e-07, "loss": 1.1275, "step": 129 }, { "epoch": 0.009391876027236441, "grad_norm": 14.901209204846483, "learning_rate": 7.821901323706378e-07, "loss": 1.2554, "step": 130 }, { "epoch": 0.009464121227445952, "grad_norm": 11.032671871890892, "learning_rate": 7.882069795427197e-07, "loss": 1.0733, "step": 131 }, { "epoch": 0.009536366427655462, "grad_norm": 12.693517902004771, "learning_rate": 7.942238267148016e-07, "loss": 1.1384, "step": 132 }, { "epoch": 0.009608611627864973, "grad_norm": 19.153922358076393, "learning_rate": 8.002406738868833e-07, "loss": 1.1624, "step": 133 }, { "epoch": 0.009680856828074486, "grad_norm": 9.881047968092954, "learning_rate": 8.062575210589651e-07, "loss": 1.0, "step": 134 }, { "epoch": 0.009753102028283996, "grad_norm": 13.282074882472006, "learning_rate": 8.12274368231047e-07, "loss": 1.0783, "step": 135 }, { "epoch": 0.009825347228493507, "grad_norm": 13.795196050873335, "learning_rate": 8.182912154031289e-07, "loss": 1.1097, "step": 136 }, { "epoch": 0.009897592428703018, "grad_norm": 13.90635094766731, "learning_rate": 8.243080625752107e-07, "loss": 1.166, "step": 137 }, { "epoch": 0.009969837628912528, "grad_norm": 13.661942820866475, "learning_rate": 8.303249097472924e-07, "loss": 1.1634, "step": 138 }, { "epoch": 0.010042082829122041, "grad_norm": 13.504986442550639, "learning_rate": 8.363417569193743e-07, "loss": 1.1762, "step": 139 }, { "epoch": 0.010114328029331552, "grad_norm": 12.29584007319015, "learning_rate": 8.423586040914561e-07, "loss": 1.1161, "step": 140 }, { "epoch": 0.010186573229541062, "grad_norm": 16.046031539724336, "learning_rate": 8.48375451263538e-07, "loss": 1.203, "step": 141 }, { "epoch": 0.010258818429750573, "grad_norm": 10.544058507036306, "learning_rate": 8.543922984356199e-07, "loss": 1.0966, "step": 142 }, { "epoch": 0.010331063629960084, "grad_norm": 8.90186981104602, "learning_rate": 8.604091456077017e-07, "loss": 0.9942, "step": 143 }, { "epoch": 0.010403308830169596, "grad_norm": 14.73982689682081, "learning_rate": 8.664259927797834e-07, "loss": 1.2175, "step": 144 }, { "epoch": 0.010475554030379107, "grad_norm": 9.33751171678168, "learning_rate": 8.724428399518653e-07, "loss": 1.0561, "step": 145 }, { "epoch": 0.010547799230588618, "grad_norm": 9.760289948956194, "learning_rate": 8.784596871239472e-07, "loss": 1.0878, "step": 146 }, { "epoch": 0.010620044430798128, "grad_norm": 11.446466985289064, "learning_rate": 8.844765342960289e-07, "loss": 1.1571, "step": 147 }, { "epoch": 0.01069228963100764, "grad_norm": 10.79624007438017, "learning_rate": 8.904933814681108e-07, "loss": 1.1228, "step": 148 }, { "epoch": 0.010764534831217152, "grad_norm": 14.034980668780662, "learning_rate": 8.965102286401926e-07, "loss": 1.2084, "step": 149 }, { "epoch": 0.010836780031426662, "grad_norm": 11.926459354697764, "learning_rate": 9.025270758122745e-07, "loss": 1.1137, "step": 150 }, { "epoch": 0.010909025231636173, "grad_norm": 10.113783663650176, "learning_rate": 9.085439229843562e-07, "loss": 1.1611, "step": 151 }, { "epoch": 0.010981270431845684, "grad_norm": 12.167729231189963, "learning_rate": 9.145607701564381e-07, "loss": 1.171, "step": 152 }, { "epoch": 0.011053515632055195, "grad_norm": 10.827798769661776, "learning_rate": 9.2057761732852e-07, "loss": 1.0988, "step": 153 }, { "epoch": 0.011125760832264707, "grad_norm": 10.677373003247956, "learning_rate": 9.265944645006017e-07, "loss": 1.1548, "step": 154 }, { "epoch": 0.011198006032474218, "grad_norm": 9.93242088411182, "learning_rate": 9.326113116726835e-07, "loss": 1.0931, "step": 155 }, { "epoch": 0.011270251232683728, "grad_norm": 10.392494985631082, "learning_rate": 9.386281588447654e-07, "loss": 1.0731, "step": 156 }, { "epoch": 0.01134249643289324, "grad_norm": 11.149917301162098, "learning_rate": 9.446450060168473e-07, "loss": 1.0914, "step": 157 }, { "epoch": 0.011414741633102752, "grad_norm": 13.20041172657037, "learning_rate": 9.506618531889291e-07, "loss": 1.1831, "step": 158 }, { "epoch": 0.011486986833312262, "grad_norm": 9.90419846720733, "learning_rate": 9.566787003610109e-07, "loss": 0.9957, "step": 159 }, { "epoch": 0.011559232033521773, "grad_norm": 12.589103201148987, "learning_rate": 9.626955475330928e-07, "loss": 1.0357, "step": 160 }, { "epoch": 0.011631477233731284, "grad_norm": 10.48329868846641, "learning_rate": 9.687123947051744e-07, "loss": 1.1274, "step": 161 }, { "epoch": 0.011703722433940794, "grad_norm": 10.02032389043092, "learning_rate": 9.747292418772564e-07, "loss": 1.2132, "step": 162 }, { "epoch": 0.011775967634150307, "grad_norm": 10.711235949297516, "learning_rate": 9.807460890493382e-07, "loss": 1.135, "step": 163 }, { "epoch": 0.011848212834359818, "grad_norm": 11.269197825981816, "learning_rate": 9.867629362214202e-07, "loss": 1.1939, "step": 164 }, { "epoch": 0.011920458034569328, "grad_norm": 10.387497326498227, "learning_rate": 9.927797833935017e-07, "loss": 1.0865, "step": 165 }, { "epoch": 0.011992703234778839, "grad_norm": 11.241369370096395, "learning_rate": 9.987966305655837e-07, "loss": 1.2574, "step": 166 }, { "epoch": 0.01206494843498835, "grad_norm": 10.675636176063321, "learning_rate": 1.0048134777376655e-06, "loss": 1.1302, "step": 167 }, { "epoch": 0.012137193635197862, "grad_norm": 11.535807052839933, "learning_rate": 1.0108303249097473e-06, "loss": 1.1281, "step": 168 }, { "epoch": 0.012209438835407373, "grad_norm": 12.772828851133626, "learning_rate": 1.0168471720818293e-06, "loss": 1.0783, "step": 169 }, { "epoch": 0.012281684035616884, "grad_norm": 11.6023950143218, "learning_rate": 1.022864019253911e-06, "loss": 1.1214, "step": 170 }, { "epoch": 0.012353929235826394, "grad_norm": 13.689752262791375, "learning_rate": 1.0288808664259928e-06, "loss": 1.1544, "step": 171 }, { "epoch": 0.012426174436035905, "grad_norm": 12.957979219919562, "learning_rate": 1.0348977135980746e-06, "loss": 1.1406, "step": 172 }, { "epoch": 0.012498419636245418, "grad_norm": 9.604726867446164, "learning_rate": 1.0409145607701566e-06, "loss": 1.0778, "step": 173 }, { "epoch": 0.012570664836454928, "grad_norm": 9.908488986303574, "learning_rate": 1.0469314079422384e-06, "loss": 1.1605, "step": 174 }, { "epoch": 0.012642910036664439, "grad_norm": 12.47768149138255, "learning_rate": 1.0529482551143202e-06, "loss": 1.1357, "step": 175 }, { "epoch": 0.01271515523687395, "grad_norm": 11.991141546405725, "learning_rate": 1.058965102286402e-06, "loss": 1.141, "step": 176 }, { "epoch": 0.01278740043708346, "grad_norm": 11.353230531697108, "learning_rate": 1.064981949458484e-06, "loss": 1.1, "step": 177 }, { "epoch": 0.012859645637292973, "grad_norm": 11.594469238345726, "learning_rate": 1.0709987966305657e-06, "loss": 1.1866, "step": 178 }, { "epoch": 0.012931890837502484, "grad_norm": 14.480259052298743, "learning_rate": 1.0770156438026475e-06, "loss": 1.1539, "step": 179 }, { "epoch": 0.013004136037711994, "grad_norm": 20.17765638974984, "learning_rate": 1.0830324909747293e-06, "loss": 1.0533, "step": 180 }, { "epoch": 0.013076381237921505, "grad_norm": 11.643746304473044, "learning_rate": 1.0890493381468113e-06, "loss": 1.1756, "step": 181 }, { "epoch": 0.013148626438131016, "grad_norm": 11.022244589622254, "learning_rate": 1.095066185318893e-06, "loss": 1.1598, "step": 182 }, { "epoch": 0.013220871638340528, "grad_norm": 13.4074621119436, "learning_rate": 1.1010830324909748e-06, "loss": 1.1307, "step": 183 }, { "epoch": 0.013293116838550039, "grad_norm": 10.636186164021513, "learning_rate": 1.1070998796630566e-06, "loss": 1.0827, "step": 184 }, { "epoch": 0.01336536203875955, "grad_norm": 13.803722509778046, "learning_rate": 1.1131167268351386e-06, "loss": 1.17, "step": 185 }, { "epoch": 0.01343760723896906, "grad_norm": 10.707146327706097, "learning_rate": 1.1191335740072204e-06, "loss": 1.0632, "step": 186 }, { "epoch": 0.013509852439178571, "grad_norm": 10.11691828189103, "learning_rate": 1.1251504211793021e-06, "loss": 1.1458, "step": 187 }, { "epoch": 0.013582097639388084, "grad_norm": 11.39812960087915, "learning_rate": 1.131167268351384e-06, "loss": 1.1053, "step": 188 }, { "epoch": 0.013654342839597594, "grad_norm": 10.996532934169098, "learning_rate": 1.137184115523466e-06, "loss": 1.0956, "step": 189 }, { "epoch": 0.013726588039807105, "grad_norm": 10.490245466096818, "learning_rate": 1.1432009626955477e-06, "loss": 1.2017, "step": 190 }, { "epoch": 0.013798833240016616, "grad_norm": 11.287797966590752, "learning_rate": 1.1492178098676295e-06, "loss": 1.1151, "step": 191 }, { "epoch": 0.013871078440226127, "grad_norm": 11.017031922010823, "learning_rate": 1.1552346570397112e-06, "loss": 1.2638, "step": 192 }, { "epoch": 0.013943323640435639, "grad_norm": 9.12169430571075, "learning_rate": 1.161251504211793e-06, "loss": 1.1557, "step": 193 }, { "epoch": 0.01401556884064515, "grad_norm": 11.956386146228901, "learning_rate": 1.167268351383875e-06, "loss": 1.0114, "step": 194 }, { "epoch": 0.01408781404085466, "grad_norm": 12.756233636227869, "learning_rate": 1.1732851985559568e-06, "loss": 1.1383, "step": 195 }, { "epoch": 0.014160059241064171, "grad_norm": 10.935226465713699, "learning_rate": 1.1793020457280386e-06, "loss": 1.0492, "step": 196 }, { "epoch": 0.014232304441273684, "grad_norm": 9.423847757427737, "learning_rate": 1.1853188929001203e-06, "loss": 1.1243, "step": 197 }, { "epoch": 0.014304549641483194, "grad_norm": 10.118177964909979, "learning_rate": 1.1913357400722023e-06, "loss": 1.2084, "step": 198 }, { "epoch": 0.014376794841692705, "grad_norm": 11.67271099645819, "learning_rate": 1.1973525872442841e-06, "loss": 1.1143, "step": 199 }, { "epoch": 0.014449040041902216, "grad_norm": 12.3057526820697, "learning_rate": 1.203369434416366e-06, "loss": 1.1492, "step": 200 }, { "epoch": 0.014521285242111727, "grad_norm": 10.097984535886937, "learning_rate": 1.2093862815884477e-06, "loss": 1.0993, "step": 201 }, { "epoch": 0.014593530442321239, "grad_norm": 10.159854753912846, "learning_rate": 1.2154031287605297e-06, "loss": 1.2186, "step": 202 }, { "epoch": 0.01466577564253075, "grad_norm": 11.935269292151387, "learning_rate": 1.2214199759326114e-06, "loss": 1.146, "step": 203 }, { "epoch": 0.01473802084274026, "grad_norm": 11.739961922111041, "learning_rate": 1.2274368231046932e-06, "loss": 1.1623, "step": 204 }, { "epoch": 0.014810266042949771, "grad_norm": 10.215743342916356, "learning_rate": 1.233453670276775e-06, "loss": 1.052, "step": 205 }, { "epoch": 0.014882511243159282, "grad_norm": 11.659717197554713, "learning_rate": 1.239470517448857e-06, "loss": 1.2221, "step": 206 }, { "epoch": 0.014954756443368794, "grad_norm": 12.596165730621411, "learning_rate": 1.2454873646209388e-06, "loss": 1.1632, "step": 207 }, { "epoch": 0.015027001643578305, "grad_norm": 13.682347551552853, "learning_rate": 1.2515042117930208e-06, "loss": 1.1354, "step": 208 }, { "epoch": 0.015099246843787816, "grad_norm": 10.808429282494636, "learning_rate": 1.2575210589651023e-06, "loss": 1.0641, "step": 209 }, { "epoch": 0.015171492043997327, "grad_norm": 12.103723479587284, "learning_rate": 1.263537906137184e-06, "loss": 1.2332, "step": 210 }, { "epoch": 0.015243737244206837, "grad_norm": 11.90557651166861, "learning_rate": 1.269554753309266e-06, "loss": 1.2182, "step": 211 }, { "epoch": 0.01531598244441635, "grad_norm": 9.68608506236907, "learning_rate": 1.2755716004813479e-06, "loss": 1.0941, "step": 212 }, { "epoch": 0.01538822764462586, "grad_norm": 11.66182855470178, "learning_rate": 1.2815884476534297e-06, "loss": 1.0942, "step": 213 }, { "epoch": 0.015460472844835371, "grad_norm": 10.496131184223346, "learning_rate": 1.2876052948255116e-06, "loss": 1.0282, "step": 214 }, { "epoch": 0.015532718045044882, "grad_norm": 14.432495835272437, "learning_rate": 1.2936221419975934e-06, "loss": 1.1463, "step": 215 }, { "epoch": 0.015604963245254393, "grad_norm": 12.577956981602961, "learning_rate": 1.299638989169675e-06, "loss": 1.0951, "step": 216 }, { "epoch": 0.015677208445463903, "grad_norm": 10.014215664786702, "learning_rate": 1.305655836341757e-06, "loss": 1.0738, "step": 217 }, { "epoch": 0.015749453645673414, "grad_norm": 11.135665410222147, "learning_rate": 1.3116726835138388e-06, "loss": 1.1182, "step": 218 }, { "epoch": 0.01582169884588293, "grad_norm": 14.361085210826415, "learning_rate": 1.3176895306859207e-06, "loss": 1.1189, "step": 219 }, { "epoch": 0.01589394404609244, "grad_norm": 14.946329866868917, "learning_rate": 1.3237063778580025e-06, "loss": 1.16, "step": 220 }, { "epoch": 0.01596618924630195, "grad_norm": 10.290108340094331, "learning_rate": 1.3297232250300843e-06, "loss": 1.0985, "step": 221 }, { "epoch": 0.01603843444651146, "grad_norm": 10.29996797130753, "learning_rate": 1.3357400722021663e-06, "loss": 1.3023, "step": 222 }, { "epoch": 0.01611067964672097, "grad_norm": 12.157260411494468, "learning_rate": 1.341756919374248e-06, "loss": 1.1905, "step": 223 }, { "epoch": 0.016182924846930482, "grad_norm": 12.682608427234308, "learning_rate": 1.3477737665463296e-06, "loss": 1.1394, "step": 224 }, { "epoch": 0.016255170047139993, "grad_norm": 12.043683966558078, "learning_rate": 1.3537906137184118e-06, "loss": 1.1029, "step": 225 }, { "epoch": 0.016327415247349503, "grad_norm": 11.722002803763754, "learning_rate": 1.3598074608904934e-06, "loss": 1.054, "step": 226 }, { "epoch": 0.016399660447559014, "grad_norm": 13.735112679279336, "learning_rate": 1.3658243080625752e-06, "loss": 1.1728, "step": 227 }, { "epoch": 0.016471905647768525, "grad_norm": 10.985303598062348, "learning_rate": 1.3718411552346572e-06, "loss": 1.1576, "step": 228 }, { "epoch": 0.01654415084797804, "grad_norm": 10.382125062480624, "learning_rate": 1.377858002406739e-06, "loss": 1.1832, "step": 229 }, { "epoch": 0.01661639604818755, "grad_norm": 10.274778607807232, "learning_rate": 1.3838748495788207e-06, "loss": 1.1424, "step": 230 }, { "epoch": 0.01668864124839706, "grad_norm": 9.6794116309559, "learning_rate": 1.3898916967509027e-06, "loss": 1.16, "step": 231 }, { "epoch": 0.01676088644860657, "grad_norm": 13.630306225567141, "learning_rate": 1.3959085439229845e-06, "loss": 1.1613, "step": 232 }, { "epoch": 0.016833131648816082, "grad_norm": 10.784633116726889, "learning_rate": 1.4019253910950665e-06, "loss": 1.095, "step": 233 }, { "epoch": 0.016905376849025593, "grad_norm": 10.188486624842271, "learning_rate": 1.407942238267148e-06, "loss": 1.1912, "step": 234 }, { "epoch": 0.016977622049235103, "grad_norm": 11.15979733956474, "learning_rate": 1.4139590854392298e-06, "loss": 1.068, "step": 235 }, { "epoch": 0.017049867249444614, "grad_norm": 11.525446064357851, "learning_rate": 1.4199759326113118e-06, "loss": 1.1771, "step": 236 }, { "epoch": 0.017122112449654125, "grad_norm": 11.185853293504639, "learning_rate": 1.4259927797833936e-06, "loss": 1.1719, "step": 237 }, { "epoch": 0.017194357649863636, "grad_norm": 13.826341692919895, "learning_rate": 1.4320096269554754e-06, "loss": 1.2402, "step": 238 }, { "epoch": 0.01726660285007315, "grad_norm": 11.298129738841872, "learning_rate": 1.4380264741275574e-06, "loss": 1.097, "step": 239 }, { "epoch": 0.01733884805028266, "grad_norm": 8.83049392649357, "learning_rate": 1.4440433212996392e-06, "loss": 1.1333, "step": 240 }, { "epoch": 0.01741109325049217, "grad_norm": 9.92760276969872, "learning_rate": 1.4500601684717207e-06, "loss": 1.0828, "step": 241 }, { "epoch": 0.017483338450701682, "grad_norm": 13.054416911655622, "learning_rate": 1.456077015643803e-06, "loss": 1.2752, "step": 242 }, { "epoch": 0.017555583650911193, "grad_norm": 13.562701386393655, "learning_rate": 1.4620938628158845e-06, "loss": 1.0531, "step": 243 }, { "epoch": 0.017627828851120703, "grad_norm": 9.694739608376223, "learning_rate": 1.4681107099879663e-06, "loss": 1.1271, "step": 244 }, { "epoch": 0.017700074051330214, "grad_norm": 10.627002482482787, "learning_rate": 1.4741275571600483e-06, "loss": 1.0768, "step": 245 }, { "epoch": 0.017772319251539725, "grad_norm": 11.91704816585486, "learning_rate": 1.48014440433213e-06, "loss": 1.1204, "step": 246 }, { "epoch": 0.017844564451749236, "grad_norm": 10.889418591268285, "learning_rate": 1.486161251504212e-06, "loss": 1.0212, "step": 247 }, { "epoch": 0.01791680965195875, "grad_norm": 10.429936941071436, "learning_rate": 1.4921780986762938e-06, "loss": 1.1512, "step": 248 }, { "epoch": 0.01798905485216826, "grad_norm": 10.33457627563293, "learning_rate": 1.4981949458483756e-06, "loss": 1.1099, "step": 249 }, { "epoch": 0.01806130005237777, "grad_norm": 12.305452605181143, "learning_rate": 1.5042117930204576e-06, "loss": 1.2576, "step": 250 }, { "epoch": 0.018133545252587282, "grad_norm": 12.585249462299915, "learning_rate": 1.5102286401925391e-06, "loss": 1.0889, "step": 251 }, { "epoch": 0.018205790452796793, "grad_norm": 14.356691061908045, "learning_rate": 1.516245487364621e-06, "loss": 1.1968, "step": 252 }, { "epoch": 0.018278035653006303, "grad_norm": 8.859725487721018, "learning_rate": 1.522262334536703e-06, "loss": 1.1676, "step": 253 }, { "epoch": 0.018350280853215814, "grad_norm": 10.017819739908909, "learning_rate": 1.5282791817087847e-06, "loss": 1.1416, "step": 254 }, { "epoch": 0.018422526053425325, "grad_norm": 10.902995938482702, "learning_rate": 1.5342960288808665e-06, "loss": 1.0688, "step": 255 }, { "epoch": 0.018494771253634835, "grad_norm": 10.37440250009063, "learning_rate": 1.5403128760529485e-06, "loss": 1.1257, "step": 256 }, { "epoch": 0.018567016453844346, "grad_norm": 10.678388671477576, "learning_rate": 1.5463297232250302e-06, "loss": 1.087, "step": 257 }, { "epoch": 0.01863926165405386, "grad_norm": 13.712926093325802, "learning_rate": 1.5523465703971122e-06, "loss": 1.1182, "step": 258 }, { "epoch": 0.01871150685426337, "grad_norm": 10.187886798279159, "learning_rate": 1.5583634175691938e-06, "loss": 1.1414, "step": 259 }, { "epoch": 0.018783752054472882, "grad_norm": 11.245450943877366, "learning_rate": 1.5643802647412756e-06, "loss": 1.0821, "step": 260 }, { "epoch": 0.018855997254682393, "grad_norm": 16.62574341911607, "learning_rate": 1.5703971119133576e-06, "loss": 1.1864, "step": 261 }, { "epoch": 0.018928242454891903, "grad_norm": 9.484227388292208, "learning_rate": 1.5764139590854393e-06, "loss": 1.0936, "step": 262 }, { "epoch": 0.019000487655101414, "grad_norm": 9.648556505465477, "learning_rate": 1.5824308062575211e-06, "loss": 1.0329, "step": 263 }, { "epoch": 0.019072732855310925, "grad_norm": 10.341220511854715, "learning_rate": 1.5884476534296031e-06, "loss": 1.0442, "step": 264 }, { "epoch": 0.019144978055520435, "grad_norm": 14.998587986926053, "learning_rate": 1.594464500601685e-06, "loss": 1.0755, "step": 265 }, { "epoch": 0.019217223255729946, "grad_norm": 9.098306988856583, "learning_rate": 1.6004813477737667e-06, "loss": 1.1011, "step": 266 }, { "epoch": 0.019289468455939457, "grad_norm": 8.35152732905948, "learning_rate": 1.6064981949458487e-06, "loss": 1.1264, "step": 267 }, { "epoch": 0.01936171365614897, "grad_norm": 11.454058891410094, "learning_rate": 1.6125150421179302e-06, "loss": 1.1711, "step": 268 }, { "epoch": 0.019433958856358482, "grad_norm": 7.0999631853895355, "learning_rate": 1.618531889290012e-06, "loss": 0.9992, "step": 269 }, { "epoch": 0.019506204056567993, "grad_norm": 10.017404288131482, "learning_rate": 1.624548736462094e-06, "loss": 1.1489, "step": 270 }, { "epoch": 0.019578449256777503, "grad_norm": 10.443169247673183, "learning_rate": 1.6305655836341758e-06, "loss": 1.0537, "step": 271 }, { "epoch": 0.019650694456987014, "grad_norm": 12.33313737318369, "learning_rate": 1.6365824308062578e-06, "loss": 1.1564, "step": 272 }, { "epoch": 0.019722939657196525, "grad_norm": 11.396936078505444, "learning_rate": 1.6425992779783395e-06, "loss": 1.1079, "step": 273 }, { "epoch": 0.019795184857406035, "grad_norm": 10.81275824971579, "learning_rate": 1.6486161251504213e-06, "loss": 1.2034, "step": 274 }, { "epoch": 0.019867430057615546, "grad_norm": 12.459984779409846, "learning_rate": 1.6546329723225033e-06, "loss": 1.1988, "step": 275 }, { "epoch": 0.019939675257825057, "grad_norm": 9.819196809136809, "learning_rate": 1.6606498194945849e-06, "loss": 1.0506, "step": 276 }, { "epoch": 0.020011920458034568, "grad_norm": 9.247241459450217, "learning_rate": 1.6666666666666667e-06, "loss": 1.0462, "step": 277 }, { "epoch": 0.020084165658244082, "grad_norm": 10.628192736065758, "learning_rate": 1.6726835138387487e-06, "loss": 1.0516, "step": 278 }, { "epoch": 0.020156410858453593, "grad_norm": 11.321217586963304, "learning_rate": 1.6787003610108304e-06, "loss": 0.9755, "step": 279 }, { "epoch": 0.020228656058663103, "grad_norm": 9.903170422840764, "learning_rate": 1.6847172081829122e-06, "loss": 1.1079, "step": 280 }, { "epoch": 0.020300901258872614, "grad_norm": 11.07275604868417, "learning_rate": 1.6907340553549942e-06, "loss": 1.1733, "step": 281 }, { "epoch": 0.020373146459082125, "grad_norm": 12.883596568772052, "learning_rate": 1.696750902527076e-06, "loss": 1.0936, "step": 282 }, { "epoch": 0.020445391659291635, "grad_norm": 8.663004859645536, "learning_rate": 1.7027677496991575e-06, "loss": 1.047, "step": 283 }, { "epoch": 0.020517636859501146, "grad_norm": 10.416227489114304, "learning_rate": 1.7087845968712397e-06, "loss": 1.0967, "step": 284 }, { "epoch": 0.020589882059710657, "grad_norm": 11.612976419225028, "learning_rate": 1.7148014440433213e-06, "loss": 1.0525, "step": 285 }, { "epoch": 0.020662127259920168, "grad_norm": 12.172128853458434, "learning_rate": 1.7208182912154033e-06, "loss": 1.068, "step": 286 }, { "epoch": 0.020734372460129682, "grad_norm": 11.264666343248123, "learning_rate": 1.726835138387485e-06, "loss": 1.1458, "step": 287 }, { "epoch": 0.020806617660339193, "grad_norm": 8.901229996285757, "learning_rate": 1.7328519855595669e-06, "loss": 1.0478, "step": 288 }, { "epoch": 0.020878862860548703, "grad_norm": 9.099535383163973, "learning_rate": 1.7388688327316489e-06, "loss": 1.0209, "step": 289 }, { "epoch": 0.020951108060758214, "grad_norm": 12.417428695200574, "learning_rate": 1.7448856799037306e-06, "loss": 1.1668, "step": 290 }, { "epoch": 0.021023353260967725, "grad_norm": 15.537298302787544, "learning_rate": 1.7509025270758124e-06, "loss": 1.0354, "step": 291 }, { "epoch": 0.021095598461177235, "grad_norm": 10.707671109118673, "learning_rate": 1.7569193742478944e-06, "loss": 1.0992, "step": 292 }, { "epoch": 0.021167843661386746, "grad_norm": 13.50910924949694, "learning_rate": 1.762936221419976e-06, "loss": 1.1489, "step": 293 }, { "epoch": 0.021240088861596257, "grad_norm": 12.202008966852485, "learning_rate": 1.7689530685920577e-06, "loss": 1.1295, "step": 294 }, { "epoch": 0.021312334061805768, "grad_norm": 10.53160606257774, "learning_rate": 1.7749699157641397e-06, "loss": 1.0336, "step": 295 }, { "epoch": 0.02138457926201528, "grad_norm": 10.627453330042272, "learning_rate": 1.7809867629362215e-06, "loss": 1.0403, "step": 296 }, { "epoch": 0.021456824462224793, "grad_norm": 11.97797024709255, "learning_rate": 1.7870036101083035e-06, "loss": 1.1627, "step": 297 }, { "epoch": 0.021529069662434303, "grad_norm": 9.3455371997418, "learning_rate": 1.7930204572803853e-06, "loss": 1.0893, "step": 298 }, { "epoch": 0.021601314862643814, "grad_norm": 10.206880261034021, "learning_rate": 1.799037304452467e-06, "loss": 1.1021, "step": 299 }, { "epoch": 0.021673560062853325, "grad_norm": 9.740916789496888, "learning_rate": 1.805054151624549e-06, "loss": 1.1006, "step": 300 }, { "epoch": 0.021745805263062835, "grad_norm": 12.41213995496823, "learning_rate": 1.8110709987966308e-06, "loss": 1.1011, "step": 301 }, { "epoch": 0.021818050463272346, "grad_norm": 8.81461390038671, "learning_rate": 1.8170878459687124e-06, "loss": 1.1155, "step": 302 }, { "epoch": 0.021890295663481857, "grad_norm": 10.183773927371355, "learning_rate": 1.8231046931407944e-06, "loss": 1.1249, "step": 303 }, { "epoch": 0.021962540863691368, "grad_norm": 12.040730335478772, "learning_rate": 1.8291215403128762e-06, "loss": 1.0297, "step": 304 }, { "epoch": 0.02203478606390088, "grad_norm": 11.163451875467503, "learning_rate": 1.835138387484958e-06, "loss": 1.1126, "step": 305 }, { "epoch": 0.02210703126411039, "grad_norm": 9.601199361267565, "learning_rate": 1.84115523465704e-06, "loss": 1.0662, "step": 306 }, { "epoch": 0.022179276464319903, "grad_norm": 12.933495238034675, "learning_rate": 1.8471720818291217e-06, "loss": 1.1517, "step": 307 }, { "epoch": 0.022251521664529414, "grad_norm": 10.032866825989812, "learning_rate": 1.8531889290012035e-06, "loss": 1.0646, "step": 308 }, { "epoch": 0.022323766864738925, "grad_norm": 10.211609565081694, "learning_rate": 1.8592057761732855e-06, "loss": 1.0995, "step": 309 }, { "epoch": 0.022396012064948435, "grad_norm": 9.586876211551157, "learning_rate": 1.865222623345367e-06, "loss": 0.9966, "step": 310 }, { "epoch": 0.022468257265157946, "grad_norm": 11.034204881556445, "learning_rate": 1.871239470517449e-06, "loss": 1.0381, "step": 311 }, { "epoch": 0.022540502465367457, "grad_norm": 9.479844244470163, "learning_rate": 1.8772563176895308e-06, "loss": 1.0201, "step": 312 }, { "epoch": 0.022612747665576968, "grad_norm": 16.549538892332563, "learning_rate": 1.8832731648616126e-06, "loss": 1.2135, "step": 313 }, { "epoch": 0.02268499286578648, "grad_norm": 11.510586054089242, "learning_rate": 1.8892900120336946e-06, "loss": 1.0322, "step": 314 }, { "epoch": 0.02275723806599599, "grad_norm": 9.149192199069152, "learning_rate": 1.8953068592057764e-06, "loss": 1.1051, "step": 315 }, { "epoch": 0.022829483266205503, "grad_norm": 12.852171575612596, "learning_rate": 1.9013237063778581e-06, "loss": 1.0755, "step": 316 }, { "epoch": 0.022901728466415014, "grad_norm": 11.913961961665962, "learning_rate": 1.9073405535499401e-06, "loss": 1.1087, "step": 317 }, { "epoch": 0.022973973666624525, "grad_norm": 11.986186660247574, "learning_rate": 1.9133574007220217e-06, "loss": 1.1368, "step": 318 }, { "epoch": 0.023046218866834035, "grad_norm": 10.301501148052177, "learning_rate": 1.9193742478941035e-06, "loss": 1.1992, "step": 319 }, { "epoch": 0.023118464067043546, "grad_norm": 8.44141212473766, "learning_rate": 1.9253910950661857e-06, "loss": 1.1152, "step": 320 }, { "epoch": 0.023190709267253057, "grad_norm": 13.143937463371302, "learning_rate": 1.9314079422382675e-06, "loss": 1.0774, "step": 321 }, { "epoch": 0.023262954467462568, "grad_norm": 10.804216861473398, "learning_rate": 1.937424789410349e-06, "loss": 1.1333, "step": 322 }, { "epoch": 0.023335199667672078, "grad_norm": 10.786404380988822, "learning_rate": 1.943441636582431e-06, "loss": 1.1205, "step": 323 }, { "epoch": 0.02340744486788159, "grad_norm": 9.999180950955003, "learning_rate": 1.949458483754513e-06, "loss": 1.1007, "step": 324 }, { "epoch": 0.0234796900680911, "grad_norm": 8.705604624601488, "learning_rate": 1.9554753309265946e-06, "loss": 0.9751, "step": 325 }, { "epoch": 0.023551935268300614, "grad_norm": 12.249093158398502, "learning_rate": 1.9614921780986764e-06, "loss": 1.1575, "step": 326 }, { "epoch": 0.023624180468510125, "grad_norm": 10.70542965322077, "learning_rate": 1.967509025270758e-06, "loss": 1.0538, "step": 327 }, { "epoch": 0.023696425668719635, "grad_norm": 9.46332497874401, "learning_rate": 1.9735258724428403e-06, "loss": 1.148, "step": 328 }, { "epoch": 0.023768670868929146, "grad_norm": 10.474817140571856, "learning_rate": 1.979542719614922e-06, "loss": 0.9958, "step": 329 }, { "epoch": 0.023840916069138657, "grad_norm": 9.848370923714958, "learning_rate": 1.9855595667870035e-06, "loss": 1.1308, "step": 330 }, { "epoch": 0.023913161269348168, "grad_norm": 10.580111115706826, "learning_rate": 1.9915764139590857e-06, "loss": 1.0433, "step": 331 }, { "epoch": 0.023985406469557678, "grad_norm": 9.759139252526081, "learning_rate": 1.9975932611311675e-06, "loss": 1.0922, "step": 332 }, { "epoch": 0.02405765166976719, "grad_norm": 10.660828979780739, "learning_rate": 2.0036101083032492e-06, "loss": 1.0714, "step": 333 }, { "epoch": 0.0241298968699767, "grad_norm": 8.59699201986083, "learning_rate": 2.009626955475331e-06, "loss": 1.1281, "step": 334 }, { "epoch": 0.02420214207018621, "grad_norm": 8.528106445650161, "learning_rate": 2.0156438026474128e-06, "loss": 1.0635, "step": 335 }, { "epoch": 0.024274387270395725, "grad_norm": 10.32076861335956, "learning_rate": 2.0216606498194946e-06, "loss": 1.1233, "step": 336 }, { "epoch": 0.024346632470605235, "grad_norm": 13.160367851333863, "learning_rate": 2.0276774969915768e-06, "loss": 1.2112, "step": 337 }, { "epoch": 0.024418877670814746, "grad_norm": 11.033995721608408, "learning_rate": 2.0336943441636585e-06, "loss": 1.145, "step": 338 }, { "epoch": 0.024491122871024257, "grad_norm": 11.3396546929211, "learning_rate": 2.0397111913357403e-06, "loss": 1.0089, "step": 339 }, { "epoch": 0.024563368071233768, "grad_norm": 9.205099881618146, "learning_rate": 2.045728038507822e-06, "loss": 1.0592, "step": 340 }, { "epoch": 0.024635613271443278, "grad_norm": 9.698783357751912, "learning_rate": 2.051744885679904e-06, "loss": 1.0776, "step": 341 }, { "epoch": 0.02470785847165279, "grad_norm": 11.728104339426913, "learning_rate": 2.0577617328519857e-06, "loss": 1.0467, "step": 342 }, { "epoch": 0.0247801036718623, "grad_norm": 13.038134469296331, "learning_rate": 2.0637785800240674e-06, "loss": 1.0519, "step": 343 }, { "epoch": 0.02485234887207181, "grad_norm": 8.364704237302062, "learning_rate": 2.0697954271961492e-06, "loss": 1.0623, "step": 344 }, { "epoch": 0.02492459407228132, "grad_norm": 12.850877403851774, "learning_rate": 2.0758122743682314e-06, "loss": 1.115, "step": 345 }, { "epoch": 0.024996839272490835, "grad_norm": 14.002649873906401, "learning_rate": 2.081829121540313e-06, "loss": 1.1828, "step": 346 }, { "epoch": 0.025069084472700346, "grad_norm": 12.924697512364954, "learning_rate": 2.0878459687123946e-06, "loss": 1.1307, "step": 347 }, { "epoch": 0.025141329672909857, "grad_norm": 10.840020057687818, "learning_rate": 2.0938628158844768e-06, "loss": 1.137, "step": 348 }, { "epoch": 0.025213574873119367, "grad_norm": 9.880538739176481, "learning_rate": 2.0998796630565585e-06, "loss": 1.1478, "step": 349 }, { "epoch": 0.025285820073328878, "grad_norm": 8.775597324310038, "learning_rate": 2.1058965102286403e-06, "loss": 1.0279, "step": 350 }, { "epoch": 0.02535806527353839, "grad_norm": 10.060805657890418, "learning_rate": 2.111913357400722e-06, "loss": 1.1281, "step": 351 }, { "epoch": 0.0254303104737479, "grad_norm": 9.857083448779914, "learning_rate": 2.117930204572804e-06, "loss": 1.0271, "step": 352 }, { "epoch": 0.02550255567395741, "grad_norm": 11.331902656759175, "learning_rate": 2.123947051744886e-06, "loss": 1.0896, "step": 353 }, { "epoch": 0.02557480087416692, "grad_norm": 9.37714412330213, "learning_rate": 2.129963898916968e-06, "loss": 1.0402, "step": 354 }, { "epoch": 0.025647046074376435, "grad_norm": 11.160461827501809, "learning_rate": 2.1359807460890496e-06, "loss": 1.0929, "step": 355 }, { "epoch": 0.025719291274585946, "grad_norm": 9.370985468580333, "learning_rate": 2.1419975932611314e-06, "loss": 1.1004, "step": 356 }, { "epoch": 0.025791536474795457, "grad_norm": 9.34606945631299, "learning_rate": 2.148014440433213e-06, "loss": 1.0352, "step": 357 }, { "epoch": 0.025863781675004967, "grad_norm": 9.277090045873129, "learning_rate": 2.154031287605295e-06, "loss": 1.1027, "step": 358 }, { "epoch": 0.025936026875214478, "grad_norm": 9.65066317320303, "learning_rate": 2.1600481347773767e-06, "loss": 1.0032, "step": 359 }, { "epoch": 0.02600827207542399, "grad_norm": 8.921992152525497, "learning_rate": 2.1660649819494585e-06, "loss": 1.1018, "step": 360 }, { "epoch": 0.0260805172756335, "grad_norm": 10.125241500423313, "learning_rate": 2.1720818291215403e-06, "loss": 1.0425, "step": 361 }, { "epoch": 0.02615276247584301, "grad_norm": 7.744306380240593, "learning_rate": 2.1780986762936225e-06, "loss": 1.0782, "step": 362 }, { "epoch": 0.02622500767605252, "grad_norm": 10.056177652785127, "learning_rate": 2.1841155234657043e-06, "loss": 1.2477, "step": 363 }, { "epoch": 0.026297252876262032, "grad_norm": 10.84038392516702, "learning_rate": 2.190132370637786e-06, "loss": 1.1179, "step": 364 }, { "epoch": 0.026369498076471546, "grad_norm": 10.366640330911856, "learning_rate": 2.196149217809868e-06, "loss": 1.1271, "step": 365 }, { "epoch": 0.026441743276681057, "grad_norm": 10.670515160892098, "learning_rate": 2.2021660649819496e-06, "loss": 1.0915, "step": 366 }, { "epoch": 0.026513988476890567, "grad_norm": 9.395176043360879, "learning_rate": 2.2081829121540314e-06, "loss": 1.0953, "step": 367 }, { "epoch": 0.026586233677100078, "grad_norm": 9.330218567409073, "learning_rate": 2.214199759326113e-06, "loss": 1.1226, "step": 368 }, { "epoch": 0.02665847887730959, "grad_norm": 10.931472599823119, "learning_rate": 2.220216606498195e-06, "loss": 1.0355, "step": 369 }, { "epoch": 0.0267307240775191, "grad_norm": 10.37166937256151, "learning_rate": 2.226233453670277e-06, "loss": 0.9653, "step": 370 }, { "epoch": 0.02680296927772861, "grad_norm": 12.315403939552123, "learning_rate": 2.232250300842359e-06, "loss": 1.0271, "step": 371 }, { "epoch": 0.02687521447793812, "grad_norm": 15.387028648662666, "learning_rate": 2.2382671480144407e-06, "loss": 1.0553, "step": 372 }, { "epoch": 0.026947459678147632, "grad_norm": 10.342419311120162, "learning_rate": 2.2442839951865225e-06, "loss": 1.019, "step": 373 }, { "epoch": 0.027019704878357143, "grad_norm": 10.222178965859412, "learning_rate": 2.2503008423586043e-06, "loss": 1.146, "step": 374 }, { "epoch": 0.027091950078566657, "grad_norm": 10.443765370182437, "learning_rate": 2.256317689530686e-06, "loss": 1.1014, "step": 375 }, { "epoch": 0.027164195278776167, "grad_norm": 9.164957014233831, "learning_rate": 2.262334536702768e-06, "loss": 1.0655, "step": 376 }, { "epoch": 0.027236440478985678, "grad_norm": 11.90494015514804, "learning_rate": 2.2683513838748496e-06, "loss": 1.0783, "step": 377 }, { "epoch": 0.02730868567919519, "grad_norm": 9.157866768122137, "learning_rate": 2.274368231046932e-06, "loss": 1.09, "step": 378 }, { "epoch": 0.0273809308794047, "grad_norm": 9.497426939399865, "learning_rate": 2.2803850782190136e-06, "loss": 1.0842, "step": 379 }, { "epoch": 0.02745317607961421, "grad_norm": 9.414518353200757, "learning_rate": 2.2864019253910954e-06, "loss": 1.0478, "step": 380 }, { "epoch": 0.02752542127982372, "grad_norm": 8.512817591989869, "learning_rate": 2.292418772563177e-06, "loss": 1.0765, "step": 381 }, { "epoch": 0.027597666480033232, "grad_norm": 9.590347332755504, "learning_rate": 2.298435619735259e-06, "loss": 1.1014, "step": 382 }, { "epoch": 0.027669911680242743, "grad_norm": 8.366016181289961, "learning_rate": 2.3044524669073407e-06, "loss": 1.0468, "step": 383 }, { "epoch": 0.027742156880452253, "grad_norm": 10.417895150541485, "learning_rate": 2.3104693140794225e-06, "loss": 1.0493, "step": 384 }, { "epoch": 0.027814402080661767, "grad_norm": 8.741293799270005, "learning_rate": 2.3164861612515043e-06, "loss": 1.0352, "step": 385 }, { "epoch": 0.027886647280871278, "grad_norm": 10.453010045261971, "learning_rate": 2.322503008423586e-06, "loss": 0.9831, "step": 386 }, { "epoch": 0.02795889248108079, "grad_norm": 9.535075836280134, "learning_rate": 2.3285198555956682e-06, "loss": 1.1267, "step": 387 }, { "epoch": 0.0280311376812903, "grad_norm": 9.095356068574784, "learning_rate": 2.33453670276775e-06, "loss": 1.0254, "step": 388 }, { "epoch": 0.02810338288149981, "grad_norm": 10.460332145012623, "learning_rate": 2.3405535499398314e-06, "loss": 1.1144, "step": 389 }, { "epoch": 0.02817562808170932, "grad_norm": 9.757665506298753, "learning_rate": 2.3465703971119136e-06, "loss": 1.0902, "step": 390 }, { "epoch": 0.028247873281918832, "grad_norm": 8.245921195938566, "learning_rate": 2.3525872442839954e-06, "loss": 1.0891, "step": 391 }, { "epoch": 0.028320118482128342, "grad_norm": 9.810306765613868, "learning_rate": 2.358604091456077e-06, "loss": 1.0331, "step": 392 }, { "epoch": 0.028392363682337853, "grad_norm": 11.104256482265068, "learning_rate": 2.364620938628159e-06, "loss": 1.0768, "step": 393 }, { "epoch": 0.028464608882547367, "grad_norm": 9.780329554335388, "learning_rate": 2.3706377858002407e-06, "loss": 1.0037, "step": 394 }, { "epoch": 0.028536854082756878, "grad_norm": 8.909707987545936, "learning_rate": 2.376654632972323e-06, "loss": 1.1503, "step": 395 }, { "epoch": 0.02860909928296639, "grad_norm": 10.491712296689416, "learning_rate": 2.3826714801444047e-06, "loss": 1.1613, "step": 396 }, { "epoch": 0.0286813444831759, "grad_norm": 9.294043358160074, "learning_rate": 2.3886883273164865e-06, "loss": 1.1194, "step": 397 }, { "epoch": 0.02875358968338541, "grad_norm": 8.821884461200868, "learning_rate": 2.3947051744885682e-06, "loss": 1.1121, "step": 398 }, { "epoch": 0.02882583488359492, "grad_norm": 7.349237218188632, "learning_rate": 2.40072202166065e-06, "loss": 0.9874, "step": 399 }, { "epoch": 0.028898080083804432, "grad_norm": 8.312203997167499, "learning_rate": 2.406738868832732e-06, "loss": 1.1367, "step": 400 }, { "epoch": 0.028970325284013942, "grad_norm": 8.94462333589815, "learning_rate": 2.4127557160048136e-06, "loss": 1.0105, "step": 401 }, { "epoch": 0.029042570484223453, "grad_norm": 8.92928892032051, "learning_rate": 2.4187725631768953e-06, "loss": 1.0623, "step": 402 }, { "epoch": 0.029114815684432964, "grad_norm": 9.060557611076307, "learning_rate": 2.4247894103489775e-06, "loss": 1.0363, "step": 403 }, { "epoch": 0.029187060884642478, "grad_norm": 10.222328609336799, "learning_rate": 2.4308062575210593e-06, "loss": 1.1599, "step": 404 }, { "epoch": 0.02925930608485199, "grad_norm": 8.74786176440956, "learning_rate": 2.436823104693141e-06, "loss": 1.0862, "step": 405 }, { "epoch": 0.0293315512850615, "grad_norm": 12.380408752299878, "learning_rate": 2.442839951865223e-06, "loss": 1.176, "step": 406 }, { "epoch": 0.02940379648527101, "grad_norm": 10.31678784555519, "learning_rate": 2.4488567990373047e-06, "loss": 1.0881, "step": 407 }, { "epoch": 0.02947604168548052, "grad_norm": 12.7537680649099, "learning_rate": 2.4548736462093864e-06, "loss": 1.017, "step": 408 }, { "epoch": 0.02954828688569003, "grad_norm": 8.41908843699027, "learning_rate": 2.4608904933814682e-06, "loss": 1.1455, "step": 409 }, { "epoch": 0.029620532085899542, "grad_norm": 9.908354622893137, "learning_rate": 2.46690734055355e-06, "loss": 0.9987, "step": 410 }, { "epoch": 0.029692777286109053, "grad_norm": 10.084737716691155, "learning_rate": 2.4729241877256318e-06, "loss": 1.0418, "step": 411 }, { "epoch": 0.029765022486318564, "grad_norm": 11.297239010171012, "learning_rate": 2.478941034897714e-06, "loss": 1.0339, "step": 412 }, { "epoch": 0.029837267686528075, "grad_norm": 8.469949897698998, "learning_rate": 2.4849578820697958e-06, "loss": 1.017, "step": 413 }, { "epoch": 0.02990951288673759, "grad_norm": 8.642065883478036, "learning_rate": 2.4909747292418775e-06, "loss": 1.0904, "step": 414 }, { "epoch": 0.0299817580869471, "grad_norm": 9.870359078690358, "learning_rate": 2.4969915764139593e-06, "loss": 1.1015, "step": 415 }, { "epoch": 0.03005400328715661, "grad_norm": 9.999998092651186, "learning_rate": 2.5030084235860415e-06, "loss": 1.0231, "step": 416 }, { "epoch": 0.03012624848736612, "grad_norm": 9.760599488154611, "learning_rate": 2.509025270758123e-06, "loss": 1.0936, "step": 417 }, { "epoch": 0.03019849368757563, "grad_norm": 10.392621621490916, "learning_rate": 2.5150421179302047e-06, "loss": 1.0915, "step": 418 }, { "epoch": 0.030270738887785142, "grad_norm": 10.619366756891996, "learning_rate": 2.5210589651022864e-06, "loss": 1.1712, "step": 419 }, { "epoch": 0.030342984087994653, "grad_norm": 8.807769154862743, "learning_rate": 2.527075812274368e-06, "loss": 1.1117, "step": 420 }, { "epoch": 0.030415229288204164, "grad_norm": 10.388965078290965, "learning_rate": 2.53309265944645e-06, "loss": 1.1632, "step": 421 }, { "epoch": 0.030487474488413675, "grad_norm": 12.376365509218505, "learning_rate": 2.539109506618532e-06, "loss": 1.1279, "step": 422 }, { "epoch": 0.03055971968862319, "grad_norm": 9.406090212253691, "learning_rate": 2.545126353790614e-06, "loss": 1.0389, "step": 423 }, { "epoch": 0.0306319648888327, "grad_norm": 9.277291940644808, "learning_rate": 2.5511432009626957e-06, "loss": 1.1604, "step": 424 }, { "epoch": 0.03070421008904221, "grad_norm": 9.24068992780284, "learning_rate": 2.5571600481347775e-06, "loss": 0.9744, "step": 425 }, { "epoch": 0.03077645528925172, "grad_norm": 10.441184484293847, "learning_rate": 2.5631768953068593e-06, "loss": 1.026, "step": 426 }, { "epoch": 0.03084870048946123, "grad_norm": 9.098360236690018, "learning_rate": 2.5691937424789415e-06, "loss": 1.1384, "step": 427 }, { "epoch": 0.030920945689670742, "grad_norm": 10.829586222203535, "learning_rate": 2.5752105896510233e-06, "loss": 1.068, "step": 428 }, { "epoch": 0.030993190889880253, "grad_norm": 11.025156212908866, "learning_rate": 2.581227436823105e-06, "loss": 1.1689, "step": 429 }, { "epoch": 0.031065436090089764, "grad_norm": 11.750407110425671, "learning_rate": 2.587244283995187e-06, "loss": 1.0467, "step": 430 }, { "epoch": 0.031137681290299275, "grad_norm": 7.423379524725597, "learning_rate": 2.593261131167268e-06, "loss": 1.0174, "step": 431 }, { "epoch": 0.031209926490508785, "grad_norm": 9.104103722489311, "learning_rate": 2.59927797833935e-06, "loss": 0.983, "step": 432 }, { "epoch": 0.0312821716907183, "grad_norm": 14.239122488866007, "learning_rate": 2.6052948255114326e-06, "loss": 1.1598, "step": 433 }, { "epoch": 0.03135441689092781, "grad_norm": 10.232854878758252, "learning_rate": 2.611311672683514e-06, "loss": 1.0722, "step": 434 }, { "epoch": 0.03142666209113732, "grad_norm": 9.995888436979039, "learning_rate": 2.6173285198555957e-06, "loss": 1.1113, "step": 435 }, { "epoch": 0.03149890729134683, "grad_norm": 8.871714668934759, "learning_rate": 2.6233453670276775e-06, "loss": 1.1306, "step": 436 }, { "epoch": 0.03157115249155634, "grad_norm": 11.392358300050027, "learning_rate": 2.6293622141997593e-06, "loss": 1.0081, "step": 437 }, { "epoch": 0.03164339769176586, "grad_norm": 10.469489319159932, "learning_rate": 2.6353790613718415e-06, "loss": 1.0125, "step": 438 }, { "epoch": 0.031715642891975364, "grad_norm": 11.619148996939748, "learning_rate": 2.6413959085439233e-06, "loss": 1.1026, "step": 439 }, { "epoch": 0.03178788809218488, "grad_norm": 9.133740563143048, "learning_rate": 2.647412755716005e-06, "loss": 1.1344, "step": 440 }, { "epoch": 0.031860133292394385, "grad_norm": 8.463504306808568, "learning_rate": 2.653429602888087e-06, "loss": 1.0907, "step": 441 }, { "epoch": 0.0319323784926039, "grad_norm": 7.73217544876021, "learning_rate": 2.6594464500601686e-06, "loss": 0.9804, "step": 442 }, { "epoch": 0.03200462369281341, "grad_norm": 7.860276521278803, "learning_rate": 2.6654632972322504e-06, "loss": 1.0049, "step": 443 }, { "epoch": 0.03207686889302292, "grad_norm": 7.986570052813416, "learning_rate": 2.6714801444043326e-06, "loss": 1.0567, "step": 444 }, { "epoch": 0.03214911409323243, "grad_norm": 9.545335311475238, "learning_rate": 2.6774969915764144e-06, "loss": 1.0697, "step": 445 }, { "epoch": 0.03222135929344194, "grad_norm": 10.01886114961933, "learning_rate": 2.683513838748496e-06, "loss": 1.0478, "step": 446 }, { "epoch": 0.03229360449365145, "grad_norm": 10.421787152928115, "learning_rate": 2.689530685920578e-06, "loss": 1.1086, "step": 447 }, { "epoch": 0.032365849693860964, "grad_norm": 8.10485737528616, "learning_rate": 2.6955475330926593e-06, "loss": 1.0466, "step": 448 }, { "epoch": 0.03243809489407048, "grad_norm": 8.721289480217585, "learning_rate": 2.701564380264742e-06, "loss": 1.081, "step": 449 }, { "epoch": 0.032510340094279985, "grad_norm": 8.00891331991538, "learning_rate": 2.7075812274368237e-06, "loss": 1.0242, "step": 450 }, { "epoch": 0.0325825852944895, "grad_norm": 8.377394604317342, "learning_rate": 2.713598074608905e-06, "loss": 1.0905, "step": 451 }, { "epoch": 0.03265483049469901, "grad_norm": 8.142159520527391, "learning_rate": 2.719614921780987e-06, "loss": 1.0712, "step": 452 }, { "epoch": 0.03272707569490852, "grad_norm": 8.965125438112606, "learning_rate": 2.7256317689530686e-06, "loss": 1.0832, "step": 453 }, { "epoch": 0.03279932089511803, "grad_norm": 9.526045181721491, "learning_rate": 2.7316486161251504e-06, "loss": 1.0289, "step": 454 }, { "epoch": 0.03287156609532754, "grad_norm": 7.6873600644657065, "learning_rate": 2.7376654632972326e-06, "loss": 0.9663, "step": 455 }, { "epoch": 0.03294381129553705, "grad_norm": 8.388511121495233, "learning_rate": 2.7436823104693144e-06, "loss": 0.9849, "step": 456 }, { "epoch": 0.033016056495746564, "grad_norm": 8.68785215254116, "learning_rate": 2.749699157641396e-06, "loss": 1.0437, "step": 457 }, { "epoch": 0.03308830169595608, "grad_norm": 9.280414376708508, "learning_rate": 2.755716004813478e-06, "loss": 0.9701, "step": 458 }, { "epoch": 0.033160546896165585, "grad_norm": 10.845254843124005, "learning_rate": 2.7617328519855597e-06, "loss": 1.0812, "step": 459 }, { "epoch": 0.0332327920963751, "grad_norm": 10.008213484843493, "learning_rate": 2.7677496991576415e-06, "loss": 1.0272, "step": 460 }, { "epoch": 0.03330503729658461, "grad_norm": 10.046880315657614, "learning_rate": 2.7737665463297237e-06, "loss": 1.1313, "step": 461 }, { "epoch": 0.03337728249679412, "grad_norm": 10.195912263848186, "learning_rate": 2.7797833935018055e-06, "loss": 1.1248, "step": 462 }, { "epoch": 0.03344952769700363, "grad_norm": 9.327780661305757, "learning_rate": 2.7858002406738872e-06, "loss": 1.0535, "step": 463 }, { "epoch": 0.03352177289721314, "grad_norm": 9.840942369941795, "learning_rate": 2.791817087845969e-06, "loss": 1.1426, "step": 464 }, { "epoch": 0.03359401809742265, "grad_norm": 9.774066663901502, "learning_rate": 2.7978339350180504e-06, "loss": 1.1201, "step": 465 }, { "epoch": 0.033666263297632164, "grad_norm": 9.012696106109106, "learning_rate": 2.803850782190133e-06, "loss": 0.9304, "step": 466 }, { "epoch": 0.03373850849784168, "grad_norm": 9.190551666205854, "learning_rate": 2.8098676293622148e-06, "loss": 1.0771, "step": 467 }, { "epoch": 0.033810753698051185, "grad_norm": 8.384643468493458, "learning_rate": 2.815884476534296e-06, "loss": 0.9346, "step": 468 }, { "epoch": 0.0338829988982607, "grad_norm": 8.745417239495843, "learning_rate": 2.821901323706378e-06, "loss": 1.148, "step": 469 }, { "epoch": 0.03395524409847021, "grad_norm": 10.349236914474133, "learning_rate": 2.8279181708784597e-06, "loss": 1.0953, "step": 470 }, { "epoch": 0.03402748929867972, "grad_norm": 7.8680883685425975, "learning_rate": 2.8339350180505415e-06, "loss": 1.0755, "step": 471 }, { "epoch": 0.03409973449888923, "grad_norm": 11.026558805197235, "learning_rate": 2.8399518652226237e-06, "loss": 1.1494, "step": 472 }, { "epoch": 0.03417197969909874, "grad_norm": 10.258892017049428, "learning_rate": 2.8459687123947054e-06, "loss": 1.0863, "step": 473 }, { "epoch": 0.03424422489930825, "grad_norm": 10.883603255550009, "learning_rate": 2.8519855595667872e-06, "loss": 1.0061, "step": 474 }, { "epoch": 0.034316470099517764, "grad_norm": 9.997832254057549, "learning_rate": 2.858002406738869e-06, "loss": 0.9922, "step": 475 }, { "epoch": 0.03438871529972727, "grad_norm": 11.943448007898843, "learning_rate": 2.8640192539109508e-06, "loss": 1.0969, "step": 476 }, { "epoch": 0.034460960499936785, "grad_norm": 13.66734020775411, "learning_rate": 2.870036101083033e-06, "loss": 1.1059, "step": 477 }, { "epoch": 0.0345332057001463, "grad_norm": 8.955125621806975, "learning_rate": 2.8760529482551148e-06, "loss": 1.0787, "step": 478 }, { "epoch": 0.03460545090035581, "grad_norm": 9.84640806126703, "learning_rate": 2.8820697954271965e-06, "loss": 1.0476, "step": 479 }, { "epoch": 0.03467769610056532, "grad_norm": 9.279861911535535, "learning_rate": 2.8880866425992783e-06, "loss": 1.094, "step": 480 }, { "epoch": 0.03474994130077483, "grad_norm": 9.545532331934139, "learning_rate": 2.89410348977136e-06, "loss": 1.0989, "step": 481 }, { "epoch": 0.03482218650098434, "grad_norm": 8.98934517755278, "learning_rate": 2.9001203369434414e-06, "loss": 1.087, "step": 482 }, { "epoch": 0.03489443170119385, "grad_norm": 10.423128936230668, "learning_rate": 2.906137184115524e-06, "loss": 1.0825, "step": 483 }, { "epoch": 0.034966676901403364, "grad_norm": 9.521680887395183, "learning_rate": 2.912154031287606e-06, "loss": 1.0214, "step": 484 }, { "epoch": 0.03503892210161287, "grad_norm": 9.065612679347884, "learning_rate": 2.918170878459687e-06, "loss": 1.1667, "step": 485 }, { "epoch": 0.035111167301822385, "grad_norm": 9.340734499164759, "learning_rate": 2.924187725631769e-06, "loss": 1.0433, "step": 486 }, { "epoch": 0.0351834125020319, "grad_norm": 8.747763211590417, "learning_rate": 2.9302045728038508e-06, "loss": 1.0742, "step": 487 }, { "epoch": 0.03525565770224141, "grad_norm": 10.418027702425066, "learning_rate": 2.9362214199759325e-06, "loss": 1.1099, "step": 488 }, { "epoch": 0.03532790290245092, "grad_norm": 9.01613738456787, "learning_rate": 2.9422382671480147e-06, "loss": 1.006, "step": 489 }, { "epoch": 0.03540014810266043, "grad_norm": 9.322399637956186, "learning_rate": 2.9482551143200965e-06, "loss": 1.1485, "step": 490 }, { "epoch": 0.03547239330286994, "grad_norm": 13.51847084381507, "learning_rate": 2.9542719614921783e-06, "loss": 1.2012, "step": 491 }, { "epoch": 0.03554463850307945, "grad_norm": 7.790163871011124, "learning_rate": 2.96028880866426e-06, "loss": 1.0569, "step": 492 }, { "epoch": 0.035616883703288964, "grad_norm": 9.594763090340773, "learning_rate": 2.966305655836342e-06, "loss": 1.0769, "step": 493 }, { "epoch": 0.03568912890349847, "grad_norm": 11.04312316117284, "learning_rate": 2.972322503008424e-06, "loss": 1.0505, "step": 494 }, { "epoch": 0.035761374103707985, "grad_norm": 10.322735509941303, "learning_rate": 2.978339350180506e-06, "loss": 1.1057, "step": 495 }, { "epoch": 0.0358336193039175, "grad_norm": 10.162039365569433, "learning_rate": 2.9843561973525876e-06, "loss": 1.0628, "step": 496 }, { "epoch": 0.03590586450412701, "grad_norm": 10.845328708027075, "learning_rate": 2.9903730445246694e-06, "loss": 1.1351, "step": 497 }, { "epoch": 0.03597810970433652, "grad_norm": 9.989272276192603, "learning_rate": 2.996389891696751e-06, "loss": 1.0864, "step": 498 }, { "epoch": 0.03605035490454603, "grad_norm": 9.661646054372953, "learning_rate": 3.0024067388688325e-06, "loss": 1.0662, "step": 499 }, { "epoch": 0.03612260010475554, "grad_norm": 11.237636639164247, "learning_rate": 3.008423586040915e-06, "loss": 1.0626, "step": 500 }, { "epoch": 0.03619484530496505, "grad_norm": 12.558644172721483, "learning_rate": 3.014440433212997e-06, "loss": 1.0685, "step": 501 }, { "epoch": 0.036267090505174564, "grad_norm": 8.579426539892859, "learning_rate": 3.0204572803850783e-06, "loss": 1.0309, "step": 502 }, { "epoch": 0.03633933570538407, "grad_norm": 12.175236092222901, "learning_rate": 3.02647412755716e-06, "loss": 1.1219, "step": 503 }, { "epoch": 0.036411580905593585, "grad_norm": 9.056489621948502, "learning_rate": 3.032490974729242e-06, "loss": 0.9755, "step": 504 }, { "epoch": 0.03648382610580309, "grad_norm": 9.780459045995265, "learning_rate": 3.038507821901324e-06, "loss": 0.9961, "step": 505 }, { "epoch": 0.03655607130601261, "grad_norm": 12.212619345949603, "learning_rate": 3.044524669073406e-06, "loss": 1.1087, "step": 506 }, { "epoch": 0.03662831650622212, "grad_norm": 9.651508241105377, "learning_rate": 3.0505415162454876e-06, "loss": 1.1164, "step": 507 }, { "epoch": 0.03670056170643163, "grad_norm": 12.172205948645875, "learning_rate": 3.0565583634175694e-06, "loss": 1.084, "step": 508 }, { "epoch": 0.03677280690664114, "grad_norm": 7.802986150506115, "learning_rate": 3.062575210589651e-06, "loss": 1.0624, "step": 509 }, { "epoch": 0.03684505210685065, "grad_norm": 7.410376063391041, "learning_rate": 3.068592057761733e-06, "loss": 0.9564, "step": 510 }, { "epoch": 0.036917297307060164, "grad_norm": 8.526415450645818, "learning_rate": 3.074608904933815e-06, "loss": 1.0601, "step": 511 }, { "epoch": 0.03698954250726967, "grad_norm": 10.47825077571861, "learning_rate": 3.080625752105897e-06, "loss": 1.0709, "step": 512 }, { "epoch": 0.037061787707479185, "grad_norm": 7.593186596978452, "learning_rate": 3.0866425992779787e-06, "loss": 0.9573, "step": 513 }, { "epoch": 0.03713403290768869, "grad_norm": 8.634608127734069, "learning_rate": 3.0926594464500605e-06, "loss": 1.1084, "step": 514 }, { "epoch": 0.03720627810789821, "grad_norm": 8.336651815253504, "learning_rate": 3.0986762936221423e-06, "loss": 1.0965, "step": 515 }, { "epoch": 0.03727852330810772, "grad_norm": 7.932318250111056, "learning_rate": 3.1046931407942245e-06, "loss": 1.1669, "step": 516 }, { "epoch": 0.03735076850831723, "grad_norm": 8.089437270992843, "learning_rate": 3.1107099879663062e-06, "loss": 1.0383, "step": 517 }, { "epoch": 0.03742301370852674, "grad_norm": 9.777907900474224, "learning_rate": 3.1167268351383876e-06, "loss": 1.083, "step": 518 }, { "epoch": 0.03749525890873625, "grad_norm": 9.51587675416147, "learning_rate": 3.1227436823104694e-06, "loss": 1.1154, "step": 519 }, { "epoch": 0.037567504108945764, "grad_norm": 8.490864949912714, "learning_rate": 3.128760529482551e-06, "loss": 1.0578, "step": 520 }, { "epoch": 0.03763974930915527, "grad_norm": 8.439652684012406, "learning_rate": 3.134777376654633e-06, "loss": 1.0896, "step": 521 }, { "epoch": 0.037711994509364785, "grad_norm": 10.309381556790694, "learning_rate": 3.140794223826715e-06, "loss": 1.0449, "step": 522 }, { "epoch": 0.03778423970957429, "grad_norm": 10.36264147690666, "learning_rate": 3.146811070998797e-06, "loss": 1.0611, "step": 523 }, { "epoch": 0.03785648490978381, "grad_norm": 7.713245235897261, "learning_rate": 3.1528279181708787e-06, "loss": 1.0356, "step": 524 }, { "epoch": 0.03792873010999332, "grad_norm": 9.886589878407424, "learning_rate": 3.1588447653429605e-06, "loss": 1.1255, "step": 525 }, { "epoch": 0.03800097531020283, "grad_norm": 12.601115077495685, "learning_rate": 3.1648616125150423e-06, "loss": 1.0573, "step": 526 }, { "epoch": 0.03807322051041234, "grad_norm": 8.103976707553057, "learning_rate": 3.170878459687124e-06, "loss": 1.0055, "step": 527 }, { "epoch": 0.03814546571062185, "grad_norm": 10.369962641355343, "learning_rate": 3.1768953068592062e-06, "loss": 0.9683, "step": 528 }, { "epoch": 0.038217710910831364, "grad_norm": 8.627537450850058, "learning_rate": 3.182912154031288e-06, "loss": 0.9004, "step": 529 }, { "epoch": 0.03828995611104087, "grad_norm": 14.343653726358555, "learning_rate": 3.18892900120337e-06, "loss": 1.1437, "step": 530 }, { "epoch": 0.038362201311250385, "grad_norm": 9.479305414435, "learning_rate": 3.1949458483754516e-06, "loss": 0.982, "step": 531 }, { "epoch": 0.03843444651145989, "grad_norm": 12.015610396000552, "learning_rate": 3.2009626955475333e-06, "loss": 1.0415, "step": 532 }, { "epoch": 0.03850669171166941, "grad_norm": 9.538462627022732, "learning_rate": 3.2069795427196155e-06, "loss": 1.0587, "step": 533 }, { "epoch": 0.038578936911878914, "grad_norm": 9.252553638878947, "learning_rate": 3.2129963898916973e-06, "loss": 1.1353, "step": 534 }, { "epoch": 0.03865118211208843, "grad_norm": 9.644106837436764, "learning_rate": 3.2190132370637787e-06, "loss": 1.0296, "step": 535 }, { "epoch": 0.03872342731229794, "grad_norm": 8.590170152819706, "learning_rate": 3.2250300842358605e-06, "loss": 1.0104, "step": 536 }, { "epoch": 0.03879567251250745, "grad_norm": 9.532777557534098, "learning_rate": 3.2310469314079422e-06, "loss": 0.9937, "step": 537 }, { "epoch": 0.038867917712716964, "grad_norm": 10.668652429117762, "learning_rate": 3.237063778580024e-06, "loss": 1.0501, "step": 538 }, { "epoch": 0.03894016291292647, "grad_norm": 8.333879325782155, "learning_rate": 3.2430806257521062e-06, "loss": 1.0834, "step": 539 }, { "epoch": 0.039012408113135985, "grad_norm": 8.019768608731654, "learning_rate": 3.249097472924188e-06, "loss": 1.0044, "step": 540 }, { "epoch": 0.03908465331334549, "grad_norm": 9.239572369476889, "learning_rate": 3.2551143200962698e-06, "loss": 1.0995, "step": 541 }, { "epoch": 0.03915689851355501, "grad_norm": 9.333154086253687, "learning_rate": 3.2611311672683516e-06, "loss": 1.0933, "step": 542 }, { "epoch": 0.039229143713764514, "grad_norm": 11.329264490103439, "learning_rate": 3.2671480144404333e-06, "loss": 1.0653, "step": 543 }, { "epoch": 0.03930138891397403, "grad_norm": 8.404628441272097, "learning_rate": 3.2731648616125155e-06, "loss": 1.0941, "step": 544 }, { "epoch": 0.03937363411418354, "grad_norm": 8.7066071425424, "learning_rate": 3.2791817087845973e-06, "loss": 1.1225, "step": 545 }, { "epoch": 0.03944587931439305, "grad_norm": 10.735012873947035, "learning_rate": 3.285198555956679e-06, "loss": 1.0526, "step": 546 }, { "epoch": 0.039518124514602564, "grad_norm": 10.680055044082353, "learning_rate": 3.291215403128761e-06, "loss": 1.059, "step": 547 }, { "epoch": 0.03959036971481207, "grad_norm": 11.485824472729826, "learning_rate": 3.2972322503008427e-06, "loss": 1.0177, "step": 548 }, { "epoch": 0.039662614915021585, "grad_norm": 9.723938761961982, "learning_rate": 3.303249097472924e-06, "loss": 0.9915, "step": 549 }, { "epoch": 0.03973486011523109, "grad_norm": 9.372364131403618, "learning_rate": 3.3092659446450066e-06, "loss": 1.1305, "step": 550 }, { "epoch": 0.03980710531544061, "grad_norm": 10.337824020012626, "learning_rate": 3.3152827918170884e-06, "loss": 1.1532, "step": 551 }, { "epoch": 0.039879350515650114, "grad_norm": 11.044871276685605, "learning_rate": 3.3212996389891698e-06, "loss": 1.0793, "step": 552 }, { "epoch": 0.03995159571585963, "grad_norm": 9.361309583945232, "learning_rate": 3.3273164861612515e-06, "loss": 0.9762, "step": 553 }, { "epoch": 0.040023840916069135, "grad_norm": 8.463945553263754, "learning_rate": 3.3333333333333333e-06, "loss": 0.9919, "step": 554 }, { "epoch": 0.04009608611627865, "grad_norm": 9.680252218822583, "learning_rate": 3.3393501805054155e-06, "loss": 1.0204, "step": 555 }, { "epoch": 0.040168331316488164, "grad_norm": 9.045501973745363, "learning_rate": 3.3453670276774973e-06, "loss": 1.0564, "step": 556 }, { "epoch": 0.04024057651669767, "grad_norm": 7.536111489315596, "learning_rate": 3.351383874849579e-06, "loss": 1.0104, "step": 557 }, { "epoch": 0.040312821716907185, "grad_norm": 9.705546447054772, "learning_rate": 3.357400722021661e-06, "loss": 1.0274, "step": 558 }, { "epoch": 0.04038506691711669, "grad_norm": 8.586961096855292, "learning_rate": 3.3634175691937426e-06, "loss": 0.9909, "step": 559 }, { "epoch": 0.040457312117326207, "grad_norm": 9.384842245719998, "learning_rate": 3.3694344163658244e-06, "loss": 1.0409, "step": 560 }, { "epoch": 0.040529557317535714, "grad_norm": 9.451379536040088, "learning_rate": 3.3754512635379066e-06, "loss": 1.017, "step": 561 }, { "epoch": 0.04060180251774523, "grad_norm": 7.459659678696653, "learning_rate": 3.3814681107099884e-06, "loss": 0.9764, "step": 562 }, { "epoch": 0.040674047717954735, "grad_norm": 7.835199444927798, "learning_rate": 3.38748495788207e-06, "loss": 1.0364, "step": 563 }, { "epoch": 0.04074629291816425, "grad_norm": 9.837032290516687, "learning_rate": 3.393501805054152e-06, "loss": 1.0801, "step": 564 }, { "epoch": 0.040818538118373764, "grad_norm": 9.424817993673257, "learning_rate": 3.3995186522262337e-06, "loss": 1.0211, "step": 565 }, { "epoch": 0.04089078331858327, "grad_norm": 8.160910257918106, "learning_rate": 3.405535499398315e-06, "loss": 1.0756, "step": 566 }, { "epoch": 0.040963028518792785, "grad_norm": 9.8175603368045, "learning_rate": 3.4115523465703977e-06, "loss": 1.0529, "step": 567 }, { "epoch": 0.04103527371900229, "grad_norm": 11.361564330802723, "learning_rate": 3.4175691937424795e-06, "loss": 1.1245, "step": 568 }, { "epoch": 0.041107518919211807, "grad_norm": 8.4553404776374, "learning_rate": 3.423586040914561e-06, "loss": 1.0693, "step": 569 }, { "epoch": 0.041179764119421314, "grad_norm": 9.041298948155728, "learning_rate": 3.4296028880866426e-06, "loss": 1.0315, "step": 570 }, { "epoch": 0.04125200931963083, "grad_norm": 8.874253792353121, "learning_rate": 3.4356197352587244e-06, "loss": 1.0034, "step": 571 }, { "epoch": 0.041324254519840335, "grad_norm": 9.009223450316592, "learning_rate": 3.4416365824308066e-06, "loss": 0.9879, "step": 572 }, { "epoch": 0.04139649972004985, "grad_norm": 7.835488638377854, "learning_rate": 3.4476534296028884e-06, "loss": 1.0548, "step": 573 }, { "epoch": 0.041468744920259364, "grad_norm": 8.227234380949776, "learning_rate": 3.45367027677497e-06, "loss": 1.0987, "step": 574 }, { "epoch": 0.04154099012046887, "grad_norm": 8.114263865328258, "learning_rate": 3.459687123947052e-06, "loss": 1.1154, "step": 575 }, { "epoch": 0.041613235320678385, "grad_norm": 8.905921903221982, "learning_rate": 3.4657039711191337e-06, "loss": 1.0347, "step": 576 }, { "epoch": 0.04168548052088789, "grad_norm": 7.503703156820226, "learning_rate": 3.4717208182912155e-06, "loss": 1.054, "step": 577 }, { "epoch": 0.041757725721097406, "grad_norm": 8.587977020474566, "learning_rate": 3.4777376654632977e-06, "loss": 1.0095, "step": 578 }, { "epoch": 0.041829970921306914, "grad_norm": 8.220405904953083, "learning_rate": 3.4837545126353795e-06, "loss": 0.969, "step": 579 }, { "epoch": 0.04190221612151643, "grad_norm": 8.743029188285204, "learning_rate": 3.4897713598074613e-06, "loss": 1.0897, "step": 580 }, { "epoch": 0.041974461321725935, "grad_norm": 8.300673252973924, "learning_rate": 3.495788206979543e-06, "loss": 1.0031, "step": 581 }, { "epoch": 0.04204670652193545, "grad_norm": 11.241633716381319, "learning_rate": 3.501805054151625e-06, "loss": 1.0231, "step": 582 }, { "epoch": 0.04211895172214496, "grad_norm": 7.758115149795744, "learning_rate": 3.507821901323707e-06, "loss": 1.0426, "step": 583 }, { "epoch": 0.04219119692235447, "grad_norm": 7.237382103152441, "learning_rate": 3.513838748495789e-06, "loss": 1.0369, "step": 584 }, { "epoch": 0.042263442122563985, "grad_norm": 8.788628027455735, "learning_rate": 3.5198555956678706e-06, "loss": 1.0763, "step": 585 }, { "epoch": 0.04233568732277349, "grad_norm": 10.622272814635066, "learning_rate": 3.525872442839952e-06, "loss": 0.938, "step": 586 }, { "epoch": 0.042407932522983006, "grad_norm": 8.101474448804298, "learning_rate": 3.5318892900120337e-06, "loss": 1.0448, "step": 587 }, { "epoch": 0.042480177723192514, "grad_norm": 7.24538669478266, "learning_rate": 3.5379061371841155e-06, "loss": 1.1258, "step": 588 }, { "epoch": 0.04255242292340203, "grad_norm": 12.49571215520048, "learning_rate": 3.5439229843561977e-06, "loss": 1.1251, "step": 589 }, { "epoch": 0.042624668123611535, "grad_norm": 10.488261609618482, "learning_rate": 3.5499398315282795e-06, "loss": 1.0829, "step": 590 }, { "epoch": 0.04269691332382105, "grad_norm": 8.272417072192903, "learning_rate": 3.5559566787003613e-06, "loss": 1.0034, "step": 591 }, { "epoch": 0.04276915852403056, "grad_norm": 7.179006317998835, "learning_rate": 3.561973525872443e-06, "loss": 1.0391, "step": 592 }, { "epoch": 0.04284140372424007, "grad_norm": 8.11638478044855, "learning_rate": 3.567990373044525e-06, "loss": 1.0203, "step": 593 }, { "epoch": 0.042913648924449585, "grad_norm": 10.154309084371867, "learning_rate": 3.574007220216607e-06, "loss": 1.066, "step": 594 }, { "epoch": 0.04298589412465909, "grad_norm": 10.090871678063522, "learning_rate": 3.580024067388689e-06, "loss": 1.0613, "step": 595 }, { "epoch": 0.043058139324868606, "grad_norm": 13.232484000084792, "learning_rate": 3.5860409145607706e-06, "loss": 1.027, "step": 596 }, { "epoch": 0.043130384525078114, "grad_norm": 9.74692115100087, "learning_rate": 3.5920577617328523e-06, "loss": 1.0504, "step": 597 }, { "epoch": 0.04320262972528763, "grad_norm": 11.077087784219138, "learning_rate": 3.598074608904934e-06, "loss": 1.1067, "step": 598 }, { "epoch": 0.043274874925497135, "grad_norm": 11.64737639784707, "learning_rate": 3.604091456077016e-06, "loss": 0.95, "step": 599 }, { "epoch": 0.04334712012570665, "grad_norm": 8.835397826965245, "learning_rate": 3.610108303249098e-06, "loss": 1.0275, "step": 600 }, { "epoch": 0.04341936532591616, "grad_norm": 8.43253434609352, "learning_rate": 3.61612515042118e-06, "loss": 0.9687, "step": 601 }, { "epoch": 0.04349161052612567, "grad_norm": 8.676439008721857, "learning_rate": 3.6221419975932617e-06, "loss": 1.0203, "step": 602 }, { "epoch": 0.043563855726335185, "grad_norm": 9.12145592830186, "learning_rate": 3.628158844765343e-06, "loss": 1.0453, "step": 603 }, { "epoch": 0.04363610092654469, "grad_norm": 9.0454176287292, "learning_rate": 3.634175691937425e-06, "loss": 1.1023, "step": 604 }, { "epoch": 0.043708346126754206, "grad_norm": 7.999159053472067, "learning_rate": 3.6401925391095066e-06, "loss": 1.023, "step": 605 }, { "epoch": 0.043780591326963714, "grad_norm": 10.031564107475106, "learning_rate": 3.6462093862815888e-06, "loss": 1.0968, "step": 606 }, { "epoch": 0.04385283652717323, "grad_norm": 8.612792290543123, "learning_rate": 3.6522262334536706e-06, "loss": 0.9911, "step": 607 }, { "epoch": 0.043925081727382735, "grad_norm": 9.522545013040927, "learning_rate": 3.6582430806257523e-06, "loss": 1.0611, "step": 608 }, { "epoch": 0.04399732692759225, "grad_norm": 7.1788121002131, "learning_rate": 3.664259927797834e-06, "loss": 1.0506, "step": 609 }, { "epoch": 0.04406957212780176, "grad_norm": 9.009161207167862, "learning_rate": 3.670276774969916e-06, "loss": 0.9861, "step": 610 }, { "epoch": 0.04414181732801127, "grad_norm": 8.659785350932133, "learning_rate": 3.676293622141998e-06, "loss": 1.0845, "step": 611 }, { "epoch": 0.04421406252822078, "grad_norm": 7.344269949179822, "learning_rate": 3.68231046931408e-06, "loss": 0.9944, "step": 612 }, { "epoch": 0.04428630772843029, "grad_norm": 8.724648507369706, "learning_rate": 3.6883273164861617e-06, "loss": 1.056, "step": 613 }, { "epoch": 0.044358552928639806, "grad_norm": 8.504232811253923, "learning_rate": 3.6943441636582434e-06, "loss": 1.0937, "step": 614 }, { "epoch": 0.044430798128849314, "grad_norm": 8.222775477906385, "learning_rate": 3.700361010830325e-06, "loss": 1.0394, "step": 615 }, { "epoch": 0.04450304332905883, "grad_norm": 8.32840669595702, "learning_rate": 3.706377858002407e-06, "loss": 1.0236, "step": 616 }, { "epoch": 0.044575288529268335, "grad_norm": 9.118325915028251, "learning_rate": 3.712394705174489e-06, "loss": 1.0857, "step": 617 }, { "epoch": 0.04464753372947785, "grad_norm": 7.372740059841404, "learning_rate": 3.718411552346571e-06, "loss": 0.9527, "step": 618 }, { "epoch": 0.04471977892968736, "grad_norm": 7.552328283787955, "learning_rate": 3.7244283995186527e-06, "loss": 0.9574, "step": 619 }, { "epoch": 0.04479202412989687, "grad_norm": 8.481147503497048, "learning_rate": 3.730445246690734e-06, "loss": 1.0112, "step": 620 }, { "epoch": 0.04486426933010638, "grad_norm": 7.733386043938958, "learning_rate": 3.736462093862816e-06, "loss": 0.9635, "step": 621 }, { "epoch": 0.04493651453031589, "grad_norm": 7.728511923094262, "learning_rate": 3.742478941034898e-06, "loss": 1.0985, "step": 622 }, { "epoch": 0.045008759730525406, "grad_norm": 8.538161834934453, "learning_rate": 3.74849578820698e-06, "loss": 1.0623, "step": 623 }, { "epoch": 0.045081004930734914, "grad_norm": 9.040837356810744, "learning_rate": 3.7545126353790616e-06, "loss": 1.0252, "step": 624 }, { "epoch": 0.04515325013094443, "grad_norm": 7.536187417293589, "learning_rate": 3.7605294825511434e-06, "loss": 0.9809, "step": 625 }, { "epoch": 0.045225495331153935, "grad_norm": 9.474546364264294, "learning_rate": 3.766546329723225e-06, "loss": 1.0552, "step": 626 }, { "epoch": 0.04529774053136345, "grad_norm": 8.120282490894857, "learning_rate": 3.772563176895307e-06, "loss": 0.9457, "step": 627 }, { "epoch": 0.04536998573157296, "grad_norm": 7.870775573206195, "learning_rate": 3.778580024067389e-06, "loss": 0.9694, "step": 628 }, { "epoch": 0.04544223093178247, "grad_norm": 12.397411051556288, "learning_rate": 3.784596871239471e-06, "loss": 1.0433, "step": 629 }, { "epoch": 0.04551447613199198, "grad_norm": 9.316041004370751, "learning_rate": 3.7906137184115527e-06, "loss": 0.9675, "step": 630 }, { "epoch": 0.04558672133220149, "grad_norm": 10.524785171454733, "learning_rate": 3.7966305655836345e-06, "loss": 1.0, "step": 631 }, { "epoch": 0.045658966532411006, "grad_norm": 10.571250766746454, "learning_rate": 3.8026474127557163e-06, "loss": 1.0872, "step": 632 }, { "epoch": 0.045731211732620514, "grad_norm": 8.915720191230786, "learning_rate": 3.808664259927798e-06, "loss": 1.073, "step": 633 }, { "epoch": 0.04580345693283003, "grad_norm": 8.911145173364714, "learning_rate": 3.8146811070998803e-06, "loss": 1.0442, "step": 634 }, { "epoch": 0.045875702133039535, "grad_norm": 11.350593289232863, "learning_rate": 3.820697954271962e-06, "loss": 1.1023, "step": 635 }, { "epoch": 0.04594794733324905, "grad_norm": 11.686436569058046, "learning_rate": 3.826714801444043e-06, "loss": 1.155, "step": 636 }, { "epoch": 0.04602019253345856, "grad_norm": 9.667877603737018, "learning_rate": 3.832731648616125e-06, "loss": 1.1304, "step": 637 }, { "epoch": 0.04609243773366807, "grad_norm": 7.94553311172028, "learning_rate": 3.838748495788207e-06, "loss": 1.0988, "step": 638 }, { "epoch": 0.04616468293387758, "grad_norm": 10.215769855245936, "learning_rate": 3.84476534296029e-06, "loss": 0.9747, "step": 639 }, { "epoch": 0.04623692813408709, "grad_norm": 12.920653943970628, "learning_rate": 3.850782190132371e-06, "loss": 1.0739, "step": 640 }, { "epoch": 0.0463091733342966, "grad_norm": 11.418520663191979, "learning_rate": 3.856799037304453e-06, "loss": 1.0497, "step": 641 }, { "epoch": 0.046381418534506114, "grad_norm": 11.39704251659175, "learning_rate": 3.862815884476535e-06, "loss": 1.084, "step": 642 }, { "epoch": 0.04645366373471563, "grad_norm": 7.935770161684499, "learning_rate": 3.868832731648616e-06, "loss": 1.017, "step": 643 }, { "epoch": 0.046525908934925135, "grad_norm": 11.6081966248783, "learning_rate": 3.874849578820698e-06, "loss": 1.0752, "step": 644 }, { "epoch": 0.04659815413513465, "grad_norm": 10.590210089684357, "learning_rate": 3.88086642599278e-06, "loss": 1.0977, "step": 645 }, { "epoch": 0.046670399335344157, "grad_norm": 8.352572801964037, "learning_rate": 3.886883273164862e-06, "loss": 1.0354, "step": 646 }, { "epoch": 0.04674264453555367, "grad_norm": 7.378044146717218, "learning_rate": 3.892900120336944e-06, "loss": 0.9732, "step": 647 }, { "epoch": 0.04681488973576318, "grad_norm": 9.867861714891365, "learning_rate": 3.898916967509026e-06, "loss": 1.1215, "step": 648 }, { "epoch": 0.04688713493597269, "grad_norm": 8.979486186869256, "learning_rate": 3.904933814681107e-06, "loss": 1.0034, "step": 649 }, { "epoch": 0.0469593801361822, "grad_norm": 11.300395074200186, "learning_rate": 3.910950661853189e-06, "loss": 0.9374, "step": 650 }, { "epoch": 0.047031625336391714, "grad_norm": 9.275614559244048, "learning_rate": 3.916967509025271e-06, "loss": 1.0823, "step": 651 }, { "epoch": 0.04710387053660123, "grad_norm": 8.035515628357288, "learning_rate": 3.922984356197353e-06, "loss": 1.0493, "step": 652 }, { "epoch": 0.047176115736810735, "grad_norm": 10.184799491695218, "learning_rate": 3.9290012033694345e-06, "loss": 1.0815, "step": 653 }, { "epoch": 0.04724836093702025, "grad_norm": 11.137502096156684, "learning_rate": 3.935018050541516e-06, "loss": 1.1539, "step": 654 }, { "epoch": 0.047320606137229757, "grad_norm": 10.596198685127938, "learning_rate": 3.941034897713598e-06, "loss": 1.027, "step": 655 }, { "epoch": 0.04739285133743927, "grad_norm": 9.829333984537804, "learning_rate": 3.947051744885681e-06, "loss": 1.0118, "step": 656 }, { "epoch": 0.04746509653764878, "grad_norm": 7.066739860491497, "learning_rate": 3.9530685920577625e-06, "loss": 0.9597, "step": 657 }, { "epoch": 0.04753734173785829, "grad_norm": 9.281716074945324, "learning_rate": 3.959085439229844e-06, "loss": 0.928, "step": 658 }, { "epoch": 0.0476095869380678, "grad_norm": 10.62385427244043, "learning_rate": 3.965102286401926e-06, "loss": 1.1307, "step": 659 }, { "epoch": 0.047681832138277314, "grad_norm": 9.647105012999285, "learning_rate": 3.971119133574007e-06, "loss": 1.0724, "step": 660 }, { "epoch": 0.04775407733848682, "grad_norm": 9.89964252173307, "learning_rate": 3.9771359807460896e-06, "loss": 0.9843, "step": 661 }, { "epoch": 0.047826322538696335, "grad_norm": 10.13564842615186, "learning_rate": 3.983152827918171e-06, "loss": 1.0537, "step": 662 }, { "epoch": 0.04789856773890585, "grad_norm": 8.59291011520817, "learning_rate": 3.989169675090253e-06, "loss": 1.0089, "step": 663 }, { "epoch": 0.047970812939115356, "grad_norm": 8.620632669194732, "learning_rate": 3.995186522262335e-06, "loss": 1.0187, "step": 664 }, { "epoch": 0.04804305813932487, "grad_norm": 10.517062900855299, "learning_rate": 4.001203369434417e-06, "loss": 1.0481, "step": 665 }, { "epoch": 0.04811530333953438, "grad_norm": 8.61822863157469, "learning_rate": 4.0072202166064985e-06, "loss": 0.9345, "step": 666 }, { "epoch": 0.04818754853974389, "grad_norm": 7.318502978057996, "learning_rate": 4.01323706377858e-06, "loss": 0.9345, "step": 667 }, { "epoch": 0.0482597937399534, "grad_norm": 8.572488346887358, "learning_rate": 4.019253910950662e-06, "loss": 0.9736, "step": 668 }, { "epoch": 0.048332038940162914, "grad_norm": 9.343124113522393, "learning_rate": 4.025270758122744e-06, "loss": 1.0342, "step": 669 }, { "epoch": 0.04840428414037242, "grad_norm": 8.825257502357093, "learning_rate": 4.0312876052948256e-06, "loss": 1.0257, "step": 670 }, { "epoch": 0.048476529340581935, "grad_norm": 8.460411781047233, "learning_rate": 4.037304452466907e-06, "loss": 0.9583, "step": 671 }, { "epoch": 0.04854877454079145, "grad_norm": 8.341902281805945, "learning_rate": 4.043321299638989e-06, "loss": 0.9806, "step": 672 }, { "epoch": 0.048621019741000956, "grad_norm": 10.69444290315251, "learning_rate": 4.049338146811072e-06, "loss": 1.0581, "step": 673 }, { "epoch": 0.04869326494121047, "grad_norm": 6.555182746301002, "learning_rate": 4.0553549939831535e-06, "loss": 0.9151, "step": 674 }, { "epoch": 0.04876551014141998, "grad_norm": 10.067904519611597, "learning_rate": 4.061371841155235e-06, "loss": 0.9953, "step": 675 }, { "epoch": 0.04883775534162949, "grad_norm": 8.090316692326013, "learning_rate": 4.067388688327317e-06, "loss": 1.0519, "step": 676 }, { "epoch": 0.048910000541839, "grad_norm": 8.752238831882531, "learning_rate": 4.073405535499398e-06, "loss": 1.0546, "step": 677 }, { "epoch": 0.048982245742048514, "grad_norm": 10.187004217709966, "learning_rate": 4.079422382671481e-06, "loss": 1.0505, "step": 678 }, { "epoch": 0.04905449094225802, "grad_norm": 8.751259849312857, "learning_rate": 4.0854392298435624e-06, "loss": 1.0362, "step": 679 }, { "epoch": 0.049126736142467535, "grad_norm": 7.489728124188994, "learning_rate": 4.091456077015644e-06, "loss": 0.9456, "step": 680 }, { "epoch": 0.04919898134267705, "grad_norm": 11.319482829543066, "learning_rate": 4.097472924187726e-06, "loss": 1.0564, "step": 681 }, { "epoch": 0.049271226542886556, "grad_norm": 8.557561410811184, "learning_rate": 4.103489771359808e-06, "loss": 0.939, "step": 682 }, { "epoch": 0.04934347174309607, "grad_norm": 9.10430652036699, "learning_rate": 4.1095066185318895e-06, "loss": 1.0206, "step": 683 }, { "epoch": 0.04941571694330558, "grad_norm": 9.476339490110933, "learning_rate": 4.115523465703971e-06, "loss": 1.0651, "step": 684 }, { "epoch": 0.04948796214351509, "grad_norm": 9.268610609965236, "learning_rate": 4.121540312876053e-06, "loss": 1.0398, "step": 685 }, { "epoch": 0.0495602073437246, "grad_norm": 10.377117630271906, "learning_rate": 4.127557160048135e-06, "loss": 1.0125, "step": 686 }, { "epoch": 0.049632452543934114, "grad_norm": 8.268358553777656, "learning_rate": 4.133574007220217e-06, "loss": 1.0555, "step": 687 }, { "epoch": 0.04970469774414362, "grad_norm": 7.603793061018621, "learning_rate": 4.1395908543922984e-06, "loss": 1.0179, "step": 688 }, { "epoch": 0.049776942944353135, "grad_norm": 8.988618223529096, "learning_rate": 4.145607701564381e-06, "loss": 0.9787, "step": 689 }, { "epoch": 0.04984918814456264, "grad_norm": 8.073609733774651, "learning_rate": 4.151624548736463e-06, "loss": 1.0686, "step": 690 }, { "epoch": 0.049921433344772156, "grad_norm": 10.265759889596087, "learning_rate": 4.157641395908545e-06, "loss": 1.0691, "step": 691 }, { "epoch": 0.04999367854498167, "grad_norm": 8.664649826519877, "learning_rate": 4.163658243080626e-06, "loss": 1.0221, "step": 692 }, { "epoch": 0.05006592374519118, "grad_norm": 9.14329429841572, "learning_rate": 4.169675090252708e-06, "loss": 0.9806, "step": 693 }, { "epoch": 0.05013816894540069, "grad_norm": 12.533034352192695, "learning_rate": 4.175691937424789e-06, "loss": 1.1317, "step": 694 }, { "epoch": 0.0502104141456102, "grad_norm": 9.170971177651175, "learning_rate": 4.181708784596872e-06, "loss": 1.0516, "step": 695 }, { "epoch": 0.050282659345819714, "grad_norm": 10.558912401545703, "learning_rate": 4.1877256317689535e-06, "loss": 0.9761, "step": 696 }, { "epoch": 0.05035490454602922, "grad_norm": 18.30068995107963, "learning_rate": 4.193742478941035e-06, "loss": 1.0548, "step": 697 }, { "epoch": 0.050427149746238735, "grad_norm": 10.310398696901226, "learning_rate": 4.199759326113117e-06, "loss": 1.1167, "step": 698 }, { "epoch": 0.05049939494644824, "grad_norm": 10.680904026131389, "learning_rate": 4.205776173285199e-06, "loss": 0.9672, "step": 699 }, { "epoch": 0.050571640146657756, "grad_norm": 17.033068638565798, "learning_rate": 4.211793020457281e-06, "loss": 1.1109, "step": 700 }, { "epoch": 0.05064388534686727, "grad_norm": 11.808606025671754, "learning_rate": 4.217809867629362e-06, "loss": 1.0477, "step": 701 }, { "epoch": 0.05071613054707678, "grad_norm": 9.057366541764498, "learning_rate": 4.223826714801444e-06, "loss": 0.9823, "step": 702 }, { "epoch": 0.05078837574728629, "grad_norm": 11.42381526889557, "learning_rate": 4.229843561973526e-06, "loss": 1.0306, "step": 703 }, { "epoch": 0.0508606209474958, "grad_norm": 9.695814932715695, "learning_rate": 4.235860409145608e-06, "loss": 1.0017, "step": 704 }, { "epoch": 0.050932866147705314, "grad_norm": 9.89725699908601, "learning_rate": 4.2418772563176895e-06, "loss": 1.071, "step": 705 }, { "epoch": 0.05100511134791482, "grad_norm": 8.384669856269406, "learning_rate": 4.247894103489772e-06, "loss": 1.0553, "step": 706 }, { "epoch": 0.051077356548124335, "grad_norm": 9.038569563049993, "learning_rate": 4.253910950661854e-06, "loss": 1.0472, "step": 707 }, { "epoch": 0.05114960174833384, "grad_norm": 8.653222832035436, "learning_rate": 4.259927797833936e-06, "loss": 1.0746, "step": 708 }, { "epoch": 0.051221846948543356, "grad_norm": 10.650640448713563, "learning_rate": 4.2659446450060175e-06, "loss": 1.0389, "step": 709 }, { "epoch": 0.05129409214875287, "grad_norm": 8.656856364141374, "learning_rate": 4.271961492178099e-06, "loss": 1.0477, "step": 710 }, { "epoch": 0.05136633734896238, "grad_norm": 14.016353320332312, "learning_rate": 4.27797833935018e-06, "loss": 1.0607, "step": 711 }, { "epoch": 0.05143858254917189, "grad_norm": 9.354404052803442, "learning_rate": 4.283995186522263e-06, "loss": 0.9832, "step": 712 }, { "epoch": 0.0515108277493814, "grad_norm": 9.322096827313718, "learning_rate": 4.290012033694345e-06, "loss": 1.0535, "step": 713 }, { "epoch": 0.051583072949590913, "grad_norm": 10.596536365566692, "learning_rate": 4.296028880866426e-06, "loss": 1.0285, "step": 714 }, { "epoch": 0.05165531814980042, "grad_norm": 8.123883918988454, "learning_rate": 4.302045728038508e-06, "loss": 0.983, "step": 715 }, { "epoch": 0.051727563350009935, "grad_norm": 8.730024806945442, "learning_rate": 4.30806257521059e-06, "loss": 1.0249, "step": 716 }, { "epoch": 0.05179980855021944, "grad_norm": 9.186812809786144, "learning_rate": 4.314079422382672e-06, "loss": 1.0687, "step": 717 }, { "epoch": 0.051872053750428956, "grad_norm": 7.7636981622982635, "learning_rate": 4.3200962695547535e-06, "loss": 0.9607, "step": 718 }, { "epoch": 0.051944298950638464, "grad_norm": 10.352491114008503, "learning_rate": 4.326113116726835e-06, "loss": 1.1309, "step": 719 }, { "epoch": 0.05201654415084798, "grad_norm": 12.768190459970054, "learning_rate": 4.332129963898917e-06, "loss": 1.0337, "step": 720 }, { "epoch": 0.05208878935105749, "grad_norm": 9.804038511887748, "learning_rate": 4.338146811070999e-06, "loss": 0.9905, "step": 721 }, { "epoch": 0.052161034551267, "grad_norm": 9.605447701659534, "learning_rate": 4.344163658243081e-06, "loss": 0.9696, "step": 722 }, { "epoch": 0.052233279751476513, "grad_norm": 8.46824774009344, "learning_rate": 4.350180505415163e-06, "loss": 1.0271, "step": 723 }, { "epoch": 0.05230552495168602, "grad_norm": 13.514060151462393, "learning_rate": 4.356197352587245e-06, "loss": 1.0756, "step": 724 }, { "epoch": 0.052377770151895535, "grad_norm": 8.83597290139902, "learning_rate": 4.362214199759327e-06, "loss": 1.1201, "step": 725 }, { "epoch": 0.05245001535210504, "grad_norm": 9.391846842308226, "learning_rate": 4.3682310469314086e-06, "loss": 0.9494, "step": 726 }, { "epoch": 0.052522260552314556, "grad_norm": 8.72173824168915, "learning_rate": 4.37424789410349e-06, "loss": 1.0673, "step": 727 }, { "epoch": 0.052594505752524064, "grad_norm": 12.666775619305955, "learning_rate": 4.380264741275572e-06, "loss": 0.9918, "step": 728 }, { "epoch": 0.05266675095273358, "grad_norm": 8.8445785727413, "learning_rate": 4.386281588447654e-06, "loss": 1.0334, "step": 729 }, { "epoch": 0.05273899615294309, "grad_norm": 8.323701352849332, "learning_rate": 4.392298435619736e-06, "loss": 1.0555, "step": 730 }, { "epoch": 0.0528112413531526, "grad_norm": 9.019573120707275, "learning_rate": 4.3983152827918175e-06, "loss": 0.9901, "step": 731 }, { "epoch": 0.05288348655336211, "grad_norm": 9.345970680927156, "learning_rate": 4.404332129963899e-06, "loss": 1.0324, "step": 732 }, { "epoch": 0.05295573175357162, "grad_norm": 7.162694702697633, "learning_rate": 4.410348977135981e-06, "loss": 0.9915, "step": 733 }, { "epoch": 0.053027976953781135, "grad_norm": 7.914300611854764, "learning_rate": 4.416365824308063e-06, "loss": 1.0857, "step": 734 }, { "epoch": 0.05310022215399064, "grad_norm": 7.523730057428349, "learning_rate": 4.422382671480145e-06, "loss": 0.9789, "step": 735 }, { "epoch": 0.053172467354200156, "grad_norm": 12.149142111800915, "learning_rate": 4.428399518652226e-06, "loss": 1.0978, "step": 736 }, { "epoch": 0.053244712554409664, "grad_norm": 8.104935976466265, "learning_rate": 4.434416365824308e-06, "loss": 1.0795, "step": 737 }, { "epoch": 0.05331695775461918, "grad_norm": 7.664666730450006, "learning_rate": 4.44043321299639e-06, "loss": 0.9951, "step": 738 }, { "epoch": 0.05338920295482869, "grad_norm": 7.917474672007784, "learning_rate": 4.4464500601684725e-06, "loss": 0.947, "step": 739 }, { "epoch": 0.0534614481550382, "grad_norm": 9.76549882730992, "learning_rate": 4.452466907340554e-06, "loss": 1.086, "step": 740 }, { "epoch": 0.05353369335524771, "grad_norm": 10.1628137594025, "learning_rate": 4.458483754512636e-06, "loss": 1.089, "step": 741 }, { "epoch": 0.05360593855545722, "grad_norm": 8.146132936033112, "learning_rate": 4.464500601684718e-06, "loss": 1.0311, "step": 742 }, { "epoch": 0.053678183755666735, "grad_norm": 8.603981391691535, "learning_rate": 4.4705174488568e-06, "loss": 1.1101, "step": 743 }, { "epoch": 0.05375042895587624, "grad_norm": 9.636689061187546, "learning_rate": 4.4765342960288814e-06, "loss": 1.012, "step": 744 }, { "epoch": 0.053822674156085756, "grad_norm": 7.843362623884979, "learning_rate": 4.482551143200963e-06, "loss": 1.0967, "step": 745 }, { "epoch": 0.053894919356295264, "grad_norm": 7.373402826672686, "learning_rate": 4.488567990373045e-06, "loss": 0.9351, "step": 746 }, { "epoch": 0.05396716455650478, "grad_norm": 9.904050564810946, "learning_rate": 4.494584837545127e-06, "loss": 1.0814, "step": 747 }, { "epoch": 0.054039409756714285, "grad_norm": 8.581422710417153, "learning_rate": 4.5006016847172085e-06, "loss": 0.9902, "step": 748 }, { "epoch": 0.0541116549569238, "grad_norm": 8.557377751931828, "learning_rate": 4.50661853188929e-06, "loss": 0.9985, "step": 749 }, { "epoch": 0.05418390015713331, "grad_norm": 7.242884597231849, "learning_rate": 4.512635379061372e-06, "loss": 0.9048, "step": 750 }, { "epoch": 0.05425614535734282, "grad_norm": 7.61640921504712, "learning_rate": 4.518652226233454e-06, "loss": 1.1279, "step": 751 }, { "epoch": 0.054328390557552335, "grad_norm": 7.8026801077587224, "learning_rate": 4.524669073405536e-06, "loss": 0.9339, "step": 752 }, { "epoch": 0.05440063575776184, "grad_norm": 8.192818581303527, "learning_rate": 4.5306859205776174e-06, "loss": 1.0654, "step": 753 }, { "epoch": 0.054472880957971356, "grad_norm": 8.035804258781395, "learning_rate": 4.536702767749699e-06, "loss": 0.9956, "step": 754 }, { "epoch": 0.054545126158180864, "grad_norm": 10.417067578547965, "learning_rate": 4.542719614921781e-06, "loss": 1.0459, "step": 755 }, { "epoch": 0.05461737135839038, "grad_norm": 8.39069819640443, "learning_rate": 4.548736462093864e-06, "loss": 1.0279, "step": 756 }, { "epoch": 0.054689616558599885, "grad_norm": 9.670424640724605, "learning_rate": 4.554753309265945e-06, "loss": 1.0886, "step": 757 }, { "epoch": 0.0547618617588094, "grad_norm": 7.956883110274173, "learning_rate": 4.560770156438027e-06, "loss": 0.9665, "step": 758 }, { "epoch": 0.05483410695901891, "grad_norm": 7.88150603372345, "learning_rate": 4.566787003610109e-06, "loss": 1.0568, "step": 759 }, { "epoch": 0.05490635215922842, "grad_norm": 8.384917806314641, "learning_rate": 4.572803850782191e-06, "loss": 1.0313, "step": 760 }, { "epoch": 0.054978597359437935, "grad_norm": 9.944393046534557, "learning_rate": 4.578820697954272e-06, "loss": 0.9444, "step": 761 }, { "epoch": 0.05505084255964744, "grad_norm": 8.834773060291672, "learning_rate": 4.584837545126354e-06, "loss": 0.9721, "step": 762 }, { "epoch": 0.055123087759856956, "grad_norm": 8.06190997189993, "learning_rate": 4.590854392298436e-06, "loss": 0.9341, "step": 763 }, { "epoch": 0.055195332960066464, "grad_norm": 9.465605865287898, "learning_rate": 4.596871239470518e-06, "loss": 1.093, "step": 764 }, { "epoch": 0.05526757816027598, "grad_norm": 8.302863249926055, "learning_rate": 4.6028880866426e-06, "loss": 1.0282, "step": 765 }, { "epoch": 0.055339823360485485, "grad_norm": 9.383668274488636, "learning_rate": 4.608904933814681e-06, "loss": 0.9146, "step": 766 }, { "epoch": 0.055412068560695, "grad_norm": 8.615425434644646, "learning_rate": 4.614921780986763e-06, "loss": 1.0583, "step": 767 }, { "epoch": 0.055484313760904506, "grad_norm": 9.367758840869229, "learning_rate": 4.620938628158845e-06, "loss": 0.9924, "step": 768 }, { "epoch": 0.05555655896111402, "grad_norm": 8.10584007056096, "learning_rate": 4.626955475330927e-06, "loss": 0.9683, "step": 769 }, { "epoch": 0.055628804161323535, "grad_norm": 8.034202418927517, "learning_rate": 4.6329723225030085e-06, "loss": 1.0607, "step": 770 }, { "epoch": 0.05570104936153304, "grad_norm": 8.792364397128598, "learning_rate": 4.63898916967509e-06, "loss": 1.0136, "step": 771 }, { "epoch": 0.055773294561742556, "grad_norm": 10.012163013211731, "learning_rate": 4.645006016847172e-06, "loss": 0.9864, "step": 772 }, { "epoch": 0.055845539761952064, "grad_norm": 6.289747876085858, "learning_rate": 4.651022864019255e-06, "loss": 1.0429, "step": 773 }, { "epoch": 0.05591778496216158, "grad_norm": 6.846085937941309, "learning_rate": 4.6570397111913365e-06, "loss": 1.0789, "step": 774 }, { "epoch": 0.055990030162371085, "grad_norm": 9.248106891634718, "learning_rate": 4.663056558363418e-06, "loss": 1.0348, "step": 775 }, { "epoch": 0.0560622753625806, "grad_norm": 8.56508447109953, "learning_rate": 4.6690734055355e-06, "loss": 1.0918, "step": 776 }, { "epoch": 0.056134520562790106, "grad_norm": 8.079466481488012, "learning_rate": 4.675090252707582e-06, "loss": 1.0892, "step": 777 }, { "epoch": 0.05620676576299962, "grad_norm": 8.794290979550757, "learning_rate": 4.681107099879663e-06, "loss": 1.0612, "step": 778 }, { "epoch": 0.056279010963209135, "grad_norm": 8.284525903502312, "learning_rate": 4.687123947051745e-06, "loss": 1.0156, "step": 779 }, { "epoch": 0.05635125616341864, "grad_norm": 9.529624925499594, "learning_rate": 4.693140794223827e-06, "loss": 1.0012, "step": 780 }, { "epoch": 0.056423501363628156, "grad_norm": 7.985110971232673, "learning_rate": 4.699157641395909e-06, "loss": 0.9738, "step": 781 }, { "epoch": 0.056495746563837664, "grad_norm": 9.213232425886066, "learning_rate": 4.705174488567991e-06, "loss": 0.9615, "step": 782 }, { "epoch": 0.05656799176404718, "grad_norm": 7.61476148265618, "learning_rate": 4.7111913357400725e-06, "loss": 1.1128, "step": 783 }, { "epoch": 0.056640236964256685, "grad_norm": 9.783868896105052, "learning_rate": 4.717208182912154e-06, "loss": 1.1889, "step": 784 }, { "epoch": 0.0567124821644662, "grad_norm": 6.837120154393605, "learning_rate": 4.723225030084236e-06, "loss": 0.9801, "step": 785 }, { "epoch": 0.056784727364675706, "grad_norm": 7.592437253911938, "learning_rate": 4.729241877256318e-06, "loss": 1.0858, "step": 786 }, { "epoch": 0.05685697256488522, "grad_norm": 9.652594704725326, "learning_rate": 4.7352587244284e-06, "loss": 1.0016, "step": 787 }, { "epoch": 0.056929217765094735, "grad_norm": 8.70441310808215, "learning_rate": 4.741275571600481e-06, "loss": 1.0535, "step": 788 }, { "epoch": 0.05700146296530424, "grad_norm": 7.119597679086969, "learning_rate": 4.747292418772563e-06, "loss": 0.946, "step": 789 }, { "epoch": 0.057073708165513756, "grad_norm": 10.529328213353011, "learning_rate": 4.753309265944646e-06, "loss": 1.0428, "step": 790 }, { "epoch": 0.057145953365723264, "grad_norm": 9.155871699202956, "learning_rate": 4.7593261131167276e-06, "loss": 1.1032, "step": 791 }, { "epoch": 0.05721819856593278, "grad_norm": 7.266973161212394, "learning_rate": 4.765342960288809e-06, "loss": 1.0256, "step": 792 }, { "epoch": 0.057290443766142285, "grad_norm": 9.224638115625881, "learning_rate": 4.771359807460891e-06, "loss": 1.0262, "step": 793 }, { "epoch": 0.0573626889663518, "grad_norm": 8.7826213376779, "learning_rate": 4.777376654632973e-06, "loss": 1.1238, "step": 794 }, { "epoch": 0.057434934166561306, "grad_norm": 6.738954619887011, "learning_rate": 4.783393501805055e-06, "loss": 0.9191, "step": 795 }, { "epoch": 0.05750717936677082, "grad_norm": 9.934591383495913, "learning_rate": 4.7894103489771365e-06, "loss": 1.0798, "step": 796 }, { "epoch": 0.05757942456698033, "grad_norm": 9.931799447169132, "learning_rate": 4.795427196149218e-06, "loss": 0.9937, "step": 797 }, { "epoch": 0.05765166976718984, "grad_norm": 9.732978242989944, "learning_rate": 4.8014440433213e-06, "loss": 1.1232, "step": 798 }, { "epoch": 0.057723914967399356, "grad_norm": 7.319465384633341, "learning_rate": 4.807460890493382e-06, "loss": 1.0508, "step": 799 }, { "epoch": 0.057796160167608863, "grad_norm": 8.430251787618014, "learning_rate": 4.813477737665464e-06, "loss": 1.0848, "step": 800 }, { "epoch": 0.05786840536781838, "grad_norm": 9.230553057168944, "learning_rate": 4.819494584837545e-06, "loss": 1.0206, "step": 801 }, { "epoch": 0.057940650568027885, "grad_norm": 9.489118668659916, "learning_rate": 4.825511432009627e-06, "loss": 0.9759, "step": 802 }, { "epoch": 0.0580128957682374, "grad_norm": 7.600672702129881, "learning_rate": 4.831528279181709e-06, "loss": 0.9895, "step": 803 }, { "epoch": 0.058085140968446906, "grad_norm": 7.38613878027911, "learning_rate": 4.837545126353791e-06, "loss": 0.9876, "step": 804 }, { "epoch": 0.05815738616865642, "grad_norm": 7.417195790593376, "learning_rate": 4.8435619735258725e-06, "loss": 0.9801, "step": 805 }, { "epoch": 0.05822963136886593, "grad_norm": 9.911650428106531, "learning_rate": 4.849578820697955e-06, "loss": 1.0259, "step": 806 }, { "epoch": 0.05830187656907544, "grad_norm": 7.952223211398574, "learning_rate": 4.855595667870037e-06, "loss": 0.9565, "step": 807 }, { "epoch": 0.058374121769284956, "grad_norm": 8.41290768319938, "learning_rate": 4.861612515042119e-06, "loss": 1.0988, "step": 808 }, { "epoch": 0.058446366969494463, "grad_norm": 7.655991904131875, "learning_rate": 4.8676293622142004e-06, "loss": 0.9828, "step": 809 }, { "epoch": 0.05851861216970398, "grad_norm": 8.30525982123631, "learning_rate": 4.873646209386282e-06, "loss": 1.0781, "step": 810 }, { "epoch": 0.058590857369913485, "grad_norm": 9.406545235192288, "learning_rate": 4.879663056558364e-06, "loss": 1.1418, "step": 811 }, { "epoch": 0.058663102570123, "grad_norm": 9.137862663534465, "learning_rate": 4.885679903730446e-06, "loss": 1.1007, "step": 812 }, { "epoch": 0.058735347770332506, "grad_norm": 7.419520339885498, "learning_rate": 4.8916967509025275e-06, "loss": 1.1, "step": 813 }, { "epoch": 0.05880759297054202, "grad_norm": 11.07541536193804, "learning_rate": 4.897713598074609e-06, "loss": 1.0524, "step": 814 }, { "epoch": 0.05887983817075153, "grad_norm": 7.934490684696599, "learning_rate": 4.903730445246691e-06, "loss": 0.973, "step": 815 }, { "epoch": 0.05895208337096104, "grad_norm": 8.844085147829071, "learning_rate": 4.909747292418773e-06, "loss": 1.0354, "step": 816 }, { "epoch": 0.059024328571170556, "grad_norm": 8.15805419610593, "learning_rate": 4.915764139590855e-06, "loss": 0.9903, "step": 817 }, { "epoch": 0.05909657377138006, "grad_norm": 8.858260580812585, "learning_rate": 4.9217809867629364e-06, "loss": 0.9559, "step": 818 }, { "epoch": 0.05916881897158958, "grad_norm": 9.136047782251898, "learning_rate": 4.927797833935018e-06, "loss": 1.0515, "step": 819 }, { "epoch": 0.059241064171799085, "grad_norm": 8.056687263122587, "learning_rate": 4.9338146811071e-06, "loss": 1.0371, "step": 820 }, { "epoch": 0.0593133093720086, "grad_norm": 7.852273755097946, "learning_rate": 4.939831528279182e-06, "loss": 0.9309, "step": 821 }, { "epoch": 0.059385554572218106, "grad_norm": 7.340701505311865, "learning_rate": 4.9458483754512636e-06, "loss": 1.0132, "step": 822 }, { "epoch": 0.05945779977242762, "grad_norm": 8.835611109687921, "learning_rate": 4.951865222623346e-06, "loss": 1.0406, "step": 823 }, { "epoch": 0.05953004497263713, "grad_norm": 9.530680855189548, "learning_rate": 4.957882069795428e-06, "loss": 0.8972, "step": 824 }, { "epoch": 0.05960229017284664, "grad_norm": 9.131000374914004, "learning_rate": 4.96389891696751e-06, "loss": 1.0486, "step": 825 }, { "epoch": 0.05967453537305615, "grad_norm": 10.041179555472274, "learning_rate": 4.9699157641395915e-06, "loss": 1.0356, "step": 826 }, { "epoch": 0.05974678057326566, "grad_norm": 10.84899669877898, "learning_rate": 4.975932611311673e-06, "loss": 1.0789, "step": 827 }, { "epoch": 0.05981902577347518, "grad_norm": 9.552454639957077, "learning_rate": 4.981949458483755e-06, "loss": 1.0654, "step": 828 }, { "epoch": 0.059891270973684685, "grad_norm": 9.346932267983203, "learning_rate": 4.987966305655837e-06, "loss": 0.9542, "step": 829 }, { "epoch": 0.0599635161738942, "grad_norm": 8.001770777227318, "learning_rate": 4.993983152827919e-06, "loss": 0.9286, "step": 830 }, { "epoch": 0.060035761374103706, "grad_norm": 8.383849976573263, "learning_rate": 5e-06, "loss": 1.0398, "step": 831 }, { "epoch": 0.06010800657431322, "grad_norm": 9.356566350018829, "learning_rate": 4.999999982888471e-06, "loss": 1.0202, "step": 832 }, { "epoch": 0.06018025177452273, "grad_norm": 8.695614325399543, "learning_rate": 4.999999931553883e-06, "loss": 1.1094, "step": 833 }, { "epoch": 0.06025249697473224, "grad_norm": 7.5701159755838, "learning_rate": 4.999999845996237e-06, "loss": 1.0123, "step": 834 }, { "epoch": 0.06032474217494175, "grad_norm": 7.429380456654953, "learning_rate": 4.999999726215534e-06, "loss": 1.0539, "step": 835 }, { "epoch": 0.06039698737515126, "grad_norm": 10.391040214330799, "learning_rate": 4.999999572211776e-06, "loss": 1.0087, "step": 836 }, { "epoch": 0.06046923257536078, "grad_norm": 8.873596510257578, "learning_rate": 4.999999383984965e-06, "loss": 1.0038, "step": 837 }, { "epoch": 0.060541477775570285, "grad_norm": 6.673910719327221, "learning_rate": 4.999999161535104e-06, "loss": 0.9166, "step": 838 }, { "epoch": 0.0606137229757798, "grad_norm": 8.057874668747715, "learning_rate": 4.999998904862195e-06, "loss": 0.9842, "step": 839 }, { "epoch": 0.060685968175989306, "grad_norm": 9.993227763603405, "learning_rate": 4.999998613966243e-06, "loss": 1.0604, "step": 840 }, { "epoch": 0.06075821337619882, "grad_norm": 10.262995599449335, "learning_rate": 4.99999828884725e-06, "loss": 1.1527, "step": 841 }, { "epoch": 0.06083045857640833, "grad_norm": 8.512931859829354, "learning_rate": 4.999997929505222e-06, "loss": 1.0186, "step": 842 }, { "epoch": 0.06090270377661784, "grad_norm": 7.9787178676937724, "learning_rate": 4.999997535940163e-06, "loss": 1.0261, "step": 843 }, { "epoch": 0.06097494897682735, "grad_norm": 9.634398392492077, "learning_rate": 4.999997108152079e-06, "loss": 1.0695, "step": 844 }, { "epoch": 0.06104719417703686, "grad_norm": 9.71772942290115, "learning_rate": 4.999996646140976e-06, "loss": 1.0477, "step": 845 }, { "epoch": 0.06111943937724638, "grad_norm": 10.38466772952024, "learning_rate": 4.9999961499068605e-06, "loss": 1.0145, "step": 846 }, { "epoch": 0.061191684577455885, "grad_norm": 8.186069043160737, "learning_rate": 4.999995619449739e-06, "loss": 1.0377, "step": 847 }, { "epoch": 0.0612639297776654, "grad_norm": 11.46558172135127, "learning_rate": 4.999995054769617e-06, "loss": 1.1109, "step": 848 }, { "epoch": 0.061336174977874906, "grad_norm": 7.605505364735952, "learning_rate": 4.999994455866506e-06, "loss": 0.9259, "step": 849 }, { "epoch": 0.06140842017808442, "grad_norm": 10.440657269480212, "learning_rate": 4.9999938227404095e-06, "loss": 0.9584, "step": 850 }, { "epoch": 0.06148066537829393, "grad_norm": 11.054715105972598, "learning_rate": 4.9999931553913405e-06, "loss": 1.0384, "step": 851 }, { "epoch": 0.06155291057850344, "grad_norm": 7.986856153444878, "learning_rate": 4.999992453819306e-06, "loss": 1.0237, "step": 852 }, { "epoch": 0.06162515577871295, "grad_norm": 9.974267658320796, "learning_rate": 4.999991718024316e-06, "loss": 1.0684, "step": 853 }, { "epoch": 0.06169740097892246, "grad_norm": 8.93489047281669, "learning_rate": 4.99999094800638e-06, "loss": 1.0416, "step": 854 }, { "epoch": 0.06176964617913197, "grad_norm": 9.728469551374035, "learning_rate": 4.999990143765509e-06, "loss": 0.9856, "step": 855 }, { "epoch": 0.061841891379341485, "grad_norm": 7.297362742163957, "learning_rate": 4.999989305301715e-06, "loss": 1.0276, "step": 856 }, { "epoch": 0.061914136579551, "grad_norm": 9.242987784323441, "learning_rate": 4.999988432615008e-06, "loss": 1.0953, "step": 857 }, { "epoch": 0.061986381779760506, "grad_norm": 7.843438738844079, "learning_rate": 4.9999875257054e-06, "loss": 1.0238, "step": 858 }, { "epoch": 0.06205862697997002, "grad_norm": 7.737683014561781, "learning_rate": 4.999986584572904e-06, "loss": 1.0412, "step": 859 }, { "epoch": 0.06213087218017953, "grad_norm": 6.564655930741052, "learning_rate": 4.9999856092175335e-06, "loss": 0.9772, "step": 860 }, { "epoch": 0.06220311738038904, "grad_norm": 9.745523452126983, "learning_rate": 4.999984599639301e-06, "loss": 1.1154, "step": 861 }, { "epoch": 0.06227536258059855, "grad_norm": 8.975602884684386, "learning_rate": 4.999983555838219e-06, "loss": 1.0276, "step": 862 }, { "epoch": 0.06234760778080806, "grad_norm": 7.781949544201382, "learning_rate": 4.999982477814305e-06, "loss": 1.0596, "step": 863 }, { "epoch": 0.06241985298101757, "grad_norm": 9.069471321890475, "learning_rate": 4.999981365567571e-06, "loss": 0.9431, "step": 864 }, { "epoch": 0.062492098181227085, "grad_norm": 7.395780092146245, "learning_rate": 4.999980219098034e-06, "loss": 1.0289, "step": 865 }, { "epoch": 0.0625643433814366, "grad_norm": 8.524710695610008, "learning_rate": 4.9999790384057076e-06, "loss": 1.0276, "step": 866 }, { "epoch": 0.06263658858164611, "grad_norm": 11.062546896969804, "learning_rate": 4.99997782349061e-06, "loss": 1.0354, "step": 867 }, { "epoch": 0.06270883378185561, "grad_norm": 8.030277178435375, "learning_rate": 4.999976574352757e-06, "loss": 1.0569, "step": 868 }, { "epoch": 0.06278107898206513, "grad_norm": 7.404003720126462, "learning_rate": 4.9999752909921665e-06, "loss": 1.0232, "step": 869 }, { "epoch": 0.06285332418227464, "grad_norm": 7.328006832902546, "learning_rate": 4.999973973408855e-06, "loss": 0.8951, "step": 870 }, { "epoch": 0.06292556938248416, "grad_norm": 10.549071313561837, "learning_rate": 4.999972621602841e-06, "loss": 1.054, "step": 871 }, { "epoch": 0.06299781458269366, "grad_norm": 9.013728373671793, "learning_rate": 4.999971235574142e-06, "loss": 1.14, "step": 872 }, { "epoch": 0.06307005978290317, "grad_norm": 8.549061720377747, "learning_rate": 4.9999698153227796e-06, "loss": 1.0556, "step": 873 }, { "epoch": 0.06314230498311268, "grad_norm": 7.788930511121541, "learning_rate": 4.99996836084877e-06, "loss": 1.0282, "step": 874 }, { "epoch": 0.0632145501833222, "grad_norm": 7.3000538863192, "learning_rate": 4.999966872152135e-06, "loss": 1.0105, "step": 875 }, { "epoch": 0.06328679538353171, "grad_norm": 10.042705139237729, "learning_rate": 4.999965349232895e-06, "loss": 1.0716, "step": 876 }, { "epoch": 0.06335904058374121, "grad_norm": 8.150983374309, "learning_rate": 4.999963792091071e-06, "loss": 1.0397, "step": 877 }, { "epoch": 0.06343128578395073, "grad_norm": 7.242641792739681, "learning_rate": 4.999962200726683e-06, "loss": 1.051, "step": 878 }, { "epoch": 0.06350353098416024, "grad_norm": 9.301658105190675, "learning_rate": 4.999960575139753e-06, "loss": 1.0827, "step": 879 }, { "epoch": 0.06357577618436976, "grad_norm": 9.172482212119064, "learning_rate": 4.9999589153303044e-06, "loss": 1.0484, "step": 880 }, { "epoch": 0.06364802138457926, "grad_norm": 11.360718197752728, "learning_rate": 4.999957221298359e-06, "loss": 1.0212, "step": 881 }, { "epoch": 0.06372026658478877, "grad_norm": 9.504804901898014, "learning_rate": 4.999955493043941e-06, "loss": 1.111, "step": 882 }, { "epoch": 0.06379251178499828, "grad_norm": 9.679261476071817, "learning_rate": 4.999953730567073e-06, "loss": 1.0514, "step": 883 }, { "epoch": 0.0638647569852078, "grad_norm": 8.424025789980341, "learning_rate": 4.999951933867779e-06, "loss": 1.0785, "step": 884 }, { "epoch": 0.06393700218541731, "grad_norm": 9.488774543964642, "learning_rate": 4.999950102946085e-06, "loss": 0.9919, "step": 885 }, { "epoch": 0.06400924738562681, "grad_norm": 10.413038634144762, "learning_rate": 4.9999482378020146e-06, "loss": 1.1535, "step": 886 }, { "epoch": 0.06408149258583633, "grad_norm": 7.418302491340411, "learning_rate": 4.999946338435595e-06, "loss": 0.968, "step": 887 }, { "epoch": 0.06415373778604584, "grad_norm": 6.8370677079088, "learning_rate": 4.99994440484685e-06, "loss": 0.9832, "step": 888 }, { "epoch": 0.06422598298625536, "grad_norm": 7.108255352682228, "learning_rate": 4.999942437035807e-06, "loss": 1.0225, "step": 889 }, { "epoch": 0.06429822818646486, "grad_norm": 9.377986178058665, "learning_rate": 4.999940435002494e-06, "loss": 1.0764, "step": 890 }, { "epoch": 0.06437047338667437, "grad_norm": 10.18950834546327, "learning_rate": 4.999938398746937e-06, "loss": 1.0144, "step": 891 }, { "epoch": 0.06444271858688388, "grad_norm": 7.932930659912544, "learning_rate": 4.999936328269165e-06, "loss": 1.0333, "step": 892 }, { "epoch": 0.0645149637870934, "grad_norm": 10.74468472394116, "learning_rate": 4.999934223569205e-06, "loss": 1.0492, "step": 893 }, { "epoch": 0.0645872089873029, "grad_norm": 7.37181513924933, "learning_rate": 4.999932084647087e-06, "loss": 0.9474, "step": 894 }, { "epoch": 0.06465945418751241, "grad_norm": 7.410759048195411, "learning_rate": 4.999929911502839e-06, "loss": 1.0032, "step": 895 }, { "epoch": 0.06473169938772193, "grad_norm": 6.7098335184061435, "learning_rate": 4.9999277041364925e-06, "loss": 0.9842, "step": 896 }, { "epoch": 0.06480394458793144, "grad_norm": 7.760532973951913, "learning_rate": 4.9999254625480765e-06, "loss": 1.0158, "step": 897 }, { "epoch": 0.06487618978814096, "grad_norm": 8.460535323428676, "learning_rate": 4.999923186737622e-06, "loss": 1.066, "step": 898 }, { "epoch": 0.06494843498835046, "grad_norm": 8.289887152664157, "learning_rate": 4.99992087670516e-06, "loss": 1.0261, "step": 899 }, { "epoch": 0.06502068018855997, "grad_norm": 7.523724480183398, "learning_rate": 4.999918532450722e-06, "loss": 0.9096, "step": 900 }, { "epoch": 0.06509292538876948, "grad_norm": 7.164294123675224, "learning_rate": 4.999916153974341e-06, "loss": 0.9929, "step": 901 }, { "epoch": 0.065165170588979, "grad_norm": 7.574298576890454, "learning_rate": 4.999913741276049e-06, "loss": 1.0287, "step": 902 }, { "epoch": 0.0652374157891885, "grad_norm": 8.570323182536642, "learning_rate": 4.999911294355878e-06, "loss": 1.0936, "step": 903 }, { "epoch": 0.06530966098939801, "grad_norm": 8.307807687898269, "learning_rate": 4.999908813213864e-06, "loss": 1.0067, "step": 904 }, { "epoch": 0.06538190618960753, "grad_norm": 8.896167281327577, "learning_rate": 4.999906297850038e-06, "loss": 0.9323, "step": 905 }, { "epoch": 0.06545415138981704, "grad_norm": 8.070222689783431, "learning_rate": 4.999903748264437e-06, "loss": 1.0254, "step": 906 }, { "epoch": 0.06552639659002656, "grad_norm": 6.540582587281861, "learning_rate": 4.9999011644570935e-06, "loss": 0.925, "step": 907 }, { "epoch": 0.06559864179023606, "grad_norm": 9.915589175203168, "learning_rate": 4.9998985464280445e-06, "loss": 1.0844, "step": 908 }, { "epoch": 0.06567088699044557, "grad_norm": 7.974578521018564, "learning_rate": 4.999895894177325e-06, "loss": 0.9083, "step": 909 }, { "epoch": 0.06574313219065508, "grad_norm": 8.507272638404192, "learning_rate": 4.999893207704973e-06, "loss": 1.0302, "step": 910 }, { "epoch": 0.0658153773908646, "grad_norm": 8.456221092834461, "learning_rate": 4.999890487011023e-06, "loss": 1.0668, "step": 911 }, { "epoch": 0.0658876225910741, "grad_norm": 8.661530904796571, "learning_rate": 4.999887732095514e-06, "loss": 0.9864, "step": 912 }, { "epoch": 0.06595986779128361, "grad_norm": 7.278353790447794, "learning_rate": 4.999884942958483e-06, "loss": 0.9872, "step": 913 }, { "epoch": 0.06603211299149313, "grad_norm": 7.371029835854991, "learning_rate": 4.999882119599967e-06, "loss": 1.0154, "step": 914 }, { "epoch": 0.06610435819170264, "grad_norm": 7.624355851777973, "learning_rate": 4.999879262020007e-06, "loss": 1.0608, "step": 915 }, { "epoch": 0.06617660339191216, "grad_norm": 10.11638698872114, "learning_rate": 4.99987637021864e-06, "loss": 0.9501, "step": 916 }, { "epoch": 0.06624884859212166, "grad_norm": 9.02544282371023, "learning_rate": 4.999873444195908e-06, "loss": 1.0702, "step": 917 }, { "epoch": 0.06632109379233117, "grad_norm": 7.600722890906856, "learning_rate": 4.999870483951848e-06, "loss": 1.0862, "step": 918 }, { "epoch": 0.06639333899254068, "grad_norm": 10.66570214043302, "learning_rate": 4.999867489486503e-06, "loss": 0.8919, "step": 919 }, { "epoch": 0.0664655841927502, "grad_norm": 10.827562722143572, "learning_rate": 4.999864460799912e-06, "loss": 1.0554, "step": 920 }, { "epoch": 0.0665378293929597, "grad_norm": 6.8497385796553685, "learning_rate": 4.999861397892119e-06, "loss": 0.9239, "step": 921 }, { "epoch": 0.06661007459316921, "grad_norm": 9.821056741388785, "learning_rate": 4.999858300763164e-06, "loss": 1.043, "step": 922 }, { "epoch": 0.06668231979337873, "grad_norm": 9.443290096372042, "learning_rate": 4.999855169413089e-06, "loss": 0.9471, "step": 923 }, { "epoch": 0.06675456499358824, "grad_norm": 8.543147806908388, "learning_rate": 4.999852003841939e-06, "loss": 1.0847, "step": 924 }, { "epoch": 0.06682681019379776, "grad_norm": 8.453149820174975, "learning_rate": 4.999848804049755e-06, "loss": 0.967, "step": 925 }, { "epoch": 0.06689905539400726, "grad_norm": 7.731832066969407, "learning_rate": 4.999845570036582e-06, "loss": 1.026, "step": 926 }, { "epoch": 0.06697130059421677, "grad_norm": 7.241076535439316, "learning_rate": 4.9998423018024655e-06, "loss": 0.9694, "step": 927 }, { "epoch": 0.06704354579442628, "grad_norm": 7.5825891933106675, "learning_rate": 4.9998389993474475e-06, "loss": 0.8292, "step": 928 }, { "epoch": 0.0671157909946358, "grad_norm": 7.887007263559446, "learning_rate": 4.999835662671575e-06, "loss": 0.9163, "step": 929 }, { "epoch": 0.0671880361948453, "grad_norm": 8.403517722205498, "learning_rate": 4.999832291774894e-06, "loss": 0.9422, "step": 930 }, { "epoch": 0.06726028139505481, "grad_norm": 8.520187191764911, "learning_rate": 4.999828886657449e-06, "loss": 1.0747, "step": 931 }, { "epoch": 0.06733252659526433, "grad_norm": 9.599590499409215, "learning_rate": 4.999825447319288e-06, "loss": 1.1415, "step": 932 }, { "epoch": 0.06740477179547384, "grad_norm": 9.617166477307444, "learning_rate": 4.999821973760457e-06, "loss": 0.9436, "step": 933 }, { "epoch": 0.06747701699568336, "grad_norm": 7.967325958467295, "learning_rate": 4.999818465981004e-06, "loss": 1.0447, "step": 934 }, { "epoch": 0.06754926219589286, "grad_norm": 7.700809773062095, "learning_rate": 4.9998149239809785e-06, "loss": 0.9577, "step": 935 }, { "epoch": 0.06762150739610237, "grad_norm": 13.457300546108096, "learning_rate": 4.999811347760427e-06, "loss": 1.0158, "step": 936 }, { "epoch": 0.06769375259631188, "grad_norm": 17.125825959495263, "learning_rate": 4.999807737319399e-06, "loss": 1.0227, "step": 937 }, { "epoch": 0.0677659977965214, "grad_norm": 8.145747529835688, "learning_rate": 4.999804092657944e-06, "loss": 1.1189, "step": 938 }, { "epoch": 0.0678382429967309, "grad_norm": 168.07717721332662, "learning_rate": 4.999800413776112e-06, "loss": 1.291, "step": 939 }, { "epoch": 0.06791048819694041, "grad_norm": 1252.7281229380937, "learning_rate": 4.999796700673953e-06, "loss": 4.4331, "step": 940 }, { "epoch": 0.06798273339714993, "grad_norm": 579.5830128204242, "learning_rate": 4.999792953351519e-06, "loss": 4.2564, "step": 941 }, { "epoch": 0.06805497859735944, "grad_norm": 1366.9764994322325, "learning_rate": 4.9997891718088595e-06, "loss": 8.2551, "step": 942 }, { "epoch": 0.06812722379756894, "grad_norm": 167.11694773047407, "learning_rate": 4.999785356046028e-06, "loss": 2.9589, "step": 943 }, { "epoch": 0.06819946899777846, "grad_norm": 140.71637309229513, "learning_rate": 4.999781506063076e-06, "loss": 2.4998, "step": 944 }, { "epoch": 0.06827171419798797, "grad_norm": 87.14427972208647, "learning_rate": 4.999777621860055e-06, "loss": 1.9832, "step": 945 }, { "epoch": 0.06834395939819748, "grad_norm": 47.80717137381823, "learning_rate": 4.99977370343702e-06, "loss": 1.6568, "step": 946 }, { "epoch": 0.068416204598407, "grad_norm": 42.183343255865665, "learning_rate": 4.999769750794024e-06, "loss": 1.4574, "step": 947 }, { "epoch": 0.0684884497986165, "grad_norm": 32.07011552162406, "learning_rate": 4.999765763931122e-06, "loss": 1.6063, "step": 948 }, { "epoch": 0.06856069499882601, "grad_norm": 44.321087132971336, "learning_rate": 4.999761742848366e-06, "loss": 1.4548, "step": 949 }, { "epoch": 0.06863294019903553, "grad_norm": 31.97084051999083, "learning_rate": 4.999757687545813e-06, "loss": 1.3405, "step": 950 }, { "epoch": 0.06870518539924504, "grad_norm": 28.5199087107777, "learning_rate": 4.999753598023518e-06, "loss": 1.3621, "step": 951 }, { "epoch": 0.06877743059945454, "grad_norm": 24.756747472359052, "learning_rate": 4.999749474281538e-06, "loss": 1.3252, "step": 952 }, { "epoch": 0.06884967579966406, "grad_norm": 23.519081971813335, "learning_rate": 4.999745316319928e-06, "loss": 1.3094, "step": 953 }, { "epoch": 0.06892192099987357, "grad_norm": 18.22155132971197, "learning_rate": 4.999741124138746e-06, "loss": 1.3866, "step": 954 }, { "epoch": 0.06899416620008308, "grad_norm": 15.798736734228498, "learning_rate": 4.999736897738049e-06, "loss": 1.2714, "step": 955 }, { "epoch": 0.0690664114002926, "grad_norm": 14.441216060757368, "learning_rate": 4.999732637117895e-06, "loss": 1.4716, "step": 956 }, { "epoch": 0.0691386566005021, "grad_norm": 15.004367446862554, "learning_rate": 4.999728342278341e-06, "loss": 1.2103, "step": 957 }, { "epoch": 0.06921090180071161, "grad_norm": 14.339982612205615, "learning_rate": 4.999724013219448e-06, "loss": 1.1751, "step": 958 }, { "epoch": 0.06928314700092113, "grad_norm": 16.341533444938122, "learning_rate": 4.999719649941274e-06, "loss": 1.3727, "step": 959 }, { "epoch": 0.06935539220113064, "grad_norm": 12.866704249615374, "learning_rate": 4.999715252443879e-06, "loss": 1.1248, "step": 960 }, { "epoch": 0.06942763740134014, "grad_norm": 12.487614713796802, "learning_rate": 4.999710820727322e-06, "loss": 1.3028, "step": 961 }, { "epoch": 0.06949988260154966, "grad_norm": 19.961318183747597, "learning_rate": 4.9997063547916655e-06, "loss": 1.3402, "step": 962 }, { "epoch": 0.06957212780175917, "grad_norm": 9.612443139512013, "learning_rate": 4.99970185463697e-06, "loss": 1.1635, "step": 963 }, { "epoch": 0.06964437300196868, "grad_norm": 14.32953439684184, "learning_rate": 4.999697320263297e-06, "loss": 1.0558, "step": 964 }, { "epoch": 0.0697166182021782, "grad_norm": 12.044072915040514, "learning_rate": 4.999692751670708e-06, "loss": 1.1984, "step": 965 }, { "epoch": 0.0697888634023877, "grad_norm": 9.324575902381692, "learning_rate": 4.999688148859268e-06, "loss": 1.2023, "step": 966 }, { "epoch": 0.06986110860259721, "grad_norm": 13.371500002957841, "learning_rate": 4.999683511829036e-06, "loss": 1.2055, "step": 967 }, { "epoch": 0.06993335380280673, "grad_norm": 11.212923795075916, "learning_rate": 4.999678840580079e-06, "loss": 1.1462, "step": 968 }, { "epoch": 0.07000559900301624, "grad_norm": 9.304082765954023, "learning_rate": 4.9996741351124585e-06, "loss": 1.2165, "step": 969 }, { "epoch": 0.07007784420322574, "grad_norm": 15.671522537786293, "learning_rate": 4.9996693954262395e-06, "loss": 1.1787, "step": 970 }, { "epoch": 0.07015008940343526, "grad_norm": 7.554205010203367, "learning_rate": 4.999664621521489e-06, "loss": 1.1306, "step": 971 }, { "epoch": 0.07022233460364477, "grad_norm": 15.969241618074582, "learning_rate": 4.99965981339827e-06, "loss": 1.0527, "step": 972 }, { "epoch": 0.07029457980385428, "grad_norm": 9.581639104159752, "learning_rate": 4.999654971056649e-06, "loss": 1.1107, "step": 973 }, { "epoch": 0.0703668250040638, "grad_norm": 13.49053849578474, "learning_rate": 4.999650094496692e-06, "loss": 1.1928, "step": 974 }, { "epoch": 0.0704390702042733, "grad_norm": 9.491932706199693, "learning_rate": 4.9996451837184665e-06, "loss": 1.1282, "step": 975 }, { "epoch": 0.07051131540448281, "grad_norm": 9.09452811251664, "learning_rate": 4.999640238722039e-06, "loss": 1.2054, "step": 976 }, { "epoch": 0.07058356060469233, "grad_norm": 12.203298804380083, "learning_rate": 4.999635259507477e-06, "loss": 1.0912, "step": 977 }, { "epoch": 0.07065580580490184, "grad_norm": 10.605703625967278, "learning_rate": 4.99963024607485e-06, "loss": 1.0932, "step": 978 }, { "epoch": 0.07072805100511134, "grad_norm": 15.64453222534329, "learning_rate": 4.999625198424226e-06, "loss": 1.1285, "step": 979 }, { "epoch": 0.07080029620532086, "grad_norm": 7.5180000470225625, "learning_rate": 4.999620116555672e-06, "loss": 1.072, "step": 980 }, { "epoch": 0.07087254140553037, "grad_norm": 12.301884601397209, "learning_rate": 4.999615000469261e-06, "loss": 1.2232, "step": 981 }, { "epoch": 0.07094478660573988, "grad_norm": 14.741817985233897, "learning_rate": 4.999609850165062e-06, "loss": 1.1257, "step": 982 }, { "epoch": 0.0710170318059494, "grad_norm": 7.728657036471624, "learning_rate": 4.999604665643144e-06, "loss": 1.0595, "step": 983 }, { "epoch": 0.0710892770061589, "grad_norm": 11.948299935048945, "learning_rate": 4.9995994469035794e-06, "loss": 1.2485, "step": 984 }, { "epoch": 0.07116152220636841, "grad_norm": 9.995372846098315, "learning_rate": 4.999594193946439e-06, "loss": 1.0904, "step": 985 }, { "epoch": 0.07123376740657793, "grad_norm": 7.802999105725011, "learning_rate": 4.999588906771794e-06, "loss": 1.0469, "step": 986 }, { "epoch": 0.07130601260678744, "grad_norm": 11.389051540629588, "learning_rate": 4.999583585379719e-06, "loss": 1.1461, "step": 987 }, { "epoch": 0.07137825780699694, "grad_norm": 12.720010830856607, "learning_rate": 4.999578229770285e-06, "loss": 1.1334, "step": 988 }, { "epoch": 0.07145050300720646, "grad_norm": 10.948911457737477, "learning_rate": 4.9995728399435665e-06, "loss": 1.1993, "step": 989 }, { "epoch": 0.07152274820741597, "grad_norm": 10.926613234210294, "learning_rate": 4.999567415899636e-06, "loss": 1.1061, "step": 990 }, { "epoch": 0.07159499340762548, "grad_norm": 10.067142150293634, "learning_rate": 4.9995619576385675e-06, "loss": 1.0645, "step": 991 }, { "epoch": 0.071667238607835, "grad_norm": 9.571186981731044, "learning_rate": 4.999556465160438e-06, "loss": 1.1765, "step": 992 }, { "epoch": 0.0717394838080445, "grad_norm": 11.328238819471302, "learning_rate": 4.99955093846532e-06, "loss": 1.061, "step": 993 }, { "epoch": 0.07181172900825401, "grad_norm": 18.699339111399766, "learning_rate": 4.999545377553291e-06, "loss": 1.1961, "step": 994 }, { "epoch": 0.07188397420846353, "grad_norm": 12.101520259973086, "learning_rate": 4.999539782424427e-06, "loss": 1.0998, "step": 995 }, { "epoch": 0.07195621940867304, "grad_norm": 10.005480980852601, "learning_rate": 4.9995341530788036e-06, "loss": 1.157, "step": 996 }, { "epoch": 0.07202846460888254, "grad_norm": 9.770064615838335, "learning_rate": 4.999528489516498e-06, "loss": 1.097, "step": 997 }, { "epoch": 0.07210070980909206, "grad_norm": 10.67458665391469, "learning_rate": 4.999522791737589e-06, "loss": 1.2029, "step": 998 }, { "epoch": 0.07217295500930157, "grad_norm": 10.797893558933547, "learning_rate": 4.999517059742154e-06, "loss": 1.2861, "step": 999 }, { "epoch": 0.07224520020951108, "grad_norm": 9.363852014440416, "learning_rate": 4.99951129353027e-06, "loss": 1.1835, "step": 1000 }, { "epoch": 0.07231744540972058, "grad_norm": 6.911859366063537, "learning_rate": 4.999505493102018e-06, "loss": 1.064, "step": 1001 }, { "epoch": 0.0723896906099301, "grad_norm": 11.66220799578339, "learning_rate": 4.999499658457477e-06, "loss": 1.1255, "step": 1002 }, { "epoch": 0.07246193581013961, "grad_norm": 8.815600559940421, "learning_rate": 4.999493789596726e-06, "loss": 1.0788, "step": 1003 }, { "epoch": 0.07253418101034913, "grad_norm": 15.791114066058341, "learning_rate": 4.999487886519845e-06, "loss": 1.2195, "step": 1004 }, { "epoch": 0.07260642621055864, "grad_norm": 9.319790233166966, "learning_rate": 4.9994819492269165e-06, "loss": 1.196, "step": 1005 }, { "epoch": 0.07267867141076814, "grad_norm": 8.429510558412899, "learning_rate": 4.99947597771802e-06, "loss": 1.1572, "step": 1006 }, { "epoch": 0.07275091661097766, "grad_norm": 7.784609885023894, "learning_rate": 4.999469971993238e-06, "loss": 1.0095, "step": 1007 }, { "epoch": 0.07282316181118717, "grad_norm": 16.09034399638103, "learning_rate": 4.999463932052654e-06, "loss": 1.2012, "step": 1008 }, { "epoch": 0.07289540701139668, "grad_norm": 10.30262311106982, "learning_rate": 4.999457857896349e-06, "loss": 1.1139, "step": 1009 }, { "epoch": 0.07296765221160618, "grad_norm": 13.580707756554713, "learning_rate": 4.999451749524406e-06, "loss": 1.1026, "step": 1010 }, { "epoch": 0.0730398974118157, "grad_norm": 9.150527572014163, "learning_rate": 4.9994456069369095e-06, "loss": 1.0825, "step": 1011 }, { "epoch": 0.07311214261202521, "grad_norm": 12.158839874139348, "learning_rate": 4.999439430133943e-06, "loss": 1.188, "step": 1012 }, { "epoch": 0.07318438781223473, "grad_norm": 11.05066077556723, "learning_rate": 4.999433219115592e-06, "loss": 1.0179, "step": 1013 }, { "epoch": 0.07325663301244424, "grad_norm": 13.49997061267056, "learning_rate": 4.999426973881941e-06, "loss": 1.1698, "step": 1014 }, { "epoch": 0.07332887821265374, "grad_norm": 8.056771542496925, "learning_rate": 4.999420694433076e-06, "loss": 1.0667, "step": 1015 }, { "epoch": 0.07340112341286326, "grad_norm": 10.122579026124553, "learning_rate": 4.9994143807690805e-06, "loss": 1.0561, "step": 1016 }, { "epoch": 0.07347336861307277, "grad_norm": 13.091996614963316, "learning_rate": 4.999408032890045e-06, "loss": 1.1001, "step": 1017 }, { "epoch": 0.07354561381328228, "grad_norm": 11.430015908644878, "learning_rate": 4.999401650796052e-06, "loss": 1.145, "step": 1018 }, { "epoch": 0.07361785901349178, "grad_norm": 10.033891466739542, "learning_rate": 4.999395234487192e-06, "loss": 1.1755, "step": 1019 }, { "epoch": 0.0736901042137013, "grad_norm": 9.02972040125841, "learning_rate": 4.999388783963552e-06, "loss": 1.0965, "step": 1020 }, { "epoch": 0.07376234941391081, "grad_norm": 10.051461746420074, "learning_rate": 4.99938229922522e-06, "loss": 1.0868, "step": 1021 }, { "epoch": 0.07383459461412033, "grad_norm": 11.513462313879566, "learning_rate": 4.999375780272286e-06, "loss": 1.117, "step": 1022 }, { "epoch": 0.07390683981432984, "grad_norm": 10.49931587533672, "learning_rate": 4.9993692271048375e-06, "loss": 1.1138, "step": 1023 }, { "epoch": 0.07397908501453934, "grad_norm": 10.592615297476447, "learning_rate": 4.999362639722964e-06, "loss": 1.0211, "step": 1024 }, { "epoch": 0.07405133021474886, "grad_norm": 8.014517486383742, "learning_rate": 4.999356018126758e-06, "loss": 1.0701, "step": 1025 }, { "epoch": 0.07412357541495837, "grad_norm": 9.537454355513116, "learning_rate": 4.999349362316308e-06, "loss": 1.1702, "step": 1026 }, { "epoch": 0.07419582061516788, "grad_norm": 11.499276179879011, "learning_rate": 4.999342672291706e-06, "loss": 1.1402, "step": 1027 }, { "epoch": 0.07426806581537738, "grad_norm": 9.01688454224321, "learning_rate": 4.999335948053044e-06, "loss": 1.155, "step": 1028 }, { "epoch": 0.0743403110155869, "grad_norm": 10.333059450847367, "learning_rate": 4.999329189600413e-06, "loss": 1.1249, "step": 1029 }, { "epoch": 0.07441255621579641, "grad_norm": 12.433520116417215, "learning_rate": 4.9993223969339056e-06, "loss": 1.0675, "step": 1030 }, { "epoch": 0.07448480141600593, "grad_norm": 8.939625307291443, "learning_rate": 4.999315570053616e-06, "loss": 1.0674, "step": 1031 }, { "epoch": 0.07455704661621544, "grad_norm": 9.45522477970596, "learning_rate": 4.999308708959636e-06, "loss": 1.0398, "step": 1032 }, { "epoch": 0.07462929181642494, "grad_norm": 13.8720518537034, "learning_rate": 4.999301813652061e-06, "loss": 1.0534, "step": 1033 }, { "epoch": 0.07470153701663446, "grad_norm": 11.505012414984407, "learning_rate": 4.999294884130984e-06, "loss": 1.1643, "step": 1034 }, { "epoch": 0.07477378221684397, "grad_norm": 9.697365345975179, "learning_rate": 4.9992879203965e-06, "loss": 1.063, "step": 1035 }, { "epoch": 0.07484602741705348, "grad_norm": 12.800564896516164, "learning_rate": 4.999280922448707e-06, "loss": 0.988, "step": 1036 }, { "epoch": 0.07491827261726298, "grad_norm": 11.25646007284626, "learning_rate": 4.999273890287698e-06, "loss": 1.1378, "step": 1037 }, { "epoch": 0.0749905178174725, "grad_norm": 8.921728770966718, "learning_rate": 4.999266823913569e-06, "loss": 1.0187, "step": 1038 }, { "epoch": 0.07506276301768201, "grad_norm": 11.769766556102812, "learning_rate": 4.999259723326419e-06, "loss": 1.0744, "step": 1039 }, { "epoch": 0.07513500821789153, "grad_norm": 12.213888698872179, "learning_rate": 4.999252588526343e-06, "loss": 1.1411, "step": 1040 }, { "epoch": 0.07520725341810104, "grad_norm": 7.38209037680377, "learning_rate": 4.99924541951344e-06, "loss": 1.0931, "step": 1041 }, { "epoch": 0.07527949861831054, "grad_norm": 11.892896157114391, "learning_rate": 4.999238216287808e-06, "loss": 1.0689, "step": 1042 }, { "epoch": 0.07535174381852006, "grad_norm": 12.732171252950264, "learning_rate": 4.999230978849545e-06, "loss": 1.219, "step": 1043 }, { "epoch": 0.07542398901872957, "grad_norm": 7.93188638501308, "learning_rate": 4.99922370719875e-06, "loss": 1.0188, "step": 1044 }, { "epoch": 0.07549623421893908, "grad_norm": 11.27336034926823, "learning_rate": 4.999216401335524e-06, "loss": 1.0437, "step": 1045 }, { "epoch": 0.07556847941914858, "grad_norm": 8.143089463211084, "learning_rate": 4.9992090612599655e-06, "loss": 1.1571, "step": 1046 }, { "epoch": 0.0756407246193581, "grad_norm": 8.372680798432567, "learning_rate": 4.999201686972176e-06, "loss": 1.0619, "step": 1047 }, { "epoch": 0.07571296981956761, "grad_norm": 8.191027165465307, "learning_rate": 4.999194278472255e-06, "loss": 1.0507, "step": 1048 }, { "epoch": 0.07578521501977713, "grad_norm": 7.784751257576184, "learning_rate": 4.999186835760305e-06, "loss": 1.0829, "step": 1049 }, { "epoch": 0.07585746021998664, "grad_norm": 9.461203208361098, "learning_rate": 4.999179358836428e-06, "loss": 0.9316, "step": 1050 }, { "epoch": 0.07592970542019614, "grad_norm": 8.90134442065451, "learning_rate": 4.999171847700725e-06, "loss": 1.1381, "step": 1051 }, { "epoch": 0.07600195062040566, "grad_norm": 9.435129436338897, "learning_rate": 4.999164302353302e-06, "loss": 1.0992, "step": 1052 }, { "epoch": 0.07607419582061517, "grad_norm": 7.950773419839909, "learning_rate": 4.999156722794259e-06, "loss": 1.0634, "step": 1053 }, { "epoch": 0.07614644102082468, "grad_norm": 6.149359705398356, "learning_rate": 4.999149109023701e-06, "loss": 0.9633, "step": 1054 }, { "epoch": 0.07621868622103418, "grad_norm": 8.409094347004105, "learning_rate": 4.999141461041732e-06, "loss": 1.1221, "step": 1055 }, { "epoch": 0.0762909314212437, "grad_norm": 10.605138548036365, "learning_rate": 4.999133778848457e-06, "loss": 1.1766, "step": 1056 }, { "epoch": 0.07636317662145321, "grad_norm": 8.686196647959983, "learning_rate": 4.999126062443981e-06, "loss": 1.03, "step": 1057 }, { "epoch": 0.07643542182166273, "grad_norm": 8.343998708840084, "learning_rate": 4.999118311828409e-06, "loss": 1.1061, "step": 1058 }, { "epoch": 0.07650766702187223, "grad_norm": 8.326950935212805, "learning_rate": 4.999110527001849e-06, "loss": 1.1387, "step": 1059 }, { "epoch": 0.07657991222208174, "grad_norm": 8.29351336235745, "learning_rate": 4.999102707964406e-06, "loss": 1.0004, "step": 1060 }, { "epoch": 0.07665215742229126, "grad_norm": 8.809209547488498, "learning_rate": 4.999094854716187e-06, "loss": 1.1205, "step": 1061 }, { "epoch": 0.07672440262250077, "grad_norm": 9.26867810746836, "learning_rate": 4.9990869672573e-06, "loss": 1.0999, "step": 1062 }, { "epoch": 0.07679664782271028, "grad_norm": 9.428294561705481, "learning_rate": 4.999079045587852e-06, "loss": 1.042, "step": 1063 }, { "epoch": 0.07686889302291978, "grad_norm": 10.003404800615662, "learning_rate": 4.999071089707953e-06, "loss": 1.158, "step": 1064 }, { "epoch": 0.0769411382231293, "grad_norm": 7.3664252654463445, "learning_rate": 4.999063099617712e-06, "loss": 1.0712, "step": 1065 }, { "epoch": 0.07701338342333881, "grad_norm": 8.603267989129098, "learning_rate": 4.999055075317237e-06, "loss": 1.0203, "step": 1066 }, { "epoch": 0.07708562862354833, "grad_norm": 9.014182466239962, "learning_rate": 4.999047016806637e-06, "loss": 1.101, "step": 1067 }, { "epoch": 0.07715787382375783, "grad_norm": 10.773023568348824, "learning_rate": 4.999038924086026e-06, "loss": 1.0112, "step": 1068 }, { "epoch": 0.07723011902396734, "grad_norm": 8.711257613933169, "learning_rate": 4.999030797155511e-06, "loss": 1.0507, "step": 1069 }, { "epoch": 0.07730236422417686, "grad_norm": 11.018520715916898, "learning_rate": 4.999022636015205e-06, "loss": 0.9961, "step": 1070 }, { "epoch": 0.07737460942438637, "grad_norm": 8.808058031797016, "learning_rate": 4.99901444066522e-06, "loss": 1.0866, "step": 1071 }, { "epoch": 0.07744685462459588, "grad_norm": 7.085077706760869, "learning_rate": 4.999006211105667e-06, "loss": 0.997, "step": 1072 }, { "epoch": 0.07751909982480538, "grad_norm": 7.515437450963564, "learning_rate": 4.99899794733666e-06, "loss": 0.987, "step": 1073 }, { "epoch": 0.0775913450250149, "grad_norm": 8.220134429946187, "learning_rate": 4.998989649358311e-06, "loss": 1.0864, "step": 1074 }, { "epoch": 0.07766359022522441, "grad_norm": 7.01109361332347, "learning_rate": 4.9989813171707345e-06, "loss": 1.0873, "step": 1075 }, { "epoch": 0.07773583542543393, "grad_norm": 7.783209251487551, "learning_rate": 4.9989729507740435e-06, "loss": 1.0629, "step": 1076 }, { "epoch": 0.07780808062564343, "grad_norm": 7.85286010343997, "learning_rate": 4.998964550168354e-06, "loss": 1.0904, "step": 1077 }, { "epoch": 0.07788032582585294, "grad_norm": 10.559509576333353, "learning_rate": 4.9989561153537795e-06, "loss": 1.0421, "step": 1078 }, { "epoch": 0.07795257102606246, "grad_norm": 7.750250535422478, "learning_rate": 4.998947646330435e-06, "loss": 1.0503, "step": 1079 }, { "epoch": 0.07802481622627197, "grad_norm": 8.632834152263571, "learning_rate": 4.998939143098439e-06, "loss": 1.1292, "step": 1080 }, { "epoch": 0.07809706142648148, "grad_norm": 8.116755131651287, "learning_rate": 4.998930605657906e-06, "loss": 0.9979, "step": 1081 }, { "epoch": 0.07816930662669098, "grad_norm": 8.48251339210647, "learning_rate": 4.998922034008954e-06, "loss": 1.1144, "step": 1082 }, { "epoch": 0.0782415518269005, "grad_norm": 7.646926372123236, "learning_rate": 4.998913428151699e-06, "loss": 0.9841, "step": 1083 }, { "epoch": 0.07831379702711001, "grad_norm": 8.063860164913411, "learning_rate": 4.99890478808626e-06, "loss": 1.088, "step": 1084 }, { "epoch": 0.07838604222731953, "grad_norm": 8.69952570181406, "learning_rate": 4.998896113812754e-06, "loss": 1.0103, "step": 1085 }, { "epoch": 0.07845828742752903, "grad_norm": 7.262411083524104, "learning_rate": 4.9988874053313e-06, "loss": 1.0132, "step": 1086 }, { "epoch": 0.07853053262773854, "grad_norm": 7.728081502765024, "learning_rate": 4.998878662642018e-06, "loss": 0.9863, "step": 1087 }, { "epoch": 0.07860277782794806, "grad_norm": 7.079423730533815, "learning_rate": 4.998869885745028e-06, "loss": 0.9302, "step": 1088 }, { "epoch": 0.07867502302815757, "grad_norm": 10.197776058319997, "learning_rate": 4.998861074640449e-06, "loss": 0.9764, "step": 1089 }, { "epoch": 0.07874726822836708, "grad_norm": 7.017699073112334, "learning_rate": 4.998852229328402e-06, "loss": 1.0114, "step": 1090 }, { "epoch": 0.07881951342857658, "grad_norm": 12.024054260496955, "learning_rate": 4.9988433498090096e-06, "loss": 1.0303, "step": 1091 }, { "epoch": 0.0788917586287861, "grad_norm": 8.720532306907664, "learning_rate": 4.99883443608239e-06, "loss": 1.0227, "step": 1092 }, { "epoch": 0.07896400382899561, "grad_norm": 10.23896754066679, "learning_rate": 4.998825488148668e-06, "loss": 1.1158, "step": 1093 }, { "epoch": 0.07903624902920513, "grad_norm": 8.286403448825022, "learning_rate": 4.998816506007966e-06, "loss": 1.059, "step": 1094 }, { "epoch": 0.07910849422941463, "grad_norm": 9.157439379208945, "learning_rate": 4.998807489660405e-06, "loss": 1.0544, "step": 1095 }, { "epoch": 0.07918073942962414, "grad_norm": 8.844072207994168, "learning_rate": 4.998798439106111e-06, "loss": 1.0838, "step": 1096 }, { "epoch": 0.07925298462983366, "grad_norm": 8.374370949877509, "learning_rate": 4.998789354345206e-06, "loss": 0.9951, "step": 1097 }, { "epoch": 0.07932522983004317, "grad_norm": 7.5593075910498255, "learning_rate": 4.998780235377815e-06, "loss": 1.1335, "step": 1098 }, { "epoch": 0.07939747503025268, "grad_norm": 8.159172616038976, "learning_rate": 4.998771082204062e-06, "loss": 1.0723, "step": 1099 }, { "epoch": 0.07946972023046218, "grad_norm": 7.843636196572333, "learning_rate": 4.998761894824074e-06, "loss": 1.077, "step": 1100 }, { "epoch": 0.0795419654306717, "grad_norm": 10.013461017564953, "learning_rate": 4.998752673237976e-06, "loss": 1.0934, "step": 1101 }, { "epoch": 0.07961421063088121, "grad_norm": 9.251904678137354, "learning_rate": 4.998743417445893e-06, "loss": 1.1208, "step": 1102 }, { "epoch": 0.07968645583109073, "grad_norm": 8.063492115103234, "learning_rate": 4.9987341274479524e-06, "loss": 1.0444, "step": 1103 }, { "epoch": 0.07975870103130023, "grad_norm": 7.964504172965911, "learning_rate": 4.998724803244283e-06, "loss": 1.0694, "step": 1104 }, { "epoch": 0.07983094623150974, "grad_norm": 8.099611023405306, "learning_rate": 4.998715444835011e-06, "loss": 1.094, "step": 1105 }, { "epoch": 0.07990319143171926, "grad_norm": 9.626043089785451, "learning_rate": 4.998706052220265e-06, "loss": 0.9621, "step": 1106 }, { "epoch": 0.07997543663192877, "grad_norm": 9.898266002731987, "learning_rate": 4.998696625400172e-06, "loss": 1.1121, "step": 1107 }, { "epoch": 0.08004768183213827, "grad_norm": 8.261856344894401, "learning_rate": 4.998687164374863e-06, "loss": 1.032, "step": 1108 }, { "epoch": 0.08011992703234778, "grad_norm": 6.460485809294053, "learning_rate": 4.998677669144467e-06, "loss": 0.9505, "step": 1109 }, { "epoch": 0.0801921722325573, "grad_norm": 9.319259341038178, "learning_rate": 4.998668139709113e-06, "loss": 1.0253, "step": 1110 }, { "epoch": 0.08026441743276681, "grad_norm": 7.739720079352888, "learning_rate": 4.998658576068933e-06, "loss": 1.0211, "step": 1111 }, { "epoch": 0.08033666263297633, "grad_norm": 9.040593049843086, "learning_rate": 4.9986489782240575e-06, "loss": 1.0445, "step": 1112 }, { "epoch": 0.08040890783318583, "grad_norm": 11.828095973998401, "learning_rate": 4.998639346174618e-06, "loss": 1.0956, "step": 1113 }, { "epoch": 0.08048115303339534, "grad_norm": 9.531187964065532, "learning_rate": 4.998629679920744e-06, "loss": 1.2079, "step": 1114 }, { "epoch": 0.08055339823360486, "grad_norm": 7.734908638471938, "learning_rate": 4.998619979462571e-06, "loss": 0.9799, "step": 1115 }, { "epoch": 0.08062564343381437, "grad_norm": 11.347745669828408, "learning_rate": 4.99861024480023e-06, "loss": 1.1444, "step": 1116 }, { "epoch": 0.08069788863402387, "grad_norm": 9.159509072287253, "learning_rate": 4.9986004759338555e-06, "loss": 1.0438, "step": 1117 }, { "epoch": 0.08077013383423338, "grad_norm": 8.254305554113202, "learning_rate": 4.9985906728635805e-06, "loss": 1.0677, "step": 1118 }, { "epoch": 0.0808423790344429, "grad_norm": 9.087270414241175, "learning_rate": 4.998580835589538e-06, "loss": 0.9993, "step": 1119 }, { "epoch": 0.08091462423465241, "grad_norm": 9.578841862447739, "learning_rate": 4.998570964111865e-06, "loss": 1.0351, "step": 1120 }, { "epoch": 0.08098686943486193, "grad_norm": 10.152247558701673, "learning_rate": 4.998561058430696e-06, "loss": 1.0478, "step": 1121 }, { "epoch": 0.08105911463507143, "grad_norm": 8.402404132308606, "learning_rate": 4.998551118546165e-06, "loss": 0.9444, "step": 1122 }, { "epoch": 0.08113135983528094, "grad_norm": 11.8092436969662, "learning_rate": 4.99854114445841e-06, "loss": 1.0678, "step": 1123 }, { "epoch": 0.08120360503549046, "grad_norm": 8.492356847006285, "learning_rate": 4.998531136167566e-06, "loss": 1.0957, "step": 1124 }, { "epoch": 0.08127585023569997, "grad_norm": 8.914724500916789, "learning_rate": 4.9985210936737705e-06, "loss": 1.0199, "step": 1125 }, { "epoch": 0.08134809543590947, "grad_norm": 10.226529301297335, "learning_rate": 4.9985110169771624e-06, "loss": 1.0936, "step": 1126 }, { "epoch": 0.08142034063611898, "grad_norm": 11.26752175541672, "learning_rate": 4.998500906077878e-06, "loss": 1.0493, "step": 1127 }, { "epoch": 0.0814925858363285, "grad_norm": 7.6901009974401555, "learning_rate": 4.9984907609760556e-06, "loss": 0.9427, "step": 1128 }, { "epoch": 0.08156483103653801, "grad_norm": 8.110694381018506, "learning_rate": 4.9984805816718355e-06, "loss": 1.0684, "step": 1129 }, { "epoch": 0.08163707623674753, "grad_norm": 10.169367280405849, "learning_rate": 4.998470368165355e-06, "loss": 1.2313, "step": 1130 }, { "epoch": 0.08170932143695703, "grad_norm": 8.312233368462254, "learning_rate": 4.998460120456756e-06, "loss": 1.0566, "step": 1131 }, { "epoch": 0.08178156663716654, "grad_norm": 12.509159241987698, "learning_rate": 4.998449838546178e-06, "loss": 1.057, "step": 1132 }, { "epoch": 0.08185381183737606, "grad_norm": 7.44460643130261, "learning_rate": 4.998439522433761e-06, "loss": 0.9894, "step": 1133 }, { "epoch": 0.08192605703758557, "grad_norm": 7.34645089337428, "learning_rate": 4.998429172119647e-06, "loss": 1.0724, "step": 1134 }, { "epoch": 0.08199830223779507, "grad_norm": 9.75773665778172, "learning_rate": 4.998418787603978e-06, "loss": 0.9903, "step": 1135 }, { "epoch": 0.08207054743800458, "grad_norm": 8.746829957290688, "learning_rate": 4.9984083688868945e-06, "loss": 1.0508, "step": 1136 }, { "epoch": 0.0821427926382141, "grad_norm": 9.26367496343244, "learning_rate": 4.998397915968541e-06, "loss": 1.0882, "step": 1137 }, { "epoch": 0.08221503783842361, "grad_norm": 8.051920729192458, "learning_rate": 4.998387428849061e-06, "loss": 1.0418, "step": 1138 }, { "epoch": 0.08228728303863313, "grad_norm": 9.871850006274746, "learning_rate": 4.998376907528596e-06, "loss": 1.1238, "step": 1139 }, { "epoch": 0.08235952823884263, "grad_norm": 7.55332453207304, "learning_rate": 4.998366352007291e-06, "loss": 1.038, "step": 1140 }, { "epoch": 0.08243177343905214, "grad_norm": 7.612874379841177, "learning_rate": 4.9983557622852906e-06, "loss": 1.0458, "step": 1141 }, { "epoch": 0.08250401863926166, "grad_norm": 9.162435583952453, "learning_rate": 4.9983451383627394e-06, "loss": 1.1471, "step": 1142 }, { "epoch": 0.08257626383947117, "grad_norm": 8.699883506258054, "learning_rate": 4.998334480239783e-06, "loss": 1.0271, "step": 1143 }, { "epoch": 0.08264850903968067, "grad_norm": 8.003606936814391, "learning_rate": 4.998323787916568e-06, "loss": 0.9465, "step": 1144 }, { "epoch": 0.08272075423989018, "grad_norm": 9.08944967519757, "learning_rate": 4.998313061393239e-06, "loss": 1.023, "step": 1145 }, { "epoch": 0.0827929994400997, "grad_norm": 10.64499783954446, "learning_rate": 4.998302300669946e-06, "loss": 1.0594, "step": 1146 }, { "epoch": 0.08286524464030921, "grad_norm": 7.027268023928428, "learning_rate": 4.998291505746833e-06, "loss": 1.0061, "step": 1147 }, { "epoch": 0.08293748984051873, "grad_norm": 6.857355676481956, "learning_rate": 4.998280676624049e-06, "loss": 0.9767, "step": 1148 }, { "epoch": 0.08300973504072823, "grad_norm": 9.301179495227549, "learning_rate": 4.9982698133017425e-06, "loss": 1.0287, "step": 1149 }, { "epoch": 0.08308198024093774, "grad_norm": 7.371449537236845, "learning_rate": 4.998258915780062e-06, "loss": 1.0113, "step": 1150 }, { "epoch": 0.08315422544114726, "grad_norm": 7.118158904361727, "learning_rate": 4.998247984059157e-06, "loss": 1.0289, "step": 1151 }, { "epoch": 0.08322647064135677, "grad_norm": 7.493440429809104, "learning_rate": 4.998237018139177e-06, "loss": 0.9469, "step": 1152 }, { "epoch": 0.08329871584156627, "grad_norm": 8.059626104045504, "learning_rate": 4.998226018020271e-06, "loss": 1.1325, "step": 1153 }, { "epoch": 0.08337096104177578, "grad_norm": 7.627840107274696, "learning_rate": 4.9982149837025915e-06, "loss": 0.9679, "step": 1154 }, { "epoch": 0.0834432062419853, "grad_norm": 10.390496137006382, "learning_rate": 4.9982039151862886e-06, "loss": 1.0214, "step": 1155 }, { "epoch": 0.08351545144219481, "grad_norm": 8.294970852442527, "learning_rate": 4.998192812471514e-06, "loss": 1.0068, "step": 1156 }, { "epoch": 0.08358769664240433, "grad_norm": 7.2687658596767815, "learning_rate": 4.998181675558419e-06, "loss": 1.0359, "step": 1157 }, { "epoch": 0.08365994184261383, "grad_norm": 11.96708233323029, "learning_rate": 4.998170504447156e-06, "loss": 1.1325, "step": 1158 }, { "epoch": 0.08373218704282334, "grad_norm": 7.53534583945329, "learning_rate": 4.99815929913788e-06, "loss": 0.9961, "step": 1159 }, { "epoch": 0.08380443224303286, "grad_norm": 8.456074480376564, "learning_rate": 4.998148059630742e-06, "loss": 1.0054, "step": 1160 }, { "epoch": 0.08387667744324237, "grad_norm": 10.585953715970053, "learning_rate": 4.998136785925896e-06, "loss": 1.1631, "step": 1161 }, { "epoch": 0.08394892264345187, "grad_norm": 9.7325467127128, "learning_rate": 4.998125478023498e-06, "loss": 1.1169, "step": 1162 }, { "epoch": 0.08402116784366138, "grad_norm": 9.187471766006787, "learning_rate": 4.998114135923702e-06, "loss": 1.0181, "step": 1163 }, { "epoch": 0.0840934130438709, "grad_norm": 8.152130840685295, "learning_rate": 4.998102759626663e-06, "loss": 1.0458, "step": 1164 }, { "epoch": 0.08416565824408041, "grad_norm": 10.813768610097702, "learning_rate": 4.998091349132536e-06, "loss": 1.042, "step": 1165 }, { "epoch": 0.08423790344428991, "grad_norm": 12.487774783481994, "learning_rate": 4.99807990444148e-06, "loss": 1.0521, "step": 1166 }, { "epoch": 0.08431014864449943, "grad_norm": 8.111434645565318, "learning_rate": 4.9980684255536484e-06, "loss": 1.0093, "step": 1167 }, { "epoch": 0.08438239384470894, "grad_norm": 7.229229343226888, "learning_rate": 4.9980569124692e-06, "loss": 0.9784, "step": 1168 }, { "epoch": 0.08445463904491846, "grad_norm": 11.523341498540786, "learning_rate": 4.9980453651882924e-06, "loss": 0.9999, "step": 1169 }, { "epoch": 0.08452688424512797, "grad_norm": 9.631893315202843, "learning_rate": 4.998033783711083e-06, "loss": 1.0699, "step": 1170 }, { "epoch": 0.08459912944533747, "grad_norm": 10.038918104506301, "learning_rate": 4.99802216803773e-06, "loss": 1.0899, "step": 1171 }, { "epoch": 0.08467137464554698, "grad_norm": 9.196129722871618, "learning_rate": 4.998010518168393e-06, "loss": 1.213, "step": 1172 }, { "epoch": 0.0847436198457565, "grad_norm": 11.634530047608754, "learning_rate": 4.997998834103233e-06, "loss": 1.1776, "step": 1173 }, { "epoch": 0.08481586504596601, "grad_norm": 8.441376424345735, "learning_rate": 4.9979871158424075e-06, "loss": 0.9978, "step": 1174 }, { "epoch": 0.08488811024617551, "grad_norm": 7.7491075555650175, "learning_rate": 4.997975363386078e-06, "loss": 1.0773, "step": 1175 }, { "epoch": 0.08496035544638503, "grad_norm": 9.233312452088397, "learning_rate": 4.997963576734406e-06, "loss": 0.9961, "step": 1176 }, { "epoch": 0.08503260064659454, "grad_norm": 9.806789802004687, "learning_rate": 4.997951755887552e-06, "loss": 0.9672, "step": 1177 }, { "epoch": 0.08510484584680406, "grad_norm": 9.566787057932062, "learning_rate": 4.997939900845678e-06, "loss": 1.0264, "step": 1178 }, { "epoch": 0.08517709104701357, "grad_norm": 7.884842791686425, "learning_rate": 4.997928011608946e-06, "loss": 1.0766, "step": 1179 }, { "epoch": 0.08524933624722307, "grad_norm": 9.559610167020942, "learning_rate": 4.9979160881775194e-06, "loss": 1.0885, "step": 1180 }, { "epoch": 0.08532158144743258, "grad_norm": 9.250748062815228, "learning_rate": 4.997904130551561e-06, "loss": 1.1334, "step": 1181 }, { "epoch": 0.0853938266476421, "grad_norm": 7.939635462620237, "learning_rate": 4.997892138731234e-06, "loss": 0.9784, "step": 1182 }, { "epoch": 0.08546607184785161, "grad_norm": 6.920102411295118, "learning_rate": 4.997880112716703e-06, "loss": 1.029, "step": 1183 }, { "epoch": 0.08553831704806111, "grad_norm": 7.195743245538496, "learning_rate": 4.997868052508133e-06, "loss": 1.0498, "step": 1184 }, { "epoch": 0.08561056224827063, "grad_norm": 8.418912631972443, "learning_rate": 4.99785595810569e-06, "loss": 1.0402, "step": 1185 }, { "epoch": 0.08568280744848014, "grad_norm": 7.743907133144584, "learning_rate": 4.997843829509536e-06, "loss": 1.0653, "step": 1186 }, { "epoch": 0.08575505264868966, "grad_norm": 8.158732186355419, "learning_rate": 4.997831666719842e-06, "loss": 1.0036, "step": 1187 }, { "epoch": 0.08582729784889917, "grad_norm": 7.8889102696709585, "learning_rate": 4.9978194697367705e-06, "loss": 0.986, "step": 1188 }, { "epoch": 0.08589954304910867, "grad_norm": 9.042432995738585, "learning_rate": 4.99780723856049e-06, "loss": 1.0258, "step": 1189 }, { "epoch": 0.08597178824931818, "grad_norm": 6.668155821738742, "learning_rate": 4.997794973191168e-06, "loss": 0.9792, "step": 1190 }, { "epoch": 0.0860440334495277, "grad_norm": 8.493749227516751, "learning_rate": 4.997782673628973e-06, "loss": 1.0146, "step": 1191 }, { "epoch": 0.08611627864973721, "grad_norm": 8.381222363454155, "learning_rate": 4.997770339874071e-06, "loss": 1.0213, "step": 1192 }, { "epoch": 0.08618852384994671, "grad_norm": 8.04084127930678, "learning_rate": 4.997757971926634e-06, "loss": 1.0865, "step": 1193 }, { "epoch": 0.08626076905015623, "grad_norm": 8.849180246092404, "learning_rate": 4.9977455697868284e-06, "loss": 0.9883, "step": 1194 }, { "epoch": 0.08633301425036574, "grad_norm": 9.541466812195212, "learning_rate": 4.997733133454826e-06, "loss": 1.1456, "step": 1195 }, { "epoch": 0.08640525945057526, "grad_norm": 7.734661551667768, "learning_rate": 4.997720662930796e-06, "loss": 0.9471, "step": 1196 }, { "epoch": 0.08647750465078477, "grad_norm": 12.614228440743945, "learning_rate": 4.99770815821491e-06, "loss": 1.063, "step": 1197 }, { "epoch": 0.08654974985099427, "grad_norm": 9.355804730397077, "learning_rate": 4.997695619307338e-06, "loss": 0.9665, "step": 1198 }, { "epoch": 0.08662199505120378, "grad_norm": 7.6980746351525395, "learning_rate": 4.9976830462082525e-06, "loss": 1.0062, "step": 1199 }, { "epoch": 0.0866942402514133, "grad_norm": 9.033183065856107, "learning_rate": 4.997670438917826e-06, "loss": 0.9619, "step": 1200 }, { "epoch": 0.08676648545162281, "grad_norm": 8.392159717650904, "learning_rate": 4.997657797436231e-06, "loss": 1.01, "step": 1201 }, { "epoch": 0.08683873065183231, "grad_norm": 14.59124198453878, "learning_rate": 4.997645121763638e-06, "loss": 1.1509, "step": 1202 }, { "epoch": 0.08691097585204183, "grad_norm": 10.620290273374012, "learning_rate": 4.997632411900224e-06, "loss": 1.105, "step": 1203 }, { "epoch": 0.08698322105225134, "grad_norm": 6.610973841135195, "learning_rate": 4.997619667846162e-06, "loss": 1.0147, "step": 1204 }, { "epoch": 0.08705546625246086, "grad_norm": 10.12523999341825, "learning_rate": 4.997606889601625e-06, "loss": 1.0926, "step": 1205 }, { "epoch": 0.08712771145267037, "grad_norm": 9.81527379187025, "learning_rate": 4.997594077166789e-06, "loss": 1.1371, "step": 1206 }, { "epoch": 0.08719995665287987, "grad_norm": 11.050979736530469, "learning_rate": 4.99758123054183e-06, "loss": 0.9712, "step": 1207 }, { "epoch": 0.08727220185308938, "grad_norm": 7.89698604600767, "learning_rate": 4.9975683497269225e-06, "loss": 1.0224, "step": 1208 }, { "epoch": 0.0873444470532989, "grad_norm": 9.178702573491375, "learning_rate": 4.997555434722244e-06, "loss": 1.1017, "step": 1209 }, { "epoch": 0.08741669225350841, "grad_norm": 10.762319203983754, "learning_rate": 4.997542485527971e-06, "loss": 0.999, "step": 1210 }, { "epoch": 0.08748893745371791, "grad_norm": 7.619951671937692, "learning_rate": 4.997529502144281e-06, "loss": 1.0271, "step": 1211 }, { "epoch": 0.08756118265392743, "grad_norm": 7.020186065207725, "learning_rate": 4.99751648457135e-06, "loss": 0.9666, "step": 1212 }, { "epoch": 0.08763342785413694, "grad_norm": 8.187727830352312, "learning_rate": 4.997503432809358e-06, "loss": 1.0493, "step": 1213 }, { "epoch": 0.08770567305434646, "grad_norm": 12.288750219530375, "learning_rate": 4.9974903468584835e-06, "loss": 1.1465, "step": 1214 }, { "epoch": 0.08777791825455596, "grad_norm": 12.846954317249368, "learning_rate": 4.997477226718905e-06, "loss": 1.0951, "step": 1215 }, { "epoch": 0.08785016345476547, "grad_norm": 7.384135132847736, "learning_rate": 4.997464072390803e-06, "loss": 0.9689, "step": 1216 }, { "epoch": 0.08792240865497498, "grad_norm": 8.630582467907457, "learning_rate": 4.997450883874356e-06, "loss": 0.991, "step": 1217 }, { "epoch": 0.0879946538551845, "grad_norm": 10.715823827875232, "learning_rate": 4.997437661169746e-06, "loss": 1.0127, "step": 1218 }, { "epoch": 0.08806689905539401, "grad_norm": 7.665405280241323, "learning_rate": 4.997424404277154e-06, "loss": 1.0252, "step": 1219 }, { "epoch": 0.08813914425560351, "grad_norm": 8.277917500908998, "learning_rate": 4.9974111131967604e-06, "loss": 1.0161, "step": 1220 }, { "epoch": 0.08821138945581303, "grad_norm": 7.94931832515951, "learning_rate": 4.997397787928748e-06, "loss": 1.0223, "step": 1221 }, { "epoch": 0.08828363465602254, "grad_norm": 12.318375942641863, "learning_rate": 4.9973844284733e-06, "loss": 0.9923, "step": 1222 }, { "epoch": 0.08835587985623206, "grad_norm": 11.634357582917396, "learning_rate": 4.997371034830597e-06, "loss": 1.0476, "step": 1223 }, { "epoch": 0.08842812505644156, "grad_norm": 8.727590584023964, "learning_rate": 4.997357607000824e-06, "loss": 0.991, "step": 1224 }, { "epoch": 0.08850037025665107, "grad_norm": 9.445898096249321, "learning_rate": 4.997344144984164e-06, "loss": 0.9266, "step": 1225 }, { "epoch": 0.08857261545686058, "grad_norm": 15.154415058865427, "learning_rate": 4.997330648780802e-06, "loss": 1.0491, "step": 1226 }, { "epoch": 0.0886448606570701, "grad_norm": 12.147092218520216, "learning_rate": 4.997317118390923e-06, "loss": 1.0141, "step": 1227 }, { "epoch": 0.08871710585727961, "grad_norm": 8.807405771470952, "learning_rate": 4.997303553814711e-06, "loss": 1.0545, "step": 1228 }, { "epoch": 0.08878935105748911, "grad_norm": 8.773896637585334, "learning_rate": 4.997289955052353e-06, "loss": 0.9648, "step": 1229 }, { "epoch": 0.08886159625769863, "grad_norm": 12.582803138190359, "learning_rate": 4.997276322104034e-06, "loss": 0.982, "step": 1230 }, { "epoch": 0.08893384145790814, "grad_norm": 8.20721624166806, "learning_rate": 4.997262654969942e-06, "loss": 1.1456, "step": 1231 }, { "epoch": 0.08900608665811766, "grad_norm": 8.216809642030134, "learning_rate": 4.997248953650262e-06, "loss": 1.0748, "step": 1232 }, { "epoch": 0.08907833185832716, "grad_norm": 8.933539947094216, "learning_rate": 4.997235218145184e-06, "loss": 1.0812, "step": 1233 }, { "epoch": 0.08915057705853667, "grad_norm": 9.513865341070755, "learning_rate": 4.997221448454894e-06, "loss": 0.9748, "step": 1234 }, { "epoch": 0.08922282225874618, "grad_norm": 12.238541206742763, "learning_rate": 4.997207644579581e-06, "loss": 1.0394, "step": 1235 }, { "epoch": 0.0892950674589557, "grad_norm": 9.030921798075168, "learning_rate": 4.997193806519436e-06, "loss": 1.0685, "step": 1236 }, { "epoch": 0.08936731265916521, "grad_norm": 10.022809527045283, "learning_rate": 4.997179934274645e-06, "loss": 0.9943, "step": 1237 }, { "epoch": 0.08943955785937471, "grad_norm": 11.31666176467662, "learning_rate": 4.997166027845401e-06, "loss": 0.982, "step": 1238 }, { "epoch": 0.08951180305958423, "grad_norm": 11.069126364428211, "learning_rate": 4.997152087231892e-06, "loss": 1.0336, "step": 1239 }, { "epoch": 0.08958404825979374, "grad_norm": 9.509747774487765, "learning_rate": 4.9971381124343095e-06, "loss": 1.0249, "step": 1240 }, { "epoch": 0.08965629346000326, "grad_norm": 9.540294520792166, "learning_rate": 4.9971241034528465e-06, "loss": 0.9429, "step": 1241 }, { "epoch": 0.08972853866021276, "grad_norm": 10.411457030598552, "learning_rate": 4.997110060287692e-06, "loss": 1.1491, "step": 1242 }, { "epoch": 0.08980078386042227, "grad_norm": 10.482388940755511, "learning_rate": 4.997095982939041e-06, "loss": 1.0594, "step": 1243 }, { "epoch": 0.08987302906063178, "grad_norm": 10.118509355007921, "learning_rate": 4.997081871407084e-06, "loss": 1.0699, "step": 1244 }, { "epoch": 0.0899452742608413, "grad_norm": 9.172298389131146, "learning_rate": 4.9970677256920154e-06, "loss": 1.0776, "step": 1245 }, { "epoch": 0.09001751946105081, "grad_norm": 8.362438289488766, "learning_rate": 4.9970535457940285e-06, "loss": 1.0118, "step": 1246 }, { "epoch": 0.09008976466126031, "grad_norm": 8.090159205130359, "learning_rate": 4.997039331713317e-06, "loss": 0.979, "step": 1247 }, { "epoch": 0.09016200986146983, "grad_norm": 7.958686487906969, "learning_rate": 4.997025083450076e-06, "loss": 0.9991, "step": 1248 }, { "epoch": 0.09023425506167934, "grad_norm": 8.477703696222079, "learning_rate": 4.997010801004501e-06, "loss": 1.0602, "step": 1249 }, { "epoch": 0.09030650026188886, "grad_norm": 8.156121848550232, "learning_rate": 4.996996484376786e-06, "loss": 1.0094, "step": 1250 }, { "epoch": 0.09037874546209836, "grad_norm": 8.01067069799641, "learning_rate": 4.9969821335671284e-06, "loss": 1.0767, "step": 1251 }, { "epoch": 0.09045099066230787, "grad_norm": 9.088012143570214, "learning_rate": 4.996967748575724e-06, "loss": 1.0258, "step": 1252 }, { "epoch": 0.09052323586251738, "grad_norm": 11.172493358651913, "learning_rate": 4.99695332940277e-06, "loss": 1.0625, "step": 1253 }, { "epoch": 0.0905954810627269, "grad_norm": 8.543679597197587, "learning_rate": 4.996938876048464e-06, "loss": 0.9319, "step": 1254 }, { "epoch": 0.09066772626293641, "grad_norm": 8.44583283549682, "learning_rate": 4.996924388513003e-06, "loss": 0.9843, "step": 1255 }, { "epoch": 0.09073997146314591, "grad_norm": 9.912610632337973, "learning_rate": 4.996909866796587e-06, "loss": 0.9995, "step": 1256 }, { "epoch": 0.09081221666335543, "grad_norm": 10.974965825820584, "learning_rate": 4.996895310899412e-06, "loss": 0.9536, "step": 1257 }, { "epoch": 0.09088446186356494, "grad_norm": 7.7812061231977045, "learning_rate": 4.9968807208216795e-06, "loss": 1.0097, "step": 1258 }, { "epoch": 0.09095670706377446, "grad_norm": 9.898632116895977, "learning_rate": 4.996866096563589e-06, "loss": 1.0198, "step": 1259 }, { "epoch": 0.09102895226398396, "grad_norm": 9.135253997595575, "learning_rate": 4.99685143812534e-06, "loss": 1.0731, "step": 1260 }, { "epoch": 0.09110119746419347, "grad_norm": 9.64758306882217, "learning_rate": 4.996836745507134e-06, "loss": 0.9839, "step": 1261 }, { "epoch": 0.09117344266440298, "grad_norm": 7.748854275583489, "learning_rate": 4.996822018709171e-06, "loss": 0.9661, "step": 1262 }, { "epoch": 0.0912456878646125, "grad_norm": 7.718057331094688, "learning_rate": 4.996807257731653e-06, "loss": 1.1335, "step": 1263 }, { "epoch": 0.09131793306482201, "grad_norm": 8.27364406021653, "learning_rate": 4.996792462574783e-06, "loss": 0.9964, "step": 1264 }, { "epoch": 0.09139017826503151, "grad_norm": 7.711454457246791, "learning_rate": 4.996777633238763e-06, "loss": 1.0548, "step": 1265 }, { "epoch": 0.09146242346524103, "grad_norm": 8.45389755018728, "learning_rate": 4.996762769723795e-06, "loss": 1.1132, "step": 1266 }, { "epoch": 0.09153466866545054, "grad_norm": 7.993358955034686, "learning_rate": 4.996747872030084e-06, "loss": 1.0494, "step": 1267 }, { "epoch": 0.09160691386566006, "grad_norm": 9.106131916943085, "learning_rate": 4.996732940157833e-06, "loss": 1.1574, "step": 1268 }, { "epoch": 0.09167915906586956, "grad_norm": 7.888912203878178, "learning_rate": 4.996717974107246e-06, "loss": 0.9449, "step": 1269 }, { "epoch": 0.09175140426607907, "grad_norm": 7.5295696995804375, "learning_rate": 4.9967029738785295e-06, "loss": 0.9847, "step": 1270 }, { "epoch": 0.09182364946628858, "grad_norm": 8.033991126938988, "learning_rate": 4.9966879394718875e-06, "loss": 1.0168, "step": 1271 }, { "epoch": 0.0918958946664981, "grad_norm": 7.349434458455348, "learning_rate": 4.996672870887526e-06, "loss": 1.0579, "step": 1272 }, { "epoch": 0.0919681398667076, "grad_norm": 9.189182672361843, "learning_rate": 4.9966577681256515e-06, "loss": 0.9804, "step": 1273 }, { "epoch": 0.09204038506691711, "grad_norm": 8.132016013771873, "learning_rate": 4.996642631186471e-06, "loss": 1.0015, "step": 1274 }, { "epoch": 0.09211263026712663, "grad_norm": 8.625386989591531, "learning_rate": 4.996627460070191e-06, "loss": 1.0529, "step": 1275 }, { "epoch": 0.09218487546733614, "grad_norm": 8.25005872300952, "learning_rate": 4.996612254777019e-06, "loss": 1.0277, "step": 1276 }, { "epoch": 0.09225712066754566, "grad_norm": 8.5577829552336, "learning_rate": 4.996597015307165e-06, "loss": 1.067, "step": 1277 }, { "epoch": 0.09232936586775516, "grad_norm": 8.436781110867777, "learning_rate": 4.996581741660836e-06, "loss": 1.0608, "step": 1278 }, { "epoch": 0.09240161106796467, "grad_norm": 7.437232261134157, "learning_rate": 4.996566433838241e-06, "loss": 0.9662, "step": 1279 }, { "epoch": 0.09247385626817418, "grad_norm": 8.37979535274034, "learning_rate": 4.9965510918395895e-06, "loss": 1.0893, "step": 1280 }, { "epoch": 0.0925461014683837, "grad_norm": 8.732865040966251, "learning_rate": 4.996535715665093e-06, "loss": 1.0636, "step": 1281 }, { "epoch": 0.0926183466685932, "grad_norm": 7.034580916750934, "learning_rate": 4.996520305314961e-06, "loss": 0.9279, "step": 1282 }, { "epoch": 0.09269059186880271, "grad_norm": 9.531770285492126, "learning_rate": 4.996504860789404e-06, "loss": 1.1575, "step": 1283 }, { "epoch": 0.09276283706901223, "grad_norm": 6.979645153742987, "learning_rate": 4.996489382088634e-06, "loss": 0.9101, "step": 1284 }, { "epoch": 0.09283508226922174, "grad_norm": 8.894210441074417, "learning_rate": 4.996473869212863e-06, "loss": 1.1211, "step": 1285 }, { "epoch": 0.09290732746943126, "grad_norm": 10.109754976799287, "learning_rate": 4.996458322162302e-06, "loss": 1.0854, "step": 1286 }, { "epoch": 0.09297957266964076, "grad_norm": 6.680004732507183, "learning_rate": 4.996442740937166e-06, "loss": 1.0216, "step": 1287 }, { "epoch": 0.09305181786985027, "grad_norm": 8.298889030651416, "learning_rate": 4.996427125537667e-06, "loss": 0.9832, "step": 1288 }, { "epoch": 0.09312406307005978, "grad_norm": 12.369646860446277, "learning_rate": 4.9964114759640196e-06, "loss": 1.1166, "step": 1289 }, { "epoch": 0.0931963082702693, "grad_norm": 8.553765289903952, "learning_rate": 4.9963957922164365e-06, "loss": 1.0469, "step": 1290 }, { "epoch": 0.0932685534704788, "grad_norm": 7.415385217724437, "learning_rate": 4.996380074295134e-06, "loss": 1.0494, "step": 1291 }, { "epoch": 0.09334079867068831, "grad_norm": 7.536417980564985, "learning_rate": 4.996364322200326e-06, "loss": 1.0358, "step": 1292 }, { "epoch": 0.09341304387089783, "grad_norm": 9.503062908919361, "learning_rate": 4.9963485359322295e-06, "loss": 1.034, "step": 1293 }, { "epoch": 0.09348528907110734, "grad_norm": 6.798321168886627, "learning_rate": 4.996332715491059e-06, "loss": 1.0092, "step": 1294 }, { "epoch": 0.09355753427131686, "grad_norm": 7.650129090260912, "learning_rate": 4.996316860877032e-06, "loss": 0.9791, "step": 1295 }, { "epoch": 0.09362977947152636, "grad_norm": 9.530319018313097, "learning_rate": 4.996300972090366e-06, "loss": 1.0153, "step": 1296 }, { "epoch": 0.09370202467173587, "grad_norm": 8.788728726301189, "learning_rate": 4.996285049131278e-06, "loss": 0.9792, "step": 1297 }, { "epoch": 0.09377426987194538, "grad_norm": 7.4643534085586625, "learning_rate": 4.996269091999985e-06, "loss": 1.0143, "step": 1298 }, { "epoch": 0.0938465150721549, "grad_norm": 8.782940576254381, "learning_rate": 4.996253100696707e-06, "loss": 1.1465, "step": 1299 }, { "epoch": 0.0939187602723644, "grad_norm": 8.896562628291957, "learning_rate": 4.996237075221662e-06, "loss": 1.0202, "step": 1300 }, { "epoch": 0.09399100547257391, "grad_norm": 8.679668162107172, "learning_rate": 4.99622101557507e-06, "loss": 1.0216, "step": 1301 }, { "epoch": 0.09406325067278343, "grad_norm": 8.873943857025282, "learning_rate": 4.996204921757151e-06, "loss": 0.9941, "step": 1302 }, { "epoch": 0.09413549587299294, "grad_norm": 8.022917822599432, "learning_rate": 4.996188793768123e-06, "loss": 0.9741, "step": 1303 }, { "epoch": 0.09420774107320246, "grad_norm": 8.346704767077245, "learning_rate": 4.99617263160821e-06, "loss": 1.024, "step": 1304 }, { "epoch": 0.09427998627341196, "grad_norm": 9.57734674524792, "learning_rate": 4.996156435277631e-06, "loss": 0.9899, "step": 1305 }, { "epoch": 0.09435223147362147, "grad_norm": 11.054190235613675, "learning_rate": 4.99614020477661e-06, "loss": 1.1201, "step": 1306 }, { "epoch": 0.09442447667383098, "grad_norm": 7.501479702257993, "learning_rate": 4.996123940105366e-06, "loss": 0.9674, "step": 1307 }, { "epoch": 0.0944967218740405, "grad_norm": 6.358825809843484, "learning_rate": 4.996107641264125e-06, "loss": 0.9878, "step": 1308 }, { "epoch": 0.09456896707425, "grad_norm": 8.310144535281298, "learning_rate": 4.996091308253107e-06, "loss": 1.0772, "step": 1309 }, { "epoch": 0.09464121227445951, "grad_norm": 9.475756177696857, "learning_rate": 4.996074941072538e-06, "loss": 0.952, "step": 1310 }, { "epoch": 0.09471345747466903, "grad_norm": 8.688062032179559, "learning_rate": 4.996058539722641e-06, "loss": 0.9904, "step": 1311 }, { "epoch": 0.09478570267487854, "grad_norm": 8.999182346077294, "learning_rate": 4.99604210420364e-06, "loss": 1.0451, "step": 1312 }, { "epoch": 0.09485794787508806, "grad_norm": 7.950583660845014, "learning_rate": 4.9960256345157615e-06, "loss": 1.0908, "step": 1313 }, { "epoch": 0.09493019307529756, "grad_norm": 7.500309492719102, "learning_rate": 4.99600913065923e-06, "loss": 0.9923, "step": 1314 }, { "epoch": 0.09500243827550707, "grad_norm": 11.469660003066242, "learning_rate": 4.995992592634271e-06, "loss": 0.9773, "step": 1315 }, { "epoch": 0.09507468347571658, "grad_norm": 7.875078957782493, "learning_rate": 4.995976020441112e-06, "loss": 1.0759, "step": 1316 }, { "epoch": 0.0951469286759261, "grad_norm": 9.680657314335512, "learning_rate": 4.995959414079979e-06, "loss": 1.0787, "step": 1317 }, { "epoch": 0.0952191738761356, "grad_norm": 8.795564868997676, "learning_rate": 4.995942773551099e-06, "loss": 1.0619, "step": 1318 }, { "epoch": 0.09529141907634511, "grad_norm": 8.111761488017049, "learning_rate": 4.9959260988547015e-06, "loss": 0.983, "step": 1319 }, { "epoch": 0.09536366427655463, "grad_norm": 7.870649559039023, "learning_rate": 4.995909389991012e-06, "loss": 0.9242, "step": 1320 }, { "epoch": 0.09543590947676414, "grad_norm": 8.528398980603761, "learning_rate": 4.995892646960263e-06, "loss": 1.1365, "step": 1321 }, { "epoch": 0.09550815467697364, "grad_norm": 7.686129067847314, "learning_rate": 4.995875869762681e-06, "loss": 1.0881, "step": 1322 }, { "epoch": 0.09558039987718316, "grad_norm": 7.619341642519364, "learning_rate": 4.995859058398495e-06, "loss": 0.9283, "step": 1323 }, { "epoch": 0.09565264507739267, "grad_norm": 8.021740935527571, "learning_rate": 4.995842212867938e-06, "loss": 0.9556, "step": 1324 }, { "epoch": 0.09572489027760218, "grad_norm": 7.488387718498461, "learning_rate": 4.995825333171238e-06, "loss": 1.0256, "step": 1325 }, { "epoch": 0.0957971354778117, "grad_norm": 8.910053498421126, "learning_rate": 4.995808419308627e-06, "loss": 1.0858, "step": 1326 }, { "epoch": 0.0958693806780212, "grad_norm": 7.901927625907373, "learning_rate": 4.995791471280338e-06, "loss": 1.0606, "step": 1327 }, { "epoch": 0.09594162587823071, "grad_norm": 7.740923827482147, "learning_rate": 4.9957744890866e-06, "loss": 0.9872, "step": 1328 }, { "epoch": 0.09601387107844023, "grad_norm": 7.019898062723928, "learning_rate": 4.995757472727648e-06, "loss": 1.0145, "step": 1329 }, { "epoch": 0.09608611627864974, "grad_norm": 8.088834116590762, "learning_rate": 4.9957404222037146e-06, "loss": 0.9568, "step": 1330 }, { "epoch": 0.09615836147885924, "grad_norm": 7.746081192075676, "learning_rate": 4.995723337515031e-06, "loss": 1.0068, "step": 1331 }, { "epoch": 0.09623060667906876, "grad_norm": 7.648225027373079, "learning_rate": 4.995706218661833e-06, "loss": 1.0134, "step": 1332 }, { "epoch": 0.09630285187927827, "grad_norm": 8.003933893009854, "learning_rate": 4.995689065644356e-06, "loss": 0.9283, "step": 1333 }, { "epoch": 0.09637509707948778, "grad_norm": 8.215415365128049, "learning_rate": 4.9956718784628325e-06, "loss": 1.0737, "step": 1334 }, { "epoch": 0.0964473422796973, "grad_norm": 7.450292020873347, "learning_rate": 4.995654657117499e-06, "loss": 1.0778, "step": 1335 }, { "epoch": 0.0965195874799068, "grad_norm": 7.755127717865784, "learning_rate": 4.99563740160859e-06, "loss": 0.9483, "step": 1336 }, { "epoch": 0.09659183268011631, "grad_norm": 7.956223399559078, "learning_rate": 4.995620111936345e-06, "loss": 0.8622, "step": 1337 }, { "epoch": 0.09666407788032583, "grad_norm": 8.317171777383928, "learning_rate": 4.9956027881009964e-06, "loss": 1.0225, "step": 1338 }, { "epoch": 0.09673632308053534, "grad_norm": 8.530764834962548, "learning_rate": 4.995585430102784e-06, "loss": 1.0961, "step": 1339 }, { "epoch": 0.09680856828074484, "grad_norm": 8.065054082340318, "learning_rate": 4.995568037941945e-06, "loss": 0.981, "step": 1340 }, { "epoch": 0.09688081348095436, "grad_norm": 8.544731024654398, "learning_rate": 4.995550611618717e-06, "loss": 1.0443, "step": 1341 }, { "epoch": 0.09695305868116387, "grad_norm": 8.084188461953303, "learning_rate": 4.995533151133339e-06, "loss": 1.0245, "step": 1342 }, { "epoch": 0.09702530388137338, "grad_norm": 9.618345660361115, "learning_rate": 4.995515656486049e-06, "loss": 1.0871, "step": 1343 }, { "epoch": 0.0970975490815829, "grad_norm": 8.163963441384682, "learning_rate": 4.995498127677087e-06, "loss": 1.034, "step": 1344 }, { "epoch": 0.0971697942817924, "grad_norm": 8.217036195483784, "learning_rate": 4.995480564706695e-06, "loss": 1.0156, "step": 1345 }, { "epoch": 0.09724203948200191, "grad_norm": 6.639671059699107, "learning_rate": 4.99546296757511e-06, "loss": 1.0039, "step": 1346 }, { "epoch": 0.09731428468221143, "grad_norm": 8.311192000885669, "learning_rate": 4.995445336282576e-06, "loss": 1.0209, "step": 1347 }, { "epoch": 0.09738652988242094, "grad_norm": 8.106137961831763, "learning_rate": 4.995427670829331e-06, "loss": 0.9845, "step": 1348 }, { "epoch": 0.09745877508263044, "grad_norm": 9.874286094645337, "learning_rate": 4.995409971215621e-06, "loss": 1.0154, "step": 1349 }, { "epoch": 0.09753102028283996, "grad_norm": 8.316492484287528, "learning_rate": 4.9953922374416855e-06, "loss": 1.0171, "step": 1350 }, { "epoch": 0.09760326548304947, "grad_norm": 10.393345068076089, "learning_rate": 4.995374469507767e-06, "loss": 1.0948, "step": 1351 }, { "epoch": 0.09767551068325898, "grad_norm": 8.017429914510137, "learning_rate": 4.9953566674141094e-06, "loss": 1.0048, "step": 1352 }, { "epoch": 0.0977477558834685, "grad_norm": 8.378591649833997, "learning_rate": 4.995338831160958e-06, "loss": 1.0452, "step": 1353 }, { "epoch": 0.097820001083678, "grad_norm": 8.884620717314125, "learning_rate": 4.995320960748554e-06, "loss": 1.0543, "step": 1354 }, { "epoch": 0.09789224628388751, "grad_norm": 8.77345924001091, "learning_rate": 4.995303056177145e-06, "loss": 0.8894, "step": 1355 }, { "epoch": 0.09796449148409703, "grad_norm": 8.825762785454705, "learning_rate": 4.995285117446973e-06, "loss": 1.0676, "step": 1356 }, { "epoch": 0.09803673668430654, "grad_norm": 7.907008681252373, "learning_rate": 4.995267144558286e-06, "loss": 1.0341, "step": 1357 }, { "epoch": 0.09810898188451604, "grad_norm": 9.475258582620294, "learning_rate": 4.995249137511329e-06, "loss": 0.9321, "step": 1358 }, { "epoch": 0.09818122708472556, "grad_norm": 6.341514456581121, "learning_rate": 4.995231096306349e-06, "loss": 0.9002, "step": 1359 }, { "epoch": 0.09825347228493507, "grad_norm": 8.712643470841671, "learning_rate": 4.995213020943593e-06, "loss": 1.0457, "step": 1360 }, { "epoch": 0.09832571748514458, "grad_norm": 10.168878867019185, "learning_rate": 4.995194911423308e-06, "loss": 0.9368, "step": 1361 }, { "epoch": 0.0983979626853541, "grad_norm": 8.166928137920705, "learning_rate": 4.9951767677457415e-06, "loss": 0.9875, "step": 1362 }, { "epoch": 0.0984702078855636, "grad_norm": 7.388200745371383, "learning_rate": 4.995158589911143e-06, "loss": 0.973, "step": 1363 }, { "epoch": 0.09854245308577311, "grad_norm": 8.513391604342344, "learning_rate": 4.99514037791976e-06, "loss": 1.0028, "step": 1364 }, { "epoch": 0.09861469828598263, "grad_norm": 9.929158866730921, "learning_rate": 4.995122131771843e-06, "loss": 1.0452, "step": 1365 }, { "epoch": 0.09868694348619214, "grad_norm": 9.362396720449002, "learning_rate": 4.995103851467642e-06, "loss": 1.1052, "step": 1366 }, { "epoch": 0.09875918868640164, "grad_norm": 7.166559913490736, "learning_rate": 4.995085537007407e-06, "loss": 1.0391, "step": 1367 }, { "epoch": 0.09883143388661116, "grad_norm": 6.747836578669614, "learning_rate": 4.995067188391387e-06, "loss": 1.0132, "step": 1368 }, { "epoch": 0.09890367908682067, "grad_norm": 9.055316472034061, "learning_rate": 4.9950488056198345e-06, "loss": 0.9714, "step": 1369 }, { "epoch": 0.09897592428703018, "grad_norm": 11.124379001387174, "learning_rate": 4.995030388693002e-06, "loss": 1.1118, "step": 1370 }, { "epoch": 0.0990481694872397, "grad_norm": 7.101550145238153, "learning_rate": 4.99501193761114e-06, "loss": 0.9405, "step": 1371 }, { "epoch": 0.0991204146874492, "grad_norm": 7.16310424386294, "learning_rate": 4.994993452374503e-06, "loss": 0.9823, "step": 1372 }, { "epoch": 0.09919265988765871, "grad_norm": 11.736649090978466, "learning_rate": 4.9949749329833415e-06, "loss": 1.0722, "step": 1373 }, { "epoch": 0.09926490508786823, "grad_norm": 10.330164720933597, "learning_rate": 4.994956379437911e-06, "loss": 0.9035, "step": 1374 }, { "epoch": 0.09933715028807774, "grad_norm": 8.050916764082679, "learning_rate": 4.994937791738464e-06, "loss": 1.0588, "step": 1375 }, { "epoch": 0.09940939548828724, "grad_norm": 9.0004446131768, "learning_rate": 4.994919169885258e-06, "loss": 1.0447, "step": 1376 }, { "epoch": 0.09948164068849676, "grad_norm": 8.242970101323223, "learning_rate": 4.994900513878543e-06, "loss": 1.0005, "step": 1377 }, { "epoch": 0.09955388588870627, "grad_norm": 10.394462622495173, "learning_rate": 4.99488182371858e-06, "loss": 1.0059, "step": 1378 }, { "epoch": 0.09962613108891578, "grad_norm": 9.147388740190292, "learning_rate": 4.994863099405619e-06, "loss": 1.0263, "step": 1379 }, { "epoch": 0.09969837628912528, "grad_norm": 8.39786289822144, "learning_rate": 4.9948443409399215e-06, "loss": 0.9454, "step": 1380 }, { "epoch": 0.0997706214893348, "grad_norm": 8.673594811163488, "learning_rate": 4.994825548321741e-06, "loss": 1.096, "step": 1381 }, { "epoch": 0.09984286668954431, "grad_norm": 8.880866462431195, "learning_rate": 4.9948067215513364e-06, "loss": 0.9099, "step": 1382 }, { "epoch": 0.09991511188975383, "grad_norm": 9.031859486219247, "learning_rate": 4.994787860628965e-06, "loss": 0.9634, "step": 1383 }, { "epoch": 0.09998735708996334, "grad_norm": 7.032215916726597, "learning_rate": 4.994768965554884e-06, "loss": 0.9658, "step": 1384 }, { "epoch": 0.10005960229017284, "grad_norm": 7.552850793888761, "learning_rate": 4.994750036329353e-06, "loss": 0.9624, "step": 1385 }, { "epoch": 0.10013184749038236, "grad_norm": 7.2875425465095915, "learning_rate": 4.994731072952632e-06, "loss": 1.0368, "step": 1386 }, { "epoch": 0.10020409269059187, "grad_norm": 8.926347082721517, "learning_rate": 4.994712075424979e-06, "loss": 1.0837, "step": 1387 }, { "epoch": 0.10027633789080138, "grad_norm": 11.117663075079069, "learning_rate": 4.9946930437466545e-06, "loss": 0.9865, "step": 1388 }, { "epoch": 0.10034858309101088, "grad_norm": 6.480662117085728, "learning_rate": 4.99467397791792e-06, "loss": 0.9841, "step": 1389 }, { "epoch": 0.1004208282912204, "grad_norm": 8.701462318034134, "learning_rate": 4.9946548779390355e-06, "loss": 1.0506, "step": 1390 }, { "epoch": 0.10049307349142991, "grad_norm": 12.063378494767036, "learning_rate": 4.9946357438102626e-06, "loss": 0.988, "step": 1391 }, { "epoch": 0.10056531869163943, "grad_norm": 8.600180921204007, "learning_rate": 4.994616575531863e-06, "loss": 0.942, "step": 1392 }, { "epoch": 0.10063756389184894, "grad_norm": 8.340293368295763, "learning_rate": 4.9945973731041e-06, "loss": 0.9563, "step": 1393 }, { "epoch": 0.10070980909205844, "grad_norm": 8.133341102387934, "learning_rate": 4.994578136527235e-06, "loss": 1.0102, "step": 1394 }, { "epoch": 0.10078205429226796, "grad_norm": 8.282956044785204, "learning_rate": 4.9945588658015335e-06, "loss": 1.0685, "step": 1395 }, { "epoch": 0.10085429949247747, "grad_norm": 8.353652849512947, "learning_rate": 4.994539560927257e-06, "loss": 0.9452, "step": 1396 }, { "epoch": 0.10092654469268698, "grad_norm": 8.443610345347688, "learning_rate": 4.994520221904671e-06, "loss": 0.9165, "step": 1397 }, { "epoch": 0.10099878989289648, "grad_norm": 10.985953986684933, "learning_rate": 4.99450084873404e-06, "loss": 0.9819, "step": 1398 }, { "epoch": 0.101071035093106, "grad_norm": 9.649906580971878, "learning_rate": 4.99448144141563e-06, "loss": 1.0011, "step": 1399 }, { "epoch": 0.10114328029331551, "grad_norm": 7.9447865119804995, "learning_rate": 4.9944619999497045e-06, "loss": 0.9704, "step": 1400 }, { "epoch": 0.10121552549352503, "grad_norm": 9.706215383218614, "learning_rate": 4.994442524336533e-06, "loss": 1.0234, "step": 1401 }, { "epoch": 0.10128777069373454, "grad_norm": 8.968155791373155, "learning_rate": 4.994423014576379e-06, "loss": 0.9934, "step": 1402 }, { "epoch": 0.10136001589394404, "grad_norm": 7.360926503039838, "learning_rate": 4.99440347066951e-06, "loss": 1.0011, "step": 1403 }, { "epoch": 0.10143226109415356, "grad_norm": 7.540922040341344, "learning_rate": 4.994383892616195e-06, "loss": 0.9943, "step": 1404 }, { "epoch": 0.10150450629436307, "grad_norm": 15.34381215380105, "learning_rate": 4.994364280416701e-06, "loss": 1.029, "step": 1405 }, { "epoch": 0.10157675149457258, "grad_norm": 8.401907894901852, "learning_rate": 4.994344634071297e-06, "loss": 1.0124, "step": 1406 }, { "epoch": 0.10164899669478208, "grad_norm": 8.051933047005361, "learning_rate": 4.994324953580251e-06, "loss": 1.0348, "step": 1407 }, { "epoch": 0.1017212418949916, "grad_norm": 7.854882353563934, "learning_rate": 4.994305238943835e-06, "loss": 0.97, "step": 1408 }, { "epoch": 0.10179348709520111, "grad_norm": 9.089579776329261, "learning_rate": 4.994285490162315e-06, "loss": 0.9641, "step": 1409 }, { "epoch": 0.10186573229541063, "grad_norm": 6.553201536532962, "learning_rate": 4.994265707235965e-06, "loss": 0.9502, "step": 1410 }, { "epoch": 0.10193797749562014, "grad_norm": 8.825065582291062, "learning_rate": 4.994245890165053e-06, "loss": 1.0312, "step": 1411 }, { "epoch": 0.10201022269582964, "grad_norm": 9.162462646075161, "learning_rate": 4.994226038949851e-06, "loss": 0.9768, "step": 1412 }, { "epoch": 0.10208246789603916, "grad_norm": 8.578237953206704, "learning_rate": 4.994206153590632e-06, "loss": 1.0096, "step": 1413 }, { "epoch": 0.10215471309624867, "grad_norm": 9.680093013221976, "learning_rate": 4.994186234087667e-06, "loss": 1.0235, "step": 1414 }, { "epoch": 0.10222695829645818, "grad_norm": 11.306352662758577, "learning_rate": 4.99416628044123e-06, "loss": 0.9626, "step": 1415 }, { "epoch": 0.10229920349666768, "grad_norm": 9.732876731230702, "learning_rate": 4.994146292651592e-06, "loss": 0.9991, "step": 1416 }, { "epoch": 0.1023714486968772, "grad_norm": 7.584511997162864, "learning_rate": 4.9941262707190285e-06, "loss": 1.0046, "step": 1417 }, { "epoch": 0.10244369389708671, "grad_norm": 7.520035240821239, "learning_rate": 4.994106214643812e-06, "loss": 1.0152, "step": 1418 }, { "epoch": 0.10251593909729623, "grad_norm": 10.015300489575157, "learning_rate": 4.99408612442622e-06, "loss": 1.0038, "step": 1419 }, { "epoch": 0.10258818429750574, "grad_norm": 7.898913934150145, "learning_rate": 4.994066000066524e-06, "loss": 0.9398, "step": 1420 }, { "epoch": 0.10266042949771524, "grad_norm": 10.329855631053052, "learning_rate": 4.994045841565e-06, "loss": 1.022, "step": 1421 }, { "epoch": 0.10273267469792476, "grad_norm": 6.554213169108347, "learning_rate": 4.994025648921927e-06, "loss": 0.9099, "step": 1422 }, { "epoch": 0.10280491989813427, "grad_norm": 7.41804974407797, "learning_rate": 4.994005422137579e-06, "loss": 1.0025, "step": 1423 }, { "epoch": 0.10287716509834378, "grad_norm": 9.413528819805492, "learning_rate": 4.993985161212232e-06, "loss": 0.9881, "step": 1424 }, { "epoch": 0.10294941029855328, "grad_norm": 8.292607188597263, "learning_rate": 4.993964866146165e-06, "loss": 0.9254, "step": 1425 }, { "epoch": 0.1030216554987628, "grad_norm": 7.244104816742181, "learning_rate": 4.993944536939656e-06, "loss": 0.8753, "step": 1426 }, { "epoch": 0.10309390069897231, "grad_norm": 7.265388731037448, "learning_rate": 4.9939241735929824e-06, "loss": 0.9981, "step": 1427 }, { "epoch": 0.10316614589918183, "grad_norm": 7.392203487039493, "learning_rate": 4.993903776106424e-06, "loss": 1.0287, "step": 1428 }, { "epoch": 0.10323839109939133, "grad_norm": 7.400501708472837, "learning_rate": 4.993883344480258e-06, "loss": 0.9884, "step": 1429 }, { "epoch": 0.10331063629960084, "grad_norm": 6.145909954390844, "learning_rate": 4.993862878714766e-06, "loss": 0.9843, "step": 1430 }, { "epoch": 0.10338288149981036, "grad_norm": 6.527546247700521, "learning_rate": 4.993842378810227e-06, "loss": 0.9275, "step": 1431 }, { "epoch": 0.10345512670001987, "grad_norm": 7.449947167535638, "learning_rate": 4.9938218447669235e-06, "loss": 1.0013, "step": 1432 }, { "epoch": 0.10352737190022938, "grad_norm": 9.287331383716047, "learning_rate": 4.993801276585135e-06, "loss": 1.0307, "step": 1433 }, { "epoch": 0.10359961710043888, "grad_norm": 7.1047511899739, "learning_rate": 4.993780674265142e-06, "loss": 1.0046, "step": 1434 }, { "epoch": 0.1036718623006484, "grad_norm": 6.718493363557925, "learning_rate": 4.993760037807229e-06, "loss": 0.9829, "step": 1435 }, { "epoch": 0.10374410750085791, "grad_norm": 7.66339301277585, "learning_rate": 4.993739367211677e-06, "loss": 1.0881, "step": 1436 }, { "epoch": 0.10381635270106743, "grad_norm": 9.527419824675777, "learning_rate": 4.9937186624787696e-06, "loss": 0.9727, "step": 1437 }, { "epoch": 0.10388859790127693, "grad_norm": 7.945239041419268, "learning_rate": 4.993697923608789e-06, "loss": 0.9445, "step": 1438 }, { "epoch": 0.10396084310148644, "grad_norm": 9.503988532961726, "learning_rate": 4.9936771506020215e-06, "loss": 0.9497, "step": 1439 }, { "epoch": 0.10403308830169596, "grad_norm": 14.7170367691546, "learning_rate": 4.9936563434587495e-06, "loss": 1.0142, "step": 1440 }, { "epoch": 0.10410533350190547, "grad_norm": 8.376181903397788, "learning_rate": 4.993635502179259e-06, "loss": 0.9569, "step": 1441 }, { "epoch": 0.10417757870211498, "grad_norm": 8.344628677106552, "learning_rate": 4.993614626763833e-06, "loss": 1.0993, "step": 1442 }, { "epoch": 0.10424982390232448, "grad_norm": 7.8335821504507965, "learning_rate": 4.993593717212759e-06, "loss": 1.039, "step": 1443 }, { "epoch": 0.104322069102534, "grad_norm": 11.455831210592532, "learning_rate": 4.993572773526324e-06, "loss": 0.9481, "step": 1444 }, { "epoch": 0.10439431430274351, "grad_norm": 10.58412151455487, "learning_rate": 4.993551795704814e-06, "loss": 1.0068, "step": 1445 }, { "epoch": 0.10446655950295303, "grad_norm": 7.081812556951566, "learning_rate": 4.9935307837485155e-06, "loss": 0.924, "step": 1446 }, { "epoch": 0.10453880470316253, "grad_norm": 6.487616553757165, "learning_rate": 4.993509737657718e-06, "loss": 0.9241, "step": 1447 }, { "epoch": 0.10461104990337204, "grad_norm": 9.430704461783744, "learning_rate": 4.993488657432707e-06, "loss": 1.065, "step": 1448 }, { "epoch": 0.10468329510358156, "grad_norm": 7.699224200118568, "learning_rate": 4.9934675430737726e-06, "loss": 1.0432, "step": 1449 }, { "epoch": 0.10475554030379107, "grad_norm": 6.956665783406695, "learning_rate": 4.993446394581203e-06, "loss": 0.9319, "step": 1450 }, { "epoch": 0.10482778550400058, "grad_norm": 9.905782116932743, "learning_rate": 4.993425211955289e-06, "loss": 1.0024, "step": 1451 }, { "epoch": 0.10490003070421008, "grad_norm": 8.032682892147603, "learning_rate": 4.99340399519632e-06, "loss": 1.0414, "step": 1452 }, { "epoch": 0.1049722759044196, "grad_norm": 8.574632944864764, "learning_rate": 4.993382744304586e-06, "loss": 1.003, "step": 1453 }, { "epoch": 0.10504452110462911, "grad_norm": 7.791184544734351, "learning_rate": 4.9933614592803785e-06, "loss": 0.9943, "step": 1454 }, { "epoch": 0.10511676630483863, "grad_norm": 9.42273815749783, "learning_rate": 4.993340140123988e-06, "loss": 0.9563, "step": 1455 }, { "epoch": 0.10518901150504813, "grad_norm": 8.074581586549954, "learning_rate": 4.993318786835708e-06, "loss": 1.1059, "step": 1456 }, { "epoch": 0.10526125670525764, "grad_norm": 7.6041906347724435, "learning_rate": 4.9932973994158285e-06, "loss": 0.8899, "step": 1457 }, { "epoch": 0.10533350190546716, "grad_norm": 8.143466094916986, "learning_rate": 4.993275977864644e-06, "loss": 0.9596, "step": 1458 }, { "epoch": 0.10540574710567667, "grad_norm": 6.933076290109782, "learning_rate": 4.993254522182448e-06, "loss": 1.0132, "step": 1459 }, { "epoch": 0.10547799230588618, "grad_norm": 8.830212695626306, "learning_rate": 4.993233032369533e-06, "loss": 1.0578, "step": 1460 }, { "epoch": 0.10555023750609568, "grad_norm": 8.167438652244876, "learning_rate": 4.993211508426194e-06, "loss": 0.9878, "step": 1461 }, { "epoch": 0.1056224827063052, "grad_norm": 8.374200583494382, "learning_rate": 4.993189950352724e-06, "loss": 1.036, "step": 1462 }, { "epoch": 0.10569472790651471, "grad_norm": 10.479860155177182, "learning_rate": 4.9931683581494205e-06, "loss": 1.0579, "step": 1463 }, { "epoch": 0.10576697310672423, "grad_norm": 7.9204208574316235, "learning_rate": 4.993146731816577e-06, "loss": 0.9765, "step": 1464 }, { "epoch": 0.10583921830693373, "grad_norm": 8.021256340729387, "learning_rate": 4.9931250713544914e-06, "loss": 1.0277, "step": 1465 }, { "epoch": 0.10591146350714324, "grad_norm": 9.867223454135802, "learning_rate": 4.99310337676346e-06, "loss": 1.0412, "step": 1466 }, { "epoch": 0.10598370870735276, "grad_norm": 7.379761791158186, "learning_rate": 4.993081648043778e-06, "loss": 0.9779, "step": 1467 }, { "epoch": 0.10605595390756227, "grad_norm": 8.5696319059466, "learning_rate": 4.993059885195745e-06, "loss": 1.0107, "step": 1468 }, { "epoch": 0.10612819910777178, "grad_norm": 7.747172178144261, "learning_rate": 4.993038088219656e-06, "loss": 0.9793, "step": 1469 }, { "epoch": 0.10620044430798128, "grad_norm": 8.144520477360981, "learning_rate": 4.9930162571158134e-06, "loss": 0.9422, "step": 1470 }, { "epoch": 0.1062726895081908, "grad_norm": 10.748615264420765, "learning_rate": 4.9929943918845124e-06, "loss": 1.0039, "step": 1471 }, { "epoch": 0.10634493470840031, "grad_norm": 9.983402592880994, "learning_rate": 4.992972492526055e-06, "loss": 1.1095, "step": 1472 }, { "epoch": 0.10641717990860983, "grad_norm": 9.782962344499069, "learning_rate": 4.992950559040739e-06, "loss": 1.0342, "step": 1473 }, { "epoch": 0.10648942510881933, "grad_norm": 7.0731089083165015, "learning_rate": 4.9929285914288665e-06, "loss": 1.0367, "step": 1474 }, { "epoch": 0.10656167030902884, "grad_norm": 8.764652300225473, "learning_rate": 4.992906589690736e-06, "loss": 0.9446, "step": 1475 }, { "epoch": 0.10663391550923836, "grad_norm": 8.445559121341246, "learning_rate": 4.992884553826651e-06, "loss": 0.9871, "step": 1476 }, { "epoch": 0.10670616070944787, "grad_norm": 8.442009519020829, "learning_rate": 4.992862483836911e-06, "loss": 0.9809, "step": 1477 }, { "epoch": 0.10677840590965738, "grad_norm": 10.74935023983372, "learning_rate": 4.99284037972182e-06, "loss": 1.011, "step": 1478 }, { "epoch": 0.10685065110986688, "grad_norm": 9.651627999185836, "learning_rate": 4.992818241481679e-06, "loss": 0.9567, "step": 1479 }, { "epoch": 0.1069228963100764, "grad_norm": 7.363094483291182, "learning_rate": 4.992796069116793e-06, "loss": 1.0151, "step": 1480 }, { "epoch": 0.10699514151028591, "grad_norm": 8.821183924374138, "learning_rate": 4.9927738626274635e-06, "loss": 0.9477, "step": 1481 }, { "epoch": 0.10706738671049543, "grad_norm": 7.48439946499133, "learning_rate": 4.992751622013996e-06, "loss": 0.8696, "step": 1482 }, { "epoch": 0.10713963191070493, "grad_norm": 13.164987448959076, "learning_rate": 4.992729347276694e-06, "loss": 1.0181, "step": 1483 }, { "epoch": 0.10721187711091444, "grad_norm": 8.296832700574848, "learning_rate": 4.992707038415862e-06, "loss": 0.956, "step": 1484 }, { "epoch": 0.10728412231112396, "grad_norm": 7.988146344750231, "learning_rate": 4.992684695431806e-06, "loss": 1.0046, "step": 1485 }, { "epoch": 0.10735636751133347, "grad_norm": 8.364744825375867, "learning_rate": 4.992662318324833e-06, "loss": 0.8697, "step": 1486 }, { "epoch": 0.10742861271154297, "grad_norm": 7.600957769978471, "learning_rate": 4.992639907095248e-06, "loss": 1.0109, "step": 1487 }, { "epoch": 0.10750085791175248, "grad_norm": 8.250783709668463, "learning_rate": 4.992617461743358e-06, "loss": 0.9849, "step": 1488 }, { "epoch": 0.107573103111962, "grad_norm": 7.3883476377773665, "learning_rate": 4.992594982269471e-06, "loss": 0.9599, "step": 1489 }, { "epoch": 0.10764534831217151, "grad_norm": 7.198147524836278, "learning_rate": 4.992572468673893e-06, "loss": 0.9363, "step": 1490 }, { "epoch": 0.10771759351238103, "grad_norm": 9.380115791696017, "learning_rate": 4.992549920956934e-06, "loss": 0.9126, "step": 1491 }, { "epoch": 0.10778983871259053, "grad_norm": 7.710786858568422, "learning_rate": 4.992527339118901e-06, "loss": 0.9649, "step": 1492 }, { "epoch": 0.10786208391280004, "grad_norm": 6.914757093464751, "learning_rate": 4.992504723160105e-06, "loss": 0.9131, "step": 1493 }, { "epoch": 0.10793432911300956, "grad_norm": 7.364395275889326, "learning_rate": 4.992482073080854e-06, "loss": 0.9457, "step": 1494 }, { "epoch": 0.10800657431321907, "grad_norm": 8.331228626220941, "learning_rate": 4.992459388881459e-06, "loss": 0.9429, "step": 1495 }, { "epoch": 0.10807881951342857, "grad_norm": 7.948311478860792, "learning_rate": 4.99243667056223e-06, "loss": 1.0276, "step": 1496 }, { "epoch": 0.10815106471363808, "grad_norm": 9.320333373693098, "learning_rate": 4.9924139181234785e-06, "loss": 1.0976, "step": 1497 }, { "epoch": 0.1082233099138476, "grad_norm": 8.28881674500206, "learning_rate": 4.9923911315655164e-06, "loss": 0.9506, "step": 1498 }, { "epoch": 0.10829555511405711, "grad_norm": 8.472186190280135, "learning_rate": 4.992368310888653e-06, "loss": 0.9617, "step": 1499 }, { "epoch": 0.10836780031426663, "grad_norm": 7.055075194392697, "learning_rate": 4.9923454560932035e-06, "loss": 0.9138, "step": 1500 }, { "epoch": 0.10844004551447613, "grad_norm": 7.691898731217709, "learning_rate": 4.99232256717948e-06, "loss": 0.9997, "step": 1501 }, { "epoch": 0.10851229071468564, "grad_norm": 9.093454257835988, "learning_rate": 4.992299644147797e-06, "loss": 1.0152, "step": 1502 }, { "epoch": 0.10858453591489516, "grad_norm": 8.698946869348163, "learning_rate": 4.9922766869984655e-06, "loss": 1.012, "step": 1503 }, { "epoch": 0.10865678111510467, "grad_norm": 12.381943113210829, "learning_rate": 4.992253695731802e-06, "loss": 1.0386, "step": 1504 }, { "epoch": 0.10872902631531417, "grad_norm": 7.669770925351244, "learning_rate": 4.99223067034812e-06, "loss": 0.9479, "step": 1505 }, { "epoch": 0.10880127151552368, "grad_norm": 7.290797706778859, "learning_rate": 4.992207610847736e-06, "loss": 1.0017, "step": 1506 }, { "epoch": 0.1088735167157332, "grad_norm": 9.725887511092136, "learning_rate": 4.992184517230964e-06, "loss": 1.0379, "step": 1507 }, { "epoch": 0.10894576191594271, "grad_norm": 7.677201482794487, "learning_rate": 4.992161389498121e-06, "loss": 1.1857, "step": 1508 }, { "epoch": 0.10901800711615223, "grad_norm": 8.586014362855455, "learning_rate": 4.992138227649524e-06, "loss": 1.0316, "step": 1509 }, { "epoch": 0.10909025231636173, "grad_norm": 9.32899642273332, "learning_rate": 4.992115031685489e-06, "loss": 0.9922, "step": 1510 }, { "epoch": 0.10916249751657124, "grad_norm": 8.378824755576881, "learning_rate": 4.992091801606336e-06, "loss": 1.0567, "step": 1511 }, { "epoch": 0.10923474271678076, "grad_norm": 6.992545927440397, "learning_rate": 4.992068537412379e-06, "loss": 1.0289, "step": 1512 }, { "epoch": 0.10930698791699027, "grad_norm": 8.441987829196437, "learning_rate": 4.99204523910394e-06, "loss": 1.0336, "step": 1513 }, { "epoch": 0.10937923311719977, "grad_norm": 7.632226989841359, "learning_rate": 4.992021906681337e-06, "loss": 0.9982, "step": 1514 }, { "epoch": 0.10945147831740928, "grad_norm": 7.751524498417105, "learning_rate": 4.9919985401448875e-06, "loss": 0.9477, "step": 1515 }, { "epoch": 0.1095237235176188, "grad_norm": 7.380832499454907, "learning_rate": 4.991975139494915e-06, "loss": 0.9473, "step": 1516 }, { "epoch": 0.10959596871782831, "grad_norm": 10.987194931082877, "learning_rate": 4.991951704731736e-06, "loss": 1.0503, "step": 1517 }, { "epoch": 0.10966821391803783, "grad_norm": 7.687961967667933, "learning_rate": 4.991928235855673e-06, "loss": 0.8783, "step": 1518 }, { "epoch": 0.10974045911824733, "grad_norm": 7.138661187577499, "learning_rate": 4.9919047328670486e-06, "loss": 1.0082, "step": 1519 }, { "epoch": 0.10981270431845684, "grad_norm": 10.544927482229015, "learning_rate": 4.991881195766182e-06, "loss": 0.9466, "step": 1520 }, { "epoch": 0.10988494951866636, "grad_norm": 7.105324331098311, "learning_rate": 4.991857624553397e-06, "loss": 0.9767, "step": 1521 }, { "epoch": 0.10995719471887587, "grad_norm": 7.481165570106941, "learning_rate": 4.991834019229017e-06, "loss": 1.0615, "step": 1522 }, { "epoch": 0.11002943991908537, "grad_norm": 7.557588858427491, "learning_rate": 4.991810379793362e-06, "loss": 0.9785, "step": 1523 }, { "epoch": 0.11010168511929488, "grad_norm": 6.603136080485704, "learning_rate": 4.991786706246759e-06, "loss": 1.0123, "step": 1524 }, { "epoch": 0.1101739303195044, "grad_norm": 7.622462616525714, "learning_rate": 4.991762998589531e-06, "loss": 1.1021, "step": 1525 }, { "epoch": 0.11024617551971391, "grad_norm": 7.02023958889567, "learning_rate": 4.9917392568220015e-06, "loss": 0.9431, "step": 1526 }, { "epoch": 0.11031842071992343, "grad_norm": 8.790957261039875, "learning_rate": 4.991715480944497e-06, "loss": 1.016, "step": 1527 }, { "epoch": 0.11039066592013293, "grad_norm": 6.902865171572185, "learning_rate": 4.991691670957342e-06, "loss": 1.0098, "step": 1528 }, { "epoch": 0.11046291112034244, "grad_norm": 6.812149178807388, "learning_rate": 4.991667826860862e-06, "loss": 1.0107, "step": 1529 }, { "epoch": 0.11053515632055196, "grad_norm": 7.115902898871099, "learning_rate": 4.9916439486553845e-06, "loss": 0.986, "step": 1530 }, { "epoch": 0.11060740152076147, "grad_norm": 7.999283520085833, "learning_rate": 4.9916200363412374e-06, "loss": 1.0289, "step": 1531 }, { "epoch": 0.11067964672097097, "grad_norm": 6.163159825283003, "learning_rate": 4.991596089918745e-06, "loss": 0.9766, "step": 1532 }, { "epoch": 0.11075189192118048, "grad_norm": 9.967665849525302, "learning_rate": 4.991572109388237e-06, "loss": 1.0656, "step": 1533 }, { "epoch": 0.11082413712139, "grad_norm": 6.507305148384349, "learning_rate": 4.9915480947500415e-06, "loss": 0.8628, "step": 1534 }, { "epoch": 0.11089638232159951, "grad_norm": 7.55850341112035, "learning_rate": 4.9915240460044865e-06, "loss": 1.1237, "step": 1535 }, { "epoch": 0.11096862752180901, "grad_norm": 7.738289630390464, "learning_rate": 4.991499963151903e-06, "loss": 1.0563, "step": 1536 }, { "epoch": 0.11104087272201853, "grad_norm": 7.868684446320839, "learning_rate": 4.99147584619262e-06, "loss": 1.049, "step": 1537 }, { "epoch": 0.11111311792222804, "grad_norm": 7.669400874764079, "learning_rate": 4.991451695126965e-06, "loss": 0.95, "step": 1538 }, { "epoch": 0.11118536312243756, "grad_norm": 8.309880582499977, "learning_rate": 4.991427509955273e-06, "loss": 0.9949, "step": 1539 }, { "epoch": 0.11125760832264707, "grad_norm": 6.9668237050927155, "learning_rate": 4.991403290677871e-06, "loss": 0.9875, "step": 1540 }, { "epoch": 0.11132985352285657, "grad_norm": 8.018212092398866, "learning_rate": 4.991379037295093e-06, "loss": 1.0368, "step": 1541 }, { "epoch": 0.11140209872306608, "grad_norm": 7.487544589156673, "learning_rate": 4.991354749807271e-06, "loss": 1.0696, "step": 1542 }, { "epoch": 0.1114743439232756, "grad_norm": 7.535242818806764, "learning_rate": 4.991330428214737e-06, "loss": 1.0016, "step": 1543 }, { "epoch": 0.11154658912348511, "grad_norm": 7.372461431641074, "learning_rate": 4.991306072517823e-06, "loss": 1.078, "step": 1544 }, { "epoch": 0.11161883432369461, "grad_norm": 7.33421505337952, "learning_rate": 4.991281682716864e-06, "loss": 0.972, "step": 1545 }, { "epoch": 0.11169107952390413, "grad_norm": 7.981723412294559, "learning_rate": 4.9912572588121925e-06, "loss": 0.9719, "step": 1546 }, { "epoch": 0.11176332472411364, "grad_norm": 7.232556635677404, "learning_rate": 4.991232800804144e-06, "loss": 1.107, "step": 1547 }, { "epoch": 0.11183556992432316, "grad_norm": 6.786858414874638, "learning_rate": 4.9912083086930515e-06, "loss": 0.945, "step": 1548 }, { "epoch": 0.11190781512453267, "grad_norm": 6.249781185134448, "learning_rate": 4.991183782479253e-06, "loss": 0.9651, "step": 1549 }, { "epoch": 0.11198006032474217, "grad_norm": 7.006038241287219, "learning_rate": 4.991159222163082e-06, "loss": 1.0488, "step": 1550 }, { "epoch": 0.11205230552495168, "grad_norm": 6.4216929314785185, "learning_rate": 4.9911346277448756e-06, "loss": 1.0705, "step": 1551 }, { "epoch": 0.1121245507251612, "grad_norm": 8.965245429454319, "learning_rate": 4.991109999224971e-06, "loss": 0.8845, "step": 1552 }, { "epoch": 0.11219679592537071, "grad_norm": 6.985579487076478, "learning_rate": 4.991085336603705e-06, "loss": 0.9131, "step": 1553 }, { "epoch": 0.11226904112558021, "grad_norm": 7.939863438813161, "learning_rate": 4.991060639881414e-06, "loss": 0.9675, "step": 1554 }, { "epoch": 0.11234128632578973, "grad_norm": 6.888730296835481, "learning_rate": 4.991035909058437e-06, "loss": 0.9453, "step": 1555 }, { "epoch": 0.11241353152599924, "grad_norm": 8.306587123990063, "learning_rate": 4.991011144135113e-06, "loss": 1.0662, "step": 1556 }, { "epoch": 0.11248577672620876, "grad_norm": 8.661142006651028, "learning_rate": 4.9909863451117805e-06, "loss": 0.9124, "step": 1557 }, { "epoch": 0.11255802192641827, "grad_norm": 7.562253837677544, "learning_rate": 4.990961511988779e-06, "loss": 0.9314, "step": 1558 }, { "epoch": 0.11263026712662777, "grad_norm": 7.889197010715156, "learning_rate": 4.990936644766449e-06, "loss": 0.961, "step": 1559 }, { "epoch": 0.11270251232683728, "grad_norm": 8.163395700191812, "learning_rate": 4.990911743445129e-06, "loss": 1.0203, "step": 1560 }, { "epoch": 0.1127747575270468, "grad_norm": 7.270829106924437, "learning_rate": 4.990886808025162e-06, "loss": 1.0488, "step": 1561 }, { "epoch": 0.11284700272725631, "grad_norm": 9.568291798789236, "learning_rate": 4.99086183850689e-06, "loss": 0.9464, "step": 1562 }, { "epoch": 0.11291924792746581, "grad_norm": 8.661528262285987, "learning_rate": 4.990836834890652e-06, "loss": 1.042, "step": 1563 }, { "epoch": 0.11299149312767533, "grad_norm": 6.980675316963207, "learning_rate": 4.990811797176792e-06, "loss": 0.9589, "step": 1564 }, { "epoch": 0.11306373832788484, "grad_norm": 7.534942861237844, "learning_rate": 4.990786725365653e-06, "loss": 0.9888, "step": 1565 }, { "epoch": 0.11313598352809436, "grad_norm": 10.410553096915235, "learning_rate": 4.990761619457577e-06, "loss": 1.0067, "step": 1566 }, { "epoch": 0.11320822872830387, "grad_norm": 8.822546460620352, "learning_rate": 4.990736479452909e-06, "loss": 0.9455, "step": 1567 }, { "epoch": 0.11328047392851337, "grad_norm": 6.852225572782391, "learning_rate": 4.9907113053519915e-06, "loss": 0.9683, "step": 1568 }, { "epoch": 0.11335271912872288, "grad_norm": 9.915993889391679, "learning_rate": 4.9906860971551716e-06, "loss": 0.9444, "step": 1569 }, { "epoch": 0.1134249643289324, "grad_norm": 8.34055956017673, "learning_rate": 4.990660854862792e-06, "loss": 0.966, "step": 1570 }, { "epoch": 0.11349720952914191, "grad_norm": 7.776573399740578, "learning_rate": 4.990635578475199e-06, "loss": 1.022, "step": 1571 }, { "epoch": 0.11356945472935141, "grad_norm": 8.17827243460674, "learning_rate": 4.990610267992739e-06, "loss": 1.1344, "step": 1572 }, { "epoch": 0.11364169992956093, "grad_norm": 7.174354540139967, "learning_rate": 4.990584923415759e-06, "loss": 0.9661, "step": 1573 }, { "epoch": 0.11371394512977044, "grad_norm": 8.046837075153348, "learning_rate": 4.9905595447446046e-06, "loss": 1.0138, "step": 1574 }, { "epoch": 0.11378619032997996, "grad_norm": 10.375483122458157, "learning_rate": 4.990534131979623e-06, "loss": 1.0806, "step": 1575 }, { "epoch": 0.11385843553018947, "grad_norm": 7.814352075390147, "learning_rate": 4.990508685121165e-06, "loss": 0.9817, "step": 1576 }, { "epoch": 0.11393068073039897, "grad_norm": 8.6388527016599, "learning_rate": 4.990483204169575e-06, "loss": 1.0189, "step": 1577 }, { "epoch": 0.11400292593060848, "grad_norm": 8.629659969486912, "learning_rate": 4.990457689125204e-06, "loss": 0.9739, "step": 1578 }, { "epoch": 0.114075171130818, "grad_norm": 9.972769856506257, "learning_rate": 4.990432139988401e-06, "loss": 1.0113, "step": 1579 }, { "epoch": 0.11414741633102751, "grad_norm": 7.317336871039719, "learning_rate": 4.990406556759516e-06, "loss": 0.9876, "step": 1580 }, { "epoch": 0.11421966153123701, "grad_norm": 9.279975777956166, "learning_rate": 4.990380939438899e-06, "loss": 1.0097, "step": 1581 }, { "epoch": 0.11429190673144653, "grad_norm": 7.981948035774273, "learning_rate": 4.990355288026901e-06, "loss": 0.9058, "step": 1582 }, { "epoch": 0.11436415193165604, "grad_norm": 6.4845420723958505, "learning_rate": 4.990329602523872e-06, "loss": 0.9778, "step": 1583 }, { "epoch": 0.11443639713186556, "grad_norm": 7.604408350896702, "learning_rate": 4.990303882930164e-06, "loss": 0.9941, "step": 1584 }, { "epoch": 0.11450864233207507, "grad_norm": 6.6330704386131405, "learning_rate": 4.99027812924613e-06, "loss": 1.0252, "step": 1585 }, { "epoch": 0.11458088753228457, "grad_norm": 8.590738108567855, "learning_rate": 4.990252341472122e-06, "loss": 0.9633, "step": 1586 }, { "epoch": 0.11465313273249408, "grad_norm": 7.617997002258272, "learning_rate": 4.9902265196084935e-06, "loss": 0.9699, "step": 1587 }, { "epoch": 0.1147253779327036, "grad_norm": 7.502177367450436, "learning_rate": 4.990200663655596e-06, "loss": 1.0292, "step": 1588 }, { "epoch": 0.11479762313291311, "grad_norm": 9.1029073744852, "learning_rate": 4.990174773613786e-06, "loss": 1.0369, "step": 1589 }, { "epoch": 0.11486986833312261, "grad_norm": 7.9446263800049834, "learning_rate": 4.990148849483417e-06, "loss": 1.0145, "step": 1590 }, { "epoch": 0.11494211353333213, "grad_norm": 7.893871644110474, "learning_rate": 4.9901228912648435e-06, "loss": 0.9509, "step": 1591 }, { "epoch": 0.11501435873354164, "grad_norm": 7.117475317485796, "learning_rate": 4.990096898958421e-06, "loss": 0.9542, "step": 1592 }, { "epoch": 0.11508660393375116, "grad_norm": 7.053958014436547, "learning_rate": 4.990070872564505e-06, "loss": 1.0486, "step": 1593 }, { "epoch": 0.11515884913396066, "grad_norm": 10.951939060150313, "learning_rate": 4.990044812083453e-06, "loss": 1.0359, "step": 1594 }, { "epoch": 0.11523109433417017, "grad_norm": 7.551146757433266, "learning_rate": 4.990018717515621e-06, "loss": 0.934, "step": 1595 }, { "epoch": 0.11530333953437968, "grad_norm": 7.103703846403111, "learning_rate": 4.989992588861365e-06, "loss": 0.9687, "step": 1596 }, { "epoch": 0.1153755847345892, "grad_norm": 6.911220781220028, "learning_rate": 4.989966426121045e-06, "loss": 1.0145, "step": 1597 }, { "epoch": 0.11544782993479871, "grad_norm": 6.594909855224342, "learning_rate": 4.989940229295017e-06, "loss": 0.9493, "step": 1598 }, { "epoch": 0.11552007513500821, "grad_norm": 7.738553608044599, "learning_rate": 4.989913998383641e-06, "loss": 0.9985, "step": 1599 }, { "epoch": 0.11559232033521773, "grad_norm": 10.040221388606257, "learning_rate": 4.989887733387275e-06, "loss": 0.9336, "step": 1600 }, { "epoch": 0.11566456553542724, "grad_norm": 9.632941994643588, "learning_rate": 4.98986143430628e-06, "loss": 0.9504, "step": 1601 }, { "epoch": 0.11573681073563676, "grad_norm": 9.225491195875469, "learning_rate": 4.989835101141015e-06, "loss": 0.9549, "step": 1602 }, { "epoch": 0.11580905593584626, "grad_norm": 7.5299691658351255, "learning_rate": 4.989808733891841e-06, "loss": 0.9961, "step": 1603 }, { "epoch": 0.11588130113605577, "grad_norm": 7.303578991341414, "learning_rate": 4.989782332559119e-06, "loss": 0.9127, "step": 1604 }, { "epoch": 0.11595354633626528, "grad_norm": 6.611806724275876, "learning_rate": 4.989755897143209e-06, "loss": 0.8495, "step": 1605 }, { "epoch": 0.1160257915364748, "grad_norm": 7.727032252767082, "learning_rate": 4.989729427644474e-06, "loss": 0.9042, "step": 1606 }, { "epoch": 0.11609803673668431, "grad_norm": 8.50529898232911, "learning_rate": 4.9897029240632775e-06, "loss": 1.0256, "step": 1607 }, { "epoch": 0.11617028193689381, "grad_norm": 7.501611917845843, "learning_rate": 4.9896763863999795e-06, "loss": 0.9821, "step": 1608 }, { "epoch": 0.11624252713710333, "grad_norm": 7.9996390261273564, "learning_rate": 4.989649814654946e-06, "loss": 1.0302, "step": 1609 }, { "epoch": 0.11631477233731284, "grad_norm": 7.845514228261865, "learning_rate": 4.989623208828539e-06, "loss": 0.961, "step": 1610 }, { "epoch": 0.11638701753752236, "grad_norm": 7.6467345604824395, "learning_rate": 4.989596568921124e-06, "loss": 0.9386, "step": 1611 }, { "epoch": 0.11645926273773186, "grad_norm": 6.572059571435582, "learning_rate": 4.989569894933064e-06, "loss": 0.9094, "step": 1612 }, { "epoch": 0.11653150793794137, "grad_norm": 8.112114651172623, "learning_rate": 4.989543186864726e-06, "loss": 0.9852, "step": 1613 }, { "epoch": 0.11660375313815088, "grad_norm": 9.413694559718405, "learning_rate": 4.989516444716475e-06, "loss": 1.0511, "step": 1614 }, { "epoch": 0.1166759983383604, "grad_norm": 7.92966729523288, "learning_rate": 4.989489668488676e-06, "loss": 0.9844, "step": 1615 }, { "epoch": 0.11674824353856991, "grad_norm": 8.96166414542361, "learning_rate": 4.989462858181697e-06, "loss": 0.9128, "step": 1616 }, { "epoch": 0.11682048873877941, "grad_norm": 11.443571886627621, "learning_rate": 4.9894360137959045e-06, "loss": 1.0484, "step": 1617 }, { "epoch": 0.11689273393898893, "grad_norm": 8.485037705759714, "learning_rate": 4.989409135331666e-06, "loss": 1.0057, "step": 1618 }, { "epoch": 0.11696497913919844, "grad_norm": 7.435986276664198, "learning_rate": 4.9893822227893484e-06, "loss": 1.0225, "step": 1619 }, { "epoch": 0.11703722433940796, "grad_norm": 6.994304247073812, "learning_rate": 4.989355276169322e-06, "loss": 0.9845, "step": 1620 }, { "epoch": 0.11710946953961746, "grad_norm": 9.686886085008215, "learning_rate": 4.989328295471954e-06, "loss": 1.0764, "step": 1621 }, { "epoch": 0.11718171473982697, "grad_norm": 8.167615666687, "learning_rate": 4.989301280697615e-06, "loss": 0.9564, "step": 1622 }, { "epoch": 0.11725395994003648, "grad_norm": 6.653374422414087, "learning_rate": 4.989274231846674e-06, "loss": 0.873, "step": 1623 }, { "epoch": 0.117326205140246, "grad_norm": 7.984620092403866, "learning_rate": 4.989247148919502e-06, "loss": 1.0323, "step": 1624 }, { "epoch": 0.11739845034045551, "grad_norm": 8.29503247623168, "learning_rate": 4.989220031916468e-06, "loss": 1.037, "step": 1625 }, { "epoch": 0.11747069554066501, "grad_norm": 8.602521712950066, "learning_rate": 4.989192880837946e-06, "loss": 1.0488, "step": 1626 }, { "epoch": 0.11754294074087453, "grad_norm": 9.618825542111173, "learning_rate": 4.989165695684306e-06, "loss": 1.0005, "step": 1627 }, { "epoch": 0.11761518594108404, "grad_norm": 7.737790241868051, "learning_rate": 4.98913847645592e-06, "loss": 1.0002, "step": 1628 }, { "epoch": 0.11768743114129356, "grad_norm": 11.069443414853723, "learning_rate": 4.9891112231531605e-06, "loss": 1.0201, "step": 1629 }, { "epoch": 0.11775967634150306, "grad_norm": 7.501881172770673, "learning_rate": 4.989083935776402e-06, "loss": 0.9785, "step": 1630 }, { "epoch": 0.11783192154171257, "grad_norm": 8.940261520895891, "learning_rate": 4.989056614326017e-06, "loss": 0.8392, "step": 1631 }, { "epoch": 0.11790416674192208, "grad_norm": 7.616345606519522, "learning_rate": 4.989029258802378e-06, "loss": 1.0527, "step": 1632 }, { "epoch": 0.1179764119421316, "grad_norm": 8.497254769809299, "learning_rate": 4.989001869205863e-06, "loss": 0.9463, "step": 1633 }, { "epoch": 0.11804865714234111, "grad_norm": 7.939855991854431, "learning_rate": 4.988974445536844e-06, "loss": 0.9139, "step": 1634 }, { "epoch": 0.11812090234255061, "grad_norm": 7.927083547210357, "learning_rate": 4.988946987795698e-06, "loss": 0.9172, "step": 1635 }, { "epoch": 0.11819314754276013, "grad_norm": 6.74518017527507, "learning_rate": 4.9889194959827995e-06, "loss": 1.0636, "step": 1636 }, { "epoch": 0.11826539274296964, "grad_norm": 6.224799659293449, "learning_rate": 4.988891970098526e-06, "loss": 0.9249, "step": 1637 }, { "epoch": 0.11833763794317916, "grad_norm": 6.423459342561227, "learning_rate": 4.988864410143254e-06, "loss": 0.8689, "step": 1638 }, { "epoch": 0.11840988314338866, "grad_norm": 7.4835627678559264, "learning_rate": 4.988836816117361e-06, "loss": 1.0034, "step": 1639 }, { "epoch": 0.11848212834359817, "grad_norm": 8.090062542309333, "learning_rate": 4.9888091880212235e-06, "loss": 1.1369, "step": 1640 }, { "epoch": 0.11855437354380768, "grad_norm": 7.637416048727946, "learning_rate": 4.988781525855221e-06, "loss": 1.0647, "step": 1641 }, { "epoch": 0.1186266187440172, "grad_norm": 7.733513308217612, "learning_rate": 4.988753829619732e-06, "loss": 1.0274, "step": 1642 }, { "epoch": 0.1186988639442267, "grad_norm": 8.059860862117858, "learning_rate": 4.988726099315136e-06, "loss": 0.9893, "step": 1643 }, { "epoch": 0.11877110914443621, "grad_norm": 7.915551545426923, "learning_rate": 4.988698334941812e-06, "loss": 1.0508, "step": 1644 }, { "epoch": 0.11884335434464573, "grad_norm": 7.245464353140312, "learning_rate": 4.988670536500139e-06, "loss": 0.8989, "step": 1645 }, { "epoch": 0.11891559954485524, "grad_norm": 8.19299178816371, "learning_rate": 4.9886427039904996e-06, "loss": 0.8919, "step": 1646 }, { "epoch": 0.11898784474506476, "grad_norm": 7.876966442850755, "learning_rate": 4.988614837413274e-06, "loss": 1.0411, "step": 1647 }, { "epoch": 0.11906008994527426, "grad_norm": 8.337760461402015, "learning_rate": 4.988586936768843e-06, "loss": 1.0056, "step": 1648 }, { "epoch": 0.11913233514548377, "grad_norm": 7.942222091716549, "learning_rate": 4.988559002057589e-06, "loss": 1.0142, "step": 1649 }, { "epoch": 0.11920458034569328, "grad_norm": 7.7341996606545615, "learning_rate": 4.988531033279895e-06, "loss": 0.8887, "step": 1650 }, { "epoch": 0.1192768255459028, "grad_norm": 8.793760463163418, "learning_rate": 4.988503030436144e-06, "loss": 0.9804, "step": 1651 }, { "epoch": 0.1193490707461123, "grad_norm": 10.403149758719412, "learning_rate": 4.988474993526717e-06, "loss": 0.9642, "step": 1652 }, { "epoch": 0.11942131594632181, "grad_norm": 8.786135797435444, "learning_rate": 4.988446922552001e-06, "loss": 1.0178, "step": 1653 }, { "epoch": 0.11949356114653133, "grad_norm": 9.073764719054479, "learning_rate": 4.988418817512378e-06, "loss": 0.8691, "step": 1654 }, { "epoch": 0.11956580634674084, "grad_norm": 8.538882017191456, "learning_rate": 4.988390678408234e-06, "loss": 1.0076, "step": 1655 }, { "epoch": 0.11963805154695036, "grad_norm": 8.802431672255489, "learning_rate": 4.988362505239954e-06, "loss": 1.0731, "step": 1656 }, { "epoch": 0.11971029674715986, "grad_norm": 8.062885127369015, "learning_rate": 4.988334298007922e-06, "loss": 0.9113, "step": 1657 }, { "epoch": 0.11978254194736937, "grad_norm": 8.344066370991762, "learning_rate": 4.988306056712527e-06, "loss": 0.9978, "step": 1658 }, { "epoch": 0.11985478714757888, "grad_norm": 6.685810196483178, "learning_rate": 4.988277781354154e-06, "loss": 0.8468, "step": 1659 }, { "epoch": 0.1199270323477884, "grad_norm": 8.427066936563415, "learning_rate": 4.988249471933189e-06, "loss": 0.953, "step": 1660 }, { "epoch": 0.1199992775479979, "grad_norm": 7.3961989048257815, "learning_rate": 4.988221128450021e-06, "loss": 0.9413, "step": 1661 }, { "epoch": 0.12007152274820741, "grad_norm": 8.367662090444576, "learning_rate": 4.988192750905039e-06, "loss": 0.9897, "step": 1662 }, { "epoch": 0.12014376794841693, "grad_norm": 8.432451560421065, "learning_rate": 4.988164339298629e-06, "loss": 1.0171, "step": 1663 }, { "epoch": 0.12021601314862644, "grad_norm": 7.044850399034417, "learning_rate": 4.988135893631182e-06, "loss": 0.9311, "step": 1664 }, { "epoch": 0.12028825834883596, "grad_norm": 8.340578769571437, "learning_rate": 4.9881074139030865e-06, "loss": 1.0334, "step": 1665 }, { "epoch": 0.12036050354904546, "grad_norm": 7.77888863517389, "learning_rate": 4.988078900114731e-06, "loss": 0.9718, "step": 1666 }, { "epoch": 0.12043274874925497, "grad_norm": 7.490967461273569, "learning_rate": 4.988050352266509e-06, "loss": 0.8726, "step": 1667 }, { "epoch": 0.12050499394946448, "grad_norm": 11.056800537893665, "learning_rate": 4.988021770358809e-06, "loss": 0.8691, "step": 1668 }, { "epoch": 0.120577239149674, "grad_norm": 8.295708008500823, "learning_rate": 4.987993154392022e-06, "loss": 1.0534, "step": 1669 }, { "epoch": 0.1206494843498835, "grad_norm": 7.377123510823532, "learning_rate": 4.98796450436654e-06, "loss": 0.9813, "step": 1670 }, { "epoch": 0.12072172955009301, "grad_norm": 7.346598620552501, "learning_rate": 4.9879358202827574e-06, "loss": 1.0795, "step": 1671 }, { "epoch": 0.12079397475030253, "grad_norm": 7.792948190762354, "learning_rate": 4.987907102141065e-06, "loss": 1.1076, "step": 1672 }, { "epoch": 0.12086621995051204, "grad_norm": 8.669490451996912, "learning_rate": 4.987878349941855e-06, "loss": 0.9987, "step": 1673 }, { "epoch": 0.12093846515072156, "grad_norm": 7.491000816393039, "learning_rate": 4.987849563685523e-06, "loss": 0.9119, "step": 1674 }, { "epoch": 0.12101071035093106, "grad_norm": 9.475102374245958, "learning_rate": 4.987820743372462e-06, "loss": 1.0878, "step": 1675 }, { "epoch": 0.12108295555114057, "grad_norm": 8.263373430633616, "learning_rate": 4.987791889003067e-06, "loss": 0.9575, "step": 1676 }, { "epoch": 0.12115520075135008, "grad_norm": 7.297081235793362, "learning_rate": 4.987763000577732e-06, "loss": 0.9767, "step": 1677 }, { "epoch": 0.1212274459515596, "grad_norm": 7.319839055110748, "learning_rate": 4.987734078096853e-06, "loss": 1.1022, "step": 1678 }, { "epoch": 0.1212996911517691, "grad_norm": 6.532236709164828, "learning_rate": 4.987705121560826e-06, "loss": 0.9859, "step": 1679 }, { "epoch": 0.12137193635197861, "grad_norm": 7.613790057635425, "learning_rate": 4.9876761309700485e-06, "loss": 1.0064, "step": 1680 }, { "epoch": 0.12144418155218813, "grad_norm": 9.950325229256014, "learning_rate": 4.987647106324916e-06, "loss": 1.0547, "step": 1681 }, { "epoch": 0.12151642675239764, "grad_norm": 9.53460214169885, "learning_rate": 4.9876180476258255e-06, "loss": 0.9226, "step": 1682 }, { "epoch": 0.12158867195260716, "grad_norm": 7.052186169759645, "learning_rate": 4.987588954873176e-06, "loss": 0.9975, "step": 1683 }, { "epoch": 0.12166091715281666, "grad_norm": 6.772624493901931, "learning_rate": 4.987559828067365e-06, "loss": 0.9943, "step": 1684 }, { "epoch": 0.12173316235302617, "grad_norm": 7.1837679254305895, "learning_rate": 4.9875306672087916e-06, "loss": 0.9592, "step": 1685 }, { "epoch": 0.12180540755323568, "grad_norm": 7.354505608576421, "learning_rate": 4.987501472297854e-06, "loss": 1.0128, "step": 1686 }, { "epoch": 0.1218776527534452, "grad_norm": 8.47090149748985, "learning_rate": 4.9874722433349536e-06, "loss": 0.9542, "step": 1687 }, { "epoch": 0.1219498979536547, "grad_norm": 7.979620486535112, "learning_rate": 4.98744298032049e-06, "loss": 1.0254, "step": 1688 }, { "epoch": 0.12202214315386421, "grad_norm": 7.446040018888605, "learning_rate": 4.9874136832548625e-06, "loss": 0.8951, "step": 1689 }, { "epoch": 0.12209438835407373, "grad_norm": 7.644633296856574, "learning_rate": 4.987384352138473e-06, "loss": 0.9713, "step": 1690 }, { "epoch": 0.12216663355428324, "grad_norm": 6.858308261657102, "learning_rate": 4.987354986971723e-06, "loss": 0.9863, "step": 1691 }, { "epoch": 0.12223887875449276, "grad_norm": 6.512133497589687, "learning_rate": 4.987325587755015e-06, "loss": 0.9073, "step": 1692 }, { "epoch": 0.12231112395470226, "grad_norm": 8.578054737018705, "learning_rate": 4.987296154488751e-06, "loss": 1.0397, "step": 1693 }, { "epoch": 0.12238336915491177, "grad_norm": 8.231772429528343, "learning_rate": 4.987266687173333e-06, "loss": 0.9585, "step": 1694 }, { "epoch": 0.12245561435512128, "grad_norm": 7.803254294708755, "learning_rate": 4.987237185809166e-06, "loss": 0.9922, "step": 1695 }, { "epoch": 0.1225278595553308, "grad_norm": 8.159059471829075, "learning_rate": 4.9872076503966536e-06, "loss": 0.9718, "step": 1696 }, { "epoch": 0.1226001047555403, "grad_norm": 10.242174091810446, "learning_rate": 4.987178080936199e-06, "loss": 1.0809, "step": 1697 }, { "epoch": 0.12267234995574981, "grad_norm": 7.215093305781486, "learning_rate": 4.987148477428208e-06, "loss": 1.0334, "step": 1698 }, { "epoch": 0.12274459515595933, "grad_norm": 6.553899160634352, "learning_rate": 4.9871188398730855e-06, "loss": 0.949, "step": 1699 }, { "epoch": 0.12281684035616884, "grad_norm": 6.350150651721623, "learning_rate": 4.987089168271237e-06, "loss": 0.9762, "step": 1700 }, { "epoch": 0.12288908555637834, "grad_norm": 7.242466662971795, "learning_rate": 4.987059462623069e-06, "loss": 0.9393, "step": 1701 }, { "epoch": 0.12296133075658786, "grad_norm": 7.03283592140554, "learning_rate": 4.9870297229289875e-06, "loss": 1.0918, "step": 1702 }, { "epoch": 0.12303357595679737, "grad_norm": 7.152129873747891, "learning_rate": 4.9869999491894e-06, "loss": 0.9487, "step": 1703 }, { "epoch": 0.12310582115700688, "grad_norm": 7.0927090994474575, "learning_rate": 4.986970141404716e-06, "loss": 0.9748, "step": 1704 }, { "epoch": 0.1231780663572164, "grad_norm": 8.22504511765741, "learning_rate": 4.986940299575341e-06, "loss": 1.0185, "step": 1705 }, { "epoch": 0.1232503115574259, "grad_norm": 7.051958437047936, "learning_rate": 4.986910423701683e-06, "loss": 0.977, "step": 1706 }, { "epoch": 0.12332255675763541, "grad_norm": 6.157766087707292, "learning_rate": 4.986880513784153e-06, "loss": 0.8982, "step": 1707 }, { "epoch": 0.12339480195784493, "grad_norm": 6.753830846980041, "learning_rate": 4.98685056982316e-06, "loss": 0.9816, "step": 1708 }, { "epoch": 0.12346704715805444, "grad_norm": 6.854885848171002, "learning_rate": 4.986820591819114e-06, "loss": 0.9235, "step": 1709 }, { "epoch": 0.12353929235826394, "grad_norm": 8.607921996972152, "learning_rate": 4.986790579772424e-06, "loss": 1.0071, "step": 1710 }, { "epoch": 0.12361153755847346, "grad_norm": 7.51876619484696, "learning_rate": 4.986760533683502e-06, "loss": 0.9897, "step": 1711 }, { "epoch": 0.12368378275868297, "grad_norm": 7.681620512702503, "learning_rate": 4.98673045355276e-06, "loss": 0.9724, "step": 1712 }, { "epoch": 0.12375602795889248, "grad_norm": 8.236304272478504, "learning_rate": 4.986700339380608e-06, "loss": 1.0207, "step": 1713 }, { "epoch": 0.123828273159102, "grad_norm": 8.509116276938112, "learning_rate": 4.98667019116746e-06, "loss": 0.9748, "step": 1714 }, { "epoch": 0.1239005183593115, "grad_norm": 7.501169495000403, "learning_rate": 4.986640008913727e-06, "loss": 0.9561, "step": 1715 }, { "epoch": 0.12397276355952101, "grad_norm": 6.526085961250649, "learning_rate": 4.986609792619823e-06, "loss": 0.8294, "step": 1716 }, { "epoch": 0.12404500875973053, "grad_norm": 8.677775916721972, "learning_rate": 4.986579542286162e-06, "loss": 0.9352, "step": 1717 }, { "epoch": 0.12411725395994004, "grad_norm": 7.887798989216644, "learning_rate": 4.986549257913158e-06, "loss": 0.9992, "step": 1718 }, { "epoch": 0.12418949916014954, "grad_norm": 7.128550932926012, "learning_rate": 4.986518939501225e-06, "loss": 0.9502, "step": 1719 }, { "epoch": 0.12426174436035906, "grad_norm": 8.28137022686902, "learning_rate": 4.986488587050779e-06, "loss": 1.1193, "step": 1720 }, { "epoch": 0.12433398956056857, "grad_norm": 9.236753722154255, "learning_rate": 4.986458200562234e-06, "loss": 0.9539, "step": 1721 }, { "epoch": 0.12440623476077808, "grad_norm": 7.459364097021514, "learning_rate": 4.986427780036007e-06, "loss": 1.0201, "step": 1722 }, { "epoch": 0.1244784799609876, "grad_norm": 6.554861218738641, "learning_rate": 4.986397325472515e-06, "loss": 1.0141, "step": 1723 }, { "epoch": 0.1245507251611971, "grad_norm": 7.74137127343181, "learning_rate": 4.9863668368721735e-06, "loss": 0.9062, "step": 1724 }, { "epoch": 0.12462297036140661, "grad_norm": 8.284711927471673, "learning_rate": 4.9863363142354e-06, "loss": 0.9387, "step": 1725 }, { "epoch": 0.12469521556161613, "grad_norm": 9.722323530365648, "learning_rate": 4.986305757562614e-06, "loss": 1.0212, "step": 1726 }, { "epoch": 0.12476746076182564, "grad_norm": 8.592441306603256, "learning_rate": 4.986275166854233e-06, "loss": 0.968, "step": 1727 }, { "epoch": 0.12483970596203514, "grad_norm": 7.261241222027585, "learning_rate": 4.986244542110674e-06, "loss": 0.8959, "step": 1728 }, { "epoch": 0.12491195116224466, "grad_norm": 8.539770992165588, "learning_rate": 4.986213883332359e-06, "loss": 0.9725, "step": 1729 }, { "epoch": 0.12498419636245417, "grad_norm": 10.453344322834596, "learning_rate": 4.9861831905197056e-06, "loss": 1.0724, "step": 1730 }, { "epoch": 0.12505644156266368, "grad_norm": 8.042593103782375, "learning_rate": 4.986152463673134e-06, "loss": 0.9462, "step": 1731 }, { "epoch": 0.1251286867628732, "grad_norm": 7.295839027854256, "learning_rate": 4.986121702793067e-06, "loss": 0.9846, "step": 1732 }, { "epoch": 0.1252009319630827, "grad_norm": 8.416883812045212, "learning_rate": 4.986090907879924e-06, "loss": 0.9701, "step": 1733 }, { "epoch": 0.12527317716329223, "grad_norm": 8.953649908260179, "learning_rate": 4.986060078934126e-06, "loss": 1.0822, "step": 1734 }, { "epoch": 0.1253454223635017, "grad_norm": 7.917682569137575, "learning_rate": 4.986029215956096e-06, "loss": 0.9708, "step": 1735 }, { "epoch": 0.12541766756371123, "grad_norm": 7.227419448767847, "learning_rate": 4.985998318946257e-06, "loss": 0.9886, "step": 1736 }, { "epoch": 0.12548991276392074, "grad_norm": 7.019715202184159, "learning_rate": 4.985967387905031e-06, "loss": 1.0245, "step": 1737 }, { "epoch": 0.12556215796413026, "grad_norm": 7.27550309267097, "learning_rate": 4.985936422832841e-06, "loss": 0.9738, "step": 1738 }, { "epoch": 0.12563440316433977, "grad_norm": 6.970053512389713, "learning_rate": 4.985905423730112e-06, "loss": 0.9329, "step": 1739 }, { "epoch": 0.12570664836454928, "grad_norm": 8.693098500986375, "learning_rate": 4.985874390597269e-06, "loss": 1.0577, "step": 1740 }, { "epoch": 0.1257788935647588, "grad_norm": 8.30654762946122, "learning_rate": 4.985843323434734e-06, "loss": 0.9587, "step": 1741 }, { "epoch": 0.1258511387649683, "grad_norm": 7.48051923530614, "learning_rate": 4.985812222242935e-06, "loss": 0.9435, "step": 1742 }, { "epoch": 0.12592338396517783, "grad_norm": 7.888797843061477, "learning_rate": 4.985781087022297e-06, "loss": 0.868, "step": 1743 }, { "epoch": 0.1259956291653873, "grad_norm": 7.055641016939007, "learning_rate": 4.985749917773245e-06, "loss": 1.0181, "step": 1744 }, { "epoch": 0.12606787436559683, "grad_norm": 7.136850239962742, "learning_rate": 4.9857187144962075e-06, "loss": 0.9571, "step": 1745 }, { "epoch": 0.12614011956580634, "grad_norm": 7.782090719987855, "learning_rate": 4.9856874771916105e-06, "loss": 1.0095, "step": 1746 }, { "epoch": 0.12621236476601586, "grad_norm": 7.171179245255143, "learning_rate": 4.985656205859882e-06, "loss": 0.9804, "step": 1747 }, { "epoch": 0.12628460996622537, "grad_norm": 8.711306221176107, "learning_rate": 4.98562490050145e-06, "loss": 1.0101, "step": 1748 }, { "epoch": 0.12635685516643488, "grad_norm": 7.371286782879175, "learning_rate": 4.985593561116743e-06, "loss": 0.906, "step": 1749 }, { "epoch": 0.1264291003666444, "grad_norm": 10.612547994085013, "learning_rate": 4.98556218770619e-06, "loss": 1.0397, "step": 1750 }, { "epoch": 0.1265013455668539, "grad_norm": 7.583688608867044, "learning_rate": 4.9855307802702215e-06, "loss": 0.9265, "step": 1751 }, { "epoch": 0.12657359076706343, "grad_norm": 8.236361703675326, "learning_rate": 4.985499338809265e-06, "loss": 1.0036, "step": 1752 }, { "epoch": 0.1266458359672729, "grad_norm": 7.4197892320007615, "learning_rate": 4.985467863323754e-06, "loss": 0.9202, "step": 1753 }, { "epoch": 0.12671808116748243, "grad_norm": 7.368841153552508, "learning_rate": 4.985436353814117e-06, "loss": 1.045, "step": 1754 }, { "epoch": 0.12679032636769194, "grad_norm": 8.029849631681895, "learning_rate": 4.985404810280786e-06, "loss": 0.8961, "step": 1755 }, { "epoch": 0.12686257156790146, "grad_norm": 7.824239958392998, "learning_rate": 4.985373232724193e-06, "loss": 0.9815, "step": 1756 }, { "epoch": 0.12693481676811097, "grad_norm": 6.808274104570041, "learning_rate": 4.98534162114477e-06, "loss": 0.9654, "step": 1757 }, { "epoch": 0.12700706196832048, "grad_norm": 6.670771130048041, "learning_rate": 4.9853099755429505e-06, "loss": 0.9449, "step": 1758 }, { "epoch": 0.12707930716853, "grad_norm": 7.702342769376269, "learning_rate": 4.985278295919167e-06, "loss": 1.0055, "step": 1759 }, { "epoch": 0.1271515523687395, "grad_norm": 6.983559511781213, "learning_rate": 4.985246582273853e-06, "loss": 0.8943, "step": 1760 }, { "epoch": 0.12722379756894903, "grad_norm": 7.873463314231348, "learning_rate": 4.985214834607443e-06, "loss": 1.0164, "step": 1761 }, { "epoch": 0.1272960427691585, "grad_norm": 7.2111758175170415, "learning_rate": 4.9851830529203725e-06, "loss": 0.9237, "step": 1762 }, { "epoch": 0.12736828796936803, "grad_norm": 7.425162460656243, "learning_rate": 4.985151237213075e-06, "loss": 0.9574, "step": 1763 }, { "epoch": 0.12744053316957754, "grad_norm": 6.423930857722157, "learning_rate": 4.985119387485986e-06, "loss": 0.9757, "step": 1764 }, { "epoch": 0.12751277836978706, "grad_norm": 9.065235225026353, "learning_rate": 4.985087503739543e-06, "loss": 1.0078, "step": 1765 }, { "epoch": 0.12758502356999657, "grad_norm": 9.898222838906605, "learning_rate": 4.985055585974181e-06, "loss": 0.9955, "step": 1766 }, { "epoch": 0.12765726877020608, "grad_norm": 8.311223211682746, "learning_rate": 4.985023634190338e-06, "loss": 0.8921, "step": 1767 }, { "epoch": 0.1277295139704156, "grad_norm": 8.222343094541571, "learning_rate": 4.984991648388451e-06, "loss": 1.0571, "step": 1768 }, { "epoch": 0.1278017591706251, "grad_norm": 8.46288318752558, "learning_rate": 4.984959628568957e-06, "loss": 1.0144, "step": 1769 }, { "epoch": 0.12787400437083463, "grad_norm": 7.40295877845724, "learning_rate": 4.984927574732297e-06, "loss": 1.0224, "step": 1770 }, { "epoch": 0.1279462495710441, "grad_norm": 10.851376888261404, "learning_rate": 4.984895486878905e-06, "loss": 1.0222, "step": 1771 }, { "epoch": 0.12801849477125363, "grad_norm": 7.628636477965079, "learning_rate": 4.984863365009226e-06, "loss": 1.0165, "step": 1772 }, { "epoch": 0.12809073997146314, "grad_norm": 7.711362693681698, "learning_rate": 4.984831209123696e-06, "loss": 0.964, "step": 1773 }, { "epoch": 0.12816298517167266, "grad_norm": 7.454550041061987, "learning_rate": 4.984799019222756e-06, "loss": 0.9324, "step": 1774 }, { "epoch": 0.12823523037188217, "grad_norm": 6.822264506691863, "learning_rate": 4.984766795306845e-06, "loss": 0.9423, "step": 1775 }, { "epoch": 0.12830747557209168, "grad_norm": 7.726013719531228, "learning_rate": 4.984734537376409e-06, "loss": 1.0539, "step": 1776 }, { "epoch": 0.1283797207723012, "grad_norm": 6.7912383461284715, "learning_rate": 4.984702245431885e-06, "loss": 1.0321, "step": 1777 }, { "epoch": 0.1284519659725107, "grad_norm": 7.284115207119251, "learning_rate": 4.984669919473716e-06, "loss": 0.9908, "step": 1778 }, { "epoch": 0.1285242111727202, "grad_norm": 6.174930328099643, "learning_rate": 4.984637559502346e-06, "loss": 0.8643, "step": 1779 }, { "epoch": 0.1285964563729297, "grad_norm": 6.956521565499511, "learning_rate": 4.984605165518216e-06, "loss": 0.8869, "step": 1780 }, { "epoch": 0.12866870157313923, "grad_norm": 7.3006305056501315, "learning_rate": 4.984572737521771e-06, "loss": 0.9913, "step": 1781 }, { "epoch": 0.12874094677334874, "grad_norm": 6.1879386361056445, "learning_rate": 4.984540275513454e-06, "loss": 0.9938, "step": 1782 }, { "epoch": 0.12881319197355826, "grad_norm": 7.00707840662789, "learning_rate": 4.98450777949371e-06, "loss": 0.944, "step": 1783 }, { "epoch": 0.12888543717376777, "grad_norm": 6.857213542210609, "learning_rate": 4.984475249462984e-06, "loss": 0.9533, "step": 1784 }, { "epoch": 0.12895768237397728, "grad_norm": 6.12321975682971, "learning_rate": 4.9844426854217206e-06, "loss": 0.9672, "step": 1785 }, { "epoch": 0.1290299275741868, "grad_norm": 7.768847527969757, "learning_rate": 4.984410087370365e-06, "loss": 0.9893, "step": 1786 }, { "epoch": 0.1291021727743963, "grad_norm": 7.049703702719894, "learning_rate": 4.984377455309366e-06, "loss": 1.007, "step": 1787 }, { "epoch": 0.1291744179746058, "grad_norm": 7.634958986423256, "learning_rate": 4.984344789239167e-06, "loss": 0.9898, "step": 1788 }, { "epoch": 0.1292466631748153, "grad_norm": 6.167106234718414, "learning_rate": 4.984312089160218e-06, "loss": 0.9627, "step": 1789 }, { "epoch": 0.12931890837502483, "grad_norm": 7.9744283154347695, "learning_rate": 4.984279355072965e-06, "loss": 1.0664, "step": 1790 }, { "epoch": 0.12939115357523434, "grad_norm": 7.477889976522559, "learning_rate": 4.984246586977857e-06, "loss": 0.8809, "step": 1791 }, { "epoch": 0.12946339877544386, "grad_norm": 8.209029218218584, "learning_rate": 4.984213784875341e-06, "loss": 1.0023, "step": 1792 }, { "epoch": 0.12953564397565337, "grad_norm": 7.532272079569621, "learning_rate": 4.984180948765868e-06, "loss": 0.9323, "step": 1793 }, { "epoch": 0.12960788917586288, "grad_norm": 7.031283094540171, "learning_rate": 4.9841480786498864e-06, "loss": 0.9449, "step": 1794 }, { "epoch": 0.1296801343760724, "grad_norm": 9.401245408038658, "learning_rate": 4.984115174527847e-06, "loss": 1.0713, "step": 1795 }, { "epoch": 0.1297523795762819, "grad_norm": 9.216965945617668, "learning_rate": 4.984082236400199e-06, "loss": 1.0561, "step": 1796 }, { "epoch": 0.1298246247764914, "grad_norm": 9.013166333669785, "learning_rate": 4.984049264267394e-06, "loss": 1.0835, "step": 1797 }, { "epoch": 0.1298968699767009, "grad_norm": 7.813334916324043, "learning_rate": 4.9840162581298836e-06, "loss": 0.9989, "step": 1798 }, { "epoch": 0.12996911517691043, "grad_norm": 8.198863755565414, "learning_rate": 4.983983217988119e-06, "loss": 1.0055, "step": 1799 }, { "epoch": 0.13004136037711994, "grad_norm": 9.362259001743258, "learning_rate": 4.983950143842553e-06, "loss": 1.0262, "step": 1800 }, { "epoch": 0.13011360557732946, "grad_norm": 8.042575079911634, "learning_rate": 4.9839170356936386e-06, "loss": 1.0391, "step": 1801 }, { "epoch": 0.13018585077753897, "grad_norm": 7.686013675071032, "learning_rate": 4.983883893541828e-06, "loss": 0.9637, "step": 1802 }, { "epoch": 0.13025809597774848, "grad_norm": 8.318147275486272, "learning_rate": 4.983850717387576e-06, "loss": 0.9503, "step": 1803 }, { "epoch": 0.130330341177958, "grad_norm": 7.055036263700486, "learning_rate": 4.983817507231335e-06, "loss": 0.9917, "step": 1804 }, { "epoch": 0.1304025863781675, "grad_norm": 8.684256627448702, "learning_rate": 4.983784263073562e-06, "loss": 0.9431, "step": 1805 }, { "epoch": 0.130474831578377, "grad_norm": 8.026100972076344, "learning_rate": 4.983750984914711e-06, "loss": 0.9222, "step": 1806 }, { "epoch": 0.1305470767785865, "grad_norm": 6.691161107228747, "learning_rate": 4.983717672755237e-06, "loss": 1.0179, "step": 1807 }, { "epoch": 0.13061932197879603, "grad_norm": 7.266265250268246, "learning_rate": 4.9836843265955975e-06, "loss": 0.9732, "step": 1808 }, { "epoch": 0.13069156717900554, "grad_norm": 7.410222915405842, "learning_rate": 4.983650946436247e-06, "loss": 0.9806, "step": 1809 }, { "epoch": 0.13076381237921506, "grad_norm": 7.5432790082974845, "learning_rate": 4.983617532277644e-06, "loss": 0.8889, "step": 1810 }, { "epoch": 0.13083605757942457, "grad_norm": 8.23439584689605, "learning_rate": 4.983584084120245e-06, "loss": 0.9939, "step": 1811 }, { "epoch": 0.13090830277963408, "grad_norm": 8.078876275963397, "learning_rate": 4.9835506019645095e-06, "loss": 0.9797, "step": 1812 }, { "epoch": 0.1309805479798436, "grad_norm": 9.282932116218259, "learning_rate": 4.983517085810893e-06, "loss": 1.012, "step": 1813 }, { "epoch": 0.1310527931800531, "grad_norm": 7.168434398192635, "learning_rate": 4.983483535659856e-06, "loss": 0.9675, "step": 1814 }, { "epoch": 0.1311250383802626, "grad_norm": 8.128275988451277, "learning_rate": 4.9834499515118595e-06, "loss": 0.9439, "step": 1815 }, { "epoch": 0.1311972835804721, "grad_norm": 6.667297079161606, "learning_rate": 4.98341633336736e-06, "loss": 0.8304, "step": 1816 }, { "epoch": 0.13126952878068163, "grad_norm": 8.582739306390755, "learning_rate": 4.983382681226819e-06, "loss": 0.9611, "step": 1817 }, { "epoch": 0.13134177398089114, "grad_norm": 7.95913798739797, "learning_rate": 4.983348995090698e-06, "loss": 0.958, "step": 1818 }, { "epoch": 0.13141401918110066, "grad_norm": 7.863198535254848, "learning_rate": 4.9833152749594574e-06, "loss": 0.9472, "step": 1819 }, { "epoch": 0.13148626438131017, "grad_norm": 9.290243088218068, "learning_rate": 4.9832815208335584e-06, "loss": 1.0334, "step": 1820 }, { "epoch": 0.13155850958151968, "grad_norm": 6.869797003577422, "learning_rate": 4.983247732713463e-06, "loss": 0.9853, "step": 1821 }, { "epoch": 0.1316307547817292, "grad_norm": 7.558406005420285, "learning_rate": 4.983213910599636e-06, "loss": 0.9988, "step": 1822 }, { "epoch": 0.1317029999819387, "grad_norm": 6.4563597914453155, "learning_rate": 4.983180054492538e-06, "loss": 0.9789, "step": 1823 }, { "epoch": 0.1317752451821482, "grad_norm": 8.795317218865318, "learning_rate": 4.983146164392632e-06, "loss": 1.0715, "step": 1824 }, { "epoch": 0.1318474903823577, "grad_norm": 9.269276508957521, "learning_rate": 4.983112240300384e-06, "loss": 1.1283, "step": 1825 }, { "epoch": 0.13191973558256723, "grad_norm": 9.207610042928442, "learning_rate": 4.983078282216257e-06, "loss": 0.9678, "step": 1826 }, { "epoch": 0.13199198078277674, "grad_norm": 7.652344996251174, "learning_rate": 4.983044290140717e-06, "loss": 0.9794, "step": 1827 }, { "epoch": 0.13206422598298626, "grad_norm": 11.058470601023064, "learning_rate": 4.9830102640742276e-06, "loss": 1.0473, "step": 1828 }, { "epoch": 0.13213647118319577, "grad_norm": 9.805058664538375, "learning_rate": 4.982976204017257e-06, "loss": 1.0345, "step": 1829 }, { "epoch": 0.13220871638340528, "grad_norm": 7.621170332937871, "learning_rate": 4.982942109970269e-06, "loss": 0.9301, "step": 1830 }, { "epoch": 0.1322809615836148, "grad_norm": 7.181901692035509, "learning_rate": 4.982907981933731e-06, "loss": 0.9869, "step": 1831 }, { "epoch": 0.1323532067838243, "grad_norm": 9.529974780214449, "learning_rate": 4.982873819908112e-06, "loss": 1.0587, "step": 1832 }, { "epoch": 0.1324254519840338, "grad_norm": 10.123056484234779, "learning_rate": 4.982839623893877e-06, "loss": 0.9403, "step": 1833 }, { "epoch": 0.1324976971842433, "grad_norm": 9.762961355485892, "learning_rate": 4.982805393891496e-06, "loss": 0.9923, "step": 1834 }, { "epoch": 0.13256994238445283, "grad_norm": 9.369295953540963, "learning_rate": 4.982771129901437e-06, "loss": 1.0665, "step": 1835 }, { "epoch": 0.13264218758466234, "grad_norm": 8.132616903717814, "learning_rate": 4.982736831924169e-06, "loss": 1.0233, "step": 1836 }, { "epoch": 0.13271443278487186, "grad_norm": 8.260609596055094, "learning_rate": 4.9827024999601606e-06, "loss": 0.9744, "step": 1837 }, { "epoch": 0.13278667798508137, "grad_norm": 8.722209285178447, "learning_rate": 4.982668134009883e-06, "loss": 0.9912, "step": 1838 }, { "epoch": 0.13285892318529088, "grad_norm": 10.135511052213932, "learning_rate": 4.982633734073807e-06, "loss": 0.9275, "step": 1839 }, { "epoch": 0.1329311683855004, "grad_norm": 7.679764988800317, "learning_rate": 4.982599300152402e-06, "loss": 0.9357, "step": 1840 }, { "epoch": 0.1330034135857099, "grad_norm": 8.127833415209162, "learning_rate": 4.982564832246141e-06, "loss": 0.9804, "step": 1841 }, { "epoch": 0.1330756587859194, "grad_norm": 10.616334859872007, "learning_rate": 4.9825303303554945e-06, "loss": 0.9213, "step": 1842 }, { "epoch": 0.1331479039861289, "grad_norm": 9.03791199057692, "learning_rate": 4.982495794480935e-06, "loss": 0.9746, "step": 1843 }, { "epoch": 0.13322014918633843, "grad_norm": 5.95104591729602, "learning_rate": 4.982461224622936e-06, "loss": 0.8141, "step": 1844 }, { "epoch": 0.13329239438654794, "grad_norm": 6.5403901168706575, "learning_rate": 4.982426620781971e-06, "loss": 0.9242, "step": 1845 }, { "epoch": 0.13336463958675746, "grad_norm": 7.865941969039864, "learning_rate": 4.9823919829585125e-06, "loss": 1.0484, "step": 1846 }, { "epoch": 0.13343688478696697, "grad_norm": 8.439588500201557, "learning_rate": 4.982357311153036e-06, "loss": 0.984, "step": 1847 }, { "epoch": 0.13350912998717648, "grad_norm": 8.056435366703509, "learning_rate": 4.982322605366013e-06, "loss": 1.0598, "step": 1848 }, { "epoch": 0.133581375187386, "grad_norm": 6.053167967085016, "learning_rate": 4.982287865597923e-06, "loss": 0.8954, "step": 1849 }, { "epoch": 0.1336536203875955, "grad_norm": 6.013574186239678, "learning_rate": 4.982253091849239e-06, "loss": 0.986, "step": 1850 }, { "epoch": 0.133725865587805, "grad_norm": 7.816570228789707, "learning_rate": 4.982218284120438e-06, "loss": 0.9665, "step": 1851 }, { "epoch": 0.1337981107880145, "grad_norm": 7.241217456693795, "learning_rate": 4.982183442411995e-06, "loss": 1.0057, "step": 1852 }, { "epoch": 0.13387035598822403, "grad_norm": 8.066454486841245, "learning_rate": 4.982148566724389e-06, "loss": 0.9711, "step": 1853 }, { "epoch": 0.13394260118843354, "grad_norm": 7.023433969600753, "learning_rate": 4.9821136570580955e-06, "loss": 1.0455, "step": 1854 }, { "epoch": 0.13401484638864306, "grad_norm": 7.00264063483167, "learning_rate": 4.982078713413594e-06, "loss": 0.9653, "step": 1855 }, { "epoch": 0.13408709158885257, "grad_norm": 7.266671841587476, "learning_rate": 4.982043735791361e-06, "loss": 1.039, "step": 1856 }, { "epoch": 0.13415933678906208, "grad_norm": 8.308544164167973, "learning_rate": 4.982008724191877e-06, "loss": 0.9182, "step": 1857 }, { "epoch": 0.1342315819892716, "grad_norm": 6.271385227727647, "learning_rate": 4.981973678615621e-06, "loss": 1.0054, "step": 1858 }, { "epoch": 0.1343038271894811, "grad_norm": 7.878157936157476, "learning_rate": 4.981938599063072e-06, "loss": 0.9589, "step": 1859 }, { "epoch": 0.1343760723896906, "grad_norm": 6.272217590372368, "learning_rate": 4.981903485534711e-06, "loss": 0.8277, "step": 1860 }, { "epoch": 0.1344483175899001, "grad_norm": 7.65804671400462, "learning_rate": 4.981868338031019e-06, "loss": 0.9954, "step": 1861 }, { "epoch": 0.13452056279010963, "grad_norm": 8.364577455308748, "learning_rate": 4.9818331565524754e-06, "loss": 1.0007, "step": 1862 }, { "epoch": 0.13459280799031914, "grad_norm": 6.24127075949757, "learning_rate": 4.981797941099564e-06, "loss": 0.9143, "step": 1863 }, { "epoch": 0.13466505319052866, "grad_norm": 7.3136945221602305, "learning_rate": 4.981762691672765e-06, "loss": 0.9479, "step": 1864 }, { "epoch": 0.13473729839073817, "grad_norm": 6.63301724139244, "learning_rate": 4.981727408272562e-06, "loss": 0.9467, "step": 1865 }, { "epoch": 0.13480954359094768, "grad_norm": 7.871754779935011, "learning_rate": 4.981692090899438e-06, "loss": 1.0075, "step": 1866 }, { "epoch": 0.1348817887911572, "grad_norm": 8.099139094814241, "learning_rate": 4.981656739553875e-06, "loss": 0.9334, "step": 1867 }, { "epoch": 0.1349540339913667, "grad_norm": 6.5070061705041065, "learning_rate": 4.981621354236359e-06, "loss": 0.93, "step": 1868 }, { "epoch": 0.1350262791915762, "grad_norm": 6.548436381531759, "learning_rate": 4.981585934947374e-06, "loss": 1.0082, "step": 1869 }, { "epoch": 0.1350985243917857, "grad_norm": 6.818821205445699, "learning_rate": 4.981550481687403e-06, "loss": 0.9383, "step": 1870 }, { "epoch": 0.13517076959199523, "grad_norm": 7.515236953583424, "learning_rate": 4.981514994456934e-06, "loss": 0.9463, "step": 1871 }, { "epoch": 0.13524301479220474, "grad_norm": 9.004295171914832, "learning_rate": 4.981479473256451e-06, "loss": 0.9027, "step": 1872 }, { "epoch": 0.13531525999241426, "grad_norm": 8.295243097368129, "learning_rate": 4.98144391808644e-06, "loss": 0.9564, "step": 1873 }, { "epoch": 0.13538750519262377, "grad_norm": 8.632041820803103, "learning_rate": 4.98140832894739e-06, "loss": 0.9663, "step": 1874 }, { "epoch": 0.13545975039283328, "grad_norm": 7.098680841102655, "learning_rate": 4.981372705839786e-06, "loss": 0.9634, "step": 1875 }, { "epoch": 0.1355319955930428, "grad_norm": 7.1180005409988665, "learning_rate": 4.981337048764116e-06, "loss": 0.9566, "step": 1876 }, { "epoch": 0.1356042407932523, "grad_norm": 8.203065941007639, "learning_rate": 4.981301357720869e-06, "loss": 1.019, "step": 1877 }, { "epoch": 0.1356764859934618, "grad_norm": 8.293973771089908, "learning_rate": 4.981265632710533e-06, "loss": 1.0432, "step": 1878 }, { "epoch": 0.1357487311936713, "grad_norm": 7.9671545488477005, "learning_rate": 4.981229873733596e-06, "loss": 0.9333, "step": 1879 }, { "epoch": 0.13582097639388083, "grad_norm": 6.504052586097733, "learning_rate": 4.981194080790549e-06, "loss": 1.008, "step": 1880 }, { "epoch": 0.13589322159409034, "grad_norm": 8.211718434110688, "learning_rate": 4.981158253881882e-06, "loss": 0.9716, "step": 1881 }, { "epoch": 0.13596546679429986, "grad_norm": 7.121392122966054, "learning_rate": 4.981122393008086e-06, "loss": 0.9849, "step": 1882 }, { "epoch": 0.13603771199450937, "grad_norm": 7.934047398634527, "learning_rate": 4.981086498169649e-06, "loss": 0.9891, "step": 1883 }, { "epoch": 0.13610995719471888, "grad_norm": 6.807781860478733, "learning_rate": 4.981050569367065e-06, "loss": 0.965, "step": 1884 }, { "epoch": 0.1361822023949284, "grad_norm": 8.184343734279484, "learning_rate": 4.981014606600825e-06, "loss": 0.9641, "step": 1885 }, { "epoch": 0.13625444759513788, "grad_norm": 7.132308691476261, "learning_rate": 4.980978609871422e-06, "loss": 1.001, "step": 1886 }, { "epoch": 0.1363266927953474, "grad_norm": 6.252379002794063, "learning_rate": 4.980942579179348e-06, "loss": 0.973, "step": 1887 }, { "epoch": 0.1363989379955569, "grad_norm": 8.814760175310624, "learning_rate": 4.980906514525096e-06, "loss": 1.0263, "step": 1888 }, { "epoch": 0.13647118319576643, "grad_norm": 6.3333950374341335, "learning_rate": 4.98087041590916e-06, "loss": 1.0157, "step": 1889 }, { "epoch": 0.13654342839597594, "grad_norm": 6.649876081416357, "learning_rate": 4.980834283332034e-06, "loss": 0.9543, "step": 1890 }, { "epoch": 0.13661567359618546, "grad_norm": 7.647382809606821, "learning_rate": 4.980798116794215e-06, "loss": 0.8973, "step": 1891 }, { "epoch": 0.13668791879639497, "grad_norm": 7.5949721118246725, "learning_rate": 4.980761916296194e-06, "loss": 0.9476, "step": 1892 }, { "epoch": 0.13676016399660448, "grad_norm": 6.5417789846444325, "learning_rate": 4.9807256818384685e-06, "loss": 0.9794, "step": 1893 }, { "epoch": 0.136832409196814, "grad_norm": 8.110599373942092, "learning_rate": 4.980689413421535e-06, "loss": 0.8384, "step": 1894 }, { "epoch": 0.13690465439702348, "grad_norm": 8.197088552784306, "learning_rate": 4.98065311104589e-06, "loss": 1.0335, "step": 1895 }, { "epoch": 0.136976899597233, "grad_norm": 7.430720982438222, "learning_rate": 4.980616774712029e-06, "loss": 0.9609, "step": 1896 }, { "epoch": 0.1370491447974425, "grad_norm": 10.08370084066574, "learning_rate": 4.980580404420452e-06, "loss": 1.009, "step": 1897 }, { "epoch": 0.13712138999765203, "grad_norm": 7.196620561323357, "learning_rate": 4.980544000171654e-06, "loss": 0.9277, "step": 1898 }, { "epoch": 0.13719363519786154, "grad_norm": 9.059148339669504, "learning_rate": 4.980507561966135e-06, "loss": 1.0231, "step": 1899 }, { "epoch": 0.13726588039807106, "grad_norm": 6.932703507989325, "learning_rate": 4.980471089804394e-06, "loss": 0.8983, "step": 1900 }, { "epoch": 0.13733812559828057, "grad_norm": 7.6965442581762895, "learning_rate": 4.98043458368693e-06, "loss": 0.8802, "step": 1901 }, { "epoch": 0.13741037079849008, "grad_norm": 7.402568690925938, "learning_rate": 4.980398043614241e-06, "loss": 1.0523, "step": 1902 }, { "epoch": 0.1374826159986996, "grad_norm": 8.136136917357542, "learning_rate": 4.98036146958683e-06, "loss": 1.0597, "step": 1903 }, { "epoch": 0.13755486119890908, "grad_norm": 8.350034939812753, "learning_rate": 4.980324861605196e-06, "loss": 0.9397, "step": 1904 }, { "epoch": 0.1376271063991186, "grad_norm": 9.578397414050242, "learning_rate": 4.98028821966984e-06, "loss": 0.8744, "step": 1905 }, { "epoch": 0.1376993515993281, "grad_norm": 12.843264679661319, "learning_rate": 4.980251543781264e-06, "loss": 0.9894, "step": 1906 }, { "epoch": 0.13777159679953763, "grad_norm": 6.486260785652408, "learning_rate": 4.98021483393997e-06, "loss": 0.9647, "step": 1907 }, { "epoch": 0.13784384199974714, "grad_norm": 6.305173342301133, "learning_rate": 4.9801780901464614e-06, "loss": 1.0046, "step": 1908 }, { "epoch": 0.13791608719995666, "grad_norm": 7.0399961879026325, "learning_rate": 4.980141312401241e-06, "loss": 0.9935, "step": 1909 }, { "epoch": 0.13798833240016617, "grad_norm": 8.628856736503485, "learning_rate": 4.980104500704811e-06, "loss": 0.9499, "step": 1910 }, { "epoch": 0.13806057760037568, "grad_norm": 7.526063518206455, "learning_rate": 4.980067655057676e-06, "loss": 1.1008, "step": 1911 }, { "epoch": 0.1381328228005852, "grad_norm": 7.954349925622949, "learning_rate": 4.9800307754603405e-06, "loss": 0.9098, "step": 1912 }, { "epoch": 0.13820506800079468, "grad_norm": 8.00498616281108, "learning_rate": 4.979993861913309e-06, "loss": 1.0663, "step": 1913 }, { "epoch": 0.1382773132010042, "grad_norm": 7.484966979377601, "learning_rate": 4.9799569144170874e-06, "loss": 1.0479, "step": 1914 }, { "epoch": 0.1383495584012137, "grad_norm": 8.174648303011555, "learning_rate": 4.9799199329721815e-06, "loss": 0.972, "step": 1915 }, { "epoch": 0.13842180360142323, "grad_norm": 6.30121260583668, "learning_rate": 4.979882917579097e-06, "loss": 0.9772, "step": 1916 }, { "epoch": 0.13849404880163274, "grad_norm": 7.099793670235221, "learning_rate": 4.979845868238341e-06, "loss": 0.9574, "step": 1917 }, { "epoch": 0.13856629400184226, "grad_norm": 8.459932924550326, "learning_rate": 4.979808784950421e-06, "loss": 1.0147, "step": 1918 }, { "epoch": 0.13863853920205177, "grad_norm": 7.232188741010269, "learning_rate": 4.979771667715844e-06, "loss": 0.8707, "step": 1919 }, { "epoch": 0.13871078440226128, "grad_norm": 7.763408014384066, "learning_rate": 4.9797345165351175e-06, "loss": 0.9813, "step": 1920 }, { "epoch": 0.1387830296024708, "grad_norm": 7.55725066798048, "learning_rate": 4.979697331408751e-06, "loss": 1.0155, "step": 1921 }, { "epoch": 0.13885527480268028, "grad_norm": 6.074841708409081, "learning_rate": 4.979660112337253e-06, "loss": 0.8682, "step": 1922 }, { "epoch": 0.1389275200028898, "grad_norm": 6.936101274131583, "learning_rate": 4.9796228593211345e-06, "loss": 0.9624, "step": 1923 }, { "epoch": 0.1389997652030993, "grad_norm": 9.665355549610714, "learning_rate": 4.979585572360904e-06, "loss": 1.0503, "step": 1924 }, { "epoch": 0.13907201040330883, "grad_norm": 8.26321324027156, "learning_rate": 4.979548251457073e-06, "loss": 1.0759, "step": 1925 }, { "epoch": 0.13914425560351834, "grad_norm": 8.637767243287975, "learning_rate": 4.97951089661015e-06, "loss": 1.0327, "step": 1926 }, { "epoch": 0.13921650080372786, "grad_norm": 9.96669334314491, "learning_rate": 4.979473507820649e-06, "loss": 0.9892, "step": 1927 }, { "epoch": 0.13928874600393737, "grad_norm": 9.241479609539544, "learning_rate": 4.979436085089081e-06, "loss": 1.0298, "step": 1928 }, { "epoch": 0.13936099120414688, "grad_norm": 9.429006632549378, "learning_rate": 4.979398628415958e-06, "loss": 0.9719, "step": 1929 }, { "epoch": 0.1394332364043564, "grad_norm": 6.8277956536168265, "learning_rate": 4.979361137801793e-06, "loss": 0.9816, "step": 1930 }, { "epoch": 0.13950548160456588, "grad_norm": 9.537003578475757, "learning_rate": 4.979323613247099e-06, "loss": 0.9865, "step": 1931 }, { "epoch": 0.1395777268047754, "grad_norm": 7.946498065969133, "learning_rate": 4.97928605475239e-06, "loss": 0.9273, "step": 1932 }, { "epoch": 0.1396499720049849, "grad_norm": 8.450308565002228, "learning_rate": 4.97924846231818e-06, "loss": 0.898, "step": 1933 }, { "epoch": 0.13972221720519443, "grad_norm": 6.726500968341535, "learning_rate": 4.979210835944983e-06, "loss": 0.9576, "step": 1934 }, { "epoch": 0.13979446240540394, "grad_norm": 6.0839323332445145, "learning_rate": 4.979173175633315e-06, "loss": 0.9343, "step": 1935 }, { "epoch": 0.13986670760561346, "grad_norm": 10.073858259835594, "learning_rate": 4.979135481383691e-06, "loss": 0.9344, "step": 1936 }, { "epoch": 0.13993895280582297, "grad_norm": 6.747834882704826, "learning_rate": 4.979097753196627e-06, "loss": 0.9727, "step": 1937 }, { "epoch": 0.14001119800603248, "grad_norm": 7.141442850968727, "learning_rate": 4.97905999107264e-06, "loss": 0.9768, "step": 1938 }, { "epoch": 0.140083443206242, "grad_norm": 7.085238151989682, "learning_rate": 4.979022195012247e-06, "loss": 0.9354, "step": 1939 }, { "epoch": 0.14015568840645148, "grad_norm": 8.100602360686336, "learning_rate": 4.978984365015964e-06, "loss": 1.0052, "step": 1940 }, { "epoch": 0.140227933606661, "grad_norm": 6.562669733668084, "learning_rate": 4.978946501084311e-06, "loss": 0.8593, "step": 1941 }, { "epoch": 0.1403001788068705, "grad_norm": 6.750206273070765, "learning_rate": 4.978908603217805e-06, "loss": 0.961, "step": 1942 }, { "epoch": 0.14037242400708003, "grad_norm": 7.581780941981723, "learning_rate": 4.978870671416964e-06, "loss": 0.9959, "step": 1943 }, { "epoch": 0.14044466920728954, "grad_norm": 7.127938718590148, "learning_rate": 4.978832705682308e-06, "loss": 0.9913, "step": 1944 }, { "epoch": 0.14051691440749906, "grad_norm": 7.276536455490754, "learning_rate": 4.978794706014359e-06, "loss": 1.0336, "step": 1945 }, { "epoch": 0.14058915960770857, "grad_norm": 8.443852498683954, "learning_rate": 4.978756672413633e-06, "loss": 1.0141, "step": 1946 }, { "epoch": 0.14066140480791808, "grad_norm": 6.334006959661401, "learning_rate": 4.978718604880654e-06, "loss": 0.8607, "step": 1947 }, { "epoch": 0.1407336500081276, "grad_norm": 6.509845026987249, "learning_rate": 4.978680503415941e-06, "loss": 0.9247, "step": 1948 }, { "epoch": 0.14080589520833708, "grad_norm": 5.794641439751316, "learning_rate": 4.9786423680200175e-06, "loss": 0.9746, "step": 1949 }, { "epoch": 0.1408781404085466, "grad_norm": 8.06221421792445, "learning_rate": 4.978604198693404e-06, "loss": 1.02, "step": 1950 }, { "epoch": 0.1409503856087561, "grad_norm": 7.917374937264042, "learning_rate": 4.978565995436624e-06, "loss": 0.9701, "step": 1951 }, { "epoch": 0.14102263080896563, "grad_norm": 10.047569808546601, "learning_rate": 4.978527758250199e-06, "loss": 1.0093, "step": 1952 }, { "epoch": 0.14109487600917514, "grad_norm": 13.258095049839774, "learning_rate": 4.978489487134655e-06, "loss": 1.0906, "step": 1953 }, { "epoch": 0.14116712120938466, "grad_norm": 6.351356493622605, "learning_rate": 4.978451182090512e-06, "loss": 0.9803, "step": 1954 }, { "epoch": 0.14123936640959417, "grad_norm": 6.370174601271428, "learning_rate": 4.978412843118299e-06, "loss": 0.9931, "step": 1955 }, { "epoch": 0.14131161160980368, "grad_norm": 6.8688773895752115, "learning_rate": 4.9783744702185375e-06, "loss": 0.952, "step": 1956 }, { "epoch": 0.1413838568100132, "grad_norm": 7.353129399302764, "learning_rate": 4.978336063391753e-06, "loss": 0.9759, "step": 1957 }, { "epoch": 0.14145610201022268, "grad_norm": 6.899449931899859, "learning_rate": 4.9782976226384734e-06, "loss": 0.9398, "step": 1958 }, { "epoch": 0.1415283472104322, "grad_norm": 8.533605676517118, "learning_rate": 4.978259147959224e-06, "loss": 0.9212, "step": 1959 }, { "epoch": 0.1416005924106417, "grad_norm": 6.89240461624555, "learning_rate": 4.97822063935453e-06, "loss": 0.9166, "step": 1960 }, { "epoch": 0.14167283761085123, "grad_norm": 6.90733059675736, "learning_rate": 4.97818209682492e-06, "loss": 0.9576, "step": 1961 }, { "epoch": 0.14174508281106074, "grad_norm": 6.894560297831868, "learning_rate": 4.978143520370922e-06, "loss": 0.9504, "step": 1962 }, { "epoch": 0.14181732801127026, "grad_norm": 6.447623875327663, "learning_rate": 4.978104909993062e-06, "loss": 0.9383, "step": 1963 }, { "epoch": 0.14188957321147977, "grad_norm": 8.429040355430537, "learning_rate": 4.978066265691871e-06, "loss": 0.9663, "step": 1964 }, { "epoch": 0.14196181841168928, "grad_norm": 8.596448883733594, "learning_rate": 4.978027587467876e-06, "loss": 0.9876, "step": 1965 }, { "epoch": 0.1420340636118988, "grad_norm": 7.776128715203353, "learning_rate": 4.977988875321607e-06, "loss": 1.0647, "step": 1966 }, { "epoch": 0.14210630881210828, "grad_norm": 7.9739939470614996, "learning_rate": 4.977950129253596e-06, "loss": 0.9269, "step": 1967 }, { "epoch": 0.1421785540123178, "grad_norm": 8.012556712028724, "learning_rate": 4.977911349264371e-06, "loss": 0.9091, "step": 1968 }, { "epoch": 0.1422507992125273, "grad_norm": 9.001657121475867, "learning_rate": 4.977872535354463e-06, "loss": 0.9502, "step": 1969 }, { "epoch": 0.14232304441273683, "grad_norm": 6.213089863887234, "learning_rate": 4.977833687524405e-06, "loss": 0.8442, "step": 1970 }, { "epoch": 0.14239528961294634, "grad_norm": 7.286243478271791, "learning_rate": 4.977794805774727e-06, "loss": 0.9752, "step": 1971 }, { "epoch": 0.14246753481315585, "grad_norm": 7.632026311705077, "learning_rate": 4.977755890105963e-06, "loss": 0.9571, "step": 1972 }, { "epoch": 0.14253978001336537, "grad_norm": 7.279337750589555, "learning_rate": 4.977716940518643e-06, "loss": 0.9802, "step": 1973 }, { "epoch": 0.14261202521357488, "grad_norm": 7.901812005172477, "learning_rate": 4.977677957013303e-06, "loss": 1.0341, "step": 1974 }, { "epoch": 0.1426842704137844, "grad_norm": 9.222811770713301, "learning_rate": 4.9776389395904755e-06, "loss": 0.8298, "step": 1975 }, { "epoch": 0.14275651561399388, "grad_norm": 9.082035870294524, "learning_rate": 4.977599888250695e-06, "loss": 0.9942, "step": 1976 }, { "epoch": 0.1428287608142034, "grad_norm": 10.227809051939213, "learning_rate": 4.977560802994496e-06, "loss": 1.0392, "step": 1977 }, { "epoch": 0.1429010060144129, "grad_norm": 6.746187334234351, "learning_rate": 4.977521683822412e-06, "loss": 0.9386, "step": 1978 }, { "epoch": 0.14297325121462243, "grad_norm": 8.296379807344577, "learning_rate": 4.977482530734981e-06, "loss": 0.9537, "step": 1979 }, { "epoch": 0.14304549641483194, "grad_norm": 9.001338011570011, "learning_rate": 4.977443343732736e-06, "loss": 0.9752, "step": 1980 }, { "epoch": 0.14311774161504145, "grad_norm": 7.112692392224357, "learning_rate": 4.977404122816217e-06, "loss": 0.9272, "step": 1981 }, { "epoch": 0.14318998681525097, "grad_norm": 7.142866254528228, "learning_rate": 4.977364867985959e-06, "loss": 0.9064, "step": 1982 }, { "epoch": 0.14326223201546048, "grad_norm": 8.660591001012367, "learning_rate": 4.977325579242499e-06, "loss": 0.9828, "step": 1983 }, { "epoch": 0.14333447721567, "grad_norm": 7.687207472283015, "learning_rate": 4.977286256586375e-06, "loss": 0.9757, "step": 1984 }, { "epoch": 0.14340672241587948, "grad_norm": 8.071933166317962, "learning_rate": 4.977246900018126e-06, "loss": 1.0186, "step": 1985 }, { "epoch": 0.143478967616089, "grad_norm": 7.491606529710173, "learning_rate": 4.977207509538291e-06, "loss": 1.0251, "step": 1986 }, { "epoch": 0.1435512128162985, "grad_norm": 9.830501687489523, "learning_rate": 4.977168085147408e-06, "loss": 1.0394, "step": 1987 }, { "epoch": 0.14362345801650803, "grad_norm": 9.644012301246901, "learning_rate": 4.977128626846017e-06, "loss": 1.0137, "step": 1988 }, { "epoch": 0.14369570321671754, "grad_norm": 8.35691315238608, "learning_rate": 4.97708913463466e-06, "loss": 1.0104, "step": 1989 }, { "epoch": 0.14376794841692705, "grad_norm": 8.168018255170399, "learning_rate": 4.977049608513874e-06, "loss": 0.9895, "step": 1990 }, { "epoch": 0.14384019361713657, "grad_norm": 7.4670094014025965, "learning_rate": 4.977010048484204e-06, "loss": 0.9591, "step": 1991 }, { "epoch": 0.14391243881734608, "grad_norm": 8.5831095348428, "learning_rate": 4.976970454546189e-06, "loss": 0.9746, "step": 1992 }, { "epoch": 0.14398468401755557, "grad_norm": 6.30352144673075, "learning_rate": 4.976930826700371e-06, "loss": 0.9543, "step": 1993 }, { "epoch": 0.14405692921776508, "grad_norm": 6.869699827963254, "learning_rate": 4.976891164947294e-06, "loss": 0.9689, "step": 1994 }, { "epoch": 0.1441291744179746, "grad_norm": 8.552889365817043, "learning_rate": 4.9768514692875e-06, "loss": 0.9649, "step": 1995 }, { "epoch": 0.1442014196181841, "grad_norm": 10.71305566902264, "learning_rate": 4.976811739721532e-06, "loss": 0.9741, "step": 1996 }, { "epoch": 0.14427366481839363, "grad_norm": 7.86785661566156, "learning_rate": 4.976771976249935e-06, "loss": 0.9867, "step": 1997 }, { "epoch": 0.14434591001860314, "grad_norm": 7.7834060317026825, "learning_rate": 4.976732178873253e-06, "loss": 1.1076, "step": 1998 }, { "epoch": 0.14441815521881265, "grad_norm": 6.907045344474365, "learning_rate": 4.97669234759203e-06, "loss": 0.9609, "step": 1999 }, { "epoch": 0.14449040041902217, "grad_norm": 7.623173479284311, "learning_rate": 4.976652482406812e-06, "loss": 1.0356, "step": 2000 }, { "epoch": 0.14456264561923168, "grad_norm": 7.683748151568887, "learning_rate": 4.976612583318144e-06, "loss": 1.0453, "step": 2001 }, { "epoch": 0.14463489081944117, "grad_norm": 6.875376188216335, "learning_rate": 4.976572650326573e-06, "loss": 0.8921, "step": 2002 }, { "epoch": 0.14470713601965068, "grad_norm": 8.382349689431834, "learning_rate": 4.9765326834326456e-06, "loss": 1.0062, "step": 2003 }, { "epoch": 0.1447793812198602, "grad_norm": 7.7386687103275795, "learning_rate": 4.976492682636909e-06, "loss": 1.0785, "step": 2004 }, { "epoch": 0.1448516264200697, "grad_norm": 8.445705012993521, "learning_rate": 4.97645264793991e-06, "loss": 0.8895, "step": 2005 }, { "epoch": 0.14492387162027923, "grad_norm": 6.852667863996332, "learning_rate": 4.9764125793421966e-06, "loss": 0.9158, "step": 2006 }, { "epoch": 0.14499611682048874, "grad_norm": 6.87644305256546, "learning_rate": 4.976372476844319e-06, "loss": 0.9157, "step": 2007 }, { "epoch": 0.14506836202069825, "grad_norm": 6.281506585341566, "learning_rate": 4.9763323404468235e-06, "loss": 0.925, "step": 2008 }, { "epoch": 0.14514060722090777, "grad_norm": 8.041032940760443, "learning_rate": 4.976292170150262e-06, "loss": 1.0327, "step": 2009 }, { "epoch": 0.14521285242111728, "grad_norm": 8.988007078336908, "learning_rate": 4.976251965955183e-06, "loss": 1.0029, "step": 2010 }, { "epoch": 0.14528509762132677, "grad_norm": 8.914656463011664, "learning_rate": 4.976211727862138e-06, "loss": 0.9504, "step": 2011 }, { "epoch": 0.14535734282153628, "grad_norm": 7.441999853292544, "learning_rate": 4.976171455871676e-06, "loss": 0.9529, "step": 2012 }, { "epoch": 0.1454295880217458, "grad_norm": 8.02244851499723, "learning_rate": 4.9761311499843504e-06, "loss": 1.0321, "step": 2013 }, { "epoch": 0.1455018332219553, "grad_norm": 9.230414611166877, "learning_rate": 4.976090810200711e-06, "loss": 0.9951, "step": 2014 }, { "epoch": 0.14557407842216483, "grad_norm": 9.186139271974984, "learning_rate": 4.976050436521311e-06, "loss": 0.9925, "step": 2015 }, { "epoch": 0.14564632362237434, "grad_norm": 10.493491562551515, "learning_rate": 4.976010028946704e-06, "loss": 0.9992, "step": 2016 }, { "epoch": 0.14571856882258385, "grad_norm": 8.371319773546855, "learning_rate": 4.975969587477441e-06, "loss": 0.9789, "step": 2017 }, { "epoch": 0.14579081402279337, "grad_norm": 8.007525242561144, "learning_rate": 4.975929112114078e-06, "loss": 0.9291, "step": 2018 }, { "epoch": 0.14586305922300288, "grad_norm": 8.028621020646327, "learning_rate": 4.975888602857168e-06, "loss": 1.066, "step": 2019 }, { "epoch": 0.14593530442321237, "grad_norm": 7.592880638813155, "learning_rate": 4.975848059707265e-06, "loss": 1.0512, "step": 2020 }, { "epoch": 0.14600754962342188, "grad_norm": 8.410142188939709, "learning_rate": 4.975807482664924e-06, "loss": 0.9222, "step": 2021 }, { "epoch": 0.1460797948236314, "grad_norm": 7.7821833652059045, "learning_rate": 4.975766871730701e-06, "loss": 0.9584, "step": 2022 }, { "epoch": 0.1461520400238409, "grad_norm": 8.393222854206309, "learning_rate": 4.975726226905152e-06, "loss": 0.9873, "step": 2023 }, { "epoch": 0.14622428522405043, "grad_norm": 6.294213728881769, "learning_rate": 4.9756855481888334e-06, "loss": 0.9207, "step": 2024 }, { "epoch": 0.14629653042425994, "grad_norm": 6.872639476239193, "learning_rate": 4.975644835582302e-06, "loss": 0.871, "step": 2025 }, { "epoch": 0.14636877562446945, "grad_norm": 7.514381099553454, "learning_rate": 4.975604089086115e-06, "loss": 1.0528, "step": 2026 }, { "epoch": 0.14644102082467897, "grad_norm": 6.415594663701566, "learning_rate": 4.975563308700829e-06, "loss": 1.0123, "step": 2027 }, { "epoch": 0.14651326602488848, "grad_norm": 8.107233429889554, "learning_rate": 4.975522494427005e-06, "loss": 0.9507, "step": 2028 }, { "epoch": 0.14658551122509797, "grad_norm": 7.264042638930823, "learning_rate": 4.9754816462652e-06, "loss": 0.9847, "step": 2029 }, { "epoch": 0.14665775642530748, "grad_norm": 6.637913623860439, "learning_rate": 4.975440764215972e-06, "loss": 0.9024, "step": 2030 }, { "epoch": 0.146730001625517, "grad_norm": 7.36991474242049, "learning_rate": 4.9753998482798835e-06, "loss": 0.9248, "step": 2031 }, { "epoch": 0.1468022468257265, "grad_norm": 8.80998637827276, "learning_rate": 4.975358898457492e-06, "loss": 0.972, "step": 2032 }, { "epoch": 0.14687449202593603, "grad_norm": 8.070150368198354, "learning_rate": 4.97531791474936e-06, "loss": 0.9685, "step": 2033 }, { "epoch": 0.14694673722614554, "grad_norm": 8.678428689095668, "learning_rate": 4.975276897156047e-06, "loss": 1.0179, "step": 2034 }, { "epoch": 0.14701898242635505, "grad_norm": 7.693402017367224, "learning_rate": 4.975235845678116e-06, "loss": 0.9897, "step": 2035 }, { "epoch": 0.14709122762656457, "grad_norm": 9.210095911510301, "learning_rate": 4.975194760316128e-06, "loss": 0.9616, "step": 2036 }, { "epoch": 0.14716347282677408, "grad_norm": 8.013166559973097, "learning_rate": 4.975153641070644e-06, "loss": 0.9543, "step": 2037 }, { "epoch": 0.14723571802698357, "grad_norm": 8.110364673472997, "learning_rate": 4.975112487942231e-06, "loss": 0.9713, "step": 2038 }, { "epoch": 0.14730796322719308, "grad_norm": 7.829119529670197, "learning_rate": 4.975071300931449e-06, "loss": 0.9907, "step": 2039 }, { "epoch": 0.1473802084274026, "grad_norm": 12.434680408256126, "learning_rate": 4.9750300800388615e-06, "loss": 1.0158, "step": 2040 }, { "epoch": 0.1474524536276121, "grad_norm": 8.174305774947726, "learning_rate": 4.974988825265035e-06, "loss": 1.1059, "step": 2041 }, { "epoch": 0.14752469882782163, "grad_norm": 7.367935935521642, "learning_rate": 4.974947536610533e-06, "loss": 0.8223, "step": 2042 }, { "epoch": 0.14759694402803114, "grad_norm": 6.95705427950989, "learning_rate": 4.974906214075921e-06, "loss": 0.9371, "step": 2043 }, { "epoch": 0.14766918922824065, "grad_norm": 8.27139605730595, "learning_rate": 4.974864857661764e-06, "loss": 0.9142, "step": 2044 }, { "epoch": 0.14774143442845017, "grad_norm": 7.143179205037029, "learning_rate": 4.9748234673686295e-06, "loss": 0.963, "step": 2045 }, { "epoch": 0.14781367962865968, "grad_norm": 9.18732561218426, "learning_rate": 4.974782043197083e-06, "loss": 1.0088, "step": 2046 }, { "epoch": 0.14788592482886917, "grad_norm": 7.2984617415401045, "learning_rate": 4.974740585147692e-06, "loss": 0.8995, "step": 2047 }, { "epoch": 0.14795817002907868, "grad_norm": 6.516220102188539, "learning_rate": 4.974699093221024e-06, "loss": 0.916, "step": 2048 }, { "epoch": 0.1480304152292882, "grad_norm": 8.505722363635325, "learning_rate": 4.9746575674176464e-06, "loss": 0.9726, "step": 2049 }, { "epoch": 0.1481026604294977, "grad_norm": 8.349263744673838, "learning_rate": 4.974616007738128e-06, "loss": 0.9698, "step": 2050 }, { "epoch": 0.14817490562970723, "grad_norm": 8.633764706393793, "learning_rate": 4.974574414183039e-06, "loss": 0.9617, "step": 2051 }, { "epoch": 0.14824715082991674, "grad_norm": 9.417675088095862, "learning_rate": 4.974532786752947e-06, "loss": 1.0048, "step": 2052 }, { "epoch": 0.14831939603012625, "grad_norm": 7.594087318495076, "learning_rate": 4.974491125448422e-06, "loss": 1.0039, "step": 2053 }, { "epoch": 0.14839164123033577, "grad_norm": 8.265322241619652, "learning_rate": 4.974449430270035e-06, "loss": 0.8842, "step": 2054 }, { "epoch": 0.14846388643054528, "grad_norm": 7.978141008264639, "learning_rate": 4.974407701218357e-06, "loss": 0.9564, "step": 2055 }, { "epoch": 0.14853613163075477, "grad_norm": 8.002973957416451, "learning_rate": 4.974365938293959e-06, "loss": 0.9387, "step": 2056 }, { "epoch": 0.14860837683096428, "grad_norm": 7.460242625527556, "learning_rate": 4.974324141497412e-06, "loss": 1.0456, "step": 2057 }, { "epoch": 0.1486806220311738, "grad_norm": 6.542006692330726, "learning_rate": 4.974282310829288e-06, "loss": 1.0484, "step": 2058 }, { "epoch": 0.1487528672313833, "grad_norm": 8.135499244756021, "learning_rate": 4.9742404462901614e-06, "loss": 0.9664, "step": 2059 }, { "epoch": 0.14882511243159283, "grad_norm": 8.383268914502928, "learning_rate": 4.974198547880604e-06, "loss": 0.901, "step": 2060 }, { "epoch": 0.14889735763180234, "grad_norm": 9.288708910296137, "learning_rate": 4.97415661560119e-06, "loss": 1.0158, "step": 2061 }, { "epoch": 0.14896960283201185, "grad_norm": 8.029628248528715, "learning_rate": 4.974114649452492e-06, "loss": 1.028, "step": 2062 }, { "epoch": 0.14904184803222137, "grad_norm": 8.16060548443304, "learning_rate": 4.974072649435087e-06, "loss": 0.9343, "step": 2063 }, { "epoch": 0.14911409323243088, "grad_norm": 8.224412019346115, "learning_rate": 4.9740306155495464e-06, "loss": 0.9674, "step": 2064 }, { "epoch": 0.14918633843264037, "grad_norm": 10.153898090060775, "learning_rate": 4.973988547796449e-06, "loss": 1.0692, "step": 2065 }, { "epoch": 0.14925858363284988, "grad_norm": 6.980648813342826, "learning_rate": 4.973946446176368e-06, "loss": 0.9394, "step": 2066 }, { "epoch": 0.1493308288330594, "grad_norm": 8.81336094323401, "learning_rate": 4.9739043106898835e-06, "loss": 0.9696, "step": 2067 }, { "epoch": 0.1494030740332689, "grad_norm": 8.094024771367563, "learning_rate": 4.973862141337568e-06, "loss": 0.8782, "step": 2068 }, { "epoch": 0.14947531923347843, "grad_norm": 7.567386782265219, "learning_rate": 4.973819938120001e-06, "loss": 1.0072, "step": 2069 }, { "epoch": 0.14954756443368794, "grad_norm": 7.7860421127956, "learning_rate": 4.97377770103776e-06, "loss": 0.9285, "step": 2070 }, { "epoch": 0.14961980963389745, "grad_norm": 7.021156492257492, "learning_rate": 4.973735430091422e-06, "loss": 0.9885, "step": 2071 }, { "epoch": 0.14969205483410697, "grad_norm": 8.31638377389518, "learning_rate": 4.973693125281568e-06, "loss": 1.1409, "step": 2072 }, { "epoch": 0.14976430003431648, "grad_norm": 6.261790636284181, "learning_rate": 4.973650786608776e-06, "loss": 1.0005, "step": 2073 }, { "epoch": 0.14983654523452597, "grad_norm": 8.342286399459411, "learning_rate": 4.9736084140736245e-06, "loss": 1.1011, "step": 2074 }, { "epoch": 0.14990879043473548, "grad_norm": 8.470363336581904, "learning_rate": 4.973566007676695e-06, "loss": 1.0795, "step": 2075 }, { "epoch": 0.149981035634945, "grad_norm": 6.297734582884932, "learning_rate": 4.973523567418567e-06, "loss": 0.9205, "step": 2076 }, { "epoch": 0.1500532808351545, "grad_norm": 7.910789664068364, "learning_rate": 4.9734810932998215e-06, "loss": 0.9813, "step": 2077 }, { "epoch": 0.15012552603536403, "grad_norm": 7.404217533701786, "learning_rate": 4.973438585321041e-06, "loss": 0.957, "step": 2078 }, { "epoch": 0.15019777123557354, "grad_norm": 6.41520846092845, "learning_rate": 4.973396043482807e-06, "loss": 0.9213, "step": 2079 }, { "epoch": 0.15027001643578305, "grad_norm": 6.742102913088306, "learning_rate": 4.973353467785701e-06, "loss": 0.9102, "step": 2080 }, { "epoch": 0.15034226163599257, "grad_norm": 6.305727810217686, "learning_rate": 4.973310858230307e-06, "loss": 1.0018, "step": 2081 }, { "epoch": 0.15041450683620208, "grad_norm": 8.122286299374327, "learning_rate": 4.973268214817208e-06, "loss": 0.9655, "step": 2082 }, { "epoch": 0.15048675203641157, "grad_norm": 8.196560338300117, "learning_rate": 4.973225537546987e-06, "loss": 0.8959, "step": 2083 }, { "epoch": 0.15055899723662108, "grad_norm": 7.378161512427523, "learning_rate": 4.9731828264202286e-06, "loss": 0.972, "step": 2084 }, { "epoch": 0.1506312424368306, "grad_norm": 7.278688430434015, "learning_rate": 4.973140081437518e-06, "loss": 0.9137, "step": 2085 }, { "epoch": 0.1507034876370401, "grad_norm": 7.283653026463583, "learning_rate": 4.97309730259944e-06, "loss": 0.9094, "step": 2086 }, { "epoch": 0.15077573283724963, "grad_norm": 7.314627313513362, "learning_rate": 4.97305448990658e-06, "loss": 0.9486, "step": 2087 }, { "epoch": 0.15084797803745914, "grad_norm": 9.95697684738963, "learning_rate": 4.973011643359524e-06, "loss": 0.9675, "step": 2088 }, { "epoch": 0.15092022323766865, "grad_norm": 7.724409615384855, "learning_rate": 4.972968762958859e-06, "loss": 0.9852, "step": 2089 }, { "epoch": 0.15099246843787817, "grad_norm": 8.068027228169981, "learning_rate": 4.972925848705171e-06, "loss": 0.9104, "step": 2090 }, { "epoch": 0.15106471363808768, "grad_norm": 8.402848133251505, "learning_rate": 4.97288290059905e-06, "loss": 0.9969, "step": 2091 }, { "epoch": 0.15113695883829717, "grad_norm": 8.171592576232223, "learning_rate": 4.9728399186410805e-06, "loss": 0.9265, "step": 2092 }, { "epoch": 0.15120920403850668, "grad_norm": 6.436271772527476, "learning_rate": 4.972796902831853e-06, "loss": 0.9895, "step": 2093 }, { "epoch": 0.1512814492387162, "grad_norm": 6.582082541064992, "learning_rate": 4.9727538531719564e-06, "loss": 0.9101, "step": 2094 }, { "epoch": 0.1513536944389257, "grad_norm": 8.137036607149666, "learning_rate": 4.972710769661979e-06, "loss": 1.0104, "step": 2095 }, { "epoch": 0.15142593963913523, "grad_norm": 7.097626420978184, "learning_rate": 4.972667652302512e-06, "loss": 0.9291, "step": 2096 }, { "epoch": 0.15149818483934474, "grad_norm": 6.504598018297416, "learning_rate": 4.972624501094143e-06, "loss": 0.9062, "step": 2097 }, { "epoch": 0.15157043003955425, "grad_norm": 6.15099005444671, "learning_rate": 4.972581316037466e-06, "loss": 0.8526, "step": 2098 }, { "epoch": 0.15164267523976377, "grad_norm": 6.086498151426192, "learning_rate": 4.97253809713307e-06, "loss": 0.9256, "step": 2099 }, { "epoch": 0.15171492043997328, "grad_norm": 8.501724068316324, "learning_rate": 4.9724948443815474e-06, "loss": 0.9512, "step": 2100 }, { "epoch": 0.15178716564018277, "grad_norm": 8.063826577516757, "learning_rate": 4.97245155778349e-06, "loss": 0.926, "step": 2101 }, { "epoch": 0.15185941084039228, "grad_norm": 7.329732299745844, "learning_rate": 4.972408237339491e-06, "loss": 0.9068, "step": 2102 }, { "epoch": 0.1519316560406018, "grad_norm": 8.068300511540906, "learning_rate": 4.9723648830501425e-06, "loss": 1.0268, "step": 2103 }, { "epoch": 0.1520039012408113, "grad_norm": 6.421945687529147, "learning_rate": 4.972321494916038e-06, "loss": 0.8591, "step": 2104 }, { "epoch": 0.15207614644102083, "grad_norm": 8.027117070248362, "learning_rate": 4.972278072937773e-06, "loss": 1.0937, "step": 2105 }, { "epoch": 0.15214839164123034, "grad_norm": 7.986875019486927, "learning_rate": 4.9722346171159394e-06, "loss": 1.0572, "step": 2106 }, { "epoch": 0.15222063684143985, "grad_norm": 7.911139502932765, "learning_rate": 4.972191127451135e-06, "loss": 0.9851, "step": 2107 }, { "epoch": 0.15229288204164937, "grad_norm": 9.332510321524815, "learning_rate": 4.972147603943952e-06, "loss": 1.0022, "step": 2108 }, { "epoch": 0.15236512724185886, "grad_norm": 6.367906700658468, "learning_rate": 4.972104046594989e-06, "loss": 0.9249, "step": 2109 }, { "epoch": 0.15243737244206837, "grad_norm": 6.543657846175371, "learning_rate": 4.97206045540484e-06, "loss": 0.9562, "step": 2110 }, { "epoch": 0.15250961764227788, "grad_norm": 8.048988079656057, "learning_rate": 4.972016830374103e-06, "loss": 0.959, "step": 2111 }, { "epoch": 0.1525818628424874, "grad_norm": 7.139517466591262, "learning_rate": 4.971973171503376e-06, "loss": 0.9301, "step": 2112 }, { "epoch": 0.1526541080426969, "grad_norm": 7.992723732239974, "learning_rate": 4.971929478793255e-06, "loss": 1.0069, "step": 2113 }, { "epoch": 0.15272635324290643, "grad_norm": 7.442218726150898, "learning_rate": 4.971885752244339e-06, "loss": 1.0689, "step": 2114 }, { "epoch": 0.15279859844311594, "grad_norm": 7.258607161824333, "learning_rate": 4.971841991857226e-06, "loss": 0.9992, "step": 2115 }, { "epoch": 0.15287084364332545, "grad_norm": 7.247498968781592, "learning_rate": 4.971798197632516e-06, "loss": 0.9095, "step": 2116 }, { "epoch": 0.15294308884353497, "grad_norm": 7.328614826546618, "learning_rate": 4.971754369570807e-06, "loss": 0.8476, "step": 2117 }, { "epoch": 0.15301533404374446, "grad_norm": 7.126498951426815, "learning_rate": 4.9717105076727e-06, "loss": 1.0288, "step": 2118 }, { "epoch": 0.15308757924395397, "grad_norm": 8.45223729237307, "learning_rate": 4.9716666119387966e-06, "loss": 0.9852, "step": 2119 }, { "epoch": 0.15315982444416348, "grad_norm": 9.7440151528427, "learning_rate": 4.971622682369695e-06, "loss": 0.9996, "step": 2120 }, { "epoch": 0.153232069644373, "grad_norm": 7.479448516833764, "learning_rate": 4.971578718965999e-06, "loss": 0.9965, "step": 2121 }, { "epoch": 0.1533043148445825, "grad_norm": 6.867740752031149, "learning_rate": 4.971534721728308e-06, "loss": 0.9139, "step": 2122 }, { "epoch": 0.15337656004479203, "grad_norm": 7.767527786563693, "learning_rate": 4.971490690657227e-06, "loss": 1.0193, "step": 2123 }, { "epoch": 0.15344880524500154, "grad_norm": 10.342617007435788, "learning_rate": 4.971446625753357e-06, "loss": 1.0003, "step": 2124 }, { "epoch": 0.15352105044521105, "grad_norm": 7.36980060985728, "learning_rate": 4.971402527017301e-06, "loss": 0.9265, "step": 2125 }, { "epoch": 0.15359329564542057, "grad_norm": 7.134124802268036, "learning_rate": 4.971358394449664e-06, "loss": 0.8922, "step": 2126 }, { "epoch": 0.15366554084563006, "grad_norm": 6.446490185967356, "learning_rate": 4.9713142280510495e-06, "loss": 0.9051, "step": 2127 }, { "epoch": 0.15373778604583957, "grad_norm": 9.853252172208961, "learning_rate": 4.971270027822062e-06, "loss": 0.9707, "step": 2128 }, { "epoch": 0.15381003124604908, "grad_norm": 8.468645045907534, "learning_rate": 4.971225793763307e-06, "loss": 1.0475, "step": 2129 }, { "epoch": 0.1538822764462586, "grad_norm": 6.134494507102114, "learning_rate": 4.97118152587539e-06, "loss": 0.9562, "step": 2130 }, { "epoch": 0.1539545216464681, "grad_norm": 8.435647492670936, "learning_rate": 4.971137224158915e-06, "loss": 1.0174, "step": 2131 }, { "epoch": 0.15402676684667763, "grad_norm": 9.573401924815572, "learning_rate": 4.971092888614491e-06, "loss": 0.8861, "step": 2132 }, { "epoch": 0.15409901204688714, "grad_norm": 7.426331412911393, "learning_rate": 4.971048519242724e-06, "loss": 1.0222, "step": 2133 }, { "epoch": 0.15417125724709665, "grad_norm": 7.526053887767615, "learning_rate": 4.9710041160442215e-06, "loss": 0.9749, "step": 2134 }, { "epoch": 0.15424350244730617, "grad_norm": 7.526937809338173, "learning_rate": 4.970959679019591e-06, "loss": 0.9366, "step": 2135 }, { "epoch": 0.15431574764751566, "grad_norm": 7.1043023089961075, "learning_rate": 4.9709152081694416e-06, "loss": 0.955, "step": 2136 }, { "epoch": 0.15438799284772517, "grad_norm": 6.927091471349386, "learning_rate": 4.970870703494381e-06, "loss": 0.9887, "step": 2137 }, { "epoch": 0.15446023804793468, "grad_norm": 7.023102104147654, "learning_rate": 4.970826164995019e-06, "loss": 1.0018, "step": 2138 }, { "epoch": 0.1545324832481442, "grad_norm": 8.425184968904475, "learning_rate": 4.970781592671966e-06, "loss": 0.9707, "step": 2139 }, { "epoch": 0.1546047284483537, "grad_norm": 7.091815432621685, "learning_rate": 4.970736986525831e-06, "loss": 0.9616, "step": 2140 }, { "epoch": 0.15467697364856323, "grad_norm": 7.264027672198182, "learning_rate": 4.9706923465572245e-06, "loss": 0.9433, "step": 2141 }, { "epoch": 0.15474921884877274, "grad_norm": 8.892065270119557, "learning_rate": 4.970647672766759e-06, "loss": 0.9391, "step": 2142 }, { "epoch": 0.15482146404898225, "grad_norm": 7.1869734861501335, "learning_rate": 4.970602965155045e-06, "loss": 0.9582, "step": 2143 }, { "epoch": 0.15489370924919177, "grad_norm": 7.858688658215047, "learning_rate": 4.9705582237226945e-06, "loss": 0.9691, "step": 2144 }, { "epoch": 0.15496595444940126, "grad_norm": 6.981699591138837, "learning_rate": 4.97051344847032e-06, "loss": 1.0345, "step": 2145 }, { "epoch": 0.15503819964961077, "grad_norm": 6.254855291825678, "learning_rate": 4.9704686393985345e-06, "loss": 0.962, "step": 2146 }, { "epoch": 0.15511044484982028, "grad_norm": 9.001888500866428, "learning_rate": 4.970423796507952e-06, "loss": 1.0684, "step": 2147 }, { "epoch": 0.1551826900500298, "grad_norm": 5.888803053576218, "learning_rate": 4.970378919799186e-06, "loss": 0.9136, "step": 2148 }, { "epoch": 0.1552549352502393, "grad_norm": 8.220023517064519, "learning_rate": 4.97033400927285e-06, "loss": 0.9324, "step": 2149 }, { "epoch": 0.15532718045044883, "grad_norm": 6.538117371213645, "learning_rate": 4.97028906492956e-06, "loss": 0.923, "step": 2150 }, { "epoch": 0.15539942565065834, "grad_norm": 6.267629554848979, "learning_rate": 4.970244086769931e-06, "loss": 0.9571, "step": 2151 }, { "epoch": 0.15547167085086785, "grad_norm": 8.14140096469697, "learning_rate": 4.970199074794578e-06, "loss": 1.0054, "step": 2152 }, { "epoch": 0.15554391605107737, "grad_norm": 10.331564187812731, "learning_rate": 4.9701540290041186e-06, "loss": 0.9419, "step": 2153 }, { "epoch": 0.15561616125128686, "grad_norm": 8.424106847130739, "learning_rate": 4.970108949399167e-06, "loss": 0.9581, "step": 2154 }, { "epoch": 0.15568840645149637, "grad_norm": 6.1022233006852336, "learning_rate": 4.9700638359803435e-06, "loss": 0.924, "step": 2155 }, { "epoch": 0.15576065165170588, "grad_norm": 8.548642716675161, "learning_rate": 4.970018688748263e-06, "loss": 0.9022, "step": 2156 }, { "epoch": 0.1558328968519154, "grad_norm": 8.547037015003495, "learning_rate": 4.9699735077035434e-06, "loss": 0.8812, "step": 2157 }, { "epoch": 0.1559051420521249, "grad_norm": 6.476885264122669, "learning_rate": 4.969928292846806e-06, "loss": 0.9184, "step": 2158 }, { "epoch": 0.15597738725233443, "grad_norm": 7.393678965674729, "learning_rate": 4.969883044178668e-06, "loss": 0.9714, "step": 2159 }, { "epoch": 0.15604963245254394, "grad_norm": 7.433237858089275, "learning_rate": 4.969837761699747e-06, "loss": 0.9857, "step": 2160 }, { "epoch": 0.15612187765275345, "grad_norm": 7.3297872061241005, "learning_rate": 4.969792445410667e-06, "loss": 1.0262, "step": 2161 }, { "epoch": 0.15619412285296297, "grad_norm": 6.920157811564914, "learning_rate": 4.969747095312045e-06, "loss": 0.9538, "step": 2162 }, { "epoch": 0.15626636805317246, "grad_norm": 6.853750229758454, "learning_rate": 4.969701711404503e-06, "loss": 1.0342, "step": 2163 }, { "epoch": 0.15633861325338197, "grad_norm": 6.453943676315341, "learning_rate": 4.969656293688661e-06, "loss": 0.9154, "step": 2164 }, { "epoch": 0.15641085845359148, "grad_norm": 7.070430657679813, "learning_rate": 4.969610842165144e-06, "loss": 0.95, "step": 2165 }, { "epoch": 0.156483103653801, "grad_norm": 7.23665732845979, "learning_rate": 4.969565356834571e-06, "loss": 0.9691, "step": 2166 }, { "epoch": 0.1565553488540105, "grad_norm": 9.006487309693384, "learning_rate": 4.969519837697566e-06, "loss": 1.0342, "step": 2167 }, { "epoch": 0.15662759405422003, "grad_norm": 6.481107398646698, "learning_rate": 4.969474284754752e-06, "loss": 0.9055, "step": 2168 }, { "epoch": 0.15669983925442954, "grad_norm": 7.111441041657088, "learning_rate": 4.9694286980067525e-06, "loss": 0.9624, "step": 2169 }, { "epoch": 0.15677208445463905, "grad_norm": 6.218311083462526, "learning_rate": 4.9693830774541915e-06, "loss": 0.8953, "step": 2170 }, { "epoch": 0.15684432965484857, "grad_norm": 8.078065499357756, "learning_rate": 4.969337423097693e-06, "loss": 0.9723, "step": 2171 }, { "epoch": 0.15691657485505806, "grad_norm": 7.381245441742215, "learning_rate": 4.969291734937883e-06, "loss": 0.9243, "step": 2172 }, { "epoch": 0.15698882005526757, "grad_norm": 6.575147956372796, "learning_rate": 4.969246012975386e-06, "loss": 0.9082, "step": 2173 }, { "epoch": 0.15706106525547708, "grad_norm": 6.667511346075748, "learning_rate": 4.969200257210829e-06, "loss": 0.9697, "step": 2174 }, { "epoch": 0.1571333104556866, "grad_norm": 6.909054010009794, "learning_rate": 4.969154467644838e-06, "loss": 1.0061, "step": 2175 }, { "epoch": 0.1572055556558961, "grad_norm": 11.41077139089867, "learning_rate": 4.969108644278038e-06, "loss": 1.0069, "step": 2176 }, { "epoch": 0.15727780085610563, "grad_norm": 9.133131192979963, "learning_rate": 4.969062787111059e-06, "loss": 0.9582, "step": 2177 }, { "epoch": 0.15735004605631514, "grad_norm": 7.36346542168546, "learning_rate": 4.969016896144526e-06, "loss": 0.9695, "step": 2178 }, { "epoch": 0.15742229125652465, "grad_norm": 7.493635656201894, "learning_rate": 4.968970971379071e-06, "loss": 0.966, "step": 2179 }, { "epoch": 0.15749453645673417, "grad_norm": 7.491132707700803, "learning_rate": 4.968925012815319e-06, "loss": 0.9827, "step": 2180 }, { "epoch": 0.15756678165694366, "grad_norm": 10.303868606108733, "learning_rate": 4.968879020453901e-06, "loss": 0.8345, "step": 2181 }, { "epoch": 0.15763902685715317, "grad_norm": 9.3128907806536, "learning_rate": 4.968832994295446e-06, "loss": 0.929, "step": 2182 }, { "epoch": 0.15771127205736268, "grad_norm": 6.518036393837985, "learning_rate": 4.968786934340584e-06, "loss": 0.9485, "step": 2183 }, { "epoch": 0.1577835172575722, "grad_norm": 8.71339432492419, "learning_rate": 4.968740840589946e-06, "loss": 0.9205, "step": 2184 }, { "epoch": 0.1578557624577817, "grad_norm": 7.417847385789707, "learning_rate": 4.968694713044163e-06, "loss": 0.9645, "step": 2185 }, { "epoch": 0.15792800765799123, "grad_norm": 7.963317215930995, "learning_rate": 4.968648551703866e-06, "loss": 1.0453, "step": 2186 }, { "epoch": 0.15800025285820074, "grad_norm": 8.22419077130227, "learning_rate": 4.968602356569687e-06, "loss": 0.8779, "step": 2187 }, { "epoch": 0.15807249805841025, "grad_norm": 7.252552503930297, "learning_rate": 4.968556127642259e-06, "loss": 1.0164, "step": 2188 }, { "epoch": 0.15814474325861977, "grad_norm": 7.560931334677301, "learning_rate": 4.968509864922214e-06, "loss": 0.9608, "step": 2189 }, { "epoch": 0.15821698845882926, "grad_norm": 7.348167097472866, "learning_rate": 4.968463568410186e-06, "loss": 1.0464, "step": 2190 }, { "epoch": 0.15828923365903877, "grad_norm": 7.549573206680451, "learning_rate": 4.968417238106807e-06, "loss": 0.9951, "step": 2191 }, { "epoch": 0.15836147885924828, "grad_norm": 7.674447741194426, "learning_rate": 4.968370874012714e-06, "loss": 0.9513, "step": 2192 }, { "epoch": 0.1584337240594578, "grad_norm": 8.869903390523854, "learning_rate": 4.9683244761285396e-06, "loss": 0.9688, "step": 2193 }, { "epoch": 0.1585059692596673, "grad_norm": 7.439147253901779, "learning_rate": 4.968278044454921e-06, "loss": 0.9641, "step": 2194 }, { "epoch": 0.15857821445987683, "grad_norm": 9.252029196370291, "learning_rate": 4.968231578992491e-06, "loss": 1.1167, "step": 2195 }, { "epoch": 0.15865045966008634, "grad_norm": 9.255994349419428, "learning_rate": 4.968185079741887e-06, "loss": 0.8801, "step": 2196 }, { "epoch": 0.15872270486029585, "grad_norm": 7.4794956938928, "learning_rate": 4.968138546703746e-06, "loss": 1.0019, "step": 2197 }, { "epoch": 0.15879495006050537, "grad_norm": 8.210898939163735, "learning_rate": 4.968091979878705e-06, "loss": 0.98, "step": 2198 }, { "epoch": 0.15886719526071486, "grad_norm": 7.635838794011747, "learning_rate": 4.968045379267401e-06, "loss": 0.9799, "step": 2199 }, { "epoch": 0.15893944046092437, "grad_norm": 6.803057925481097, "learning_rate": 4.967998744870472e-06, "loss": 0.9147, "step": 2200 }, { "epoch": 0.15901168566113388, "grad_norm": 8.426114913566103, "learning_rate": 4.967952076688557e-06, "loss": 1.0388, "step": 2201 }, { "epoch": 0.1590839308613434, "grad_norm": 7.635201056030916, "learning_rate": 4.9679053747222935e-06, "loss": 0.9328, "step": 2202 }, { "epoch": 0.1591561760615529, "grad_norm": 8.09474818139003, "learning_rate": 4.967858638972322e-06, "loss": 1.0085, "step": 2203 }, { "epoch": 0.15922842126176243, "grad_norm": 6.924223146347893, "learning_rate": 4.967811869439282e-06, "loss": 0.9207, "step": 2204 }, { "epoch": 0.15930066646197194, "grad_norm": 6.670199538967964, "learning_rate": 4.9677650661238136e-06, "loss": 0.9592, "step": 2205 }, { "epoch": 0.15937291166218145, "grad_norm": 7.700775840612638, "learning_rate": 4.967718229026558e-06, "loss": 0.9396, "step": 2206 }, { "epoch": 0.15944515686239097, "grad_norm": 6.755253760528958, "learning_rate": 4.967671358148155e-06, "loss": 1.0001, "step": 2207 }, { "epoch": 0.15951740206260046, "grad_norm": 7.765407628292601, "learning_rate": 4.967624453489247e-06, "loss": 0.9726, "step": 2208 }, { "epoch": 0.15958964726280997, "grad_norm": 7.919375414640211, "learning_rate": 4.967577515050477e-06, "loss": 0.9096, "step": 2209 }, { "epoch": 0.15966189246301948, "grad_norm": 7.938494282122092, "learning_rate": 4.967530542832486e-06, "loss": 1.0089, "step": 2210 }, { "epoch": 0.159734137663229, "grad_norm": 7.653967941547974, "learning_rate": 4.967483536835919e-06, "loss": 0.9839, "step": 2211 }, { "epoch": 0.1598063828634385, "grad_norm": 7.0199985932351465, "learning_rate": 4.967436497061417e-06, "loss": 1.0273, "step": 2212 }, { "epoch": 0.15987862806364803, "grad_norm": 8.236165324378105, "learning_rate": 4.967389423509626e-06, "loss": 0.9903, "step": 2213 }, { "epoch": 0.15995087326385754, "grad_norm": 7.561985723490583, "learning_rate": 4.967342316181189e-06, "loss": 0.9332, "step": 2214 }, { "epoch": 0.16002311846406705, "grad_norm": 9.231979549973712, "learning_rate": 4.967295175076752e-06, "loss": 0.9594, "step": 2215 }, { "epoch": 0.16009536366427654, "grad_norm": 7.092099707800283, "learning_rate": 4.96724800019696e-06, "loss": 0.8508, "step": 2216 }, { "epoch": 0.16016760886448606, "grad_norm": 6.600999074514273, "learning_rate": 4.967200791542458e-06, "loss": 0.967, "step": 2217 }, { "epoch": 0.16023985406469557, "grad_norm": 8.11132083546205, "learning_rate": 4.967153549113893e-06, "loss": 0.9161, "step": 2218 }, { "epoch": 0.16031209926490508, "grad_norm": 9.423308154372625, "learning_rate": 4.967106272911912e-06, "loss": 0.919, "step": 2219 }, { "epoch": 0.1603843444651146, "grad_norm": 6.612205963231584, "learning_rate": 4.967058962937161e-06, "loss": 0.9395, "step": 2220 }, { "epoch": 0.1604565896653241, "grad_norm": 7.353361552170773, "learning_rate": 4.9670116191902884e-06, "loss": 0.929, "step": 2221 }, { "epoch": 0.16052883486553363, "grad_norm": 7.465003953589014, "learning_rate": 4.966964241671942e-06, "loss": 0.9567, "step": 2222 }, { "epoch": 0.16060108006574314, "grad_norm": 6.596220448601075, "learning_rate": 4.966916830382771e-06, "loss": 0.9251, "step": 2223 }, { "epoch": 0.16067332526595265, "grad_norm": 7.402017792542993, "learning_rate": 4.966869385323424e-06, "loss": 0.9682, "step": 2224 }, { "epoch": 0.16074557046616214, "grad_norm": 6.759498412354358, "learning_rate": 4.966821906494551e-06, "loss": 0.9441, "step": 2225 }, { "epoch": 0.16081781566637166, "grad_norm": 6.43802973965723, "learning_rate": 4.966774393896801e-06, "loss": 0.8931, "step": 2226 }, { "epoch": 0.16089006086658117, "grad_norm": 7.307968430958813, "learning_rate": 4.966726847530825e-06, "loss": 0.9392, "step": 2227 }, { "epoch": 0.16096230606679068, "grad_norm": 7.941841440055598, "learning_rate": 4.966679267397273e-06, "loss": 0.9424, "step": 2228 }, { "epoch": 0.1610345512670002, "grad_norm": 7.799603124449717, "learning_rate": 4.966631653496799e-06, "loss": 0.923, "step": 2229 }, { "epoch": 0.1611067964672097, "grad_norm": 7.01283993908534, "learning_rate": 4.966584005830051e-06, "loss": 0.9971, "step": 2230 }, { "epoch": 0.16117904166741923, "grad_norm": 7.981207949199741, "learning_rate": 4.9665363243976835e-06, "loss": 0.9284, "step": 2231 }, { "epoch": 0.16125128686762874, "grad_norm": 9.532658707372775, "learning_rate": 4.966488609200349e-06, "loss": 0.9461, "step": 2232 }, { "epoch": 0.16132353206783825, "grad_norm": 6.818032913893963, "learning_rate": 4.966440860238701e-06, "loss": 0.9396, "step": 2233 }, { "epoch": 0.16139577726804774, "grad_norm": 7.490737281438407, "learning_rate": 4.9663930775133915e-06, "loss": 0.8997, "step": 2234 }, { "epoch": 0.16146802246825726, "grad_norm": 7.622049980454144, "learning_rate": 4.966345261025077e-06, "loss": 0.9127, "step": 2235 }, { "epoch": 0.16154026766846677, "grad_norm": 8.190883257773882, "learning_rate": 4.966297410774411e-06, "loss": 0.9711, "step": 2236 }, { "epoch": 0.16161251286867628, "grad_norm": 8.629663505845468, "learning_rate": 4.966249526762048e-06, "loss": 0.9964, "step": 2237 }, { "epoch": 0.1616847580688858, "grad_norm": 9.107128880594251, "learning_rate": 4.966201608988643e-06, "loss": 0.9493, "step": 2238 }, { "epoch": 0.1617570032690953, "grad_norm": 7.515204467428405, "learning_rate": 4.966153657454854e-06, "loss": 0.9287, "step": 2239 }, { "epoch": 0.16182924846930483, "grad_norm": 9.595314122038213, "learning_rate": 4.966105672161335e-06, "loss": 1.0362, "step": 2240 }, { "epoch": 0.16190149366951434, "grad_norm": 6.806375532857933, "learning_rate": 4.966057653108746e-06, "loss": 0.9385, "step": 2241 }, { "epoch": 0.16197373886972385, "grad_norm": 11.418848724857154, "learning_rate": 4.966009600297742e-06, "loss": 1.0434, "step": 2242 }, { "epoch": 0.16204598406993334, "grad_norm": 13.391954745454335, "learning_rate": 4.965961513728981e-06, "loss": 0.9409, "step": 2243 }, { "epoch": 0.16211822927014286, "grad_norm": 9.03022058025314, "learning_rate": 4.9659133934031216e-06, "loss": 1.0527, "step": 2244 }, { "epoch": 0.16219047447035237, "grad_norm": 8.648146378907773, "learning_rate": 4.965865239320822e-06, "loss": 0.9877, "step": 2245 }, { "epoch": 0.16226271967056188, "grad_norm": 10.639635485378188, "learning_rate": 4.965817051482742e-06, "loss": 1.0514, "step": 2246 }, { "epoch": 0.1623349648707714, "grad_norm": 10.373800185353703, "learning_rate": 4.965768829889542e-06, "loss": 0.9354, "step": 2247 }, { "epoch": 0.1624072100709809, "grad_norm": 6.849693469686224, "learning_rate": 4.96572057454188e-06, "loss": 0.9102, "step": 2248 }, { "epoch": 0.16247945527119043, "grad_norm": 6.58367787035006, "learning_rate": 4.965672285440418e-06, "loss": 0.9794, "step": 2249 }, { "epoch": 0.16255170047139994, "grad_norm": 7.271392042960663, "learning_rate": 4.965623962585818e-06, "loss": 0.9645, "step": 2250 }, { "epoch": 0.16262394567160945, "grad_norm": 9.326244068765076, "learning_rate": 4.96557560597874e-06, "loss": 0.9534, "step": 2251 }, { "epoch": 0.16269619087181894, "grad_norm": 7.65972323363378, "learning_rate": 4.9655272156198455e-06, "loss": 0.8919, "step": 2252 }, { "epoch": 0.16276843607202846, "grad_norm": 6.777969058869204, "learning_rate": 4.965478791509799e-06, "loss": 0.934, "step": 2253 }, { "epoch": 0.16284068127223797, "grad_norm": 8.376236553855934, "learning_rate": 4.9654303336492615e-06, "loss": 0.9281, "step": 2254 }, { "epoch": 0.16291292647244748, "grad_norm": 6.45959650627853, "learning_rate": 4.9653818420388965e-06, "loss": 0.9116, "step": 2255 }, { "epoch": 0.162985171672657, "grad_norm": 9.283356604098929, "learning_rate": 4.965333316679369e-06, "loss": 0.9369, "step": 2256 }, { "epoch": 0.1630574168728665, "grad_norm": 9.91703022095427, "learning_rate": 4.9652847575713426e-06, "loss": 1.0058, "step": 2257 }, { "epoch": 0.16312966207307603, "grad_norm": 9.85011807288647, "learning_rate": 4.965236164715483e-06, "loss": 0.91, "step": 2258 }, { "epoch": 0.16320190727328554, "grad_norm": 7.908379188889698, "learning_rate": 4.965187538112453e-06, "loss": 0.9307, "step": 2259 }, { "epoch": 0.16327415247349505, "grad_norm": 7.339581284390532, "learning_rate": 4.96513887776292e-06, "loss": 0.9227, "step": 2260 }, { "epoch": 0.16334639767370454, "grad_norm": 9.29959420580157, "learning_rate": 4.965090183667549e-06, "loss": 0.936, "step": 2261 }, { "epoch": 0.16341864287391406, "grad_norm": 9.993330829697241, "learning_rate": 4.965041455827009e-06, "loss": 0.9082, "step": 2262 }, { "epoch": 0.16349088807412357, "grad_norm": 7.106571122482099, "learning_rate": 4.964992694241965e-06, "loss": 0.9569, "step": 2263 }, { "epoch": 0.16356313327433308, "grad_norm": 8.852935442122167, "learning_rate": 4.964943898913084e-06, "loss": 1.0164, "step": 2264 }, { "epoch": 0.1636353784745426, "grad_norm": 6.651218572268825, "learning_rate": 4.964895069841036e-06, "loss": 0.9463, "step": 2265 }, { "epoch": 0.1637076236747521, "grad_norm": 8.893493725856867, "learning_rate": 4.964846207026488e-06, "loss": 0.9311, "step": 2266 }, { "epoch": 0.16377986887496163, "grad_norm": 6.808067069416964, "learning_rate": 4.964797310470109e-06, "loss": 0.9036, "step": 2267 }, { "epoch": 0.16385211407517114, "grad_norm": 8.14908726735796, "learning_rate": 4.964748380172569e-06, "loss": 0.9331, "step": 2268 }, { "epoch": 0.16392435927538065, "grad_norm": 8.134552490080027, "learning_rate": 4.964699416134537e-06, "loss": 0.9101, "step": 2269 }, { "epoch": 0.16399660447559014, "grad_norm": 7.89845754317128, "learning_rate": 4.964650418356684e-06, "loss": 0.9805, "step": 2270 }, { "epoch": 0.16406884967579966, "grad_norm": 6.024073941935819, "learning_rate": 4.964601386839681e-06, "loss": 0.8884, "step": 2271 }, { "epoch": 0.16414109487600917, "grad_norm": 8.534652087512578, "learning_rate": 4.964552321584198e-06, "loss": 0.9446, "step": 2272 }, { "epoch": 0.16421334007621868, "grad_norm": 11.526483972299266, "learning_rate": 4.964503222590908e-06, "loss": 1.0255, "step": 2273 }, { "epoch": 0.1642855852764282, "grad_norm": 10.22600183778713, "learning_rate": 4.964454089860481e-06, "loss": 0.9966, "step": 2274 }, { "epoch": 0.1643578304766377, "grad_norm": 6.111236054655609, "learning_rate": 4.964404923393592e-06, "loss": 0.9485, "step": 2275 }, { "epoch": 0.16443007567684723, "grad_norm": 7.49657972548591, "learning_rate": 4.9643557231909135e-06, "loss": 0.9312, "step": 2276 }, { "epoch": 0.16450232087705674, "grad_norm": 9.31599022918097, "learning_rate": 4.964306489253118e-06, "loss": 1.0629, "step": 2277 }, { "epoch": 0.16457456607726625, "grad_norm": 9.197328046812487, "learning_rate": 4.9642572215808806e-06, "loss": 1.0107, "step": 2278 }, { "epoch": 0.16464681127747574, "grad_norm": 6.77711353625574, "learning_rate": 4.964207920174874e-06, "loss": 0.9206, "step": 2279 }, { "epoch": 0.16471905647768526, "grad_norm": 6.615515775864561, "learning_rate": 4.964158585035775e-06, "loss": 0.9454, "step": 2280 }, { "epoch": 0.16479130167789477, "grad_norm": 6.254801012467655, "learning_rate": 4.964109216164259e-06, "loss": 0.8958, "step": 2281 }, { "epoch": 0.16486354687810428, "grad_norm": 9.474596289737923, "learning_rate": 4.964059813561e-06, "loss": 1.0003, "step": 2282 }, { "epoch": 0.1649357920783138, "grad_norm": 7.869198312024841, "learning_rate": 4.964010377226675e-06, "loss": 1.0007, "step": 2283 }, { "epoch": 0.1650080372785233, "grad_norm": 7.309971559732744, "learning_rate": 4.963960907161963e-06, "loss": 0.777, "step": 2284 }, { "epoch": 0.16508028247873283, "grad_norm": 8.00125159963393, "learning_rate": 4.963911403367539e-06, "loss": 0.989, "step": 2285 }, { "epoch": 0.16515252767894234, "grad_norm": 7.472553641317223, "learning_rate": 4.963861865844079e-06, "loss": 1.0216, "step": 2286 }, { "epoch": 0.16522477287915185, "grad_norm": 7.186534783610591, "learning_rate": 4.9638122945922655e-06, "loss": 0.984, "step": 2287 }, { "epoch": 0.16529701807936134, "grad_norm": 9.922434008612678, "learning_rate": 4.963762689612773e-06, "loss": 1.0522, "step": 2288 }, { "epoch": 0.16536926327957086, "grad_norm": 6.667067070698727, "learning_rate": 4.963713050906282e-06, "loss": 0.9034, "step": 2289 }, { "epoch": 0.16544150847978037, "grad_norm": 7.921477479008537, "learning_rate": 4.963663378473474e-06, "loss": 0.9879, "step": 2290 }, { "epoch": 0.16551375367998988, "grad_norm": 7.971540134623918, "learning_rate": 4.963613672315027e-06, "loss": 0.9887, "step": 2291 }, { "epoch": 0.1655859988801994, "grad_norm": 8.541591985112419, "learning_rate": 4.96356393243162e-06, "loss": 1.053, "step": 2292 }, { "epoch": 0.1656582440804089, "grad_norm": 8.295953099529303, "learning_rate": 4.963514158823937e-06, "loss": 1.0347, "step": 2293 }, { "epoch": 0.16573048928061843, "grad_norm": 7.977736727586198, "learning_rate": 4.963464351492657e-06, "loss": 0.8912, "step": 2294 }, { "epoch": 0.16580273448082794, "grad_norm": 7.323779153474468, "learning_rate": 4.963414510438464e-06, "loss": 0.9312, "step": 2295 }, { "epoch": 0.16587497968103745, "grad_norm": 8.658517038001559, "learning_rate": 4.963364635662039e-06, "loss": 0.9956, "step": 2296 }, { "epoch": 0.16594722488124694, "grad_norm": 8.215776609093467, "learning_rate": 4.963314727164064e-06, "loss": 0.8889, "step": 2297 }, { "epoch": 0.16601947008145646, "grad_norm": 9.850169967501671, "learning_rate": 4.963264784945223e-06, "loss": 1.1332, "step": 2298 }, { "epoch": 0.16609171528166597, "grad_norm": 7.4193684089611445, "learning_rate": 4.9632148090062e-06, "loss": 0.8875, "step": 2299 }, { "epoch": 0.16616396048187548, "grad_norm": 7.684914751243941, "learning_rate": 4.963164799347679e-06, "loss": 1.0172, "step": 2300 }, { "epoch": 0.166236205682085, "grad_norm": 8.460538930478402, "learning_rate": 4.963114755970344e-06, "loss": 1.0498, "step": 2301 }, { "epoch": 0.1663084508822945, "grad_norm": 5.723664720228626, "learning_rate": 4.963064678874882e-06, "loss": 0.8993, "step": 2302 }, { "epoch": 0.16638069608250403, "grad_norm": 7.385329948601283, "learning_rate": 4.963014568061975e-06, "loss": 0.97, "step": 2303 }, { "epoch": 0.16645294128271354, "grad_norm": 8.749839127287883, "learning_rate": 4.962964423532312e-06, "loss": 0.9917, "step": 2304 }, { "epoch": 0.16652518648292305, "grad_norm": 7.962687261705961, "learning_rate": 4.962914245286578e-06, "loss": 1.0192, "step": 2305 }, { "epoch": 0.16659743168313254, "grad_norm": 8.593359810033968, "learning_rate": 4.96286403332546e-06, "loss": 1.0406, "step": 2306 }, { "epoch": 0.16666967688334205, "grad_norm": 10.18671961878783, "learning_rate": 4.962813787649647e-06, "loss": 1.0022, "step": 2307 }, { "epoch": 0.16674192208355157, "grad_norm": 7.162362365998053, "learning_rate": 4.962763508259824e-06, "loss": 1.0422, "step": 2308 }, { "epoch": 0.16681416728376108, "grad_norm": 7.075484770756767, "learning_rate": 4.962713195156681e-06, "loss": 0.9428, "step": 2309 }, { "epoch": 0.1668864124839706, "grad_norm": 7.705494235833093, "learning_rate": 4.962662848340908e-06, "loss": 1.0138, "step": 2310 }, { "epoch": 0.1669586576841801, "grad_norm": 8.292951730287529, "learning_rate": 4.962612467813192e-06, "loss": 0.9569, "step": 2311 }, { "epoch": 0.16703090288438963, "grad_norm": 7.761841372254209, "learning_rate": 4.962562053574222e-06, "loss": 0.9799, "step": 2312 }, { "epoch": 0.16710314808459914, "grad_norm": 6.719427030296446, "learning_rate": 4.962511605624691e-06, "loss": 0.9365, "step": 2313 }, { "epoch": 0.16717539328480865, "grad_norm": 6.758786734327189, "learning_rate": 4.962461123965287e-06, "loss": 1.0343, "step": 2314 }, { "epoch": 0.16724763848501814, "grad_norm": 7.480843302062259, "learning_rate": 4.962410608596704e-06, "loss": 1.0329, "step": 2315 }, { "epoch": 0.16731988368522765, "grad_norm": 7.163557960440321, "learning_rate": 4.96236005951963e-06, "loss": 0.9821, "step": 2316 }, { "epoch": 0.16739212888543717, "grad_norm": 8.512209930183102, "learning_rate": 4.9623094767347596e-06, "loss": 0.9278, "step": 2317 }, { "epoch": 0.16746437408564668, "grad_norm": 8.755735860647338, "learning_rate": 4.962258860242784e-06, "loss": 0.9823, "step": 2318 }, { "epoch": 0.1675366192858562, "grad_norm": 6.315210950020729, "learning_rate": 4.962208210044397e-06, "loss": 0.9716, "step": 2319 }, { "epoch": 0.1676088644860657, "grad_norm": 7.324433851008075, "learning_rate": 4.962157526140291e-06, "loss": 0.9614, "step": 2320 }, { "epoch": 0.16768110968627523, "grad_norm": 7.1333529047237025, "learning_rate": 4.9621068085311596e-06, "loss": 0.9757, "step": 2321 }, { "epoch": 0.16775335488648474, "grad_norm": 8.06567083890379, "learning_rate": 4.962056057217698e-06, "loss": 0.9281, "step": 2322 }, { "epoch": 0.16782560008669423, "grad_norm": 7.664963353337563, "learning_rate": 4.962005272200601e-06, "loss": 1.0151, "step": 2323 }, { "epoch": 0.16789784528690374, "grad_norm": 6.771286073855848, "learning_rate": 4.961954453480563e-06, "loss": 0.91, "step": 2324 }, { "epoch": 0.16797009048711325, "grad_norm": 6.737767190536283, "learning_rate": 4.961903601058281e-06, "loss": 0.9174, "step": 2325 }, { "epoch": 0.16804233568732277, "grad_norm": 7.849418871319815, "learning_rate": 4.961852714934449e-06, "loss": 1.0418, "step": 2326 }, { "epoch": 0.16811458088753228, "grad_norm": 9.688339098614689, "learning_rate": 4.961801795109766e-06, "loss": 1.0213, "step": 2327 }, { "epoch": 0.1681868260877418, "grad_norm": 9.16709949743106, "learning_rate": 4.961750841584927e-06, "loss": 1.0579, "step": 2328 }, { "epoch": 0.1682590712879513, "grad_norm": 7.252657173186758, "learning_rate": 4.961699854360631e-06, "loss": 0.8674, "step": 2329 }, { "epoch": 0.16833131648816083, "grad_norm": 9.154763787897918, "learning_rate": 4.961648833437575e-06, "loss": 1.0379, "step": 2330 }, { "epoch": 0.16840356168837034, "grad_norm": 6.512203791173418, "learning_rate": 4.961597778816458e-06, "loss": 0.9443, "step": 2331 }, { "epoch": 0.16847580688857983, "grad_norm": 6.922582310345313, "learning_rate": 4.961546690497979e-06, "loss": 0.994, "step": 2332 }, { "epoch": 0.16854805208878934, "grad_norm": 8.877335362026312, "learning_rate": 4.961495568482837e-06, "loss": 0.972, "step": 2333 }, { "epoch": 0.16862029728899885, "grad_norm": 9.28285198322061, "learning_rate": 4.961444412771731e-06, "loss": 1.0168, "step": 2334 }, { "epoch": 0.16869254248920837, "grad_norm": 6.424545735134277, "learning_rate": 4.961393223365363e-06, "loss": 0.9555, "step": 2335 }, { "epoch": 0.16876478768941788, "grad_norm": 6.810472939267444, "learning_rate": 4.961342000264433e-06, "loss": 0.9976, "step": 2336 }, { "epoch": 0.1688370328896274, "grad_norm": 9.25086847300743, "learning_rate": 4.961290743469642e-06, "loss": 1.0618, "step": 2337 }, { "epoch": 0.1689092780898369, "grad_norm": 10.129513711658026, "learning_rate": 4.961239452981691e-06, "loss": 0.9458, "step": 2338 }, { "epoch": 0.16898152329004643, "grad_norm": 8.826768510966593, "learning_rate": 4.961188128801284e-06, "loss": 0.9626, "step": 2339 }, { "epoch": 0.16905376849025594, "grad_norm": 6.625496611785656, "learning_rate": 4.961136770929122e-06, "loss": 0.8931, "step": 2340 }, { "epoch": 0.16912601369046543, "grad_norm": 6.81427958631854, "learning_rate": 4.961085379365908e-06, "loss": 0.9353, "step": 2341 }, { "epoch": 0.16919825889067494, "grad_norm": 12.066487814348802, "learning_rate": 4.961033954112348e-06, "loss": 0.9445, "step": 2342 }, { "epoch": 0.16927050409088445, "grad_norm": 7.8443766187858115, "learning_rate": 4.9609824951691425e-06, "loss": 0.9454, "step": 2343 }, { "epoch": 0.16934274929109397, "grad_norm": 8.189499472576339, "learning_rate": 4.9609310025369975e-06, "loss": 0.9581, "step": 2344 }, { "epoch": 0.16941499449130348, "grad_norm": 6.6372305777659735, "learning_rate": 4.9608794762166176e-06, "loss": 0.9983, "step": 2345 }, { "epoch": 0.169487239691513, "grad_norm": 10.623420508172357, "learning_rate": 4.960827916208709e-06, "loss": 0.9396, "step": 2346 }, { "epoch": 0.1695594848917225, "grad_norm": 7.694253823512373, "learning_rate": 4.960776322513977e-06, "loss": 0.9735, "step": 2347 }, { "epoch": 0.16963173009193203, "grad_norm": 9.18216888005208, "learning_rate": 4.9607246951331274e-06, "loss": 0.964, "step": 2348 }, { "epoch": 0.16970397529214154, "grad_norm": 8.07201114293096, "learning_rate": 4.960673034066868e-06, "loss": 1.0112, "step": 2349 }, { "epoch": 0.16977622049235103, "grad_norm": 28.70753168574897, "learning_rate": 4.960621339315904e-06, "loss": 1.0382, "step": 2350 }, { "epoch": 0.16984846569256054, "grad_norm": 9.043578711879979, "learning_rate": 4.9605696108809465e-06, "loss": 0.8868, "step": 2351 }, { "epoch": 0.16992071089277005, "grad_norm": 8.774663117726506, "learning_rate": 4.9605178487627e-06, "loss": 0.8521, "step": 2352 }, { "epoch": 0.16999295609297957, "grad_norm": 8.230389957204716, "learning_rate": 4.960466052961876e-06, "loss": 0.9756, "step": 2353 }, { "epoch": 0.17006520129318908, "grad_norm": 6.880761801217064, "learning_rate": 4.9604142234791805e-06, "loss": 1.0196, "step": 2354 }, { "epoch": 0.1701374464933986, "grad_norm": 6.582485611172201, "learning_rate": 4.960362360315325e-06, "loss": 1.0548, "step": 2355 }, { "epoch": 0.1702096916936081, "grad_norm": 8.617683292627506, "learning_rate": 4.96031046347102e-06, "loss": 1.0204, "step": 2356 }, { "epoch": 0.17028193689381763, "grad_norm": 7.65770523877263, "learning_rate": 4.960258532946974e-06, "loss": 1.0347, "step": 2357 }, { "epoch": 0.17035418209402714, "grad_norm": 6.382500518308709, "learning_rate": 4.9602065687438996e-06, "loss": 0.9056, "step": 2358 }, { "epoch": 0.17042642729423663, "grad_norm": 8.15144340979473, "learning_rate": 4.960154570862508e-06, "loss": 0.8964, "step": 2359 }, { "epoch": 0.17049867249444614, "grad_norm": 14.097800203615268, "learning_rate": 4.96010253930351e-06, "loss": 0.9179, "step": 2360 }, { "epoch": 0.17057091769465565, "grad_norm": 6.211945918528795, "learning_rate": 4.960050474067618e-06, "loss": 0.9805, "step": 2361 }, { "epoch": 0.17064316289486517, "grad_norm": 8.615556937894246, "learning_rate": 4.959998375155545e-06, "loss": 1.0204, "step": 2362 }, { "epoch": 0.17071540809507468, "grad_norm": 7.719567383024432, "learning_rate": 4.9599462425680054e-06, "loss": 0.8515, "step": 2363 }, { "epoch": 0.1707876532952842, "grad_norm": 6.500419309736301, "learning_rate": 4.959894076305711e-06, "loss": 0.9256, "step": 2364 }, { "epoch": 0.1708598984954937, "grad_norm": 7.083165544504757, "learning_rate": 4.959841876369377e-06, "loss": 0.983, "step": 2365 }, { "epoch": 0.17093214369570323, "grad_norm": 7.494123827189931, "learning_rate": 4.959789642759717e-06, "loss": 1.0266, "step": 2366 }, { "epoch": 0.17100438889591274, "grad_norm": 8.150122200453453, "learning_rate": 4.959737375477447e-06, "loss": 1.0181, "step": 2367 }, { "epoch": 0.17107663409612223, "grad_norm": 7.64232231275489, "learning_rate": 4.9596850745232825e-06, "loss": 0.9891, "step": 2368 }, { "epoch": 0.17114887929633174, "grad_norm": 6.345640004302874, "learning_rate": 4.959632739897939e-06, "loss": 0.9358, "step": 2369 }, { "epoch": 0.17122112449654125, "grad_norm": 7.149639691430972, "learning_rate": 4.959580371602133e-06, "loss": 0.9115, "step": 2370 }, { "epoch": 0.17129336969675077, "grad_norm": 7.2928808745375315, "learning_rate": 4.9595279696365815e-06, "loss": 0.8856, "step": 2371 }, { "epoch": 0.17136561489696028, "grad_norm": 7.9221391201193505, "learning_rate": 4.959475534002002e-06, "loss": 0.9047, "step": 2372 }, { "epoch": 0.1714378600971698, "grad_norm": 9.061712401794267, "learning_rate": 4.9594230646991115e-06, "loss": 0.9803, "step": 2373 }, { "epoch": 0.1715101052973793, "grad_norm": 6.03627966637509, "learning_rate": 4.959370561728629e-06, "loss": 0.9526, "step": 2374 }, { "epoch": 0.17158235049758883, "grad_norm": 7.083130538131769, "learning_rate": 4.9593180250912735e-06, "loss": 0.906, "step": 2375 }, { "epoch": 0.17165459569779834, "grad_norm": 8.348846593447384, "learning_rate": 4.959265454787764e-06, "loss": 0.9846, "step": 2376 }, { "epoch": 0.17172684089800783, "grad_norm": 7.40198249036011, "learning_rate": 4.959212850818819e-06, "loss": 0.9856, "step": 2377 }, { "epoch": 0.17179908609821734, "grad_norm": 6.8733385332583055, "learning_rate": 4.95916021318516e-06, "loss": 0.8904, "step": 2378 }, { "epoch": 0.17187133129842685, "grad_norm": 8.101473977939717, "learning_rate": 4.959107541887507e-06, "loss": 0.9841, "step": 2379 }, { "epoch": 0.17194357649863637, "grad_norm": 5.961670836689918, "learning_rate": 4.959054836926582e-06, "loss": 0.9709, "step": 2380 }, { "epoch": 0.17201582169884588, "grad_norm": 7.880098418187571, "learning_rate": 4.959002098303105e-06, "loss": 1.0484, "step": 2381 }, { "epoch": 0.1720880668990554, "grad_norm": 8.62110525458, "learning_rate": 4.9589493260177975e-06, "loss": 0.9846, "step": 2382 }, { "epoch": 0.1721603120992649, "grad_norm": 7.597475495921841, "learning_rate": 4.958896520071385e-06, "loss": 0.973, "step": 2383 }, { "epoch": 0.17223255729947443, "grad_norm": 7.7271783812776045, "learning_rate": 4.9588436804645865e-06, "loss": 0.9374, "step": 2384 }, { "epoch": 0.17230480249968394, "grad_norm": 6.876819439200453, "learning_rate": 4.958790807198129e-06, "loss": 0.8867, "step": 2385 }, { "epoch": 0.17237704769989343, "grad_norm": 6.6077977095298825, "learning_rate": 4.958737900272733e-06, "loss": 0.8981, "step": 2386 }, { "epoch": 0.17244929290010294, "grad_norm": 6.941454610833223, "learning_rate": 4.958684959689125e-06, "loss": 0.9535, "step": 2387 }, { "epoch": 0.17252153810031245, "grad_norm": 6.871223105838934, "learning_rate": 4.958631985448028e-06, "loss": 0.8725, "step": 2388 }, { "epoch": 0.17259378330052197, "grad_norm": 7.278037217865054, "learning_rate": 4.958578977550169e-06, "loss": 0.9783, "step": 2389 }, { "epoch": 0.17266602850073148, "grad_norm": 8.304184045181682, "learning_rate": 4.958525935996272e-06, "loss": 0.9845, "step": 2390 }, { "epoch": 0.172738273700941, "grad_norm": 7.753102819679327, "learning_rate": 4.958472860787065e-06, "loss": 0.9234, "step": 2391 }, { "epoch": 0.1728105189011505, "grad_norm": 8.327547837059576, "learning_rate": 4.958419751923273e-06, "loss": 0.9737, "step": 2392 }, { "epoch": 0.17288276410136003, "grad_norm": 6.802349683172587, "learning_rate": 4.958366609405623e-06, "loss": 0.8978, "step": 2393 }, { "epoch": 0.17295500930156954, "grad_norm": 7.7084467149248646, "learning_rate": 4.958313433234843e-06, "loss": 0.9191, "step": 2394 }, { "epoch": 0.17302725450177903, "grad_norm": 6.649381577267725, "learning_rate": 4.95826022341166e-06, "loss": 0.8745, "step": 2395 }, { "epoch": 0.17309949970198854, "grad_norm": 6.656857458343473, "learning_rate": 4.958206979936805e-06, "loss": 0.8519, "step": 2396 }, { "epoch": 0.17317174490219805, "grad_norm": 7.075425734364665, "learning_rate": 4.958153702811005e-06, "loss": 0.9794, "step": 2397 }, { "epoch": 0.17324399010240757, "grad_norm": 9.070000011439886, "learning_rate": 4.958100392034988e-06, "loss": 0.8491, "step": 2398 }, { "epoch": 0.17331623530261708, "grad_norm": 8.274987497925318, "learning_rate": 4.958047047609487e-06, "loss": 0.8826, "step": 2399 }, { "epoch": 0.1733884805028266, "grad_norm": 7.408763346369606, "learning_rate": 4.9579936695352295e-06, "loss": 0.9219, "step": 2400 }, { "epoch": 0.1734607257030361, "grad_norm": 7.839656415144705, "learning_rate": 4.957940257812947e-06, "loss": 0.9638, "step": 2401 }, { "epoch": 0.17353297090324563, "grad_norm": 7.0176792323289705, "learning_rate": 4.957886812443371e-06, "loss": 1.0606, "step": 2402 }, { "epoch": 0.17360521610345514, "grad_norm": 8.006387544711615, "learning_rate": 4.957833333427233e-06, "loss": 0.9639, "step": 2403 }, { "epoch": 0.17367746130366463, "grad_norm": 9.69010722853911, "learning_rate": 4.957779820765265e-06, "loss": 0.9298, "step": 2404 }, { "epoch": 0.17374970650387414, "grad_norm": 8.891601508872185, "learning_rate": 4.9577262744581985e-06, "loss": 0.9436, "step": 2405 }, { "epoch": 0.17382195170408365, "grad_norm": 6.35482948056443, "learning_rate": 4.95767269450677e-06, "loss": 1.0245, "step": 2406 }, { "epoch": 0.17389419690429317, "grad_norm": 7.136824850829395, "learning_rate": 4.9576190809117085e-06, "loss": 0.9641, "step": 2407 }, { "epoch": 0.17396644210450268, "grad_norm": 9.458359055897246, "learning_rate": 4.957565433673751e-06, "loss": 0.9368, "step": 2408 }, { "epoch": 0.1740386873047122, "grad_norm": 7.814033052708961, "learning_rate": 4.957511752793632e-06, "loss": 0.9528, "step": 2409 }, { "epoch": 0.1741109325049217, "grad_norm": 6.381812550765297, "learning_rate": 4.957458038272083e-06, "loss": 0.9632, "step": 2410 }, { "epoch": 0.17418317770513123, "grad_norm": 6.698570429179478, "learning_rate": 4.957404290109843e-06, "loss": 0.9148, "step": 2411 }, { "epoch": 0.17425542290534074, "grad_norm": 6.360001816659344, "learning_rate": 4.957350508307645e-06, "loss": 0.9078, "step": 2412 }, { "epoch": 0.17432766810555023, "grad_norm": 7.103911931308215, "learning_rate": 4.957296692866228e-06, "loss": 0.9575, "step": 2413 }, { "epoch": 0.17439991330575974, "grad_norm": 6.721593068983704, "learning_rate": 4.957242843786326e-06, "loss": 1.0229, "step": 2414 }, { "epoch": 0.17447215850596925, "grad_norm": 7.329403893768461, "learning_rate": 4.957188961068678e-06, "loss": 0.9649, "step": 2415 }, { "epoch": 0.17454440370617877, "grad_norm": 6.909861729255632, "learning_rate": 4.957135044714021e-06, "loss": 1.016, "step": 2416 }, { "epoch": 0.17461664890638828, "grad_norm": 6.9455478486734865, "learning_rate": 4.957081094723093e-06, "loss": 0.9814, "step": 2417 }, { "epoch": 0.1746888941065978, "grad_norm": 7.364000814476155, "learning_rate": 4.957027111096633e-06, "loss": 0.9556, "step": 2418 }, { "epoch": 0.1747611393068073, "grad_norm": 7.111088338788975, "learning_rate": 4.9569730938353785e-06, "loss": 0.9283, "step": 2419 }, { "epoch": 0.17483338450701683, "grad_norm": 7.10359563984355, "learning_rate": 4.956919042940071e-06, "loss": 0.891, "step": 2420 }, { "epoch": 0.17490562970722634, "grad_norm": 7.408622265199394, "learning_rate": 4.95686495841145e-06, "loss": 0.9327, "step": 2421 }, { "epoch": 0.17497787490743583, "grad_norm": 8.498671932812917, "learning_rate": 4.956810840250254e-06, "loss": 1.0071, "step": 2422 }, { "epoch": 0.17505012010764534, "grad_norm": 6.874715070888989, "learning_rate": 4.956756688457226e-06, "loss": 0.9183, "step": 2423 }, { "epoch": 0.17512236530785485, "grad_norm": 7.960706293793593, "learning_rate": 4.956702503033107e-06, "loss": 0.9693, "step": 2424 }, { "epoch": 0.17519461050806437, "grad_norm": 7.6364594944804, "learning_rate": 4.956648283978637e-06, "loss": 0.9593, "step": 2425 }, { "epoch": 0.17526685570827388, "grad_norm": 6.8141541878756, "learning_rate": 4.95659403129456e-06, "loss": 0.9935, "step": 2426 }, { "epoch": 0.1753391009084834, "grad_norm": 6.971724213354977, "learning_rate": 4.956539744981619e-06, "loss": 0.9922, "step": 2427 }, { "epoch": 0.1754113461086929, "grad_norm": 7.098069812438862, "learning_rate": 4.956485425040555e-06, "loss": 0.9488, "step": 2428 }, { "epoch": 0.17548359130890243, "grad_norm": 8.596444446205606, "learning_rate": 4.956431071472113e-06, "loss": 0.9629, "step": 2429 }, { "epoch": 0.1755558365091119, "grad_norm": 6.490752170512573, "learning_rate": 4.956376684277038e-06, "loss": 0.9645, "step": 2430 }, { "epoch": 0.17562808170932143, "grad_norm": 6.8532409352012404, "learning_rate": 4.9563222634560725e-06, "loss": 0.9356, "step": 2431 }, { "epoch": 0.17570032690953094, "grad_norm": 8.800781683449834, "learning_rate": 4.956267809009963e-06, "loss": 0.9044, "step": 2432 }, { "epoch": 0.17577257210974045, "grad_norm": 7.160961482527891, "learning_rate": 4.956213320939454e-06, "loss": 0.9076, "step": 2433 }, { "epoch": 0.17584481730994997, "grad_norm": 5.957739455853044, "learning_rate": 4.956158799245291e-06, "loss": 0.8615, "step": 2434 }, { "epoch": 0.17591706251015948, "grad_norm": 6.94448638225184, "learning_rate": 4.956104243928222e-06, "loss": 0.9268, "step": 2435 }, { "epoch": 0.175989307710369, "grad_norm": 8.096929330764986, "learning_rate": 4.956049654988993e-06, "loss": 0.946, "step": 2436 }, { "epoch": 0.1760615529105785, "grad_norm": 9.077688814015469, "learning_rate": 4.955995032428352e-06, "loss": 1.0942, "step": 2437 }, { "epoch": 0.17613379811078803, "grad_norm": 6.989371268574912, "learning_rate": 4.955940376247044e-06, "loss": 0.9561, "step": 2438 }, { "epoch": 0.1762060433109975, "grad_norm": 6.5704286181772416, "learning_rate": 4.955885686445821e-06, "loss": 0.9771, "step": 2439 }, { "epoch": 0.17627828851120703, "grad_norm": 7.636854868112352, "learning_rate": 4.955830963025428e-06, "loss": 0.937, "step": 2440 }, { "epoch": 0.17635053371141654, "grad_norm": 6.862808793143665, "learning_rate": 4.955776205986616e-06, "loss": 0.9246, "step": 2441 }, { "epoch": 0.17642277891162605, "grad_norm": 7.73839142676172, "learning_rate": 4.955721415330136e-06, "loss": 0.9288, "step": 2442 }, { "epoch": 0.17649502411183557, "grad_norm": 6.878221346688616, "learning_rate": 4.955666591056736e-06, "loss": 0.9379, "step": 2443 }, { "epoch": 0.17656726931204508, "grad_norm": 7.176012495423392, "learning_rate": 4.955611733167168e-06, "loss": 0.9768, "step": 2444 }, { "epoch": 0.1766395145122546, "grad_norm": 9.908004268468215, "learning_rate": 4.955556841662181e-06, "loss": 0.9838, "step": 2445 }, { "epoch": 0.1767117597124641, "grad_norm": 7.65538673983716, "learning_rate": 4.955501916542527e-06, "loss": 1.0007, "step": 2446 }, { "epoch": 0.17678400491267363, "grad_norm": 8.315875522491087, "learning_rate": 4.955446957808959e-06, "loss": 0.9428, "step": 2447 }, { "epoch": 0.1768562501128831, "grad_norm": 8.612196555141699, "learning_rate": 4.955391965462229e-06, "loss": 0.9905, "step": 2448 }, { "epoch": 0.17692849531309263, "grad_norm": 9.565719162366294, "learning_rate": 4.955336939503089e-06, "loss": 0.8933, "step": 2449 }, { "epoch": 0.17700074051330214, "grad_norm": 7.6617762959722, "learning_rate": 4.9552818799322926e-06, "loss": 0.9668, "step": 2450 }, { "epoch": 0.17707298571351165, "grad_norm": 6.935480597138258, "learning_rate": 4.955226786750594e-06, "loss": 0.9744, "step": 2451 }, { "epoch": 0.17714523091372117, "grad_norm": 7.64077902172533, "learning_rate": 4.9551716599587475e-06, "loss": 0.9521, "step": 2452 }, { "epoch": 0.17721747611393068, "grad_norm": 6.891063719469659, "learning_rate": 4.955116499557506e-06, "loss": 0.8485, "step": 2453 }, { "epoch": 0.1772897213141402, "grad_norm": 7.270068050942304, "learning_rate": 4.955061305547627e-06, "loss": 0.9884, "step": 2454 }, { "epoch": 0.1773619665143497, "grad_norm": 7.462044103552936, "learning_rate": 4.955006077929865e-06, "loss": 1.0163, "step": 2455 }, { "epoch": 0.17743421171455923, "grad_norm": 7.851634405765266, "learning_rate": 4.954950816704976e-06, "loss": 0.9161, "step": 2456 }, { "epoch": 0.1775064569147687, "grad_norm": 6.732933226428357, "learning_rate": 4.954895521873715e-06, "loss": 0.9474, "step": 2457 }, { "epoch": 0.17757870211497823, "grad_norm": 8.492567964539596, "learning_rate": 4.954840193436842e-06, "loss": 1.0034, "step": 2458 }, { "epoch": 0.17765094731518774, "grad_norm": 6.932988254811732, "learning_rate": 4.954784831395112e-06, "loss": 0.9725, "step": 2459 }, { "epoch": 0.17772319251539725, "grad_norm": 7.107454614860015, "learning_rate": 4.954729435749284e-06, "loss": 1.0011, "step": 2460 }, { "epoch": 0.17779543771560677, "grad_norm": 7.517137403049656, "learning_rate": 4.9546740065001155e-06, "loss": 0.9148, "step": 2461 }, { "epoch": 0.17786768291581628, "grad_norm": 6.705755397905815, "learning_rate": 4.9546185436483664e-06, "loss": 0.8974, "step": 2462 }, { "epoch": 0.1779399281160258, "grad_norm": 7.281221970925799, "learning_rate": 4.954563047194795e-06, "loss": 0.9671, "step": 2463 }, { "epoch": 0.1780121733162353, "grad_norm": 6.930804896875592, "learning_rate": 4.954507517140162e-06, "loss": 0.9478, "step": 2464 }, { "epoch": 0.17808441851644483, "grad_norm": 8.675410579232647, "learning_rate": 4.9544519534852265e-06, "loss": 0.9808, "step": 2465 }, { "epoch": 0.1781566637166543, "grad_norm": 8.353787560236524, "learning_rate": 4.954396356230749e-06, "loss": 1.0261, "step": 2466 }, { "epoch": 0.17822890891686383, "grad_norm": 7.487591205735592, "learning_rate": 4.954340725377491e-06, "loss": 0.9135, "step": 2467 }, { "epoch": 0.17830115411707334, "grad_norm": 7.457334341420957, "learning_rate": 4.954285060926215e-06, "loss": 0.9921, "step": 2468 }, { "epoch": 0.17837339931728285, "grad_norm": 6.777535119776561, "learning_rate": 4.9542293628776815e-06, "loss": 0.9732, "step": 2469 }, { "epoch": 0.17844564451749237, "grad_norm": 6.720322819630678, "learning_rate": 4.954173631232654e-06, "loss": 0.9718, "step": 2470 }, { "epoch": 0.17851788971770188, "grad_norm": 10.009053137752614, "learning_rate": 4.9541178659918955e-06, "loss": 0.9129, "step": 2471 }, { "epoch": 0.1785901349179114, "grad_norm": 8.285920060707888, "learning_rate": 4.954062067156168e-06, "loss": 1.0349, "step": 2472 }, { "epoch": 0.1786623801181209, "grad_norm": 7.483760290844457, "learning_rate": 4.954006234726237e-06, "loss": 0.9906, "step": 2473 }, { "epoch": 0.17873462531833043, "grad_norm": 7.47652780490905, "learning_rate": 4.953950368702865e-06, "loss": 0.9459, "step": 2474 }, { "epoch": 0.1788068705185399, "grad_norm": 10.904685708912304, "learning_rate": 4.953894469086819e-06, "loss": 0.9273, "step": 2475 }, { "epoch": 0.17887911571874943, "grad_norm": 7.696999983522015, "learning_rate": 4.953838535878862e-06, "loss": 1.0126, "step": 2476 }, { "epoch": 0.17895136091895894, "grad_norm": 7.244611908368927, "learning_rate": 4.953782569079761e-06, "loss": 0.9816, "step": 2477 }, { "epoch": 0.17902360611916845, "grad_norm": 9.430511514457548, "learning_rate": 4.953726568690282e-06, "loss": 0.9323, "step": 2478 }, { "epoch": 0.17909585131937797, "grad_norm": 9.633174051026135, "learning_rate": 4.953670534711192e-06, "loss": 1.0652, "step": 2479 }, { "epoch": 0.17916809651958748, "grad_norm": 14.33113690484295, "learning_rate": 4.953614467143257e-06, "loss": 0.9599, "step": 2480 }, { "epoch": 0.179240341719797, "grad_norm": 7.5656988771369775, "learning_rate": 4.953558365987246e-06, "loss": 0.8991, "step": 2481 }, { "epoch": 0.1793125869200065, "grad_norm": 6.562184934773178, "learning_rate": 4.9535022312439246e-06, "loss": 0.9583, "step": 2482 }, { "epoch": 0.17938483212021603, "grad_norm": 9.463722430224173, "learning_rate": 4.953446062914063e-06, "loss": 0.9884, "step": 2483 }, { "epoch": 0.1794570773204255, "grad_norm": 8.65890913790508, "learning_rate": 4.95338986099843e-06, "loss": 0.9143, "step": 2484 }, { "epoch": 0.17952932252063503, "grad_norm": 8.918749366979435, "learning_rate": 4.9533336254977945e-06, "loss": 0.9586, "step": 2485 }, { "epoch": 0.17960156772084454, "grad_norm": 11.051400171007568, "learning_rate": 4.953277356412927e-06, "loss": 0.9624, "step": 2486 }, { "epoch": 0.17967381292105405, "grad_norm": 7.273177620642577, "learning_rate": 4.953221053744597e-06, "loss": 1.0278, "step": 2487 }, { "epoch": 0.17974605812126357, "grad_norm": 6.402048212367395, "learning_rate": 4.953164717493576e-06, "loss": 0.9511, "step": 2488 }, { "epoch": 0.17981830332147308, "grad_norm": 6.921909995238145, "learning_rate": 4.953108347660635e-06, "loss": 0.9818, "step": 2489 }, { "epoch": 0.1798905485216826, "grad_norm": 9.146297318514847, "learning_rate": 4.953051944246544e-06, "loss": 1.027, "step": 2490 }, { "epoch": 0.1799627937218921, "grad_norm": 7.071985676936494, "learning_rate": 4.9529955072520784e-06, "loss": 0.9687, "step": 2491 }, { "epoch": 0.18003503892210163, "grad_norm": 8.344755762181943, "learning_rate": 4.952939036678008e-06, "loss": 0.9232, "step": 2492 }, { "epoch": 0.1801072841223111, "grad_norm": 8.319415664693832, "learning_rate": 4.9528825325251064e-06, "loss": 1.0159, "step": 2493 }, { "epoch": 0.18017952932252063, "grad_norm": 7.493611221364696, "learning_rate": 4.952825994794148e-06, "loss": 0.9227, "step": 2494 }, { "epoch": 0.18025177452273014, "grad_norm": 6.8680820685289445, "learning_rate": 4.952769423485906e-06, "loss": 0.9268, "step": 2495 }, { "epoch": 0.18032401972293965, "grad_norm": 7.29454692811518, "learning_rate": 4.952712818601155e-06, "loss": 0.8863, "step": 2496 }, { "epoch": 0.18039626492314917, "grad_norm": 6.459468060964094, "learning_rate": 4.95265618014067e-06, "loss": 1.0324, "step": 2497 }, { "epoch": 0.18046851012335868, "grad_norm": 7.867183863346354, "learning_rate": 4.952599508105227e-06, "loss": 1.0258, "step": 2498 }, { "epoch": 0.1805407553235682, "grad_norm": 7.533467453069342, "learning_rate": 4.9525428024956e-06, "loss": 0.9291, "step": 2499 }, { "epoch": 0.1806130005237777, "grad_norm": 7.026760448189495, "learning_rate": 4.952486063312567e-06, "loss": 0.9759, "step": 2500 }, { "epoch": 0.18068524572398723, "grad_norm": 8.471660268799791, "learning_rate": 4.952429290556904e-06, "loss": 1.0299, "step": 2501 }, { "epoch": 0.1807574909241967, "grad_norm": 8.008621815048132, "learning_rate": 4.9523724842293875e-06, "loss": 0.9523, "step": 2502 }, { "epoch": 0.18082973612440623, "grad_norm": 7.8313266808240805, "learning_rate": 4.9523156443307964e-06, "loss": 0.9093, "step": 2503 }, { "epoch": 0.18090198132461574, "grad_norm": 7.808440840017345, "learning_rate": 4.952258770861909e-06, "loss": 0.9142, "step": 2504 }, { "epoch": 0.18097422652482525, "grad_norm": 7.825903784870398, "learning_rate": 4.952201863823502e-06, "loss": 0.993, "step": 2505 }, { "epoch": 0.18104647172503477, "grad_norm": 7.373509401883082, "learning_rate": 4.952144923216355e-06, "loss": 0.9433, "step": 2506 }, { "epoch": 0.18111871692524428, "grad_norm": 9.26720292806105, "learning_rate": 4.952087949041249e-06, "loss": 0.9893, "step": 2507 }, { "epoch": 0.1811909621254538, "grad_norm": 8.234932288868324, "learning_rate": 4.952030941298962e-06, "loss": 1.0124, "step": 2508 }, { "epoch": 0.1812632073256633, "grad_norm": 7.418259296080632, "learning_rate": 4.951973899990276e-06, "loss": 0.9573, "step": 2509 }, { "epoch": 0.18133545252587283, "grad_norm": 8.909092285637405, "learning_rate": 4.951916825115971e-06, "loss": 0.9359, "step": 2510 }, { "epoch": 0.1814076977260823, "grad_norm": 7.57250063110631, "learning_rate": 4.951859716676829e-06, "loss": 0.9165, "step": 2511 }, { "epoch": 0.18147994292629183, "grad_norm": 7.76082445818432, "learning_rate": 4.95180257467363e-06, "loss": 0.9609, "step": 2512 }, { "epoch": 0.18155218812650134, "grad_norm": 7.29263554993393, "learning_rate": 4.9517453991071586e-06, "loss": 0.9384, "step": 2513 }, { "epoch": 0.18162443332671085, "grad_norm": 7.56184371748814, "learning_rate": 4.951688189978196e-06, "loss": 0.9004, "step": 2514 }, { "epoch": 0.18169667852692037, "grad_norm": 8.23582489162487, "learning_rate": 4.951630947287525e-06, "loss": 1.0106, "step": 2515 }, { "epoch": 0.18176892372712988, "grad_norm": 7.321621414462697, "learning_rate": 4.951573671035931e-06, "loss": 0.8926, "step": 2516 }, { "epoch": 0.1818411689273394, "grad_norm": 7.610812386299071, "learning_rate": 4.951516361224196e-06, "loss": 1.0075, "step": 2517 }, { "epoch": 0.1819134141275489, "grad_norm": 6.657548518073887, "learning_rate": 4.951459017853106e-06, "loss": 0.8819, "step": 2518 }, { "epoch": 0.18198565932775843, "grad_norm": 9.264950603882506, "learning_rate": 4.951401640923445e-06, "loss": 0.9283, "step": 2519 }, { "epoch": 0.1820579045279679, "grad_norm": 8.277809666423549, "learning_rate": 4.951344230436e-06, "loss": 0.9438, "step": 2520 }, { "epoch": 0.18213014972817743, "grad_norm": 8.13202914845299, "learning_rate": 4.951286786391555e-06, "loss": 1.0357, "step": 2521 }, { "epoch": 0.18220239492838694, "grad_norm": 6.800808454587317, "learning_rate": 4.951229308790897e-06, "loss": 0.9443, "step": 2522 }, { "epoch": 0.18227464012859645, "grad_norm": 7.882614813784207, "learning_rate": 4.951171797634814e-06, "loss": 1.0425, "step": 2523 }, { "epoch": 0.18234688532880597, "grad_norm": 8.71094713424355, "learning_rate": 4.951114252924091e-06, "loss": 0.9443, "step": 2524 }, { "epoch": 0.18241913052901548, "grad_norm": 6.671621420550659, "learning_rate": 4.951056674659518e-06, "loss": 0.9846, "step": 2525 }, { "epoch": 0.182491375729225, "grad_norm": 7.832481770864071, "learning_rate": 4.950999062841882e-06, "loss": 0.9232, "step": 2526 }, { "epoch": 0.1825636209294345, "grad_norm": 7.560988598323924, "learning_rate": 4.950941417471972e-06, "loss": 0.9038, "step": 2527 }, { "epoch": 0.18263586612964403, "grad_norm": 6.983158014079975, "learning_rate": 4.950883738550577e-06, "loss": 0.9421, "step": 2528 }, { "epoch": 0.1827081113298535, "grad_norm": 5.781117700661048, "learning_rate": 4.950826026078486e-06, "loss": 0.8135, "step": 2529 }, { "epoch": 0.18278035653006303, "grad_norm": 7.493950502132901, "learning_rate": 4.9507682800564906e-06, "loss": 0.8649, "step": 2530 }, { "epoch": 0.18285260173027254, "grad_norm": 7.045707792054367, "learning_rate": 4.95071050048538e-06, "loss": 0.9203, "step": 2531 }, { "epoch": 0.18292484693048205, "grad_norm": 7.903282362463718, "learning_rate": 4.950652687365945e-06, "loss": 0.9654, "step": 2532 }, { "epoch": 0.18299709213069157, "grad_norm": 8.21931345353497, "learning_rate": 4.950594840698978e-06, "loss": 0.9475, "step": 2533 }, { "epoch": 0.18306933733090108, "grad_norm": 8.389988482607187, "learning_rate": 4.950536960485271e-06, "loss": 0.9081, "step": 2534 }, { "epoch": 0.1831415825311106, "grad_norm": 8.27229625406038, "learning_rate": 4.950479046725615e-06, "loss": 0.9847, "step": 2535 }, { "epoch": 0.1832138277313201, "grad_norm": 6.97243439991956, "learning_rate": 4.950421099420803e-06, "loss": 0.9599, "step": 2536 }, { "epoch": 0.1832860729315296, "grad_norm": 13.303940291150882, "learning_rate": 4.95036311857163e-06, "loss": 1.1352, "step": 2537 }, { "epoch": 0.1833583181317391, "grad_norm": 8.04908807932249, "learning_rate": 4.950305104178887e-06, "loss": 0.9773, "step": 2538 }, { "epoch": 0.18343056333194863, "grad_norm": 7.688049296781733, "learning_rate": 4.9502470562433704e-06, "loss": 0.9687, "step": 2539 }, { "epoch": 0.18350280853215814, "grad_norm": 7.822376081851215, "learning_rate": 4.950188974765875e-06, "loss": 0.8817, "step": 2540 }, { "epoch": 0.18357505373236765, "grad_norm": 6.593889714067054, "learning_rate": 4.950130859747194e-06, "loss": 0.9091, "step": 2541 }, { "epoch": 0.18364729893257717, "grad_norm": 6.356537056801619, "learning_rate": 4.950072711188124e-06, "loss": 0.9443, "step": 2542 }, { "epoch": 0.18371954413278668, "grad_norm": 7.4326345733972525, "learning_rate": 4.950014529089461e-06, "loss": 0.9234, "step": 2543 }, { "epoch": 0.1837917893329962, "grad_norm": 7.794478971381136, "learning_rate": 4.949956313452002e-06, "loss": 0.9146, "step": 2544 }, { "epoch": 0.1838640345332057, "grad_norm": 6.5061144313558215, "learning_rate": 4.949898064276542e-06, "loss": 0.9634, "step": 2545 }, { "epoch": 0.1839362797334152, "grad_norm": 6.707718846706744, "learning_rate": 4.949839781563881e-06, "loss": 0.894, "step": 2546 }, { "epoch": 0.1840085249336247, "grad_norm": 7.62785586046801, "learning_rate": 4.949781465314815e-06, "loss": 0.9724, "step": 2547 }, { "epoch": 0.18408077013383423, "grad_norm": 7.3130187804258595, "learning_rate": 4.949723115530143e-06, "loss": 1.0402, "step": 2548 }, { "epoch": 0.18415301533404374, "grad_norm": 8.40376466222412, "learning_rate": 4.949664732210664e-06, "loss": 1.0384, "step": 2549 }, { "epoch": 0.18422526053425325, "grad_norm": 7.019200286959835, "learning_rate": 4.949606315357176e-06, "loss": 0.8904, "step": 2550 }, { "epoch": 0.18429750573446277, "grad_norm": 9.149036260828412, "learning_rate": 4.94954786497048e-06, "loss": 0.969, "step": 2551 }, { "epoch": 0.18436975093467228, "grad_norm": 7.139035505017084, "learning_rate": 4.949489381051375e-06, "loss": 0.9706, "step": 2552 }, { "epoch": 0.1844419961348818, "grad_norm": 8.69936038768283, "learning_rate": 4.9494308636006635e-06, "loss": 1.0166, "step": 2553 }, { "epoch": 0.1845142413350913, "grad_norm": 6.5222561617474915, "learning_rate": 4.9493723126191454e-06, "loss": 0.9731, "step": 2554 }, { "epoch": 0.1845864865353008, "grad_norm": 6.8130028215383165, "learning_rate": 4.949313728107621e-06, "loss": 0.9914, "step": 2555 }, { "epoch": 0.1846587317355103, "grad_norm": 9.033138724432597, "learning_rate": 4.949255110066894e-06, "loss": 1.0037, "step": 2556 }, { "epoch": 0.18473097693571983, "grad_norm": 6.730199523960952, "learning_rate": 4.949196458497766e-06, "loss": 0.9683, "step": 2557 }, { "epoch": 0.18480322213592934, "grad_norm": 7.006256304966916, "learning_rate": 4.949137773401041e-06, "loss": 0.9681, "step": 2558 }, { "epoch": 0.18487546733613885, "grad_norm": 7.658776687667718, "learning_rate": 4.9490790547775206e-06, "loss": 0.9663, "step": 2559 }, { "epoch": 0.18494771253634837, "grad_norm": 7.4330641396992405, "learning_rate": 4.949020302628009e-06, "loss": 0.9721, "step": 2560 }, { "epoch": 0.18501995773655788, "grad_norm": 8.841262952939887, "learning_rate": 4.948961516953312e-06, "loss": 0.9046, "step": 2561 }, { "epoch": 0.1850922029367674, "grad_norm": 7.112488854907376, "learning_rate": 4.948902697754234e-06, "loss": 0.8815, "step": 2562 }, { "epoch": 0.1851644481369769, "grad_norm": 7.623034614789654, "learning_rate": 4.948843845031579e-06, "loss": 0.8934, "step": 2563 }, { "epoch": 0.1852366933371864, "grad_norm": 9.246061156601826, "learning_rate": 4.948784958786152e-06, "loss": 1.0074, "step": 2564 }, { "epoch": 0.1853089385373959, "grad_norm": 6.9234672420054855, "learning_rate": 4.948726039018762e-06, "loss": 0.9234, "step": 2565 }, { "epoch": 0.18538118373760543, "grad_norm": 8.00126971659103, "learning_rate": 4.948667085730212e-06, "loss": 0.9824, "step": 2566 }, { "epoch": 0.18545342893781494, "grad_norm": 7.821614556238208, "learning_rate": 4.9486080989213125e-06, "loss": 0.9045, "step": 2567 }, { "epoch": 0.18552567413802445, "grad_norm": 7.200225974881307, "learning_rate": 4.948549078592868e-06, "loss": 0.959, "step": 2568 }, { "epoch": 0.18559791933823397, "grad_norm": 7.553633102479561, "learning_rate": 4.948490024745689e-06, "loss": 0.9405, "step": 2569 }, { "epoch": 0.18567016453844348, "grad_norm": 8.175412639504618, "learning_rate": 4.948430937380582e-06, "loss": 1.0104, "step": 2570 }, { "epoch": 0.185742409738653, "grad_norm": 7.211825662224803, "learning_rate": 4.948371816498357e-06, "loss": 0.9141, "step": 2571 }, { "epoch": 0.1858146549388625, "grad_norm": 8.225947140595014, "learning_rate": 4.948312662099822e-06, "loss": 1.0263, "step": 2572 }, { "epoch": 0.185886900139072, "grad_norm": 7.78051435677654, "learning_rate": 4.948253474185789e-06, "loss": 0.9158, "step": 2573 }, { "epoch": 0.1859591453392815, "grad_norm": 8.80575108302696, "learning_rate": 4.9481942527570656e-06, "loss": 0.9628, "step": 2574 }, { "epoch": 0.18603139053949103, "grad_norm": 7.07548045761238, "learning_rate": 4.9481349978144644e-06, "loss": 0.9676, "step": 2575 }, { "epoch": 0.18610363573970054, "grad_norm": 6.890376979663377, "learning_rate": 4.948075709358796e-06, "loss": 0.9815, "step": 2576 }, { "epoch": 0.18617588093991005, "grad_norm": 7.7442785493707635, "learning_rate": 4.948016387390871e-06, "loss": 1.0177, "step": 2577 }, { "epoch": 0.18624812614011957, "grad_norm": 5.978410661954292, "learning_rate": 4.9479570319115036e-06, "loss": 1.015, "step": 2578 }, { "epoch": 0.18632037134032908, "grad_norm": 8.91143326762316, "learning_rate": 4.947897642921505e-06, "loss": 0.9204, "step": 2579 }, { "epoch": 0.1863926165405386, "grad_norm": 7.394936979178754, "learning_rate": 4.947838220421688e-06, "loss": 0.9561, "step": 2580 }, { "epoch": 0.1864648617407481, "grad_norm": 7.65423283376182, "learning_rate": 4.947778764412867e-06, "loss": 0.9484, "step": 2581 }, { "epoch": 0.1865371069409576, "grad_norm": 7.159719633868054, "learning_rate": 4.9477192748958546e-06, "loss": 0.9534, "step": 2582 }, { "epoch": 0.1866093521411671, "grad_norm": 6.882403462235572, "learning_rate": 4.947659751871466e-06, "loss": 0.9036, "step": 2583 }, { "epoch": 0.18668159734137663, "grad_norm": 7.258530432400621, "learning_rate": 4.9476001953405155e-06, "loss": 1.0291, "step": 2584 }, { "epoch": 0.18675384254158614, "grad_norm": 6.81600833813511, "learning_rate": 4.947540605303819e-06, "loss": 0.9565, "step": 2585 }, { "epoch": 0.18682608774179565, "grad_norm": 8.723565853267175, "learning_rate": 4.9474809817621924e-06, "loss": 1.0396, "step": 2586 }, { "epoch": 0.18689833294200517, "grad_norm": 7.00997568210623, "learning_rate": 4.947421324716452e-06, "loss": 0.9756, "step": 2587 }, { "epoch": 0.18697057814221468, "grad_norm": 6.420466136231398, "learning_rate": 4.9473616341674125e-06, "loss": 0.9271, "step": 2588 }, { "epoch": 0.1870428233424242, "grad_norm": 8.41578105635919, "learning_rate": 4.947301910115894e-06, "loss": 1.037, "step": 2589 }, { "epoch": 0.1871150685426337, "grad_norm": 6.317072052977531, "learning_rate": 4.947242152562711e-06, "loss": 0.9516, "step": 2590 }, { "epoch": 0.1871873137428432, "grad_norm": 6.786195981704301, "learning_rate": 4.9471823615086845e-06, "loss": 0.9438, "step": 2591 }, { "epoch": 0.1872595589430527, "grad_norm": 7.444809599440662, "learning_rate": 4.947122536954631e-06, "loss": 1.0955, "step": 2592 }, { "epoch": 0.18733180414326223, "grad_norm": 7.576027617294423, "learning_rate": 4.94706267890137e-06, "loss": 0.9581, "step": 2593 }, { "epoch": 0.18740404934347174, "grad_norm": 7.599832492789366, "learning_rate": 4.947002787349721e-06, "loss": 0.9602, "step": 2594 }, { "epoch": 0.18747629454368125, "grad_norm": 8.243359291703193, "learning_rate": 4.9469428623005034e-06, "loss": 0.9876, "step": 2595 }, { "epoch": 0.18754853974389077, "grad_norm": 6.539676760463598, "learning_rate": 4.946882903754538e-06, "loss": 0.8996, "step": 2596 }, { "epoch": 0.18762078494410028, "grad_norm": 6.536789000963335, "learning_rate": 4.946822911712646e-06, "loss": 0.9958, "step": 2597 }, { "epoch": 0.1876930301443098, "grad_norm": 7.313764153132536, "learning_rate": 4.946762886175648e-06, "loss": 0.9424, "step": 2598 }, { "epoch": 0.1877652753445193, "grad_norm": 6.273894144923009, "learning_rate": 4.946702827144366e-06, "loss": 0.8166, "step": 2599 }, { "epoch": 0.1878375205447288, "grad_norm": 7.578037410898455, "learning_rate": 4.946642734619622e-06, "loss": 1.0095, "step": 2600 }, { "epoch": 0.1879097657449383, "grad_norm": 7.2985568671162175, "learning_rate": 4.9465826086022385e-06, "loss": 1.0039, "step": 2601 }, { "epoch": 0.18798201094514783, "grad_norm": 6.95249569220274, "learning_rate": 4.9465224490930385e-06, "loss": 0.9981, "step": 2602 }, { "epoch": 0.18805425614535734, "grad_norm": 6.323190531280316, "learning_rate": 4.946462256092847e-06, "loss": 0.9509, "step": 2603 }, { "epoch": 0.18812650134556685, "grad_norm": 6.7285734531259545, "learning_rate": 4.9464020296024855e-06, "loss": 0.8719, "step": 2604 }, { "epoch": 0.18819874654577637, "grad_norm": 8.876846766070752, "learning_rate": 4.94634176962278e-06, "loss": 0.9916, "step": 2605 }, { "epoch": 0.18827099174598588, "grad_norm": 7.808017024334343, "learning_rate": 4.946281476154555e-06, "loss": 0.9543, "step": 2606 }, { "epoch": 0.1883432369461954, "grad_norm": 8.015099580732763, "learning_rate": 4.9462211491986366e-06, "loss": 0.8705, "step": 2607 }, { "epoch": 0.1884154821464049, "grad_norm": 6.389071026108581, "learning_rate": 4.94616078875585e-06, "loss": 0.8902, "step": 2608 }, { "epoch": 0.1884877273466144, "grad_norm": 8.110012374849674, "learning_rate": 4.9461003948270204e-06, "loss": 0.933, "step": 2609 }, { "epoch": 0.1885599725468239, "grad_norm": 8.14944349389907, "learning_rate": 4.946039967412976e-06, "loss": 0.899, "step": 2610 }, { "epoch": 0.18863221774703343, "grad_norm": 7.798797064723398, "learning_rate": 4.945979506514544e-06, "loss": 1.0134, "step": 2611 }, { "epoch": 0.18870446294724294, "grad_norm": 7.746581769643274, "learning_rate": 4.9459190121325525e-06, "loss": 0.9645, "step": 2612 }, { "epoch": 0.18877670814745245, "grad_norm": 6.720560654912462, "learning_rate": 4.945858484267828e-06, "loss": 0.9995, "step": 2613 }, { "epoch": 0.18884895334766197, "grad_norm": 7.295920593448657, "learning_rate": 4.945797922921201e-06, "loss": 0.9451, "step": 2614 }, { "epoch": 0.18892119854787148, "grad_norm": 10.798037343593178, "learning_rate": 4.945737328093499e-06, "loss": 0.8805, "step": 2615 }, { "epoch": 0.188993443748081, "grad_norm": 6.32915030756283, "learning_rate": 4.945676699785551e-06, "loss": 0.9414, "step": 2616 }, { "epoch": 0.1890656889482905, "grad_norm": 6.789932809758515, "learning_rate": 4.945616037998188e-06, "loss": 0.9933, "step": 2617 }, { "epoch": 0.1891379341485, "grad_norm": 7.088875461168371, "learning_rate": 4.945555342732241e-06, "loss": 0.894, "step": 2618 }, { "epoch": 0.1892101793487095, "grad_norm": 8.369357813328303, "learning_rate": 4.94549461398854e-06, "loss": 0.9622, "step": 2619 }, { "epoch": 0.18928242454891903, "grad_norm": 6.7177239632618155, "learning_rate": 4.945433851767917e-06, "loss": 0.8959, "step": 2620 }, { "epoch": 0.18935466974912854, "grad_norm": 6.700456563851638, "learning_rate": 4.945373056071203e-06, "loss": 0.9463, "step": 2621 }, { "epoch": 0.18942691494933805, "grad_norm": 6.7585121454117205, "learning_rate": 4.9453122268992305e-06, "loss": 0.8948, "step": 2622 }, { "epoch": 0.18949916014954757, "grad_norm": 6.569116944028049, "learning_rate": 4.945251364252832e-06, "loss": 0.9008, "step": 2623 }, { "epoch": 0.18957140534975708, "grad_norm": 7.379870859065514, "learning_rate": 4.945190468132841e-06, "loss": 0.9088, "step": 2624 }, { "epoch": 0.1896436505499666, "grad_norm": 6.973200313595286, "learning_rate": 4.945129538540092e-06, "loss": 0.8675, "step": 2625 }, { "epoch": 0.1897158957501761, "grad_norm": 12.386804364454992, "learning_rate": 4.945068575475417e-06, "loss": 0.9208, "step": 2626 }, { "epoch": 0.1897881409503856, "grad_norm": 9.982355573236577, "learning_rate": 4.945007578939652e-06, "loss": 1.1001, "step": 2627 }, { "epoch": 0.1898603861505951, "grad_norm": 6.8634707794334435, "learning_rate": 4.944946548933632e-06, "loss": 0.9752, "step": 2628 }, { "epoch": 0.18993263135080463, "grad_norm": 11.181538447950187, "learning_rate": 4.944885485458192e-06, "loss": 0.9303, "step": 2629 }, { "epoch": 0.19000487655101414, "grad_norm": 11.546521502750366, "learning_rate": 4.944824388514168e-06, "loss": 1.058, "step": 2630 }, { "epoch": 0.19007712175122365, "grad_norm": 10.585823627155063, "learning_rate": 4.944763258102396e-06, "loss": 0.9448, "step": 2631 }, { "epoch": 0.19014936695143317, "grad_norm": 7.695408916353476, "learning_rate": 4.944702094223714e-06, "loss": 0.9548, "step": 2632 }, { "epoch": 0.19022161215164268, "grad_norm": 7.820998551404063, "learning_rate": 4.944640896878958e-06, "loss": 0.9294, "step": 2633 }, { "epoch": 0.1902938573518522, "grad_norm": 7.119764043777188, "learning_rate": 4.944579666068966e-06, "loss": 0.9667, "step": 2634 }, { "epoch": 0.1903661025520617, "grad_norm": 9.63581894043428, "learning_rate": 4.944518401794577e-06, "loss": 0.9265, "step": 2635 }, { "epoch": 0.1904383477522712, "grad_norm": 9.581222655573953, "learning_rate": 4.944457104056629e-06, "loss": 1.0564, "step": 2636 }, { "epoch": 0.1905105929524807, "grad_norm": 7.236362126691373, "learning_rate": 4.9443957728559615e-06, "loss": 0.9067, "step": 2637 }, { "epoch": 0.19058283815269023, "grad_norm": 6.802613249890654, "learning_rate": 4.944334408193413e-06, "loss": 0.9651, "step": 2638 }, { "epoch": 0.19065508335289974, "grad_norm": 7.581207089094346, "learning_rate": 4.944273010069825e-06, "loss": 1.0696, "step": 2639 }, { "epoch": 0.19072732855310925, "grad_norm": 10.364976200089316, "learning_rate": 4.944211578486037e-06, "loss": 0.9594, "step": 2640 }, { "epoch": 0.19079957375331877, "grad_norm": 7.856452292278596, "learning_rate": 4.9441501134428905e-06, "loss": 0.9041, "step": 2641 }, { "epoch": 0.19087181895352828, "grad_norm": 7.129681571708855, "learning_rate": 4.9440886149412275e-06, "loss": 0.939, "step": 2642 }, { "epoch": 0.1909440641537378, "grad_norm": 6.358683630550252, "learning_rate": 4.944027082981888e-06, "loss": 0.905, "step": 2643 }, { "epoch": 0.19101630935394728, "grad_norm": 8.553235910663112, "learning_rate": 4.943965517565715e-06, "loss": 0.9699, "step": 2644 }, { "epoch": 0.1910885545541568, "grad_norm": 9.705476878191275, "learning_rate": 4.9439039186935525e-06, "loss": 0.9611, "step": 2645 }, { "epoch": 0.1911607997543663, "grad_norm": 8.106209021165457, "learning_rate": 4.943842286366243e-06, "loss": 0.993, "step": 2646 }, { "epoch": 0.19123304495457583, "grad_norm": 6.115599600340075, "learning_rate": 4.943780620584629e-06, "loss": 0.9432, "step": 2647 }, { "epoch": 0.19130529015478534, "grad_norm": 7.3616696159846855, "learning_rate": 4.943718921349557e-06, "loss": 1.052, "step": 2648 }, { "epoch": 0.19137753535499485, "grad_norm": 7.89236159146795, "learning_rate": 4.943657188661871e-06, "loss": 0.9938, "step": 2649 }, { "epoch": 0.19144978055520437, "grad_norm": 6.631862325198754, "learning_rate": 4.9435954225224145e-06, "loss": 0.9293, "step": 2650 }, { "epoch": 0.19152202575541388, "grad_norm": 7.780260873456185, "learning_rate": 4.943533622932034e-06, "loss": 0.9856, "step": 2651 }, { "epoch": 0.1915942709556234, "grad_norm": 7.614723159055916, "learning_rate": 4.943471789891575e-06, "loss": 1.0355, "step": 2652 }, { "epoch": 0.19166651615583288, "grad_norm": 6.715656934744772, "learning_rate": 4.9434099234018854e-06, "loss": 0.8268, "step": 2653 }, { "epoch": 0.1917387613560424, "grad_norm": 7.243490519578704, "learning_rate": 4.943348023463811e-06, "loss": 0.9753, "step": 2654 }, { "epoch": 0.1918110065562519, "grad_norm": 6.39489454240616, "learning_rate": 4.943286090078199e-06, "loss": 0.9692, "step": 2655 }, { "epoch": 0.19188325175646143, "grad_norm": 6.723302531943298, "learning_rate": 4.943224123245897e-06, "loss": 0.9644, "step": 2656 }, { "epoch": 0.19195549695667094, "grad_norm": 6.028142415086301, "learning_rate": 4.943162122967754e-06, "loss": 0.8227, "step": 2657 }, { "epoch": 0.19202774215688045, "grad_norm": 8.164787180008075, "learning_rate": 4.943100089244619e-06, "loss": 0.8431, "step": 2658 }, { "epoch": 0.19209998735708997, "grad_norm": 6.953012255493933, "learning_rate": 4.943038022077341e-06, "loss": 0.9751, "step": 2659 }, { "epoch": 0.19217223255729948, "grad_norm": 7.442575726263194, "learning_rate": 4.942975921466769e-06, "loss": 0.9887, "step": 2660 }, { "epoch": 0.192244477757509, "grad_norm": 6.960989835223435, "learning_rate": 4.942913787413753e-06, "loss": 0.8914, "step": 2661 }, { "epoch": 0.19231672295771848, "grad_norm": 6.773312471813205, "learning_rate": 4.942851619919145e-06, "loss": 0.9029, "step": 2662 }, { "epoch": 0.192388968157928, "grad_norm": 6.818883582326815, "learning_rate": 4.942789418983794e-06, "loss": 0.9835, "step": 2663 }, { "epoch": 0.1924612133581375, "grad_norm": 6.855537192500228, "learning_rate": 4.942727184608553e-06, "loss": 0.8992, "step": 2664 }, { "epoch": 0.19253345855834703, "grad_norm": 8.200295270860487, "learning_rate": 4.942664916794273e-06, "loss": 0.921, "step": 2665 }, { "epoch": 0.19260570375855654, "grad_norm": 7.275585934878426, "learning_rate": 4.942602615541809e-06, "loss": 0.9873, "step": 2666 }, { "epoch": 0.19267794895876605, "grad_norm": 6.022896629818294, "learning_rate": 4.942540280852009e-06, "loss": 1.002, "step": 2667 }, { "epoch": 0.19275019415897557, "grad_norm": 6.9471873787588265, "learning_rate": 4.94247791272573e-06, "loss": 1.0428, "step": 2668 }, { "epoch": 0.19282243935918508, "grad_norm": 6.354722028966309, "learning_rate": 4.942415511163826e-06, "loss": 0.9861, "step": 2669 }, { "epoch": 0.1928946845593946, "grad_norm": 7.9755681807113765, "learning_rate": 4.942353076167149e-06, "loss": 0.9305, "step": 2670 }, { "epoch": 0.19296692975960408, "grad_norm": 6.02223789327224, "learning_rate": 4.942290607736555e-06, "loss": 0.9148, "step": 2671 }, { "epoch": 0.1930391749598136, "grad_norm": 6.247913775348493, "learning_rate": 4.9422281058728984e-06, "loss": 0.8374, "step": 2672 }, { "epoch": 0.1931114201600231, "grad_norm": 8.674298471893401, "learning_rate": 4.942165570577035e-06, "loss": 0.878, "step": 2673 }, { "epoch": 0.19318366536023263, "grad_norm": 7.932434628442991, "learning_rate": 4.942103001849823e-06, "loss": 0.9997, "step": 2674 }, { "epoch": 0.19325591056044214, "grad_norm": 8.167607726807399, "learning_rate": 4.942040399692116e-06, "loss": 0.9166, "step": 2675 }, { "epoch": 0.19332815576065165, "grad_norm": 6.692219717592598, "learning_rate": 4.941977764104772e-06, "loss": 0.9031, "step": 2676 }, { "epoch": 0.19340040096086117, "grad_norm": 9.620126654728999, "learning_rate": 4.941915095088648e-06, "loss": 0.9601, "step": 2677 }, { "epoch": 0.19347264616107068, "grad_norm": 7.690624563503137, "learning_rate": 4.941852392644603e-06, "loss": 0.9384, "step": 2678 }, { "epoch": 0.1935448913612802, "grad_norm": 7.389274877092374, "learning_rate": 4.941789656773495e-06, "loss": 0.9606, "step": 2679 }, { "epoch": 0.19361713656148968, "grad_norm": 7.162072623985338, "learning_rate": 4.941726887476182e-06, "loss": 0.9663, "step": 2680 }, { "epoch": 0.1936893817616992, "grad_norm": 5.846121403762438, "learning_rate": 4.941664084753523e-06, "loss": 0.9079, "step": 2681 }, { "epoch": 0.1937616269619087, "grad_norm": 7.335206601718358, "learning_rate": 4.9416012486063804e-06, "loss": 1.0286, "step": 2682 }, { "epoch": 0.19383387216211823, "grad_norm": 7.004362109572272, "learning_rate": 4.941538379035611e-06, "loss": 1.0122, "step": 2683 }, { "epoch": 0.19390611736232774, "grad_norm": 7.227440561098305, "learning_rate": 4.941475476042078e-06, "loss": 0.8576, "step": 2684 }, { "epoch": 0.19397836256253725, "grad_norm": 7.900231524587735, "learning_rate": 4.941412539626641e-06, "loss": 1.0118, "step": 2685 }, { "epoch": 0.19405060776274677, "grad_norm": 6.854349367746508, "learning_rate": 4.941349569790162e-06, "loss": 1.0348, "step": 2686 }, { "epoch": 0.19412285296295628, "grad_norm": 6.563278297411847, "learning_rate": 4.941286566533502e-06, "loss": 0.9249, "step": 2687 }, { "epoch": 0.1941950981631658, "grad_norm": 8.077953580295578, "learning_rate": 4.9412235298575255e-06, "loss": 0.9923, "step": 2688 }, { "epoch": 0.19426734336337528, "grad_norm": 7.570784389720445, "learning_rate": 4.941160459763094e-06, "loss": 1.0453, "step": 2689 }, { "epoch": 0.1943395885635848, "grad_norm": 6.260797557785295, "learning_rate": 4.941097356251071e-06, "loss": 0.9441, "step": 2690 }, { "epoch": 0.1944118337637943, "grad_norm": 6.645716109084036, "learning_rate": 4.941034219322322e-06, "loss": 0.9157, "step": 2691 }, { "epoch": 0.19448407896400383, "grad_norm": 7.003358852528204, "learning_rate": 4.940971048977709e-06, "loss": 0.983, "step": 2692 }, { "epoch": 0.19455632416421334, "grad_norm": 5.874412344903039, "learning_rate": 4.9409078452180966e-06, "loss": 0.9091, "step": 2693 }, { "epoch": 0.19462856936442285, "grad_norm": 6.908348351597615, "learning_rate": 4.940844608044352e-06, "loss": 0.8616, "step": 2694 }, { "epoch": 0.19470081456463237, "grad_norm": 6.225383958120607, "learning_rate": 4.94078133745734e-06, "loss": 0.9456, "step": 2695 }, { "epoch": 0.19477305976484188, "grad_norm": 7.135693740105996, "learning_rate": 4.940718033457926e-06, "loss": 0.915, "step": 2696 }, { "epoch": 0.1948453049650514, "grad_norm": 6.974311285985044, "learning_rate": 4.940654696046978e-06, "loss": 1.0251, "step": 2697 }, { "epoch": 0.19491755016526088, "grad_norm": 6.839030550245225, "learning_rate": 4.940591325225361e-06, "loss": 0.9135, "step": 2698 }, { "epoch": 0.1949897953654704, "grad_norm": 6.698499528630169, "learning_rate": 4.940527920993945e-06, "loss": 0.912, "step": 2699 }, { "epoch": 0.1950620405656799, "grad_norm": 6.334733239438244, "learning_rate": 4.940464483353596e-06, "loss": 0.9126, "step": 2700 }, { "epoch": 0.19513428576588943, "grad_norm": 7.660115165541915, "learning_rate": 4.9404010123051835e-06, "loss": 0.9748, "step": 2701 }, { "epoch": 0.19520653096609894, "grad_norm": 6.75188193754055, "learning_rate": 4.940337507849576e-06, "loss": 1.0217, "step": 2702 }, { "epoch": 0.19527877616630845, "grad_norm": 8.2545902313619, "learning_rate": 4.940273969987644e-06, "loss": 0.9782, "step": 2703 }, { "epoch": 0.19535102136651797, "grad_norm": 7.826801850658585, "learning_rate": 4.9402103987202545e-06, "loss": 0.9647, "step": 2704 }, { "epoch": 0.19542326656672748, "grad_norm": 6.5773061210914365, "learning_rate": 4.94014679404828e-06, "loss": 0.9679, "step": 2705 }, { "epoch": 0.195495511766937, "grad_norm": 8.487761719029548, "learning_rate": 4.94008315597259e-06, "loss": 0.9019, "step": 2706 }, { "epoch": 0.19556775696714648, "grad_norm": 5.718907088326589, "learning_rate": 4.940019484494057e-06, "loss": 0.937, "step": 2707 }, { "epoch": 0.195640002167356, "grad_norm": 8.463318156103183, "learning_rate": 4.939955779613551e-06, "loss": 1.0458, "step": 2708 }, { "epoch": 0.1957122473675655, "grad_norm": 9.944300214217604, "learning_rate": 4.939892041331945e-06, "loss": 1.1102, "step": 2709 }, { "epoch": 0.19578449256777503, "grad_norm": 6.593851820896304, "learning_rate": 4.939828269650111e-06, "loss": 0.9111, "step": 2710 }, { "epoch": 0.19585673776798454, "grad_norm": 8.786176175371626, "learning_rate": 4.939764464568924e-06, "loss": 1.0119, "step": 2711 }, { "epoch": 0.19592898296819405, "grad_norm": 7.221092735718851, "learning_rate": 4.939700626089254e-06, "loss": 1.029, "step": 2712 }, { "epoch": 0.19600122816840357, "grad_norm": 7.683887904654702, "learning_rate": 4.939636754211977e-06, "loss": 1.005, "step": 2713 }, { "epoch": 0.19607347336861308, "grad_norm": 6.570634142320402, "learning_rate": 4.939572848937968e-06, "loss": 0.9161, "step": 2714 }, { "epoch": 0.1961457185688226, "grad_norm": 6.443294546790424, "learning_rate": 4.9395089102681006e-06, "loss": 0.8869, "step": 2715 }, { "epoch": 0.19621796376903208, "grad_norm": 6.207012812669282, "learning_rate": 4.9394449382032505e-06, "loss": 0.981, "step": 2716 }, { "epoch": 0.1962902089692416, "grad_norm": 7.159153778108382, "learning_rate": 4.939380932744293e-06, "loss": 0.8832, "step": 2717 }, { "epoch": 0.1963624541694511, "grad_norm": 9.38975970301016, "learning_rate": 4.939316893892104e-06, "loss": 0.9795, "step": 2718 }, { "epoch": 0.19643469936966063, "grad_norm": 7.182642082829902, "learning_rate": 4.939252821647562e-06, "loss": 0.9779, "step": 2719 }, { "epoch": 0.19650694456987014, "grad_norm": 8.657414657520228, "learning_rate": 4.939188716011543e-06, "loss": 0.9775, "step": 2720 }, { "epoch": 0.19657918977007965, "grad_norm": 7.593835398503862, "learning_rate": 4.939124576984922e-06, "loss": 0.9857, "step": 2721 }, { "epoch": 0.19665143497028917, "grad_norm": 7.578719220017927, "learning_rate": 4.939060404568582e-06, "loss": 1.0388, "step": 2722 }, { "epoch": 0.19672368017049868, "grad_norm": 7.973418898787826, "learning_rate": 4.938996198763397e-06, "loss": 0.9444, "step": 2723 }, { "epoch": 0.1967959253707082, "grad_norm": 7.827687386408321, "learning_rate": 4.9389319595702495e-06, "loss": 0.9154, "step": 2724 }, { "epoch": 0.19686817057091768, "grad_norm": 7.870793505827538, "learning_rate": 4.9388676869900165e-06, "loss": 0.9788, "step": 2725 }, { "epoch": 0.1969404157711272, "grad_norm": 8.518025303197799, "learning_rate": 4.938803381023578e-06, "loss": 0.9655, "step": 2726 }, { "epoch": 0.1970126609713367, "grad_norm": 6.209747699520426, "learning_rate": 4.938739041671816e-06, "loss": 0.8296, "step": 2727 }, { "epoch": 0.19708490617154623, "grad_norm": 7.370420424168754, "learning_rate": 4.938674668935609e-06, "loss": 1.0031, "step": 2728 }, { "epoch": 0.19715715137175574, "grad_norm": 8.471131162447357, "learning_rate": 4.938610262815839e-06, "loss": 0.9375, "step": 2729 }, { "epoch": 0.19722939657196525, "grad_norm": 8.154627846950811, "learning_rate": 4.938545823313389e-06, "loss": 0.9623, "step": 2730 }, { "epoch": 0.19730164177217477, "grad_norm": 9.989922977490437, "learning_rate": 4.93848135042914e-06, "loss": 0.9769, "step": 2731 }, { "epoch": 0.19737388697238428, "grad_norm": 7.018802186766834, "learning_rate": 4.938416844163974e-06, "loss": 0.9195, "step": 2732 }, { "epoch": 0.1974461321725938, "grad_norm": 7.010866722258071, "learning_rate": 4.938352304518775e-06, "loss": 0.9738, "step": 2733 }, { "epoch": 0.19751837737280328, "grad_norm": 6.57377397253232, "learning_rate": 4.938287731494426e-06, "loss": 0.9727, "step": 2734 }, { "epoch": 0.1975906225730128, "grad_norm": 7.6128641075914585, "learning_rate": 4.938223125091812e-06, "loss": 0.9102, "step": 2735 }, { "epoch": 0.1976628677732223, "grad_norm": 6.042099123856689, "learning_rate": 4.938158485311816e-06, "loss": 0.8974, "step": 2736 }, { "epoch": 0.19773511297343183, "grad_norm": 7.791076583413442, "learning_rate": 4.938093812155323e-06, "loss": 0.9635, "step": 2737 }, { "epoch": 0.19780735817364134, "grad_norm": 8.946662954148698, "learning_rate": 4.938029105623219e-06, "loss": 1.0139, "step": 2738 }, { "epoch": 0.19787960337385085, "grad_norm": 8.233718527407541, "learning_rate": 4.93796436571639e-06, "loss": 0.9063, "step": 2739 }, { "epoch": 0.19795184857406037, "grad_norm": 7.371829887145373, "learning_rate": 4.937899592435721e-06, "loss": 0.8817, "step": 2740 }, { "epoch": 0.19802409377426988, "grad_norm": 7.019243492380234, "learning_rate": 4.937834785782101e-06, "loss": 0.9882, "step": 2741 }, { "epoch": 0.1980963389744794, "grad_norm": 7.945608728038528, "learning_rate": 4.937769945756414e-06, "loss": 0.929, "step": 2742 }, { "epoch": 0.19816858417468888, "grad_norm": 6.525609551382142, "learning_rate": 4.93770507235955e-06, "loss": 0.9671, "step": 2743 }, { "epoch": 0.1982408293748984, "grad_norm": 8.163930733087367, "learning_rate": 4.937640165592396e-06, "loss": 0.8956, "step": 2744 }, { "epoch": 0.1983130745751079, "grad_norm": 7.468418768397129, "learning_rate": 4.937575225455841e-06, "loss": 0.9031, "step": 2745 }, { "epoch": 0.19838531977531743, "grad_norm": 9.54455038635329, "learning_rate": 4.937510251950775e-06, "loss": 0.9547, "step": 2746 }, { "epoch": 0.19845756497552694, "grad_norm": 7.1222234301765255, "learning_rate": 4.937445245078084e-06, "loss": 0.9245, "step": 2747 }, { "epoch": 0.19852981017573645, "grad_norm": 6.456235122294239, "learning_rate": 4.937380204838662e-06, "loss": 0.8174, "step": 2748 }, { "epoch": 0.19860205537594597, "grad_norm": 7.509505288481684, "learning_rate": 4.937315131233397e-06, "loss": 0.9209, "step": 2749 }, { "epoch": 0.19867430057615548, "grad_norm": 9.334646223461686, "learning_rate": 4.937250024263179e-06, "loss": 0.9826, "step": 2750 }, { "epoch": 0.19874654577636497, "grad_norm": 7.3359629800209785, "learning_rate": 4.937184883928902e-06, "loss": 0.9312, "step": 2751 }, { "epoch": 0.19881879097657448, "grad_norm": 6.167864537098561, "learning_rate": 4.937119710231456e-06, "loss": 0.8835, "step": 2752 }, { "epoch": 0.198891036176784, "grad_norm": 7.476486221609977, "learning_rate": 4.937054503171733e-06, "loss": 0.922, "step": 2753 }, { "epoch": 0.1989632813769935, "grad_norm": 6.505470028315144, "learning_rate": 4.936989262750627e-06, "loss": 0.87, "step": 2754 }, { "epoch": 0.19903552657720303, "grad_norm": 6.528790900788652, "learning_rate": 4.93692398896903e-06, "loss": 1.0097, "step": 2755 }, { "epoch": 0.19910777177741254, "grad_norm": 8.009638701826011, "learning_rate": 4.936858681827835e-06, "loss": 0.9679, "step": 2756 }, { "epoch": 0.19918001697762205, "grad_norm": 9.113578023055721, "learning_rate": 4.9367933413279375e-06, "loss": 0.9694, "step": 2757 }, { "epoch": 0.19925226217783157, "grad_norm": 7.310031832979247, "learning_rate": 4.93672796747023e-06, "loss": 0.8989, "step": 2758 }, { "epoch": 0.19932450737804108, "grad_norm": 7.743291350814815, "learning_rate": 4.93666256025561e-06, "loss": 0.8908, "step": 2759 }, { "epoch": 0.19939675257825057, "grad_norm": 6.404894876205552, "learning_rate": 4.9365971196849715e-06, "loss": 0.9718, "step": 2760 }, { "epoch": 0.19946899777846008, "grad_norm": 8.295769626814131, "learning_rate": 4.93653164575921e-06, "loss": 0.998, "step": 2761 }, { "epoch": 0.1995412429786696, "grad_norm": 6.1381566956904585, "learning_rate": 4.9364661384792215e-06, "loss": 0.9477, "step": 2762 }, { "epoch": 0.1996134881788791, "grad_norm": 6.487766197197013, "learning_rate": 4.936400597845904e-06, "loss": 0.9394, "step": 2763 }, { "epoch": 0.19968573337908863, "grad_norm": 8.365753535358035, "learning_rate": 4.936335023860154e-06, "loss": 0.9709, "step": 2764 }, { "epoch": 0.19975797857929814, "grad_norm": 6.770972836353338, "learning_rate": 4.936269416522869e-06, "loss": 0.9255, "step": 2765 }, { "epoch": 0.19983022377950765, "grad_norm": 6.770655922154879, "learning_rate": 4.936203775834947e-06, "loss": 0.8991, "step": 2766 }, { "epoch": 0.19990246897971717, "grad_norm": 7.892036296670143, "learning_rate": 4.936138101797288e-06, "loss": 0.8975, "step": 2767 }, { "epoch": 0.19997471417992668, "grad_norm": 7.039429378792283, "learning_rate": 4.9360723944107895e-06, "loss": 0.9045, "step": 2768 }, { "epoch": 0.20004695938013617, "grad_norm": 7.201216902151065, "learning_rate": 4.936006653676351e-06, "loss": 0.9732, "step": 2769 }, { "epoch": 0.20011920458034568, "grad_norm": 6.35531299027402, "learning_rate": 4.935940879594872e-06, "loss": 0.9526, "step": 2770 }, { "epoch": 0.2001914497805552, "grad_norm": 6.938981181619329, "learning_rate": 4.935875072167256e-06, "loss": 0.9298, "step": 2771 }, { "epoch": 0.2002636949807647, "grad_norm": 7.009321545734233, "learning_rate": 4.935809231394399e-06, "loss": 1.0036, "step": 2772 }, { "epoch": 0.20033594018097423, "grad_norm": 5.774347587162817, "learning_rate": 4.9357433572772064e-06, "loss": 0.868, "step": 2773 }, { "epoch": 0.20040818538118374, "grad_norm": 8.771145365790721, "learning_rate": 4.935677449816579e-06, "loss": 1.018, "step": 2774 }, { "epoch": 0.20048043058139325, "grad_norm": 6.726888012957194, "learning_rate": 4.935611509013417e-06, "loss": 0.843, "step": 2775 }, { "epoch": 0.20055267578160277, "grad_norm": 8.279125704101636, "learning_rate": 4.935545534868626e-06, "loss": 1.0404, "step": 2776 }, { "epoch": 0.20062492098181228, "grad_norm": 6.454669462763771, "learning_rate": 4.935479527383107e-06, "loss": 0.901, "step": 2777 }, { "epoch": 0.20069716618202177, "grad_norm": 6.262052283206155, "learning_rate": 4.935413486557764e-06, "loss": 0.9887, "step": 2778 }, { "epoch": 0.20076941138223128, "grad_norm": 6.983589554898617, "learning_rate": 4.935347412393502e-06, "loss": 0.9435, "step": 2779 }, { "epoch": 0.2008416565824408, "grad_norm": 7.530165724836729, "learning_rate": 4.935281304891224e-06, "loss": 1.0706, "step": 2780 }, { "epoch": 0.2009139017826503, "grad_norm": 7.247090775409899, "learning_rate": 4.935215164051837e-06, "loss": 0.9616, "step": 2781 }, { "epoch": 0.20098614698285983, "grad_norm": 7.154683970383629, "learning_rate": 4.935148989876245e-06, "loss": 0.9347, "step": 2782 }, { "epoch": 0.20105839218306934, "grad_norm": 7.985488604918133, "learning_rate": 4.935082782365353e-06, "loss": 0.9175, "step": 2783 }, { "epoch": 0.20113063738327885, "grad_norm": 6.22867761773899, "learning_rate": 4.93501654152007e-06, "loss": 0.9695, "step": 2784 }, { "epoch": 0.20120288258348837, "grad_norm": 6.598354088131622, "learning_rate": 4.9349502673412995e-06, "loss": 0.8628, "step": 2785 }, { "epoch": 0.20127512778369788, "grad_norm": 6.053864296356599, "learning_rate": 4.934883959829952e-06, "loss": 0.9718, "step": 2786 }, { "epoch": 0.20134737298390737, "grad_norm": 8.005556084536305, "learning_rate": 4.934817618986932e-06, "loss": 0.9485, "step": 2787 }, { "epoch": 0.20141961818411688, "grad_norm": 6.749388773053043, "learning_rate": 4.934751244813151e-06, "loss": 0.9596, "step": 2788 }, { "epoch": 0.2014918633843264, "grad_norm": 8.123929936297044, "learning_rate": 4.9346848373095155e-06, "loss": 0.9113, "step": 2789 }, { "epoch": 0.2015641085845359, "grad_norm": 7.626102305296567, "learning_rate": 4.934618396476934e-06, "loss": 0.9152, "step": 2790 }, { "epoch": 0.20163635378474543, "grad_norm": 7.893674234855743, "learning_rate": 4.934551922316318e-06, "loss": 0.9739, "step": 2791 }, { "epoch": 0.20170859898495494, "grad_norm": 6.175136660403412, "learning_rate": 4.934485414828576e-06, "loss": 0.9185, "step": 2792 }, { "epoch": 0.20178084418516445, "grad_norm": 6.445456023496714, "learning_rate": 4.934418874014619e-06, "loss": 0.8877, "step": 2793 }, { "epoch": 0.20185308938537397, "grad_norm": 6.683568065957371, "learning_rate": 4.934352299875358e-06, "loss": 0.9997, "step": 2794 }, { "epoch": 0.20192533458558348, "grad_norm": 6.7032419474610325, "learning_rate": 4.934285692411704e-06, "loss": 0.974, "step": 2795 }, { "epoch": 0.20199757978579297, "grad_norm": 8.848267173306581, "learning_rate": 4.934219051624569e-06, "loss": 0.9793, "step": 2796 }, { "epoch": 0.20206982498600248, "grad_norm": 7.471355159454085, "learning_rate": 4.934152377514866e-06, "loss": 0.9234, "step": 2797 }, { "epoch": 0.202142070186212, "grad_norm": 6.865513585627341, "learning_rate": 4.934085670083506e-06, "loss": 0.9763, "step": 2798 }, { "epoch": 0.2022143153864215, "grad_norm": 7.248838923646103, "learning_rate": 4.934018929331403e-06, "loss": 0.9523, "step": 2799 }, { "epoch": 0.20228656058663103, "grad_norm": 6.852424036751036, "learning_rate": 4.9339521552594715e-06, "loss": 0.7972, "step": 2800 }, { "epoch": 0.20235880578684054, "grad_norm": 7.652208655169179, "learning_rate": 4.9338853478686244e-06, "loss": 0.9638, "step": 2801 }, { "epoch": 0.20243105098705005, "grad_norm": 7.656879757263235, "learning_rate": 4.933818507159776e-06, "loss": 0.9381, "step": 2802 }, { "epoch": 0.20250329618725957, "grad_norm": 6.523340720184048, "learning_rate": 4.933751633133843e-06, "loss": 0.9404, "step": 2803 }, { "epoch": 0.20257554138746908, "grad_norm": 7.585026506592636, "learning_rate": 4.933684725791741e-06, "loss": 0.9433, "step": 2804 }, { "epoch": 0.20264778658767857, "grad_norm": 6.777729579705525, "learning_rate": 4.933617785134383e-06, "loss": 0.95, "step": 2805 }, { "epoch": 0.20272003178788808, "grad_norm": 7.599388007570058, "learning_rate": 4.933550811162687e-06, "loss": 0.944, "step": 2806 }, { "epoch": 0.2027922769880976, "grad_norm": 6.912390279330247, "learning_rate": 4.93348380387757e-06, "loss": 0.8614, "step": 2807 }, { "epoch": 0.2028645221883071, "grad_norm": 6.918593203483147, "learning_rate": 4.93341676327995e-06, "loss": 0.9119, "step": 2808 }, { "epoch": 0.20293676738851663, "grad_norm": 7.802969773123143, "learning_rate": 4.933349689370743e-06, "loss": 0.9916, "step": 2809 }, { "epoch": 0.20300901258872614, "grad_norm": 6.574120686842901, "learning_rate": 4.933282582150869e-06, "loss": 1.0079, "step": 2810 }, { "epoch": 0.20308125778893565, "grad_norm": 7.895092958817466, "learning_rate": 4.933215441621245e-06, "loss": 0.8953, "step": 2811 }, { "epoch": 0.20315350298914517, "grad_norm": 7.179399519863782, "learning_rate": 4.9331482677827915e-06, "loss": 0.8938, "step": 2812 }, { "epoch": 0.20322574818935468, "grad_norm": 7.028341140917623, "learning_rate": 4.933081060636427e-06, "loss": 0.962, "step": 2813 }, { "epoch": 0.20329799338956417, "grad_norm": 7.424444199641074, "learning_rate": 4.933013820183072e-06, "loss": 0.9892, "step": 2814 }, { "epoch": 0.20337023858977368, "grad_norm": 7.032315999921641, "learning_rate": 4.932946546423647e-06, "loss": 0.9417, "step": 2815 }, { "epoch": 0.2034424837899832, "grad_norm": 6.0594282725344835, "learning_rate": 4.932879239359073e-06, "loss": 0.9225, "step": 2816 }, { "epoch": 0.2035147289901927, "grad_norm": 7.597470725964317, "learning_rate": 4.932811898990271e-06, "loss": 1.0034, "step": 2817 }, { "epoch": 0.20358697419040223, "grad_norm": 7.153354642089126, "learning_rate": 4.932744525318163e-06, "loss": 0.8603, "step": 2818 }, { "epoch": 0.20365921939061174, "grad_norm": 8.889714896712055, "learning_rate": 4.932677118343671e-06, "loss": 0.9087, "step": 2819 }, { "epoch": 0.20373146459082125, "grad_norm": 7.122904686937711, "learning_rate": 4.932609678067719e-06, "loss": 0.9345, "step": 2820 }, { "epoch": 0.20380370979103077, "grad_norm": 7.474198273496828, "learning_rate": 4.932542204491229e-06, "loss": 0.9439, "step": 2821 }, { "epoch": 0.20387595499124028, "grad_norm": 6.377491482826566, "learning_rate": 4.932474697615125e-06, "loss": 1.0222, "step": 2822 }, { "epoch": 0.20394820019144977, "grad_norm": 8.241381217968431, "learning_rate": 4.932407157440331e-06, "loss": 0.8777, "step": 2823 }, { "epoch": 0.20402044539165928, "grad_norm": 8.99159908275171, "learning_rate": 4.932339583967772e-06, "loss": 0.9734, "step": 2824 }, { "epoch": 0.2040926905918688, "grad_norm": 8.654809274142172, "learning_rate": 4.932271977198372e-06, "loss": 0.9561, "step": 2825 }, { "epoch": 0.2041649357920783, "grad_norm": 7.874065283146435, "learning_rate": 4.932204337133058e-06, "loss": 1.0246, "step": 2826 }, { "epoch": 0.20423718099228783, "grad_norm": 8.339540484093824, "learning_rate": 4.932136663772754e-06, "loss": 0.8874, "step": 2827 }, { "epoch": 0.20430942619249734, "grad_norm": 6.823202982323498, "learning_rate": 4.932068957118388e-06, "loss": 0.9313, "step": 2828 }, { "epoch": 0.20438167139270685, "grad_norm": 6.607196710433353, "learning_rate": 4.932001217170886e-06, "loss": 0.9881, "step": 2829 }, { "epoch": 0.20445391659291637, "grad_norm": 6.558807787603366, "learning_rate": 4.931933443931177e-06, "loss": 0.9315, "step": 2830 }, { "epoch": 0.20452616179312588, "grad_norm": 7.862662444991325, "learning_rate": 4.931865637400186e-06, "loss": 0.9293, "step": 2831 }, { "epoch": 0.20459840699333537, "grad_norm": 8.877665374685105, "learning_rate": 4.931797797578843e-06, "loss": 0.9277, "step": 2832 }, { "epoch": 0.20467065219354488, "grad_norm": 8.777727934404762, "learning_rate": 4.9317299244680775e-06, "loss": 0.9376, "step": 2833 }, { "epoch": 0.2047428973937544, "grad_norm": 7.972045456377235, "learning_rate": 4.931662018068816e-06, "loss": 0.9795, "step": 2834 }, { "epoch": 0.2048151425939639, "grad_norm": 8.579703541860344, "learning_rate": 4.93159407838199e-06, "loss": 0.9784, "step": 2835 }, { "epoch": 0.20488738779417343, "grad_norm": 7.4684933414811345, "learning_rate": 4.931526105408529e-06, "loss": 0.9679, "step": 2836 }, { "epoch": 0.20495963299438294, "grad_norm": 7.933578363475971, "learning_rate": 4.931458099149363e-06, "loss": 0.9042, "step": 2837 }, { "epoch": 0.20503187819459245, "grad_norm": 7.5903355335560025, "learning_rate": 4.931390059605424e-06, "loss": 0.8873, "step": 2838 }, { "epoch": 0.20510412339480197, "grad_norm": 8.375464583079667, "learning_rate": 4.9313219867776445e-06, "loss": 0.9563, "step": 2839 }, { "epoch": 0.20517636859501148, "grad_norm": 7.29917332397162, "learning_rate": 4.931253880666953e-06, "loss": 0.9426, "step": 2840 }, { "epoch": 0.20524861379522097, "grad_norm": 6.992793597159898, "learning_rate": 4.931185741274284e-06, "loss": 0.908, "step": 2841 }, { "epoch": 0.20532085899543048, "grad_norm": 7.473337464457631, "learning_rate": 4.9311175686005695e-06, "loss": 1.0491, "step": 2842 }, { "epoch": 0.20539310419564, "grad_norm": 9.369975053068028, "learning_rate": 4.9310493626467435e-06, "loss": 1.0768, "step": 2843 }, { "epoch": 0.2054653493958495, "grad_norm": 9.294773922243886, "learning_rate": 4.930981123413739e-06, "loss": 0.9896, "step": 2844 }, { "epoch": 0.20553759459605903, "grad_norm": 7.314924311008368, "learning_rate": 4.930912850902491e-06, "loss": 1.0189, "step": 2845 }, { "epoch": 0.20560983979626854, "grad_norm": 5.959434389759619, "learning_rate": 4.930844545113934e-06, "loss": 0.9787, "step": 2846 }, { "epoch": 0.20568208499647805, "grad_norm": 7.353407981864865, "learning_rate": 4.9307762060490014e-06, "loss": 0.98, "step": 2847 }, { "epoch": 0.20575433019668757, "grad_norm": 6.016420782388955, "learning_rate": 4.93070783370863e-06, "loss": 0.9653, "step": 2848 }, { "epoch": 0.20582657539689708, "grad_norm": 8.8850148606914, "learning_rate": 4.9306394280937565e-06, "loss": 0.9935, "step": 2849 }, { "epoch": 0.20589882059710657, "grad_norm": 9.91675403032902, "learning_rate": 4.930570989205317e-06, "loss": 0.9709, "step": 2850 }, { "epoch": 0.20597106579731608, "grad_norm": 6.5021813473764265, "learning_rate": 4.930502517044247e-06, "loss": 0.8667, "step": 2851 }, { "epoch": 0.2060433109975256, "grad_norm": 5.679591119045544, "learning_rate": 4.930434011611485e-06, "loss": 0.9504, "step": 2852 }, { "epoch": 0.2061155561977351, "grad_norm": 7.668364737610193, "learning_rate": 4.930365472907967e-06, "loss": 0.9302, "step": 2853 }, { "epoch": 0.20618780139794463, "grad_norm": 10.007511369640135, "learning_rate": 4.930296900934635e-06, "loss": 0.9632, "step": 2854 }, { "epoch": 0.20626004659815414, "grad_norm": 7.88871467051712, "learning_rate": 4.9302282956924245e-06, "loss": 1.0631, "step": 2855 }, { "epoch": 0.20633229179836365, "grad_norm": 10.07956967903644, "learning_rate": 4.930159657182275e-06, "loss": 0.9043, "step": 2856 }, { "epoch": 0.20640453699857317, "grad_norm": 6.594555846232711, "learning_rate": 4.930090985405127e-06, "loss": 0.9279, "step": 2857 }, { "epoch": 0.20647678219878265, "grad_norm": 8.166067711281507, "learning_rate": 4.93002228036192e-06, "loss": 0.9265, "step": 2858 }, { "epoch": 0.20654902739899217, "grad_norm": 10.094411349996081, "learning_rate": 4.929953542053596e-06, "loss": 1.0322, "step": 2859 }, { "epoch": 0.20662127259920168, "grad_norm": 9.124301413354246, "learning_rate": 4.929884770481094e-06, "loss": 1.0096, "step": 2860 }, { "epoch": 0.2066935177994112, "grad_norm": 7.850320564090639, "learning_rate": 4.929815965645356e-06, "loss": 0.8971, "step": 2861 }, { "epoch": 0.2067657629996207, "grad_norm": 8.161754869739163, "learning_rate": 4.929747127547324e-06, "loss": 0.9226, "step": 2862 }, { "epoch": 0.20683800819983023, "grad_norm": 8.017153469646155, "learning_rate": 4.929678256187941e-06, "loss": 0.8883, "step": 2863 }, { "epoch": 0.20691025340003974, "grad_norm": 8.226133098064382, "learning_rate": 4.929609351568148e-06, "loss": 0.908, "step": 2864 }, { "epoch": 0.20698249860024925, "grad_norm": 6.7495645453254, "learning_rate": 4.92954041368889e-06, "loss": 0.8926, "step": 2865 }, { "epoch": 0.20705474380045877, "grad_norm": 8.037792583734705, "learning_rate": 4.9294714425511105e-06, "loss": 1.0686, "step": 2866 }, { "epoch": 0.20712698900066825, "grad_norm": 6.473047570397796, "learning_rate": 4.929402438155754e-06, "loss": 0.9855, "step": 2867 }, { "epoch": 0.20719923420087777, "grad_norm": 6.730306365477976, "learning_rate": 4.929333400503763e-06, "loss": 0.9125, "step": 2868 }, { "epoch": 0.20727147940108728, "grad_norm": 8.37586128019433, "learning_rate": 4.929264329596085e-06, "loss": 0.9693, "step": 2869 }, { "epoch": 0.2073437246012968, "grad_norm": 8.300064307273939, "learning_rate": 4.929195225433664e-06, "loss": 1.0175, "step": 2870 }, { "epoch": 0.2074159698015063, "grad_norm": 9.690628432611856, "learning_rate": 4.929126088017446e-06, "loss": 1.0135, "step": 2871 }, { "epoch": 0.20748821500171583, "grad_norm": 7.365430927916319, "learning_rate": 4.929056917348379e-06, "loss": 0.9753, "step": 2872 }, { "epoch": 0.20756046020192534, "grad_norm": 8.200111983860438, "learning_rate": 4.928987713427409e-06, "loss": 0.9929, "step": 2873 }, { "epoch": 0.20763270540213485, "grad_norm": 6.759291858488036, "learning_rate": 4.928918476255482e-06, "loss": 0.9596, "step": 2874 }, { "epoch": 0.20770495060234437, "grad_norm": 6.036811754634119, "learning_rate": 4.928849205833548e-06, "loss": 0.8504, "step": 2875 }, { "epoch": 0.20777719580255385, "grad_norm": 6.0577477775148685, "learning_rate": 4.9287799021625535e-06, "loss": 0.8913, "step": 2876 }, { "epoch": 0.20784944100276337, "grad_norm": 6.880984943769803, "learning_rate": 4.928710565243448e-06, "loss": 0.9029, "step": 2877 }, { "epoch": 0.20792168620297288, "grad_norm": 8.62145436703298, "learning_rate": 4.928641195077182e-06, "loss": 0.8602, "step": 2878 }, { "epoch": 0.2079939314031824, "grad_norm": 7.68141516561455, "learning_rate": 4.928571791664703e-06, "loss": 0.9045, "step": 2879 }, { "epoch": 0.2080661766033919, "grad_norm": 6.834864871203906, "learning_rate": 4.9285023550069614e-06, "loss": 0.9546, "step": 2880 }, { "epoch": 0.20813842180360143, "grad_norm": 7.164459716700664, "learning_rate": 4.928432885104908e-06, "loss": 0.8776, "step": 2881 }, { "epoch": 0.20821066700381094, "grad_norm": 8.162341886569209, "learning_rate": 4.9283633819594945e-06, "loss": 0.9895, "step": 2882 }, { "epoch": 0.20828291220402045, "grad_norm": 6.2815269295296545, "learning_rate": 4.928293845571671e-06, "loss": 0.9078, "step": 2883 }, { "epoch": 0.20835515740422997, "grad_norm": 7.355546542671473, "learning_rate": 4.928224275942392e-06, "loss": 0.96, "step": 2884 }, { "epoch": 0.20842740260443945, "grad_norm": 7.125304901559339, "learning_rate": 4.928154673072606e-06, "loss": 0.9607, "step": 2885 }, { "epoch": 0.20849964780464897, "grad_norm": 8.733129750371077, "learning_rate": 4.9280850369632685e-06, "loss": 0.9913, "step": 2886 }, { "epoch": 0.20857189300485848, "grad_norm": 5.978770208561883, "learning_rate": 4.9280153676153335e-06, "loss": 0.9392, "step": 2887 }, { "epoch": 0.208644138205068, "grad_norm": 6.929932737257983, "learning_rate": 4.927945665029751e-06, "loss": 0.8647, "step": 2888 }, { "epoch": 0.2087163834052775, "grad_norm": 7.157497513824255, "learning_rate": 4.92787592920748e-06, "loss": 0.9238, "step": 2889 }, { "epoch": 0.20878862860548703, "grad_norm": 7.076706093552387, "learning_rate": 4.927806160149471e-06, "loss": 0.9241, "step": 2890 }, { "epoch": 0.20886087380569654, "grad_norm": 7.525935026863071, "learning_rate": 4.9277363578566806e-06, "loss": 0.982, "step": 2891 }, { "epoch": 0.20893311900590605, "grad_norm": 7.162559692773182, "learning_rate": 4.927666522330065e-06, "loss": 0.9001, "step": 2892 }, { "epoch": 0.20900536420611557, "grad_norm": 6.0838138268631825, "learning_rate": 4.92759665357058e-06, "loss": 0.9372, "step": 2893 }, { "epoch": 0.20907760940632505, "grad_norm": 7.255108841987282, "learning_rate": 4.927526751579182e-06, "loss": 0.9098, "step": 2894 }, { "epoch": 0.20914985460653457, "grad_norm": 6.704176600203693, "learning_rate": 4.927456816356826e-06, "loss": 0.9183, "step": 2895 }, { "epoch": 0.20922209980674408, "grad_norm": 7.381487562990141, "learning_rate": 4.927386847904473e-06, "loss": 0.8559, "step": 2896 }, { "epoch": 0.2092943450069536, "grad_norm": 6.420089139230394, "learning_rate": 4.927316846223077e-06, "loss": 0.8774, "step": 2897 }, { "epoch": 0.2093665902071631, "grad_norm": 7.780785481632407, "learning_rate": 4.927246811313599e-06, "loss": 0.8442, "step": 2898 }, { "epoch": 0.20943883540737263, "grad_norm": 8.046690114684287, "learning_rate": 4.927176743176997e-06, "loss": 0.9251, "step": 2899 }, { "epoch": 0.20951108060758214, "grad_norm": 6.1912355802581045, "learning_rate": 4.927106641814229e-06, "loss": 0.8805, "step": 2900 }, { "epoch": 0.20958332580779165, "grad_norm": 6.754008198062981, "learning_rate": 4.927036507226256e-06, "loss": 0.9848, "step": 2901 }, { "epoch": 0.20965557100800117, "grad_norm": 6.234562538787211, "learning_rate": 4.926966339414039e-06, "loss": 0.9127, "step": 2902 }, { "epoch": 0.20972781620821065, "grad_norm": 5.426648591751127, "learning_rate": 4.926896138378535e-06, "loss": 0.833, "step": 2903 }, { "epoch": 0.20980006140842017, "grad_norm": 7.202039260295374, "learning_rate": 4.926825904120709e-06, "loss": 0.9493, "step": 2904 }, { "epoch": 0.20987230660862968, "grad_norm": 8.078559908052194, "learning_rate": 4.92675563664152e-06, "loss": 0.9693, "step": 2905 }, { "epoch": 0.2099445518088392, "grad_norm": 7.949690221353189, "learning_rate": 4.926685335941931e-06, "loss": 1.0673, "step": 2906 }, { "epoch": 0.2100167970090487, "grad_norm": 7.322970727003809, "learning_rate": 4.926615002022903e-06, "loss": 0.8954, "step": 2907 }, { "epoch": 0.21008904220925823, "grad_norm": 8.367423202941806, "learning_rate": 4.926544634885401e-06, "loss": 0.9462, "step": 2908 }, { "epoch": 0.21016128740946774, "grad_norm": 8.081791471793533, "learning_rate": 4.926474234530386e-06, "loss": 0.9097, "step": 2909 }, { "epoch": 0.21023353260967725, "grad_norm": 7.247801611823112, "learning_rate": 4.926403800958824e-06, "loss": 1.0008, "step": 2910 }, { "epoch": 0.21030577780988677, "grad_norm": 7.48600047385, "learning_rate": 4.926333334171676e-06, "loss": 0.9692, "step": 2911 }, { "epoch": 0.21037802301009625, "grad_norm": 9.590764976118873, "learning_rate": 4.92626283416991e-06, "loss": 0.9456, "step": 2912 }, { "epoch": 0.21045026821030577, "grad_norm": 7.170798094103367, "learning_rate": 4.926192300954489e-06, "loss": 1.0406, "step": 2913 }, { "epoch": 0.21052251341051528, "grad_norm": 8.703063635892285, "learning_rate": 4.92612173452638e-06, "loss": 0.8623, "step": 2914 }, { "epoch": 0.2105947586107248, "grad_norm": 13.660194787442066, "learning_rate": 4.926051134886548e-06, "loss": 1.0566, "step": 2915 }, { "epoch": 0.2106670038109343, "grad_norm": 8.569675974824051, "learning_rate": 4.925980502035959e-06, "loss": 1.002, "step": 2916 }, { "epoch": 0.21073924901114383, "grad_norm": 6.025702103983176, "learning_rate": 4.925909835975581e-06, "loss": 0.9284, "step": 2917 }, { "epoch": 0.21081149421135334, "grad_norm": 7.4330500265054855, "learning_rate": 4.9258391367063814e-06, "loss": 0.9188, "step": 2918 }, { "epoch": 0.21088373941156285, "grad_norm": 6.679131521543806, "learning_rate": 4.925768404229327e-06, "loss": 0.8616, "step": 2919 }, { "epoch": 0.21095598461177237, "grad_norm": 8.77856795326314, "learning_rate": 4.925697638545387e-06, "loss": 0.9136, "step": 2920 }, { "epoch": 0.21102822981198185, "grad_norm": 9.208797759575232, "learning_rate": 4.925626839655529e-06, "loss": 0.9763, "step": 2921 }, { "epoch": 0.21110047501219137, "grad_norm": 7.76864080375352, "learning_rate": 4.925556007560723e-06, "loss": 0.957, "step": 2922 }, { "epoch": 0.21117272021240088, "grad_norm": 6.337986324393053, "learning_rate": 4.92548514226194e-06, "loss": 0.9282, "step": 2923 }, { "epoch": 0.2112449654126104, "grad_norm": 6.31245830257732, "learning_rate": 4.925414243760147e-06, "loss": 0.8955, "step": 2924 }, { "epoch": 0.2113172106128199, "grad_norm": 7.290096295509596, "learning_rate": 4.925343312056318e-06, "loss": 0.9739, "step": 2925 }, { "epoch": 0.21138945581302943, "grad_norm": 7.35874544273457, "learning_rate": 4.9252723471514205e-06, "loss": 0.9718, "step": 2926 }, { "epoch": 0.21146170101323894, "grad_norm": 9.994016383034174, "learning_rate": 4.925201349046429e-06, "loss": 0.9518, "step": 2927 }, { "epoch": 0.21153394621344845, "grad_norm": 7.492637326417489, "learning_rate": 4.925130317742313e-06, "loss": 0.8837, "step": 2928 }, { "epoch": 0.21160619141365797, "grad_norm": 6.394457276603284, "learning_rate": 4.9250592532400466e-06, "loss": 0.9747, "step": 2929 }, { "epoch": 0.21167843661386745, "grad_norm": 6.737067089433598, "learning_rate": 4.924988155540601e-06, "loss": 0.9798, "step": 2930 }, { "epoch": 0.21175068181407697, "grad_norm": 5.6920154797899105, "learning_rate": 4.924917024644952e-06, "loss": 0.9058, "step": 2931 }, { "epoch": 0.21182292701428648, "grad_norm": 7.390827587488326, "learning_rate": 4.92484586055407e-06, "loss": 0.906, "step": 2932 }, { "epoch": 0.211895172214496, "grad_norm": 6.975731606893718, "learning_rate": 4.924774663268932e-06, "loss": 0.7564, "step": 2933 }, { "epoch": 0.2119674174147055, "grad_norm": 7.215922805237896, "learning_rate": 4.9247034327905115e-06, "loss": 0.9734, "step": 2934 }, { "epoch": 0.21203966261491503, "grad_norm": 6.453058496460437, "learning_rate": 4.924632169119784e-06, "loss": 0.9691, "step": 2935 }, { "epoch": 0.21211190781512454, "grad_norm": 7.726126539931582, "learning_rate": 4.924560872257724e-06, "loss": 0.9491, "step": 2936 }, { "epoch": 0.21218415301533405, "grad_norm": 6.93909497870833, "learning_rate": 4.9244895422053085e-06, "loss": 0.9156, "step": 2937 }, { "epoch": 0.21225639821554357, "grad_norm": 6.791324567857608, "learning_rate": 4.924418178963514e-06, "loss": 0.901, "step": 2938 }, { "epoch": 0.21232864341575305, "grad_norm": 7.379048674760346, "learning_rate": 4.924346782533317e-06, "loss": 1.0545, "step": 2939 }, { "epoch": 0.21240088861596257, "grad_norm": 6.923148216793387, "learning_rate": 4.924275352915694e-06, "loss": 0.9205, "step": 2940 }, { "epoch": 0.21247313381617208, "grad_norm": 6.965572436730372, "learning_rate": 4.924203890111624e-06, "loss": 0.9382, "step": 2941 }, { "epoch": 0.2125453790163816, "grad_norm": 6.742937136953759, "learning_rate": 4.924132394122086e-06, "loss": 0.9804, "step": 2942 }, { "epoch": 0.2126176242165911, "grad_norm": 6.834937426846441, "learning_rate": 4.924060864948057e-06, "loss": 0.9324, "step": 2943 }, { "epoch": 0.21268986941680063, "grad_norm": 21.430455777105003, "learning_rate": 4.923989302590518e-06, "loss": 0.9517, "step": 2944 }, { "epoch": 0.21276211461701014, "grad_norm": 7.828975303900379, "learning_rate": 4.923917707050446e-06, "loss": 0.9294, "step": 2945 }, { "epoch": 0.21283435981721965, "grad_norm": 7.0081995216080974, "learning_rate": 4.923846078328823e-06, "loss": 0.9718, "step": 2946 }, { "epoch": 0.21290660501742917, "grad_norm": 8.462625801807958, "learning_rate": 4.92377441642663e-06, "loss": 0.9833, "step": 2947 }, { "epoch": 0.21297885021763865, "grad_norm": 5.966473369912662, "learning_rate": 4.923702721344847e-06, "loss": 0.9127, "step": 2948 }, { "epoch": 0.21305109541784817, "grad_norm": 6.3287133214175135, "learning_rate": 4.923630993084455e-06, "loss": 0.8757, "step": 2949 }, { "epoch": 0.21312334061805768, "grad_norm": 6.291832658544329, "learning_rate": 4.923559231646437e-06, "loss": 0.9055, "step": 2950 }, { "epoch": 0.2131955858182672, "grad_norm": 6.364318276071729, "learning_rate": 4.923487437031774e-06, "loss": 0.9161, "step": 2951 }, { "epoch": 0.2132678310184767, "grad_norm": 6.301755920816584, "learning_rate": 4.9234156092414505e-06, "loss": 0.9306, "step": 2952 }, { "epoch": 0.21334007621868623, "grad_norm": 7.059976042363742, "learning_rate": 4.923343748276449e-06, "loss": 1.0121, "step": 2953 }, { "epoch": 0.21341232141889574, "grad_norm": 6.654759773028531, "learning_rate": 4.9232718541377535e-06, "loss": 0.9871, "step": 2954 }, { "epoch": 0.21348456661910525, "grad_norm": 7.499391912604508, "learning_rate": 4.923199926826347e-06, "loss": 0.9726, "step": 2955 }, { "epoch": 0.21355681181931477, "grad_norm": 7.269982522263959, "learning_rate": 4.9231279663432156e-06, "loss": 0.9237, "step": 2956 }, { "epoch": 0.21362905701952425, "grad_norm": 6.51844869120282, "learning_rate": 4.923055972689344e-06, "loss": 0.9785, "step": 2957 }, { "epoch": 0.21370130221973377, "grad_norm": 6.779455272656153, "learning_rate": 4.922983945865717e-06, "loss": 0.9127, "step": 2958 }, { "epoch": 0.21377354741994328, "grad_norm": 6.812669104480889, "learning_rate": 4.92291188587332e-06, "loss": 0.8746, "step": 2959 }, { "epoch": 0.2138457926201528, "grad_norm": 7.431122169004748, "learning_rate": 4.9228397927131425e-06, "loss": 0.9413, "step": 2960 }, { "epoch": 0.2139180378203623, "grad_norm": 9.219778229865952, "learning_rate": 4.922767666386169e-06, "loss": 1.0039, "step": 2961 }, { "epoch": 0.21399028302057183, "grad_norm": 7.194107866044354, "learning_rate": 4.922695506893387e-06, "loss": 0.9248, "step": 2962 }, { "epoch": 0.21406252822078134, "grad_norm": 7.512821935778906, "learning_rate": 4.922623314235784e-06, "loss": 0.9067, "step": 2963 }, { "epoch": 0.21413477342099085, "grad_norm": 6.547944703033493, "learning_rate": 4.9225510884143504e-06, "loss": 0.8541, "step": 2964 }, { "epoch": 0.21420701862120034, "grad_norm": 7.210574321755552, "learning_rate": 4.922478829430072e-06, "loss": 0.9912, "step": 2965 }, { "epoch": 0.21427926382140985, "grad_norm": 8.99875632276179, "learning_rate": 4.922406537283941e-06, "loss": 0.9952, "step": 2966 }, { "epoch": 0.21435150902161937, "grad_norm": 10.513174601896466, "learning_rate": 4.922334211976943e-06, "loss": 0.9419, "step": 2967 }, { "epoch": 0.21442375422182888, "grad_norm": 9.044640352385711, "learning_rate": 4.922261853510072e-06, "loss": 0.9351, "step": 2968 }, { "epoch": 0.2144959994220384, "grad_norm": 7.5994954293051125, "learning_rate": 4.922189461884317e-06, "loss": 0.8884, "step": 2969 }, { "epoch": 0.2145682446222479, "grad_norm": 7.936348658885923, "learning_rate": 4.922117037100668e-06, "loss": 1.0357, "step": 2970 }, { "epoch": 0.21464048982245743, "grad_norm": 8.786544344122465, "learning_rate": 4.9220445791601185e-06, "loss": 0.9456, "step": 2971 }, { "epoch": 0.21471273502266694, "grad_norm": 9.044404161859774, "learning_rate": 4.921972088063659e-06, "loss": 0.9743, "step": 2972 }, { "epoch": 0.21478498022287645, "grad_norm": 6.984863537252049, "learning_rate": 4.921899563812282e-06, "loss": 0.9081, "step": 2973 }, { "epoch": 0.21485722542308594, "grad_norm": 7.001743099622084, "learning_rate": 4.921827006406981e-06, "loss": 0.9117, "step": 2974 }, { "epoch": 0.21492947062329545, "grad_norm": 7.9182219199011294, "learning_rate": 4.921754415848748e-06, "loss": 0.8887, "step": 2975 }, { "epoch": 0.21500171582350497, "grad_norm": 7.049612253767352, "learning_rate": 4.921681792138577e-06, "loss": 1.0012, "step": 2976 }, { "epoch": 0.21507396102371448, "grad_norm": 6.13072559785554, "learning_rate": 4.921609135277463e-06, "loss": 0.9014, "step": 2977 }, { "epoch": 0.215146206223924, "grad_norm": 7.25354127885024, "learning_rate": 4.9215364452664005e-06, "loss": 0.9699, "step": 2978 }, { "epoch": 0.2152184514241335, "grad_norm": 7.3983596427067555, "learning_rate": 4.9214637221063845e-06, "loss": 0.9056, "step": 2979 }, { "epoch": 0.21529069662434303, "grad_norm": 6.800425897612297, "learning_rate": 4.92139096579841e-06, "loss": 0.9354, "step": 2980 }, { "epoch": 0.21536294182455254, "grad_norm": 7.340983157719668, "learning_rate": 4.921318176343474e-06, "loss": 0.8737, "step": 2981 }, { "epoch": 0.21543518702476205, "grad_norm": 7.045727553909249, "learning_rate": 4.921245353742571e-06, "loss": 0.9161, "step": 2982 }, { "epoch": 0.21550743222497154, "grad_norm": 7.345037339377227, "learning_rate": 4.921172497996699e-06, "loss": 0.9747, "step": 2983 }, { "epoch": 0.21557967742518105, "grad_norm": 9.034612432293807, "learning_rate": 4.9210996091068565e-06, "loss": 1.011, "step": 2984 }, { "epoch": 0.21565192262539057, "grad_norm": 6.6888332285770735, "learning_rate": 4.921026687074039e-06, "loss": 1.0055, "step": 2985 }, { "epoch": 0.21572416782560008, "grad_norm": 7.285127188698738, "learning_rate": 4.9209537318992466e-06, "loss": 0.9196, "step": 2986 }, { "epoch": 0.2157964130258096, "grad_norm": 6.747628819810808, "learning_rate": 4.920880743583478e-06, "loss": 0.903, "step": 2987 }, { "epoch": 0.2158686582260191, "grad_norm": 8.357889489065155, "learning_rate": 4.92080772212773e-06, "loss": 1.006, "step": 2988 }, { "epoch": 0.21594090342622863, "grad_norm": 7.731141310277071, "learning_rate": 4.9207346675330055e-06, "loss": 0.8892, "step": 2989 }, { "epoch": 0.21601314862643814, "grad_norm": 6.723852021322203, "learning_rate": 4.920661579800303e-06, "loss": 0.8607, "step": 2990 }, { "epoch": 0.21608539382664765, "grad_norm": 7.715864746522006, "learning_rate": 4.920588458930622e-06, "loss": 0.9562, "step": 2991 }, { "epoch": 0.21615763902685714, "grad_norm": 8.203065475974515, "learning_rate": 4.920515304924965e-06, "loss": 0.9777, "step": 2992 }, { "epoch": 0.21622988422706665, "grad_norm": 6.796612333286663, "learning_rate": 4.920442117784333e-06, "loss": 0.8827, "step": 2993 }, { "epoch": 0.21630212942727617, "grad_norm": 6.652284112328097, "learning_rate": 4.920368897509727e-06, "loss": 0.8814, "step": 2994 }, { "epoch": 0.21637437462748568, "grad_norm": 7.060376683633454, "learning_rate": 4.920295644102151e-06, "loss": 0.8752, "step": 2995 }, { "epoch": 0.2164466198276952, "grad_norm": 7.727145551971159, "learning_rate": 4.9202223575626065e-06, "loss": 0.9349, "step": 2996 }, { "epoch": 0.2165188650279047, "grad_norm": 6.943558530459578, "learning_rate": 4.920149037892097e-06, "loss": 0.8562, "step": 2997 }, { "epoch": 0.21659111022811423, "grad_norm": 10.124625870185548, "learning_rate": 4.9200756850916264e-06, "loss": 0.9096, "step": 2998 }, { "epoch": 0.21666335542832374, "grad_norm": 6.2385295132972844, "learning_rate": 4.9200022991621995e-06, "loss": 0.8847, "step": 2999 }, { "epoch": 0.21673560062853325, "grad_norm": 7.3880755360061405, "learning_rate": 4.919928880104819e-06, "loss": 0.8805, "step": 3000 }, { "epoch": 0.21680784582874274, "grad_norm": 6.772775162253469, "learning_rate": 4.919855427920491e-06, "loss": 0.9124, "step": 3001 }, { "epoch": 0.21688009102895225, "grad_norm": 6.313255944642083, "learning_rate": 4.919781942610222e-06, "loss": 1.0062, "step": 3002 }, { "epoch": 0.21695233622916177, "grad_norm": 6.877727175711146, "learning_rate": 4.919708424175017e-06, "loss": 0.9834, "step": 3003 }, { "epoch": 0.21702458142937128, "grad_norm": 8.276096107747177, "learning_rate": 4.919634872615882e-06, "loss": 0.9302, "step": 3004 }, { "epoch": 0.2170968266295808, "grad_norm": 7.332966650840828, "learning_rate": 4.919561287933824e-06, "loss": 0.9447, "step": 3005 }, { "epoch": 0.2171690718297903, "grad_norm": 7.818946315090767, "learning_rate": 4.9194876701298515e-06, "loss": 1.0125, "step": 3006 }, { "epoch": 0.21724131702999983, "grad_norm": 8.507256944240725, "learning_rate": 4.919414019204971e-06, "loss": 0.9868, "step": 3007 }, { "epoch": 0.21731356223020934, "grad_norm": 6.394062936233857, "learning_rate": 4.919340335160191e-06, "loss": 0.9752, "step": 3008 }, { "epoch": 0.21738580743041885, "grad_norm": 9.572978343633489, "learning_rate": 4.919266617996521e-06, "loss": 0.9594, "step": 3009 }, { "epoch": 0.21745805263062834, "grad_norm": 8.833193891852181, "learning_rate": 4.919192867714968e-06, "loss": 0.976, "step": 3010 }, { "epoch": 0.21753029783083785, "grad_norm": 6.553599689047031, "learning_rate": 4.919119084316544e-06, "loss": 0.9785, "step": 3011 }, { "epoch": 0.21760254303104737, "grad_norm": 8.177094114907966, "learning_rate": 4.919045267802259e-06, "loss": 0.9695, "step": 3012 }, { "epoch": 0.21767478823125688, "grad_norm": 7.401061739660572, "learning_rate": 4.918971418173121e-06, "loss": 1.0776, "step": 3013 }, { "epoch": 0.2177470334314664, "grad_norm": 8.292275053417091, "learning_rate": 4.9188975354301425e-06, "loss": 0.9349, "step": 3014 }, { "epoch": 0.2178192786316759, "grad_norm": 7.91445822446593, "learning_rate": 4.918823619574335e-06, "loss": 0.9416, "step": 3015 }, { "epoch": 0.21789152383188543, "grad_norm": 9.830337542213272, "learning_rate": 4.91874967060671e-06, "loss": 0.9388, "step": 3016 }, { "epoch": 0.21796376903209494, "grad_norm": 7.8376657666163245, "learning_rate": 4.918675688528282e-06, "loss": 0.8668, "step": 3017 }, { "epoch": 0.21803601423230445, "grad_norm": 7.063917169761407, "learning_rate": 4.91860167334006e-06, "loss": 0.9965, "step": 3018 }, { "epoch": 0.21810825943251394, "grad_norm": 7.523614962535775, "learning_rate": 4.918527625043059e-06, "loss": 0.9257, "step": 3019 }, { "epoch": 0.21818050463272345, "grad_norm": 7.304642588431453, "learning_rate": 4.918453543638294e-06, "loss": 0.9295, "step": 3020 }, { "epoch": 0.21825274983293297, "grad_norm": 6.592973563957242, "learning_rate": 4.918379429126776e-06, "loss": 0.7957, "step": 3021 }, { "epoch": 0.21832499503314248, "grad_norm": 6.256336509571404, "learning_rate": 4.9183052815095225e-06, "loss": 0.9426, "step": 3022 }, { "epoch": 0.218397240233352, "grad_norm": 7.804298184969108, "learning_rate": 4.918231100787547e-06, "loss": 1.039, "step": 3023 }, { "epoch": 0.2184694854335615, "grad_norm": 6.299374067910314, "learning_rate": 4.9181568869618655e-06, "loss": 0.9872, "step": 3024 }, { "epoch": 0.21854173063377103, "grad_norm": 5.681351576632508, "learning_rate": 4.918082640033494e-06, "loss": 0.8517, "step": 3025 }, { "epoch": 0.21861397583398054, "grad_norm": 6.245983511671814, "learning_rate": 4.918008360003449e-06, "loss": 0.8723, "step": 3026 }, { "epoch": 0.21868622103419005, "grad_norm": 8.588179124745299, "learning_rate": 4.917934046872746e-06, "loss": 0.9254, "step": 3027 }, { "epoch": 0.21875846623439954, "grad_norm": 6.239723607324349, "learning_rate": 4.917859700642404e-06, "loss": 0.9076, "step": 3028 }, { "epoch": 0.21883071143460905, "grad_norm": 7.786448262866258, "learning_rate": 4.9177853213134405e-06, "loss": 0.8577, "step": 3029 }, { "epoch": 0.21890295663481857, "grad_norm": 7.179724692164278, "learning_rate": 4.917710908886872e-06, "loss": 0.9948, "step": 3030 }, { "epoch": 0.21897520183502808, "grad_norm": 9.565763427769614, "learning_rate": 4.917636463363719e-06, "loss": 1.052, "step": 3031 }, { "epoch": 0.2190474470352376, "grad_norm": 7.437741572199468, "learning_rate": 4.9175619847450005e-06, "loss": 0.9214, "step": 3032 }, { "epoch": 0.2191196922354471, "grad_norm": 8.683447901384259, "learning_rate": 4.917487473031735e-06, "loss": 0.9935, "step": 3033 }, { "epoch": 0.21919193743565663, "grad_norm": 7.9547175103938175, "learning_rate": 4.917412928224943e-06, "loss": 0.9692, "step": 3034 }, { "epoch": 0.21926418263586614, "grad_norm": 7.430511781874528, "learning_rate": 4.917338350325645e-06, "loss": 0.9948, "step": 3035 }, { "epoch": 0.21933642783607565, "grad_norm": 8.060068636239919, "learning_rate": 4.917263739334862e-06, "loss": 0.9299, "step": 3036 }, { "epoch": 0.21940867303628514, "grad_norm": 6.31828632106144, "learning_rate": 4.917189095253615e-06, "loss": 0.9417, "step": 3037 }, { "epoch": 0.21948091823649465, "grad_norm": 7.682145153308036, "learning_rate": 4.9171144180829265e-06, "loss": 0.9582, "step": 3038 }, { "epoch": 0.21955316343670417, "grad_norm": 6.608673419194829, "learning_rate": 4.917039707823818e-06, "loss": 0.9829, "step": 3039 }, { "epoch": 0.21962540863691368, "grad_norm": 7.0174941395788455, "learning_rate": 4.916964964477314e-06, "loss": 0.8656, "step": 3040 }, { "epoch": 0.2196976538371232, "grad_norm": 6.64880613925482, "learning_rate": 4.916890188044435e-06, "loss": 0.9262, "step": 3041 }, { "epoch": 0.2197698990373327, "grad_norm": 6.434651753947534, "learning_rate": 4.916815378526206e-06, "loss": 0.9382, "step": 3042 }, { "epoch": 0.21984214423754223, "grad_norm": 6.651060561887885, "learning_rate": 4.9167405359236505e-06, "loss": 0.9153, "step": 3043 }, { "epoch": 0.21991438943775174, "grad_norm": 7.73292999640794, "learning_rate": 4.9166656602377946e-06, "loss": 0.8531, "step": 3044 }, { "epoch": 0.21998663463796125, "grad_norm": 6.033659932831696, "learning_rate": 4.916590751469662e-06, "loss": 0.9111, "step": 3045 }, { "epoch": 0.22005887983817074, "grad_norm": 7.020009189610566, "learning_rate": 4.916515809620278e-06, "loss": 0.9405, "step": 3046 }, { "epoch": 0.22013112503838025, "grad_norm": 6.437365187927755, "learning_rate": 4.9164408346906696e-06, "loss": 1.0196, "step": 3047 }, { "epoch": 0.22020337023858977, "grad_norm": 7.015454534485948, "learning_rate": 4.916365826681861e-06, "loss": 0.9849, "step": 3048 }, { "epoch": 0.22027561543879928, "grad_norm": 7.18110279168092, "learning_rate": 4.916290785594882e-06, "loss": 1.0241, "step": 3049 }, { "epoch": 0.2203478606390088, "grad_norm": 6.257179104375891, "learning_rate": 4.916215711430757e-06, "loss": 0.8653, "step": 3050 }, { "epoch": 0.2204201058392183, "grad_norm": 8.67482530049687, "learning_rate": 4.916140604190516e-06, "loss": 0.9129, "step": 3051 }, { "epoch": 0.22049235103942783, "grad_norm": 5.985682252576442, "learning_rate": 4.916065463875186e-06, "loss": 0.9338, "step": 3052 }, { "epoch": 0.22056459623963734, "grad_norm": 7.228567869981421, "learning_rate": 4.915990290485796e-06, "loss": 0.8946, "step": 3053 }, { "epoch": 0.22063684143984685, "grad_norm": 6.063167889172699, "learning_rate": 4.915915084023374e-06, "loss": 0.8654, "step": 3054 }, { "epoch": 0.22070908664005634, "grad_norm": 8.04398414016139, "learning_rate": 4.91583984448895e-06, "loss": 0.9389, "step": 3055 }, { "epoch": 0.22078133184026585, "grad_norm": 7.514227279154071, "learning_rate": 4.915764571883555e-06, "loss": 0.9598, "step": 3056 }, { "epoch": 0.22085357704047537, "grad_norm": 5.995783277792636, "learning_rate": 4.915689266208219e-06, "loss": 0.8767, "step": 3057 }, { "epoch": 0.22092582224068488, "grad_norm": 7.950901042811319, "learning_rate": 4.915613927463973e-06, "loss": 0.9243, "step": 3058 }, { "epoch": 0.2209980674408944, "grad_norm": 6.6547227996943255, "learning_rate": 4.915538555651846e-06, "loss": 0.9502, "step": 3059 }, { "epoch": 0.2210703126411039, "grad_norm": 6.580066820170609, "learning_rate": 4.915463150772874e-06, "loss": 0.8988, "step": 3060 }, { "epoch": 0.22114255784131343, "grad_norm": 9.446438428032513, "learning_rate": 4.915387712828085e-06, "loss": 0.88, "step": 3061 }, { "epoch": 0.22121480304152294, "grad_norm": 7.152902944842051, "learning_rate": 4.915312241818514e-06, "loss": 0.9792, "step": 3062 }, { "epoch": 0.22128704824173245, "grad_norm": 6.074345921598813, "learning_rate": 4.915236737745195e-06, "loss": 0.8991, "step": 3063 }, { "epoch": 0.22135929344194194, "grad_norm": 10.307272215997207, "learning_rate": 4.91516120060916e-06, "loss": 0.9635, "step": 3064 }, { "epoch": 0.22143153864215145, "grad_norm": 6.667518211657408, "learning_rate": 4.915085630411442e-06, "loss": 0.8517, "step": 3065 }, { "epoch": 0.22150378384236097, "grad_norm": 9.266247475928767, "learning_rate": 4.915010027153079e-06, "loss": 0.9712, "step": 3066 }, { "epoch": 0.22157602904257048, "grad_norm": 7.3135304825300675, "learning_rate": 4.914934390835102e-06, "loss": 0.9846, "step": 3067 }, { "epoch": 0.22164827424278, "grad_norm": 7.2406837852834585, "learning_rate": 4.9148587214585496e-06, "loss": 0.8977, "step": 3068 }, { "epoch": 0.2217205194429895, "grad_norm": 6.867658544709307, "learning_rate": 4.914783019024456e-06, "loss": 0.9922, "step": 3069 }, { "epoch": 0.22179276464319903, "grad_norm": 7.953039382443744, "learning_rate": 4.914707283533857e-06, "loss": 0.9987, "step": 3070 }, { "epoch": 0.22186500984340854, "grad_norm": 9.624108607642539, "learning_rate": 4.914631514987791e-06, "loss": 0.9623, "step": 3071 }, { "epoch": 0.22193725504361803, "grad_norm": 6.945726345850054, "learning_rate": 4.914555713387295e-06, "loss": 0.9149, "step": 3072 }, { "epoch": 0.22200950024382754, "grad_norm": 7.0390221259073815, "learning_rate": 4.9144798787334045e-06, "loss": 0.9698, "step": 3073 }, { "epoch": 0.22208174544403705, "grad_norm": 5.458073772442062, "learning_rate": 4.91440401102716e-06, "loss": 0.9429, "step": 3074 }, { "epoch": 0.22215399064424657, "grad_norm": 9.732253919695163, "learning_rate": 4.9143281102696e-06, "loss": 0.9189, "step": 3075 }, { "epoch": 0.22222623584445608, "grad_norm": 10.551355385156317, "learning_rate": 4.9142521764617616e-06, "loss": 1.0507, "step": 3076 }, { "epoch": 0.2222984810446656, "grad_norm": 6.052238037793398, "learning_rate": 4.914176209604686e-06, "loss": 0.9008, "step": 3077 }, { "epoch": 0.2223707262448751, "grad_norm": 8.704335099473468, "learning_rate": 4.914100209699412e-06, "loss": 0.919, "step": 3078 }, { "epoch": 0.22244297144508463, "grad_norm": 8.46387930015647, "learning_rate": 4.914024176746981e-06, "loss": 1.0411, "step": 3079 }, { "epoch": 0.22251521664529414, "grad_norm": 6.457711204179235, "learning_rate": 4.913948110748433e-06, "loss": 0.908, "step": 3080 }, { "epoch": 0.22258746184550363, "grad_norm": 8.367856751362808, "learning_rate": 4.913872011704811e-06, "loss": 0.865, "step": 3081 }, { "epoch": 0.22265970704571314, "grad_norm": 9.12059353553428, "learning_rate": 4.913795879617154e-06, "loss": 0.8475, "step": 3082 }, { "epoch": 0.22273195224592265, "grad_norm": 6.710141082508472, "learning_rate": 4.913719714486506e-06, "loss": 0.9367, "step": 3083 }, { "epoch": 0.22280419744613217, "grad_norm": 7.842015534389202, "learning_rate": 4.913643516313909e-06, "loss": 0.9756, "step": 3084 }, { "epoch": 0.22287644264634168, "grad_norm": 7.795251711732443, "learning_rate": 4.913567285100407e-06, "loss": 0.9476, "step": 3085 }, { "epoch": 0.2229486878465512, "grad_norm": 8.389037253756946, "learning_rate": 4.913491020847043e-06, "loss": 0.8799, "step": 3086 }, { "epoch": 0.2230209330467607, "grad_norm": 7.4743036666644525, "learning_rate": 4.91341472355486e-06, "loss": 0.8948, "step": 3087 }, { "epoch": 0.22309317824697023, "grad_norm": 6.312074552494607, "learning_rate": 4.913338393224903e-06, "loss": 1.0076, "step": 3088 }, { "epoch": 0.22316542344717974, "grad_norm": 7.203839909953731, "learning_rate": 4.913262029858219e-06, "loss": 0.9741, "step": 3089 }, { "epoch": 0.22323766864738923, "grad_norm": 8.078711482880967, "learning_rate": 4.91318563345585e-06, "loss": 0.9793, "step": 3090 }, { "epoch": 0.22330991384759874, "grad_norm": 7.648548472206309, "learning_rate": 4.913109204018844e-06, "loss": 0.9847, "step": 3091 }, { "epoch": 0.22338215904780825, "grad_norm": 6.615126251607525, "learning_rate": 4.9130327415482475e-06, "loss": 0.875, "step": 3092 }, { "epoch": 0.22345440424801777, "grad_norm": 7.094899357518086, "learning_rate": 4.9129562460451055e-06, "loss": 0.8789, "step": 3093 }, { "epoch": 0.22352664944822728, "grad_norm": 6.510341047737924, "learning_rate": 4.912879717510465e-06, "loss": 0.8718, "step": 3094 }, { "epoch": 0.2235988946484368, "grad_norm": 6.805257324465936, "learning_rate": 4.912803155945376e-06, "loss": 0.8419, "step": 3095 }, { "epoch": 0.2236711398486463, "grad_norm": 5.791595604915704, "learning_rate": 4.912726561350885e-06, "loss": 0.8943, "step": 3096 }, { "epoch": 0.22374338504885583, "grad_norm": 8.544121167841764, "learning_rate": 4.912649933728041e-06, "loss": 0.9574, "step": 3097 }, { "epoch": 0.22381563024906534, "grad_norm": 6.52938976900715, "learning_rate": 4.912573273077892e-06, "loss": 0.9323, "step": 3098 }, { "epoch": 0.22388787544927483, "grad_norm": 6.690131124400322, "learning_rate": 4.912496579401488e-06, "loss": 0.7989, "step": 3099 }, { "epoch": 0.22396012064948434, "grad_norm": 6.406505449365817, "learning_rate": 4.91241985269988e-06, "loss": 0.8848, "step": 3100 }, { "epoch": 0.22403236584969385, "grad_norm": 8.611021351773733, "learning_rate": 4.912343092974117e-06, "loss": 1.0132, "step": 3101 }, { "epoch": 0.22410461104990337, "grad_norm": 6.8359958144834145, "learning_rate": 4.9122663002252495e-06, "loss": 1.0136, "step": 3102 }, { "epoch": 0.22417685625011288, "grad_norm": 7.578926091275827, "learning_rate": 4.91218947445433e-06, "loss": 1.0134, "step": 3103 }, { "epoch": 0.2242491014503224, "grad_norm": 7.125072010412558, "learning_rate": 4.912112615662409e-06, "loss": 0.9589, "step": 3104 }, { "epoch": 0.2243213466505319, "grad_norm": 6.829310962331035, "learning_rate": 4.9120357238505395e-06, "loss": 0.9574, "step": 3105 }, { "epoch": 0.22439359185074143, "grad_norm": 6.427863454435477, "learning_rate": 4.911958799019774e-06, "loss": 0.8734, "step": 3106 }, { "epoch": 0.22446583705095094, "grad_norm": 7.47700765434507, "learning_rate": 4.911881841171165e-06, "loss": 0.9814, "step": 3107 }, { "epoch": 0.22453808225116043, "grad_norm": 6.92500348142681, "learning_rate": 4.911804850305767e-06, "loss": 0.9486, "step": 3108 }, { "epoch": 0.22461032745136994, "grad_norm": 6.269894180013284, "learning_rate": 4.911727826424632e-06, "loss": 0.9448, "step": 3109 }, { "epoch": 0.22468257265157945, "grad_norm": 7.354670549660465, "learning_rate": 4.911650769528817e-06, "loss": 0.938, "step": 3110 }, { "epoch": 0.22475481785178897, "grad_norm": 8.01496964837394, "learning_rate": 4.911573679619374e-06, "loss": 0.8855, "step": 3111 }, { "epoch": 0.22482706305199848, "grad_norm": 6.752986388613183, "learning_rate": 4.911496556697361e-06, "loss": 0.9454, "step": 3112 }, { "epoch": 0.224899308252208, "grad_norm": 6.401946916087812, "learning_rate": 4.911419400763832e-06, "loss": 0.9105, "step": 3113 }, { "epoch": 0.2249715534524175, "grad_norm": 7.940266044637242, "learning_rate": 4.911342211819843e-06, "loss": 0.9633, "step": 3114 }, { "epoch": 0.22504379865262703, "grad_norm": 6.158550007079366, "learning_rate": 4.911264989866452e-06, "loss": 0.96, "step": 3115 }, { "epoch": 0.22511604385283654, "grad_norm": 7.158171949539889, "learning_rate": 4.911187734904716e-06, "loss": 0.91, "step": 3116 }, { "epoch": 0.22518828905304603, "grad_norm": 6.257446735565353, "learning_rate": 4.911110446935692e-06, "loss": 0.9539, "step": 3117 }, { "epoch": 0.22526053425325554, "grad_norm": 6.11605025398161, "learning_rate": 4.911033125960439e-06, "loss": 0.8424, "step": 3118 }, { "epoch": 0.22533277945346505, "grad_norm": 6.970891101853926, "learning_rate": 4.910955771980013e-06, "loss": 0.9864, "step": 3119 }, { "epoch": 0.22540502465367457, "grad_norm": 7.321102462045292, "learning_rate": 4.910878384995475e-06, "loss": 0.8312, "step": 3120 }, { "epoch": 0.22547726985388408, "grad_norm": 6.442234113340942, "learning_rate": 4.910800965007884e-06, "loss": 0.9834, "step": 3121 }, { "epoch": 0.2255495150540936, "grad_norm": 8.29479103712742, "learning_rate": 4.9107235120182985e-06, "loss": 0.9536, "step": 3122 }, { "epoch": 0.2256217602543031, "grad_norm": 7.645509496443326, "learning_rate": 4.910646026027781e-06, "loss": 0.9177, "step": 3123 }, { "epoch": 0.22569400545451263, "grad_norm": 9.619875249549473, "learning_rate": 4.910568507037391e-06, "loss": 0.9854, "step": 3124 }, { "epoch": 0.22576625065472214, "grad_norm": 6.8279680107453435, "learning_rate": 4.9104909550481896e-06, "loss": 0.9662, "step": 3125 }, { "epoch": 0.22583849585493163, "grad_norm": 6.160131789444937, "learning_rate": 4.910413370061239e-06, "loss": 0.9641, "step": 3126 }, { "epoch": 0.22591074105514114, "grad_norm": 8.666627248038612, "learning_rate": 4.9103357520776e-06, "loss": 0.9675, "step": 3127 }, { "epoch": 0.22598298625535065, "grad_norm": 8.751325670363864, "learning_rate": 4.910258101098338e-06, "loss": 0.9064, "step": 3128 }, { "epoch": 0.22605523145556017, "grad_norm": 8.218529063689141, "learning_rate": 4.910180417124513e-06, "loss": 0.971, "step": 3129 }, { "epoch": 0.22612747665576968, "grad_norm": 8.386667806668752, "learning_rate": 4.910102700157189e-06, "loss": 0.9337, "step": 3130 }, { "epoch": 0.2261997218559792, "grad_norm": 6.72445167092888, "learning_rate": 4.91002495019743e-06, "loss": 0.8877, "step": 3131 }, { "epoch": 0.2262719670561887, "grad_norm": 6.741975323464707, "learning_rate": 4.909947167246303e-06, "loss": 0.8788, "step": 3132 }, { "epoch": 0.22634421225639823, "grad_norm": 6.737132488128585, "learning_rate": 4.909869351304868e-06, "loss": 0.9511, "step": 3133 }, { "epoch": 0.22641645745660774, "grad_norm": 7.136648460672608, "learning_rate": 4.909791502374194e-06, "loss": 0.8852, "step": 3134 }, { "epoch": 0.22648870265681723, "grad_norm": 8.743444984683972, "learning_rate": 4.909713620455345e-06, "loss": 0.9785, "step": 3135 }, { "epoch": 0.22656094785702674, "grad_norm": 6.790110060596202, "learning_rate": 4.909635705549387e-06, "loss": 0.8839, "step": 3136 }, { "epoch": 0.22663319305723625, "grad_norm": 7.738578501831193, "learning_rate": 4.9095577576573886e-06, "loss": 0.8585, "step": 3137 }, { "epoch": 0.22670543825744577, "grad_norm": 7.0259379347903, "learning_rate": 4.909479776780414e-06, "loss": 0.9751, "step": 3138 }, { "epoch": 0.22677768345765528, "grad_norm": 6.654353055053424, "learning_rate": 4.909401762919533e-06, "loss": 0.9292, "step": 3139 }, { "epoch": 0.2268499286578648, "grad_norm": 8.430450885701813, "learning_rate": 4.909323716075813e-06, "loss": 0.7966, "step": 3140 }, { "epoch": 0.2269221738580743, "grad_norm": 7.352844839923541, "learning_rate": 4.9092456362503206e-06, "loss": 0.9965, "step": 3141 }, { "epoch": 0.22699441905828383, "grad_norm": 7.500362133184387, "learning_rate": 4.909167523444127e-06, "loss": 1.0083, "step": 3142 }, { "epoch": 0.22706666425849334, "grad_norm": 7.735502650198027, "learning_rate": 4.9090893776583005e-06, "loss": 0.9853, "step": 3143 }, { "epoch": 0.22713890945870283, "grad_norm": 6.763169052111695, "learning_rate": 4.90901119889391e-06, "loss": 0.9042, "step": 3144 }, { "epoch": 0.22721115465891234, "grad_norm": 7.600208942151227, "learning_rate": 4.908932987152028e-06, "loss": 0.9178, "step": 3145 }, { "epoch": 0.22728339985912185, "grad_norm": 6.751637578051088, "learning_rate": 4.908854742433723e-06, "loss": 0.9124, "step": 3146 }, { "epoch": 0.22735564505933137, "grad_norm": 7.69956854069698, "learning_rate": 4.9087764647400684e-06, "loss": 0.959, "step": 3147 }, { "epoch": 0.22742789025954088, "grad_norm": 7.752516061224318, "learning_rate": 4.9086981540721325e-06, "loss": 1.0004, "step": 3148 }, { "epoch": 0.2275001354597504, "grad_norm": 6.925527878896105, "learning_rate": 4.90861981043099e-06, "loss": 0.8726, "step": 3149 }, { "epoch": 0.2275723806599599, "grad_norm": 6.257903328187597, "learning_rate": 4.908541433817712e-06, "loss": 0.8477, "step": 3150 }, { "epoch": 0.22764462586016943, "grad_norm": 7.490079804060262, "learning_rate": 4.908463024233372e-06, "loss": 0.9226, "step": 3151 }, { "epoch": 0.22771687106037894, "grad_norm": 6.844569048359705, "learning_rate": 4.908384581679044e-06, "loss": 0.9094, "step": 3152 }, { "epoch": 0.22778911626058843, "grad_norm": 7.30688260801182, "learning_rate": 4.9083061061558e-06, "loss": 0.929, "step": 3153 }, { "epoch": 0.22786136146079794, "grad_norm": 6.954651397151996, "learning_rate": 4.908227597664717e-06, "loss": 0.9446, "step": 3154 }, { "epoch": 0.22793360666100745, "grad_norm": 7.82599883605808, "learning_rate": 4.9081490562068655e-06, "loss": 0.9522, "step": 3155 }, { "epoch": 0.22800585186121697, "grad_norm": 6.930310897568946, "learning_rate": 4.908070481783325e-06, "loss": 0.8731, "step": 3156 }, { "epoch": 0.22807809706142648, "grad_norm": 6.533479592898436, "learning_rate": 4.907991874395169e-06, "loss": 0.9554, "step": 3157 }, { "epoch": 0.228150342261636, "grad_norm": 8.713540110081551, "learning_rate": 4.907913234043474e-06, "loss": 0.9764, "step": 3158 }, { "epoch": 0.2282225874618455, "grad_norm": 6.410865274906192, "learning_rate": 4.907834560729316e-06, "loss": 0.9414, "step": 3159 }, { "epoch": 0.22829483266205503, "grad_norm": 7.758964276360969, "learning_rate": 4.9077558544537725e-06, "loss": 0.9148, "step": 3160 }, { "epoch": 0.22836707786226454, "grad_norm": 10.049074682653078, "learning_rate": 4.907677115217922e-06, "loss": 1.0859, "step": 3161 }, { "epoch": 0.22843932306247403, "grad_norm": 7.544732778985537, "learning_rate": 4.90759834302284e-06, "loss": 0.9676, "step": 3162 }, { "epoch": 0.22851156826268354, "grad_norm": 10.51878057918535, "learning_rate": 4.9075195378696064e-06, "loss": 0.984, "step": 3163 }, { "epoch": 0.22858381346289305, "grad_norm": 11.11230945059053, "learning_rate": 4.907440699759299e-06, "loss": 0.9992, "step": 3164 }, { "epoch": 0.22865605866310257, "grad_norm": 9.046854760340054, "learning_rate": 4.907361828692999e-06, "loss": 0.9487, "step": 3165 }, { "epoch": 0.22872830386331208, "grad_norm": 6.2656587899455145, "learning_rate": 4.907282924671784e-06, "loss": 0.8778, "step": 3166 }, { "epoch": 0.2288005490635216, "grad_norm": 8.51278398350988, "learning_rate": 4.907203987696735e-06, "loss": 0.8884, "step": 3167 }, { "epoch": 0.2288727942637311, "grad_norm": 10.950667645728593, "learning_rate": 4.907125017768932e-06, "loss": 0.9767, "step": 3168 }, { "epoch": 0.22894503946394062, "grad_norm": 8.649853267417406, "learning_rate": 4.9070460148894575e-06, "loss": 0.8746, "step": 3169 }, { "epoch": 0.22901728466415014, "grad_norm": 8.098089167003428, "learning_rate": 4.906966979059391e-06, "loss": 0.9153, "step": 3170 }, { "epoch": 0.22908952986435963, "grad_norm": 6.798932765910477, "learning_rate": 4.9068879102798164e-06, "loss": 0.9472, "step": 3171 }, { "epoch": 0.22916177506456914, "grad_norm": 7.607708946364711, "learning_rate": 4.906808808551815e-06, "loss": 0.9214, "step": 3172 }, { "epoch": 0.22923402026477865, "grad_norm": 9.749698340811186, "learning_rate": 4.906729673876469e-06, "loss": 0.9306, "step": 3173 }, { "epoch": 0.22930626546498817, "grad_norm": 9.073806339503637, "learning_rate": 4.906650506254863e-06, "loss": 0.9931, "step": 3174 }, { "epoch": 0.22937851066519768, "grad_norm": 7.43925545115104, "learning_rate": 4.906571305688081e-06, "loss": 0.9037, "step": 3175 }, { "epoch": 0.2294507558654072, "grad_norm": 7.250732384882751, "learning_rate": 4.906492072177205e-06, "loss": 0.9495, "step": 3176 }, { "epoch": 0.2295230010656167, "grad_norm": 7.055834570279712, "learning_rate": 4.906412805723321e-06, "loss": 0.9555, "step": 3177 }, { "epoch": 0.22959524626582622, "grad_norm": 7.276937230745163, "learning_rate": 4.906333506327516e-06, "loss": 0.9014, "step": 3178 }, { "epoch": 0.2296674914660357, "grad_norm": 9.200602420488691, "learning_rate": 4.9062541739908715e-06, "loss": 0.8693, "step": 3179 }, { "epoch": 0.22973973666624523, "grad_norm": 6.8538178545500905, "learning_rate": 4.906174808714476e-06, "loss": 0.8847, "step": 3180 }, { "epoch": 0.22981198186645474, "grad_norm": 7.12151237927136, "learning_rate": 4.906095410499417e-06, "loss": 0.8977, "step": 3181 }, { "epoch": 0.22988422706666425, "grad_norm": 6.71699224145216, "learning_rate": 4.9060159793467784e-06, "loss": 0.9904, "step": 3182 }, { "epoch": 0.22995647226687377, "grad_norm": 7.822738164565999, "learning_rate": 4.905936515257651e-06, "loss": 0.9392, "step": 3183 }, { "epoch": 0.23002871746708328, "grad_norm": 6.782794547963458, "learning_rate": 4.90585701823312e-06, "loss": 0.9693, "step": 3184 }, { "epoch": 0.2301009626672928, "grad_norm": 7.488043855457178, "learning_rate": 4.905777488274274e-06, "loss": 1.0002, "step": 3185 }, { "epoch": 0.2301732078675023, "grad_norm": 8.80911904267989, "learning_rate": 4.905697925382203e-06, "loss": 0.9949, "step": 3186 }, { "epoch": 0.23024545306771182, "grad_norm": 6.668534811850744, "learning_rate": 4.905618329557994e-06, "loss": 0.9066, "step": 3187 }, { "epoch": 0.2303176982679213, "grad_norm": 7.582430719448373, "learning_rate": 4.905538700802739e-06, "loss": 1.0236, "step": 3188 }, { "epoch": 0.23038994346813083, "grad_norm": 6.997542631149585, "learning_rate": 4.905459039117527e-06, "loss": 1.0503, "step": 3189 }, { "epoch": 0.23046218866834034, "grad_norm": 8.234753478760432, "learning_rate": 4.905379344503448e-06, "loss": 0.9151, "step": 3190 }, { "epoch": 0.23053443386854985, "grad_norm": 6.841933754072542, "learning_rate": 4.905299616961594e-06, "loss": 0.9958, "step": 3191 }, { "epoch": 0.23060667906875937, "grad_norm": 6.997388625108711, "learning_rate": 4.905219856493055e-06, "loss": 0.896, "step": 3192 }, { "epoch": 0.23067892426896888, "grad_norm": 7.055883498425932, "learning_rate": 4.905140063098924e-06, "loss": 0.9188, "step": 3193 }, { "epoch": 0.2307511694691784, "grad_norm": 5.825375969668248, "learning_rate": 4.9050602367802935e-06, "loss": 0.8506, "step": 3194 }, { "epoch": 0.2308234146693879, "grad_norm": 6.348937971558646, "learning_rate": 4.9049803775382555e-06, "loss": 0.9113, "step": 3195 }, { "epoch": 0.23089565986959742, "grad_norm": 8.70202607865175, "learning_rate": 4.904900485373903e-06, "loss": 0.8969, "step": 3196 }, { "epoch": 0.2309679050698069, "grad_norm": 5.530385267692156, "learning_rate": 4.90482056028833e-06, "loss": 0.8055, "step": 3197 }, { "epoch": 0.23104015027001643, "grad_norm": 6.677645258435773, "learning_rate": 4.9047406022826315e-06, "loss": 0.9076, "step": 3198 }, { "epoch": 0.23111239547022594, "grad_norm": 5.872841945938325, "learning_rate": 4.904660611357901e-06, "loss": 0.7943, "step": 3199 }, { "epoch": 0.23118464067043545, "grad_norm": 6.600787560839982, "learning_rate": 4.9045805875152345e-06, "loss": 0.9632, "step": 3200 }, { "epoch": 0.23125688587064497, "grad_norm": 7.872829804282401, "learning_rate": 4.9045005307557256e-06, "loss": 0.9201, "step": 3201 }, { "epoch": 0.23132913107085448, "grad_norm": 7.979501928020765, "learning_rate": 4.904420441080472e-06, "loss": 0.8606, "step": 3202 }, { "epoch": 0.231401376271064, "grad_norm": 7.989371391844067, "learning_rate": 4.90434031849057e-06, "loss": 1.0711, "step": 3203 }, { "epoch": 0.2314736214712735, "grad_norm": 6.298923509452205, "learning_rate": 4.904260162987115e-06, "loss": 0.9381, "step": 3204 }, { "epoch": 0.23154586667148302, "grad_norm": 7.386017926001984, "learning_rate": 4.904179974571206e-06, "loss": 0.9326, "step": 3205 }, { "epoch": 0.2316181118716925, "grad_norm": 9.097464625370263, "learning_rate": 4.904099753243939e-06, "loss": 0.952, "step": 3206 }, { "epoch": 0.23169035707190203, "grad_norm": 7.334430641382598, "learning_rate": 4.904019499006414e-06, "loss": 0.9913, "step": 3207 }, { "epoch": 0.23176260227211154, "grad_norm": 6.696392101370183, "learning_rate": 4.903939211859727e-06, "loss": 0.8837, "step": 3208 }, { "epoch": 0.23183484747232105, "grad_norm": 6.176322937383979, "learning_rate": 4.90385889180498e-06, "loss": 0.8789, "step": 3209 }, { "epoch": 0.23190709267253057, "grad_norm": 7.097853494610679, "learning_rate": 4.9037785388432715e-06, "loss": 0.9989, "step": 3210 }, { "epoch": 0.23197933787274008, "grad_norm": 7.855875923761845, "learning_rate": 4.903698152975701e-06, "loss": 0.8965, "step": 3211 }, { "epoch": 0.2320515830729496, "grad_norm": 8.309256243422011, "learning_rate": 4.9036177342033685e-06, "loss": 0.9804, "step": 3212 }, { "epoch": 0.2321238282731591, "grad_norm": 7.6888136322856395, "learning_rate": 4.903537282527376e-06, "loss": 0.9961, "step": 3213 }, { "epoch": 0.23219607347336862, "grad_norm": 6.623830152186103, "learning_rate": 4.903456797948825e-06, "loss": 0.9167, "step": 3214 }, { "epoch": 0.2322683186735781, "grad_norm": 6.757506823379284, "learning_rate": 4.903376280468816e-06, "loss": 0.9515, "step": 3215 }, { "epoch": 0.23234056387378763, "grad_norm": 5.079277776306572, "learning_rate": 4.903295730088451e-06, "loss": 0.8399, "step": 3216 }, { "epoch": 0.23241280907399714, "grad_norm": 7.258239799215107, "learning_rate": 4.903215146808834e-06, "loss": 0.8552, "step": 3217 }, { "epoch": 0.23248505427420665, "grad_norm": 7.4234491546096155, "learning_rate": 4.903134530631068e-06, "loss": 0.9798, "step": 3218 }, { "epoch": 0.23255729947441617, "grad_norm": 6.921046360265713, "learning_rate": 4.9030538815562554e-06, "loss": 0.9304, "step": 3219 }, { "epoch": 0.23262954467462568, "grad_norm": 6.731378370954141, "learning_rate": 4.902973199585502e-06, "loss": 0.9751, "step": 3220 }, { "epoch": 0.2327017898748352, "grad_norm": 6.393426630786651, "learning_rate": 4.9028924847199115e-06, "loss": 0.8985, "step": 3221 }, { "epoch": 0.2327740350750447, "grad_norm": 9.387634715245103, "learning_rate": 4.902811736960588e-06, "loss": 0.9589, "step": 3222 }, { "epoch": 0.23284628027525422, "grad_norm": 6.484646784590386, "learning_rate": 4.9027309563086365e-06, "loss": 0.9341, "step": 3223 }, { "epoch": 0.2329185254754637, "grad_norm": 7.094398233649379, "learning_rate": 4.902650142765165e-06, "loss": 0.8992, "step": 3224 }, { "epoch": 0.23299077067567323, "grad_norm": 5.836141764016469, "learning_rate": 4.902569296331279e-06, "loss": 0.9574, "step": 3225 }, { "epoch": 0.23306301587588274, "grad_norm": 7.825118959150303, "learning_rate": 4.902488417008084e-06, "loss": 0.9011, "step": 3226 }, { "epoch": 0.23313526107609225, "grad_norm": 8.480086842937961, "learning_rate": 4.902407504796688e-06, "loss": 0.9699, "step": 3227 }, { "epoch": 0.23320750627630177, "grad_norm": 8.641231577657836, "learning_rate": 4.902326559698198e-06, "loss": 0.9182, "step": 3228 }, { "epoch": 0.23327975147651128, "grad_norm": 5.952964480180856, "learning_rate": 4.902245581713725e-06, "loss": 0.8571, "step": 3229 }, { "epoch": 0.2333519966767208, "grad_norm": 7.564519187034022, "learning_rate": 4.9021645708443735e-06, "loss": 0.9573, "step": 3230 }, { "epoch": 0.2334242418769303, "grad_norm": 9.475743295305923, "learning_rate": 4.9020835270912535e-06, "loss": 0.934, "step": 3231 }, { "epoch": 0.23349648707713982, "grad_norm": 8.05150949293026, "learning_rate": 4.902002450455477e-06, "loss": 0.9404, "step": 3232 }, { "epoch": 0.2335687322773493, "grad_norm": 6.163090192841385, "learning_rate": 4.90192134093815e-06, "loss": 0.928, "step": 3233 }, { "epoch": 0.23364097747755883, "grad_norm": 6.983439884268175, "learning_rate": 4.901840198540386e-06, "loss": 0.8767, "step": 3234 }, { "epoch": 0.23371322267776834, "grad_norm": 7.813725978025334, "learning_rate": 4.901759023263294e-06, "loss": 0.8652, "step": 3235 }, { "epoch": 0.23378546787797785, "grad_norm": 7.869720384606099, "learning_rate": 4.901677815107986e-06, "loss": 0.9858, "step": 3236 }, { "epoch": 0.23385771307818737, "grad_norm": 7.669159884640666, "learning_rate": 4.901596574075574e-06, "loss": 1.0478, "step": 3237 }, { "epoch": 0.23392995827839688, "grad_norm": 6.2890679590426615, "learning_rate": 4.901515300167169e-06, "loss": 0.9115, "step": 3238 }, { "epoch": 0.2340022034786064, "grad_norm": 8.762859812707719, "learning_rate": 4.901433993383885e-06, "loss": 1.0607, "step": 3239 }, { "epoch": 0.2340744486788159, "grad_norm": 7.870512637512635, "learning_rate": 4.901352653726833e-06, "loss": 0.8388, "step": 3240 }, { "epoch": 0.23414669387902542, "grad_norm": 7.21586808976012, "learning_rate": 4.901271281197129e-06, "loss": 0.9347, "step": 3241 }, { "epoch": 0.2342189390792349, "grad_norm": 7.139299198667591, "learning_rate": 4.901189875795885e-06, "loss": 0.9397, "step": 3242 }, { "epoch": 0.23429118427944443, "grad_norm": 7.153536219544565, "learning_rate": 4.9011084375242155e-06, "loss": 1.0497, "step": 3243 }, { "epoch": 0.23436342947965394, "grad_norm": 6.846638387877329, "learning_rate": 4.901026966383237e-06, "loss": 0.9874, "step": 3244 }, { "epoch": 0.23443567467986345, "grad_norm": 8.834560189129483, "learning_rate": 4.900945462374062e-06, "loss": 0.8827, "step": 3245 }, { "epoch": 0.23450791988007297, "grad_norm": 6.205979927387041, "learning_rate": 4.90086392549781e-06, "loss": 0.8783, "step": 3246 }, { "epoch": 0.23458016508028248, "grad_norm": 8.04646160952712, "learning_rate": 4.900782355755593e-06, "loss": 0.8824, "step": 3247 }, { "epoch": 0.234652410280492, "grad_norm": 6.37094196487265, "learning_rate": 4.900700753148531e-06, "loss": 0.9077, "step": 3248 }, { "epoch": 0.2347246554807015, "grad_norm": 7.420398701857687, "learning_rate": 4.900619117677739e-06, "loss": 0.8322, "step": 3249 }, { "epoch": 0.23479690068091102, "grad_norm": 9.756169641275, "learning_rate": 4.9005374493443355e-06, "loss": 1.0108, "step": 3250 }, { "epoch": 0.2348691458811205, "grad_norm": 9.83324751304634, "learning_rate": 4.900455748149438e-06, "loss": 0.9865, "step": 3251 }, { "epoch": 0.23494139108133003, "grad_norm": 7.607598882728903, "learning_rate": 4.900374014094165e-06, "loss": 0.9457, "step": 3252 }, { "epoch": 0.23501363628153954, "grad_norm": 6.347799277234788, "learning_rate": 4.900292247179636e-06, "loss": 0.8952, "step": 3253 }, { "epoch": 0.23508588148174905, "grad_norm": 7.915119247453952, "learning_rate": 4.90021044740697e-06, "loss": 0.9364, "step": 3254 }, { "epoch": 0.23515812668195857, "grad_norm": 7.690271885781809, "learning_rate": 4.900128614777286e-06, "loss": 0.9131, "step": 3255 }, { "epoch": 0.23523037188216808, "grad_norm": 6.9171195130920236, "learning_rate": 4.900046749291705e-06, "loss": 0.8665, "step": 3256 }, { "epoch": 0.2353026170823776, "grad_norm": 8.251839634944432, "learning_rate": 4.8999648509513475e-06, "loss": 0.9955, "step": 3257 }, { "epoch": 0.2353748622825871, "grad_norm": 6.365195420255148, "learning_rate": 4.899882919757335e-06, "loss": 0.8959, "step": 3258 }, { "epoch": 0.23544710748279662, "grad_norm": 7.338818261750587, "learning_rate": 4.899800955710789e-06, "loss": 0.9442, "step": 3259 }, { "epoch": 0.2355193526830061, "grad_norm": 6.741060058815137, "learning_rate": 4.89971895881283e-06, "loss": 0.9172, "step": 3260 }, { "epoch": 0.23559159788321563, "grad_norm": 5.708405023203552, "learning_rate": 4.899636929064583e-06, "loss": 0.9105, "step": 3261 }, { "epoch": 0.23566384308342514, "grad_norm": 6.239861466752894, "learning_rate": 4.899554866467169e-06, "loss": 0.8401, "step": 3262 }, { "epoch": 0.23573608828363465, "grad_norm": 8.867245147114375, "learning_rate": 4.899472771021712e-06, "loss": 1.0305, "step": 3263 }, { "epoch": 0.23580833348384417, "grad_norm": 8.429320941694291, "learning_rate": 4.899390642729336e-06, "loss": 0.9367, "step": 3264 }, { "epoch": 0.23588057868405368, "grad_norm": 7.391390674116212, "learning_rate": 4.899308481591164e-06, "loss": 0.9384, "step": 3265 }, { "epoch": 0.2359528238842632, "grad_norm": 7.080202854504771, "learning_rate": 4.899226287608323e-06, "loss": 0.8663, "step": 3266 }, { "epoch": 0.2360250690844727, "grad_norm": 7.719489305434573, "learning_rate": 4.899144060781937e-06, "loss": 0.9159, "step": 3267 }, { "epoch": 0.23609731428468222, "grad_norm": 8.169803976931416, "learning_rate": 4.899061801113132e-06, "loss": 1.026, "step": 3268 }, { "epoch": 0.2361695594848917, "grad_norm": 7.967057351251763, "learning_rate": 4.898979508603033e-06, "loss": 1.0581, "step": 3269 }, { "epoch": 0.23624180468510123, "grad_norm": 8.811073133598711, "learning_rate": 4.898897183252767e-06, "loss": 0.9661, "step": 3270 }, { "epoch": 0.23631404988531074, "grad_norm": 7.005539745635988, "learning_rate": 4.898814825063462e-06, "loss": 0.8429, "step": 3271 }, { "epoch": 0.23638629508552025, "grad_norm": 6.869349706709246, "learning_rate": 4.8987324340362445e-06, "loss": 0.9664, "step": 3272 }, { "epoch": 0.23645854028572977, "grad_norm": 7.728217739315419, "learning_rate": 4.8986500101722415e-06, "loss": 0.8704, "step": 3273 }, { "epoch": 0.23653078548593928, "grad_norm": 10.09122664618179, "learning_rate": 4.898567553472583e-06, "loss": 0.9946, "step": 3274 }, { "epoch": 0.2366030306861488, "grad_norm": 7.112658335661394, "learning_rate": 4.898485063938397e-06, "loss": 0.9507, "step": 3275 }, { "epoch": 0.2366752758863583, "grad_norm": 6.349056936806947, "learning_rate": 4.898402541570812e-06, "loss": 0.8483, "step": 3276 }, { "epoch": 0.23674752108656782, "grad_norm": 7.948321797522585, "learning_rate": 4.898319986370959e-06, "loss": 0.9712, "step": 3277 }, { "epoch": 0.2368197662867773, "grad_norm": 6.328759733791954, "learning_rate": 4.898237398339969e-06, "loss": 0.9402, "step": 3278 }, { "epoch": 0.23689201148698683, "grad_norm": 7.016864080966776, "learning_rate": 4.89815477747897e-06, "loss": 0.8967, "step": 3279 }, { "epoch": 0.23696425668719634, "grad_norm": 5.9942290845541795, "learning_rate": 4.898072123789094e-06, "loss": 0.9136, "step": 3280 }, { "epoch": 0.23703650188740585, "grad_norm": 7.614567357858441, "learning_rate": 4.8979894372714724e-06, "loss": 1.0067, "step": 3281 }, { "epoch": 0.23710874708761537, "grad_norm": 7.28720046093569, "learning_rate": 4.8979067179272375e-06, "loss": 0.9258, "step": 3282 }, { "epoch": 0.23718099228782488, "grad_norm": 8.097514922789873, "learning_rate": 4.897823965757521e-06, "loss": 0.915, "step": 3283 }, { "epoch": 0.2372532374880344, "grad_norm": 6.5560794484489735, "learning_rate": 4.8977411807634575e-06, "loss": 0.8833, "step": 3284 }, { "epoch": 0.2373254826882439, "grad_norm": 7.829091513047842, "learning_rate": 4.897658362946178e-06, "loss": 0.9383, "step": 3285 }, { "epoch": 0.2373977278884534, "grad_norm": 7.233626193023152, "learning_rate": 4.897575512306818e-06, "loss": 1.0069, "step": 3286 }, { "epoch": 0.2374699730886629, "grad_norm": 7.356968447751488, "learning_rate": 4.8974926288465095e-06, "loss": 0.9071, "step": 3287 }, { "epoch": 0.23754221828887243, "grad_norm": 6.643176784530677, "learning_rate": 4.8974097125663885e-06, "loss": 0.9254, "step": 3288 }, { "epoch": 0.23761446348908194, "grad_norm": 7.0222859525940695, "learning_rate": 4.897326763467591e-06, "loss": 0.8641, "step": 3289 }, { "epoch": 0.23768670868929145, "grad_norm": 7.193115429104139, "learning_rate": 4.897243781551252e-06, "loss": 0.8162, "step": 3290 }, { "epoch": 0.23775895388950097, "grad_norm": 7.522389878709072, "learning_rate": 4.897160766818506e-06, "loss": 0.945, "step": 3291 }, { "epoch": 0.23783119908971048, "grad_norm": 6.808231241245948, "learning_rate": 4.89707771927049e-06, "loss": 0.9415, "step": 3292 }, { "epoch": 0.23790344428992, "grad_norm": 6.100673713376259, "learning_rate": 4.896994638908342e-06, "loss": 0.8862, "step": 3293 }, { "epoch": 0.2379756894901295, "grad_norm": 7.768941804170745, "learning_rate": 4.896911525733198e-06, "loss": 1.0145, "step": 3294 }, { "epoch": 0.238047934690339, "grad_norm": 6.174053338881002, "learning_rate": 4.896828379746197e-06, "loss": 0.8778, "step": 3295 }, { "epoch": 0.2381201798905485, "grad_norm": 6.302139391268843, "learning_rate": 4.896745200948476e-06, "loss": 0.8998, "step": 3296 }, { "epoch": 0.23819242509075803, "grad_norm": 6.835261685343796, "learning_rate": 4.896661989341174e-06, "loss": 0.8913, "step": 3297 }, { "epoch": 0.23826467029096754, "grad_norm": 7.232306100123176, "learning_rate": 4.896578744925431e-06, "loss": 0.921, "step": 3298 }, { "epoch": 0.23833691549117705, "grad_norm": 6.586306451891125, "learning_rate": 4.896495467702385e-06, "loss": 0.9908, "step": 3299 }, { "epoch": 0.23840916069138657, "grad_norm": 6.635293700926779, "learning_rate": 4.896412157673177e-06, "loss": 0.8363, "step": 3300 }, { "epoch": 0.23848140589159608, "grad_norm": 7.289136552996687, "learning_rate": 4.896328814838948e-06, "loss": 1.0144, "step": 3301 }, { "epoch": 0.2385536510918056, "grad_norm": 7.419511599442889, "learning_rate": 4.8962454392008374e-06, "loss": 0.9995, "step": 3302 }, { "epoch": 0.2386258962920151, "grad_norm": 6.918320546179362, "learning_rate": 4.896162030759987e-06, "loss": 0.927, "step": 3303 }, { "epoch": 0.2386981414922246, "grad_norm": 6.574693668322151, "learning_rate": 4.896078589517539e-06, "loss": 0.9777, "step": 3304 }, { "epoch": 0.2387703866924341, "grad_norm": 5.705613688059598, "learning_rate": 4.895995115474636e-06, "loss": 0.916, "step": 3305 }, { "epoch": 0.23884263189264363, "grad_norm": 6.561304473970591, "learning_rate": 4.895911608632421e-06, "loss": 0.8587, "step": 3306 }, { "epoch": 0.23891487709285314, "grad_norm": 5.805844341844562, "learning_rate": 4.895828068992035e-06, "loss": 0.8484, "step": 3307 }, { "epoch": 0.23898712229306265, "grad_norm": 6.810688862962831, "learning_rate": 4.895744496554623e-06, "loss": 0.9431, "step": 3308 }, { "epoch": 0.23905936749327217, "grad_norm": 7.177634725073577, "learning_rate": 4.89566089132133e-06, "loss": 0.832, "step": 3309 }, { "epoch": 0.23913161269348168, "grad_norm": 6.548767253389182, "learning_rate": 4.8955772532932984e-06, "loss": 0.8954, "step": 3310 }, { "epoch": 0.2392038578936912, "grad_norm": 7.835633474520199, "learning_rate": 4.895493582471675e-06, "loss": 0.9235, "step": 3311 }, { "epoch": 0.2392761030939007, "grad_norm": 6.774972849436056, "learning_rate": 4.895409878857604e-06, "loss": 0.9304, "step": 3312 }, { "epoch": 0.2393483482941102, "grad_norm": 6.253596987395472, "learning_rate": 4.895326142452232e-06, "loss": 1.0255, "step": 3313 }, { "epoch": 0.2394205934943197, "grad_norm": 6.216256566706766, "learning_rate": 4.895242373256706e-06, "loss": 0.9647, "step": 3314 }, { "epoch": 0.23949283869452923, "grad_norm": 7.367086788067582, "learning_rate": 4.89515857127217e-06, "loss": 0.8936, "step": 3315 }, { "epoch": 0.23956508389473874, "grad_norm": 8.502089299750827, "learning_rate": 4.895074736499774e-06, "loss": 0.9425, "step": 3316 }, { "epoch": 0.23963732909494825, "grad_norm": 7.617588130489942, "learning_rate": 4.894990868940663e-06, "loss": 0.9586, "step": 3317 }, { "epoch": 0.23970957429515777, "grad_norm": 9.699623446676897, "learning_rate": 4.894906968595988e-06, "loss": 0.9702, "step": 3318 }, { "epoch": 0.23978181949536728, "grad_norm": 6.65363156391164, "learning_rate": 4.8948230354668955e-06, "loss": 0.9367, "step": 3319 }, { "epoch": 0.2398540646955768, "grad_norm": 7.107785493669842, "learning_rate": 4.8947390695545364e-06, "loss": 0.9421, "step": 3320 }, { "epoch": 0.2399263098957863, "grad_norm": 7.949397264580327, "learning_rate": 4.894655070860057e-06, "loss": 0.9426, "step": 3321 }, { "epoch": 0.2399985550959958, "grad_norm": 7.040017591367764, "learning_rate": 4.89457103938461e-06, "loss": 0.9475, "step": 3322 }, { "epoch": 0.2400708002962053, "grad_norm": 10.197883416271976, "learning_rate": 4.894486975129345e-06, "loss": 0.9769, "step": 3323 }, { "epoch": 0.24014304549641483, "grad_norm": 7.1721957265908, "learning_rate": 4.894402878095411e-06, "loss": 0.857, "step": 3324 }, { "epoch": 0.24021529069662434, "grad_norm": 7.725018014238705, "learning_rate": 4.894318748283962e-06, "loss": 0.9446, "step": 3325 }, { "epoch": 0.24028753589683385, "grad_norm": 7.378812418755585, "learning_rate": 4.8942345856961485e-06, "loss": 0.8946, "step": 3326 }, { "epoch": 0.24035978109704337, "grad_norm": 9.175006286452632, "learning_rate": 4.894150390333122e-06, "loss": 0.9276, "step": 3327 }, { "epoch": 0.24043202629725288, "grad_norm": 7.959537940871803, "learning_rate": 4.894066162196036e-06, "loss": 0.9183, "step": 3328 }, { "epoch": 0.2405042714974624, "grad_norm": 6.380975353820484, "learning_rate": 4.8939819012860426e-06, "loss": 0.898, "step": 3329 }, { "epoch": 0.2405765166976719, "grad_norm": 7.363498059225173, "learning_rate": 4.893897607604296e-06, "loss": 0.9293, "step": 3330 }, { "epoch": 0.2406487618978814, "grad_norm": 7.182519663363241, "learning_rate": 4.89381328115195e-06, "loss": 0.9761, "step": 3331 }, { "epoch": 0.2407210070980909, "grad_norm": 8.87652961551623, "learning_rate": 4.893728921930159e-06, "loss": 0.9433, "step": 3332 }, { "epoch": 0.24079325229830043, "grad_norm": 6.49613867289413, "learning_rate": 4.893644529940077e-06, "loss": 0.9059, "step": 3333 }, { "epoch": 0.24086549749850994, "grad_norm": 8.446415013170204, "learning_rate": 4.8935601051828605e-06, "loss": 0.9886, "step": 3334 }, { "epoch": 0.24093774269871945, "grad_norm": 7.883139143226184, "learning_rate": 4.893475647659664e-06, "loss": 0.8917, "step": 3335 }, { "epoch": 0.24100998789892897, "grad_norm": 7.094960651355799, "learning_rate": 4.8933911573716455e-06, "loss": 0.9332, "step": 3336 }, { "epoch": 0.24108223309913848, "grad_norm": 8.224022396161054, "learning_rate": 4.8933066343199594e-06, "loss": 0.9312, "step": 3337 }, { "epoch": 0.241154478299348, "grad_norm": 5.907091191853818, "learning_rate": 4.893222078505764e-06, "loss": 0.8527, "step": 3338 }, { "epoch": 0.2412267234995575, "grad_norm": 7.198716674451996, "learning_rate": 4.893137489930217e-06, "loss": 0.9313, "step": 3339 }, { "epoch": 0.241298968699767, "grad_norm": 6.912331505596481, "learning_rate": 4.893052868594475e-06, "loss": 0.9198, "step": 3340 }, { "epoch": 0.2413712138999765, "grad_norm": 6.805556372125624, "learning_rate": 4.892968214499699e-06, "loss": 0.843, "step": 3341 }, { "epoch": 0.24144345910018603, "grad_norm": 8.758002409292422, "learning_rate": 4.8928835276470445e-06, "loss": 0.9058, "step": 3342 }, { "epoch": 0.24151570430039554, "grad_norm": 7.756619733409696, "learning_rate": 4.892798808037673e-06, "loss": 0.9451, "step": 3343 }, { "epoch": 0.24158794950060505, "grad_norm": 7.683297349868994, "learning_rate": 4.892714055672744e-06, "loss": 0.9388, "step": 3344 }, { "epoch": 0.24166019470081457, "grad_norm": 6.738729038563265, "learning_rate": 4.8926292705534175e-06, "loss": 0.9185, "step": 3345 }, { "epoch": 0.24173243990102408, "grad_norm": 7.419513398946621, "learning_rate": 4.892544452680853e-06, "loss": 0.9173, "step": 3346 }, { "epoch": 0.2418046851012336, "grad_norm": 8.64741015168952, "learning_rate": 4.892459602056213e-06, "loss": 0.8537, "step": 3347 }, { "epoch": 0.2418769303014431, "grad_norm": 8.531579091638019, "learning_rate": 4.8923747186806595e-06, "loss": 0.951, "step": 3348 }, { "epoch": 0.2419491755016526, "grad_norm": 6.6483921717388395, "learning_rate": 4.8922898025553536e-06, "loss": 0.8829, "step": 3349 }, { "epoch": 0.2420214207018621, "grad_norm": 6.66245635914779, "learning_rate": 4.892204853681457e-06, "loss": 0.9797, "step": 3350 }, { "epoch": 0.24209366590207163, "grad_norm": 7.51575316004618, "learning_rate": 4.892119872060134e-06, "loss": 0.9532, "step": 3351 }, { "epoch": 0.24216591110228114, "grad_norm": 9.640878633844506, "learning_rate": 4.892034857692547e-06, "loss": 0.9324, "step": 3352 }, { "epoch": 0.24223815630249065, "grad_norm": 7.019941263849748, "learning_rate": 4.89194981057986e-06, "loss": 0.907, "step": 3353 }, { "epoch": 0.24231040150270017, "grad_norm": 9.510981186111062, "learning_rate": 4.891864730723237e-06, "loss": 0.9837, "step": 3354 }, { "epoch": 0.24238264670290968, "grad_norm": 6.298200974107917, "learning_rate": 4.891779618123844e-06, "loss": 0.8663, "step": 3355 }, { "epoch": 0.2424548919031192, "grad_norm": 6.24054429981544, "learning_rate": 4.891694472782844e-06, "loss": 0.9306, "step": 3356 }, { "epoch": 0.2425271371033287, "grad_norm": 7.544743649606741, "learning_rate": 4.891609294701404e-06, "loss": 0.9236, "step": 3357 }, { "epoch": 0.2425993823035382, "grad_norm": 7.670251118951922, "learning_rate": 4.8915240838806905e-06, "loss": 0.922, "step": 3358 }, { "epoch": 0.2426716275037477, "grad_norm": 9.59797143166343, "learning_rate": 4.891438840321868e-06, "loss": 1.01, "step": 3359 }, { "epoch": 0.24274387270395723, "grad_norm": 7.052121799501197, "learning_rate": 4.8913535640261055e-06, "loss": 0.8369, "step": 3360 }, { "epoch": 0.24281611790416674, "grad_norm": 7.996089456367924, "learning_rate": 4.89126825499457e-06, "loss": 0.9486, "step": 3361 }, { "epoch": 0.24288836310437625, "grad_norm": 8.26217215806662, "learning_rate": 4.891182913228428e-06, "loss": 0.9329, "step": 3362 }, { "epoch": 0.24296060830458577, "grad_norm": 8.477720345029974, "learning_rate": 4.891097538728849e-06, "loss": 0.9376, "step": 3363 }, { "epoch": 0.24303285350479528, "grad_norm": 7.120631702560234, "learning_rate": 4.891012131497e-06, "loss": 0.9309, "step": 3364 }, { "epoch": 0.2431050987050048, "grad_norm": 7.71056225142851, "learning_rate": 4.890926691534052e-06, "loss": 0.9334, "step": 3365 }, { "epoch": 0.2431773439052143, "grad_norm": 9.59449591206545, "learning_rate": 4.890841218841175e-06, "loss": 0.9725, "step": 3366 }, { "epoch": 0.2432495891054238, "grad_norm": 9.841338137689002, "learning_rate": 4.8907557134195375e-06, "loss": 0.9415, "step": 3367 }, { "epoch": 0.2433218343056333, "grad_norm": 9.592345889844161, "learning_rate": 4.89067017527031e-06, "loss": 0.9707, "step": 3368 }, { "epoch": 0.24339407950584283, "grad_norm": 6.529947105298636, "learning_rate": 4.890584604394665e-06, "loss": 0.9298, "step": 3369 }, { "epoch": 0.24346632470605234, "grad_norm": 9.732861837524451, "learning_rate": 4.890499000793772e-06, "loss": 1.0914, "step": 3370 }, { "epoch": 0.24353856990626185, "grad_norm": 8.921020680692964, "learning_rate": 4.890413364468804e-06, "loss": 0.9249, "step": 3371 }, { "epoch": 0.24361081510647137, "grad_norm": 8.930692200697198, "learning_rate": 4.890327695420934e-06, "loss": 1.0604, "step": 3372 }, { "epoch": 0.24368306030668088, "grad_norm": 7.230236080636529, "learning_rate": 4.890241993651332e-06, "loss": 0.9436, "step": 3373 }, { "epoch": 0.2437553055068904, "grad_norm": 7.7047573008302646, "learning_rate": 4.890156259161175e-06, "loss": 0.8822, "step": 3374 }, { "epoch": 0.2438275507070999, "grad_norm": 6.819634018192607, "learning_rate": 4.890070491951634e-06, "loss": 0.9545, "step": 3375 }, { "epoch": 0.2438997959073094, "grad_norm": 6.980923407829122, "learning_rate": 4.889984692023883e-06, "loss": 0.9752, "step": 3376 }, { "epoch": 0.2439720411075189, "grad_norm": 11.742200494836455, "learning_rate": 4.889898859379098e-06, "loss": 0.9701, "step": 3377 }, { "epoch": 0.24404428630772843, "grad_norm": 10.024275877715986, "learning_rate": 4.889812994018453e-06, "loss": 0.9259, "step": 3378 }, { "epoch": 0.24411653150793794, "grad_norm": 7.8813823691860305, "learning_rate": 4.8897270959431234e-06, "loss": 1.0822, "step": 3379 }, { "epoch": 0.24418877670814745, "grad_norm": 7.527874835312607, "learning_rate": 4.889641165154286e-06, "loss": 0.9331, "step": 3380 }, { "epoch": 0.24426102190835697, "grad_norm": 10.441113605957916, "learning_rate": 4.889555201653116e-06, "loss": 0.944, "step": 3381 }, { "epoch": 0.24433326710856648, "grad_norm": 10.254057662572572, "learning_rate": 4.889469205440791e-06, "loss": 0.9233, "step": 3382 }, { "epoch": 0.244405512308776, "grad_norm": 8.968373573192624, "learning_rate": 4.889383176518488e-06, "loss": 0.9174, "step": 3383 }, { "epoch": 0.2444777575089855, "grad_norm": 9.16120604658728, "learning_rate": 4.889297114887383e-06, "loss": 0.955, "step": 3384 }, { "epoch": 0.244550002709195, "grad_norm": 6.210532121204722, "learning_rate": 4.889211020548657e-06, "loss": 1.0021, "step": 3385 }, { "epoch": 0.2446222479094045, "grad_norm": 10.120206852182273, "learning_rate": 4.889124893503488e-06, "loss": 1.0188, "step": 3386 }, { "epoch": 0.24469449310961403, "grad_norm": 6.656123630237487, "learning_rate": 4.889038733753053e-06, "loss": 0.9342, "step": 3387 }, { "epoch": 0.24476673830982354, "grad_norm": 8.506173080107764, "learning_rate": 4.888952541298533e-06, "loss": 0.9642, "step": 3388 }, { "epoch": 0.24483898351003305, "grad_norm": 7.902283167170763, "learning_rate": 4.888866316141108e-06, "loss": 0.914, "step": 3389 }, { "epoch": 0.24491122871024257, "grad_norm": 7.081705362572292, "learning_rate": 4.888780058281958e-06, "loss": 0.9316, "step": 3390 }, { "epoch": 0.24498347391045208, "grad_norm": 7.558471867961127, "learning_rate": 4.8886937677222635e-06, "loss": 0.8887, "step": 3391 }, { "epoch": 0.2450557191106616, "grad_norm": 8.35705693984436, "learning_rate": 4.888607444463206e-06, "loss": 0.8785, "step": 3392 }, { "epoch": 0.2451279643108711, "grad_norm": 7.975481608327213, "learning_rate": 4.888521088505967e-06, "loss": 0.938, "step": 3393 }, { "epoch": 0.2452002095110806, "grad_norm": 7.368014373168843, "learning_rate": 4.888434699851729e-06, "loss": 0.9072, "step": 3394 }, { "epoch": 0.2452724547112901, "grad_norm": 7.515946284225512, "learning_rate": 4.888348278501674e-06, "loss": 0.9056, "step": 3395 }, { "epoch": 0.24534469991149963, "grad_norm": 8.326856563083572, "learning_rate": 4.888261824456987e-06, "loss": 0.9789, "step": 3396 }, { "epoch": 0.24541694511170914, "grad_norm": 6.577963494064713, "learning_rate": 4.888175337718849e-06, "loss": 0.992, "step": 3397 }, { "epoch": 0.24548919031191865, "grad_norm": 7.583498718917556, "learning_rate": 4.888088818288444e-06, "loss": 0.9563, "step": 3398 }, { "epoch": 0.24556143551212817, "grad_norm": 9.335896639554935, "learning_rate": 4.888002266166959e-06, "loss": 0.9746, "step": 3399 }, { "epoch": 0.24563368071233768, "grad_norm": 6.83143797229303, "learning_rate": 4.887915681355576e-06, "loss": 0.9189, "step": 3400 }, { "epoch": 0.2457059259125472, "grad_norm": 7.680059290100753, "learning_rate": 4.887829063855481e-06, "loss": 1.0007, "step": 3401 }, { "epoch": 0.24577817111275668, "grad_norm": 8.766836045324759, "learning_rate": 4.887742413667862e-06, "loss": 0.9515, "step": 3402 }, { "epoch": 0.2458504163129662, "grad_norm": 7.0554371854743225, "learning_rate": 4.887655730793902e-06, "loss": 0.9596, "step": 3403 }, { "epoch": 0.2459226615131757, "grad_norm": 6.535148953503847, "learning_rate": 4.8875690152347894e-06, "loss": 0.8811, "step": 3404 }, { "epoch": 0.24599490671338523, "grad_norm": 7.171580589085781, "learning_rate": 4.8874822669917105e-06, "loss": 1.0251, "step": 3405 }, { "epoch": 0.24606715191359474, "grad_norm": 6.456874690895416, "learning_rate": 4.887395486065853e-06, "loss": 0.8446, "step": 3406 }, { "epoch": 0.24613939711380425, "grad_norm": 7.050719301580353, "learning_rate": 4.887308672458405e-06, "loss": 0.9721, "step": 3407 }, { "epoch": 0.24621164231401377, "grad_norm": 6.623595466718833, "learning_rate": 4.887221826170556e-06, "loss": 0.8627, "step": 3408 }, { "epoch": 0.24628388751422328, "grad_norm": 8.159965049070417, "learning_rate": 4.887134947203492e-06, "loss": 0.9124, "step": 3409 }, { "epoch": 0.2463561327144328, "grad_norm": 9.534966216485076, "learning_rate": 4.8870480355584055e-06, "loss": 0.9994, "step": 3410 }, { "epoch": 0.24642837791464228, "grad_norm": 7.129853586456502, "learning_rate": 4.886961091236484e-06, "loss": 0.9212, "step": 3411 }, { "epoch": 0.2465006231148518, "grad_norm": 7.296707184064636, "learning_rate": 4.886874114238919e-06, "loss": 0.9492, "step": 3412 }, { "epoch": 0.2465728683150613, "grad_norm": 6.614559944491949, "learning_rate": 4.8867871045669e-06, "loss": 0.8448, "step": 3413 }, { "epoch": 0.24664511351527083, "grad_norm": 6.39018803900387, "learning_rate": 4.88670006222162e-06, "loss": 0.8646, "step": 3414 }, { "epoch": 0.24671735871548034, "grad_norm": 7.7855335379602035, "learning_rate": 4.886612987204268e-06, "loss": 0.9718, "step": 3415 }, { "epoch": 0.24678960391568985, "grad_norm": 7.583956710289486, "learning_rate": 4.886525879516037e-06, "loss": 0.9523, "step": 3416 }, { "epoch": 0.24686184911589937, "grad_norm": 8.289725633965565, "learning_rate": 4.886438739158121e-06, "loss": 0.9414, "step": 3417 }, { "epoch": 0.24693409431610888, "grad_norm": 6.913863598392561, "learning_rate": 4.886351566131712e-06, "loss": 0.8294, "step": 3418 }, { "epoch": 0.2470063395163184, "grad_norm": 7.46916446573257, "learning_rate": 4.886264360438001e-06, "loss": 0.9936, "step": 3419 }, { "epoch": 0.24707858471652788, "grad_norm": 6.04408818968401, "learning_rate": 4.886177122078185e-06, "loss": 0.8916, "step": 3420 }, { "epoch": 0.2471508299167374, "grad_norm": 7.5065510431377485, "learning_rate": 4.886089851053457e-06, "loss": 0.9663, "step": 3421 }, { "epoch": 0.2472230751169469, "grad_norm": 7.4482367457900995, "learning_rate": 4.886002547365011e-06, "loss": 0.9062, "step": 3422 }, { "epoch": 0.24729532031715643, "grad_norm": 6.743851298365991, "learning_rate": 4.885915211014043e-06, "loss": 0.8788, "step": 3423 }, { "epoch": 0.24736756551736594, "grad_norm": 7.905183139636387, "learning_rate": 4.885827842001749e-06, "loss": 0.9556, "step": 3424 }, { "epoch": 0.24743981071757545, "grad_norm": 6.579476913020868, "learning_rate": 4.885740440329324e-06, "loss": 0.9348, "step": 3425 }, { "epoch": 0.24751205591778497, "grad_norm": 6.621644807912775, "learning_rate": 4.885653005997964e-06, "loss": 0.9346, "step": 3426 }, { "epoch": 0.24758430111799448, "grad_norm": 8.405409987217913, "learning_rate": 4.8855655390088675e-06, "loss": 1.0217, "step": 3427 }, { "epoch": 0.247656546318204, "grad_norm": 6.432354991359174, "learning_rate": 4.88547803936323e-06, "loss": 0.8877, "step": 3428 }, { "epoch": 0.24772879151841348, "grad_norm": 6.2756106744410305, "learning_rate": 4.885390507062251e-06, "loss": 0.8532, "step": 3429 }, { "epoch": 0.247801036718623, "grad_norm": 7.518746661584228, "learning_rate": 4.885302942107129e-06, "loss": 1.0225, "step": 3430 }, { "epoch": 0.2478732819188325, "grad_norm": 7.233602725636374, "learning_rate": 4.885215344499061e-06, "loss": 0.8716, "step": 3431 }, { "epoch": 0.24794552711904203, "grad_norm": 6.848843843109922, "learning_rate": 4.8851277142392466e-06, "loss": 0.9746, "step": 3432 }, { "epoch": 0.24801777231925154, "grad_norm": 8.408009171325734, "learning_rate": 4.885040051328886e-06, "loss": 0.955, "step": 3433 }, { "epoch": 0.24809001751946105, "grad_norm": 7.4558399941462365, "learning_rate": 4.8849523557691795e-06, "loss": 0.9654, "step": 3434 }, { "epoch": 0.24816226271967057, "grad_norm": 6.705526708614425, "learning_rate": 4.884864627561326e-06, "loss": 0.8632, "step": 3435 }, { "epoch": 0.24823450791988008, "grad_norm": 6.222319072393497, "learning_rate": 4.884776866706528e-06, "loss": 0.8731, "step": 3436 }, { "epoch": 0.2483067531200896, "grad_norm": 8.223273711228707, "learning_rate": 4.884689073205986e-06, "loss": 0.8586, "step": 3437 }, { "epoch": 0.24837899832029908, "grad_norm": 7.077856601906289, "learning_rate": 4.884601247060903e-06, "loss": 0.9208, "step": 3438 }, { "epoch": 0.2484512435205086, "grad_norm": 8.726581733976118, "learning_rate": 4.88451338827248e-06, "loss": 0.9522, "step": 3439 }, { "epoch": 0.2485234887207181, "grad_norm": 7.077888939565265, "learning_rate": 4.884425496841921e-06, "loss": 0.988, "step": 3440 }, { "epoch": 0.24859573392092763, "grad_norm": 9.151367552238948, "learning_rate": 4.884337572770427e-06, "loss": 1.0242, "step": 3441 }, { "epoch": 0.24866797912113714, "grad_norm": 6.227319688119992, "learning_rate": 4.884249616059203e-06, "loss": 0.8627, "step": 3442 }, { "epoch": 0.24874022432134665, "grad_norm": 6.396893176345085, "learning_rate": 4.884161626709453e-06, "loss": 0.8649, "step": 3443 }, { "epoch": 0.24881246952155617, "grad_norm": 8.745652781006, "learning_rate": 4.884073604722383e-06, "loss": 0.9297, "step": 3444 }, { "epoch": 0.24888471472176568, "grad_norm": 6.663077850761801, "learning_rate": 4.883985550099195e-06, "loss": 0.9596, "step": 3445 }, { "epoch": 0.2489569599219752, "grad_norm": 7.201797992003591, "learning_rate": 4.883897462841096e-06, "loss": 0.9238, "step": 3446 }, { "epoch": 0.24902920512218468, "grad_norm": 6.659312228869605, "learning_rate": 4.883809342949292e-06, "loss": 0.9028, "step": 3447 }, { "epoch": 0.2491014503223942, "grad_norm": 7.406669508243614, "learning_rate": 4.883721190424989e-06, "loss": 0.892, "step": 3448 }, { "epoch": 0.2491736955226037, "grad_norm": 7.8415760203955545, "learning_rate": 4.883633005269394e-06, "loss": 0.8985, "step": 3449 }, { "epoch": 0.24924594072281323, "grad_norm": 8.213105442716756, "learning_rate": 4.883544787483713e-06, "loss": 0.8643, "step": 3450 }, { "epoch": 0.24931818592302274, "grad_norm": 5.8504596888241736, "learning_rate": 4.883456537069155e-06, "loss": 0.8769, "step": 3451 }, { "epoch": 0.24939043112323225, "grad_norm": 6.939346755007309, "learning_rate": 4.883368254026928e-06, "loss": 1.0508, "step": 3452 }, { "epoch": 0.24946267632344177, "grad_norm": 7.653200873625398, "learning_rate": 4.883279938358241e-06, "loss": 0.9325, "step": 3453 }, { "epoch": 0.24953492152365128, "grad_norm": 6.789116722039658, "learning_rate": 4.8831915900643e-06, "loss": 0.9184, "step": 3454 }, { "epoch": 0.2496071667238608, "grad_norm": 6.273734535795627, "learning_rate": 4.883103209146318e-06, "loss": 0.8881, "step": 3455 }, { "epoch": 0.24967941192407028, "grad_norm": 6.243132213008272, "learning_rate": 4.883014795605503e-06, "loss": 0.8914, "step": 3456 }, { "epoch": 0.2497516571242798, "grad_norm": 6.374114629561585, "learning_rate": 4.882926349443067e-06, "loss": 0.8343, "step": 3457 }, { "epoch": 0.2498239023244893, "grad_norm": 6.206470423353904, "learning_rate": 4.882837870660218e-06, "loss": 0.9313, "step": 3458 }, { "epoch": 0.24989614752469883, "grad_norm": 6.303956245832075, "learning_rate": 4.882749359258169e-06, "loss": 0.8798, "step": 3459 }, { "epoch": 0.24996839272490834, "grad_norm": 7.58755378515546, "learning_rate": 4.882660815238132e-06, "loss": 0.8613, "step": 3460 }, { "epoch": 0.2500406379251178, "grad_norm": 5.7157241237047804, "learning_rate": 4.882572238601317e-06, "loss": 0.9001, "step": 3461 }, { "epoch": 0.25011288312532737, "grad_norm": 8.720141228048396, "learning_rate": 4.8824836293489395e-06, "loss": 0.9527, "step": 3462 }, { "epoch": 0.25018512832553685, "grad_norm": 7.26981198661979, "learning_rate": 4.8823949874822105e-06, "loss": 0.9461, "step": 3463 }, { "epoch": 0.2502573735257464, "grad_norm": 7.297674817313924, "learning_rate": 4.882306313002344e-06, "loss": 0.9445, "step": 3464 }, { "epoch": 0.2503296187259559, "grad_norm": 7.154221692889162, "learning_rate": 4.882217605910553e-06, "loss": 1.014, "step": 3465 }, { "epoch": 0.2504018639261654, "grad_norm": 8.044597295968076, "learning_rate": 4.882128866208054e-06, "loss": 0.8515, "step": 3466 }, { "epoch": 0.2504741091263749, "grad_norm": 6.461473583412736, "learning_rate": 4.88204009389606e-06, "loss": 0.8886, "step": 3467 }, { "epoch": 0.25054635432658445, "grad_norm": 7.424916369034617, "learning_rate": 4.881951288975786e-06, "loss": 0.9515, "step": 3468 }, { "epoch": 0.25061859952679394, "grad_norm": 8.290018758165882, "learning_rate": 4.881862451448448e-06, "loss": 1.0045, "step": 3469 }, { "epoch": 0.2506908447270034, "grad_norm": 7.2560536334191115, "learning_rate": 4.881773581315263e-06, "loss": 0.9569, "step": 3470 }, { "epoch": 0.25076308992721297, "grad_norm": 8.376426916834314, "learning_rate": 4.881684678577448e-06, "loss": 0.9666, "step": 3471 }, { "epoch": 0.25083533512742245, "grad_norm": 6.5240886066344, "learning_rate": 4.881595743236218e-06, "loss": 0.9622, "step": 3472 }, { "epoch": 0.250907580327632, "grad_norm": 7.575302258998353, "learning_rate": 4.881506775292792e-06, "loss": 1.0698, "step": 3473 }, { "epoch": 0.2509798255278415, "grad_norm": 6.566626132639891, "learning_rate": 4.8814177747483865e-06, "loss": 0.9421, "step": 3474 }, { "epoch": 0.251052070728051, "grad_norm": 7.201517517386364, "learning_rate": 4.881328741604221e-06, "loss": 0.8925, "step": 3475 }, { "epoch": 0.2511243159282605, "grad_norm": 6.537192821397489, "learning_rate": 4.881239675861515e-06, "loss": 0.8786, "step": 3476 }, { "epoch": 0.25119656112847005, "grad_norm": 8.946010992106954, "learning_rate": 4.881150577521485e-06, "loss": 0.9888, "step": 3477 }, { "epoch": 0.25126880632867954, "grad_norm": 6.2953015200248625, "learning_rate": 4.881061446585354e-06, "loss": 0.9857, "step": 3478 }, { "epoch": 0.251341051528889, "grad_norm": 6.741417125835039, "learning_rate": 4.88097228305434e-06, "loss": 0.9579, "step": 3479 }, { "epoch": 0.25141329672909857, "grad_norm": 5.577879828186114, "learning_rate": 4.880883086929664e-06, "loss": 0.9148, "step": 3480 }, { "epoch": 0.25148554192930805, "grad_norm": 6.222309263316906, "learning_rate": 4.880793858212547e-06, "loss": 0.9961, "step": 3481 }, { "epoch": 0.2515577871295176, "grad_norm": 6.64448302451635, "learning_rate": 4.880704596904211e-06, "loss": 0.9446, "step": 3482 }, { "epoch": 0.2516300323297271, "grad_norm": 6.480029017065566, "learning_rate": 4.880615303005878e-06, "loss": 0.998, "step": 3483 }, { "epoch": 0.2517022775299366, "grad_norm": 6.793654106186298, "learning_rate": 4.880525976518769e-06, "loss": 0.9537, "step": 3484 }, { "epoch": 0.2517745227301461, "grad_norm": 6.882881501942002, "learning_rate": 4.880436617444108e-06, "loss": 0.9486, "step": 3485 }, { "epoch": 0.25184676793035565, "grad_norm": 6.623077401507356, "learning_rate": 4.880347225783119e-06, "loss": 0.8481, "step": 3486 }, { "epoch": 0.25191901313056514, "grad_norm": 6.896385362854808, "learning_rate": 4.880257801537023e-06, "loss": 0.9711, "step": 3487 }, { "epoch": 0.2519912583307746, "grad_norm": 6.435631341915334, "learning_rate": 4.880168344707047e-06, "loss": 0.905, "step": 3488 }, { "epoch": 0.25206350353098417, "grad_norm": 7.493689107130491, "learning_rate": 4.880078855294414e-06, "loss": 0.887, "step": 3489 }, { "epoch": 0.25213574873119365, "grad_norm": 8.323233880470013, "learning_rate": 4.87998933330035e-06, "loss": 0.974, "step": 3490 }, { "epoch": 0.2522079939314032, "grad_norm": 5.910491550613456, "learning_rate": 4.8798997787260795e-06, "loss": 0.9941, "step": 3491 }, { "epoch": 0.2522802391316127, "grad_norm": 7.235553950237457, "learning_rate": 4.879810191572829e-06, "loss": 0.9337, "step": 3492 }, { "epoch": 0.2523524843318222, "grad_norm": 6.816845549060511, "learning_rate": 4.879720571841825e-06, "loss": 0.9043, "step": 3493 }, { "epoch": 0.2524247295320317, "grad_norm": 7.38383446134453, "learning_rate": 4.879630919534294e-06, "loss": 0.8146, "step": 3494 }, { "epoch": 0.25249697473224125, "grad_norm": 7.607549742185981, "learning_rate": 4.879541234651462e-06, "loss": 0.8639, "step": 3495 }, { "epoch": 0.25256921993245074, "grad_norm": 8.412019812835855, "learning_rate": 4.87945151719456e-06, "loss": 0.9975, "step": 3496 }, { "epoch": 0.2526414651326602, "grad_norm": 7.117572593948513, "learning_rate": 4.879361767164814e-06, "loss": 0.867, "step": 3497 }, { "epoch": 0.25271371033286977, "grad_norm": 5.585396608427866, "learning_rate": 4.879271984563452e-06, "loss": 0.8911, "step": 3498 }, { "epoch": 0.25278595553307925, "grad_norm": 6.908547411992625, "learning_rate": 4.879182169391705e-06, "loss": 0.9546, "step": 3499 }, { "epoch": 0.2528582007332888, "grad_norm": 6.461022520601422, "learning_rate": 4.879092321650801e-06, "loss": 0.8859, "step": 3500 }, { "epoch": 0.2529304459334983, "grad_norm": 6.715059340180001, "learning_rate": 4.87900244134197e-06, "loss": 0.9174, "step": 3501 }, { "epoch": 0.2530026911337078, "grad_norm": 6.599348683442503, "learning_rate": 4.878912528466443e-06, "loss": 0.9168, "step": 3502 }, { "epoch": 0.2530749363339173, "grad_norm": 7.62729997810855, "learning_rate": 4.87882258302545e-06, "loss": 0.91, "step": 3503 }, { "epoch": 0.25314718153412685, "grad_norm": 6.207885760190201, "learning_rate": 4.878732605020224e-06, "loss": 0.8386, "step": 3504 }, { "epoch": 0.25321942673433634, "grad_norm": 7.2130445251656035, "learning_rate": 4.878642594451994e-06, "loss": 0.8404, "step": 3505 }, { "epoch": 0.2532916719345458, "grad_norm": 9.65150745061806, "learning_rate": 4.878552551321996e-06, "loss": 0.8866, "step": 3506 }, { "epoch": 0.25336391713475537, "grad_norm": 6.54313199423222, "learning_rate": 4.878462475631459e-06, "loss": 0.9719, "step": 3507 }, { "epoch": 0.25343616233496485, "grad_norm": 6.301877592737553, "learning_rate": 4.878372367381617e-06, "loss": 1.0084, "step": 3508 }, { "epoch": 0.2535084075351744, "grad_norm": 7.542754570771383, "learning_rate": 4.878282226573705e-06, "loss": 0.9535, "step": 3509 }, { "epoch": 0.2535806527353839, "grad_norm": 6.420536542201601, "learning_rate": 4.878192053208955e-06, "loss": 0.9328, "step": 3510 }, { "epoch": 0.2536528979355934, "grad_norm": 7.349138855603785, "learning_rate": 4.878101847288603e-06, "loss": 0.9451, "step": 3511 }, { "epoch": 0.2537251431358029, "grad_norm": 7.230274331783117, "learning_rate": 4.878011608813884e-06, "loss": 0.9435, "step": 3512 }, { "epoch": 0.25379738833601245, "grad_norm": 5.764082208580598, "learning_rate": 4.8779213377860315e-06, "loss": 0.9273, "step": 3513 }, { "epoch": 0.25386963353622194, "grad_norm": 6.474663873733681, "learning_rate": 4.877831034206282e-06, "loss": 0.885, "step": 3514 }, { "epoch": 0.2539418787364314, "grad_norm": 8.55338219565005, "learning_rate": 4.877740698075872e-06, "loss": 0.8968, "step": 3515 }, { "epoch": 0.25401412393664097, "grad_norm": 8.233926547323504, "learning_rate": 4.877650329396038e-06, "loss": 0.8953, "step": 3516 }, { "epoch": 0.25408636913685045, "grad_norm": 6.3287518979604105, "learning_rate": 4.8775599281680175e-06, "loss": 0.9403, "step": 3517 }, { "epoch": 0.25415861433706, "grad_norm": 7.51379626066785, "learning_rate": 4.877469494393048e-06, "loss": 0.9436, "step": 3518 }, { "epoch": 0.2542308595372695, "grad_norm": 7.656242277180416, "learning_rate": 4.877379028072367e-06, "loss": 0.9568, "step": 3519 }, { "epoch": 0.254303104737479, "grad_norm": 7.4672773497484615, "learning_rate": 4.877288529207212e-06, "loss": 1.0035, "step": 3520 }, { "epoch": 0.2543753499376885, "grad_norm": 7.18450653101786, "learning_rate": 4.877197997798824e-06, "loss": 1.0059, "step": 3521 }, { "epoch": 0.25444759513789805, "grad_norm": 6.218931569271465, "learning_rate": 4.87710743384844e-06, "loss": 0.921, "step": 3522 }, { "epoch": 0.25451984033810754, "grad_norm": 6.307055249706599, "learning_rate": 4.877016837357301e-06, "loss": 0.9575, "step": 3523 }, { "epoch": 0.254592085538317, "grad_norm": 6.488352389924567, "learning_rate": 4.876926208326649e-06, "loss": 0.8822, "step": 3524 }, { "epoch": 0.25466433073852657, "grad_norm": 7.183881562002188, "learning_rate": 4.876835546757721e-06, "loss": 0.9734, "step": 3525 }, { "epoch": 0.25473657593873605, "grad_norm": 6.5888723257770465, "learning_rate": 4.87674485265176e-06, "loss": 0.9232, "step": 3526 }, { "epoch": 0.2548088211389456, "grad_norm": 6.359421188599156, "learning_rate": 4.876654126010009e-06, "loss": 0.9054, "step": 3527 }, { "epoch": 0.2548810663391551, "grad_norm": 7.016412023905736, "learning_rate": 4.876563366833706e-06, "loss": 0.9017, "step": 3528 }, { "epoch": 0.2549533115393646, "grad_norm": 6.798399164595432, "learning_rate": 4.876472575124097e-06, "loss": 0.8595, "step": 3529 }, { "epoch": 0.2550255567395741, "grad_norm": 7.304072814210779, "learning_rate": 4.876381750882424e-06, "loss": 0.8878, "step": 3530 }, { "epoch": 0.25509780193978365, "grad_norm": 6.212495812274589, "learning_rate": 4.876290894109929e-06, "loss": 0.8993, "step": 3531 }, { "epoch": 0.25517004713999314, "grad_norm": 7.823363784494558, "learning_rate": 4.876200004807858e-06, "loss": 0.9318, "step": 3532 }, { "epoch": 0.2552422923402026, "grad_norm": 6.19392508404625, "learning_rate": 4.8761090829774535e-06, "loss": 0.9016, "step": 3533 }, { "epoch": 0.25531453754041217, "grad_norm": 6.953131857879343, "learning_rate": 4.8760181286199605e-06, "loss": 0.9837, "step": 3534 }, { "epoch": 0.25538678274062165, "grad_norm": 5.840229789707083, "learning_rate": 4.875927141736624e-06, "loss": 0.8503, "step": 3535 }, { "epoch": 0.2554590279408312, "grad_norm": 8.340354657213652, "learning_rate": 4.875836122328689e-06, "loss": 0.9188, "step": 3536 }, { "epoch": 0.2555312731410407, "grad_norm": 6.429161523045155, "learning_rate": 4.875745070397403e-06, "loss": 0.8831, "step": 3537 }, { "epoch": 0.2556035183412502, "grad_norm": 7.719427781714109, "learning_rate": 4.8756539859440115e-06, "loss": 1.0099, "step": 3538 }, { "epoch": 0.2556757635414597, "grad_norm": 6.437044590516888, "learning_rate": 4.875562868969762e-06, "loss": 0.8635, "step": 3539 }, { "epoch": 0.25574800874166925, "grad_norm": 6.840694215580332, "learning_rate": 4.8754717194759e-06, "loss": 0.9019, "step": 3540 }, { "epoch": 0.25582025394187874, "grad_norm": 7.236279626187005, "learning_rate": 4.875380537463677e-06, "loss": 0.9502, "step": 3541 }, { "epoch": 0.2558924991420882, "grad_norm": 7.220551237160293, "learning_rate": 4.875289322934336e-06, "loss": 0.878, "step": 3542 }, { "epoch": 0.25596474434229777, "grad_norm": 6.451903590144806, "learning_rate": 4.875198075889131e-06, "loss": 0.8947, "step": 3543 }, { "epoch": 0.25603698954250725, "grad_norm": 6.6460866212162015, "learning_rate": 4.875106796329307e-06, "loss": 0.9514, "step": 3544 }, { "epoch": 0.2561092347427168, "grad_norm": 5.836169216555289, "learning_rate": 4.8750154842561146e-06, "loss": 0.9059, "step": 3545 }, { "epoch": 0.2561814799429263, "grad_norm": 7.454507311704369, "learning_rate": 4.874924139670805e-06, "loss": 0.9591, "step": 3546 }, { "epoch": 0.2562537251431358, "grad_norm": 8.347926776734841, "learning_rate": 4.874832762574628e-06, "loss": 1.0304, "step": 3547 }, { "epoch": 0.2563259703433453, "grad_norm": 6.169933931315382, "learning_rate": 4.874741352968835e-06, "loss": 0.9694, "step": 3548 }, { "epoch": 0.25639821554355485, "grad_norm": 5.51153222409843, "learning_rate": 4.8746499108546764e-06, "loss": 0.8441, "step": 3549 }, { "epoch": 0.25647046074376434, "grad_norm": 9.169258537309515, "learning_rate": 4.8745584362334045e-06, "loss": 0.8878, "step": 3550 }, { "epoch": 0.2565427059439738, "grad_norm": 6.886814854600981, "learning_rate": 4.874466929106271e-06, "loss": 0.9546, "step": 3551 }, { "epoch": 0.25661495114418337, "grad_norm": 7.625986254335786, "learning_rate": 4.874375389474528e-06, "loss": 0.9144, "step": 3552 }, { "epoch": 0.25668719634439285, "grad_norm": 6.764379937400028, "learning_rate": 4.87428381733943e-06, "loss": 0.8869, "step": 3553 }, { "epoch": 0.2567594415446024, "grad_norm": 6.649156399330212, "learning_rate": 4.874192212702231e-06, "loss": 0.8783, "step": 3554 }, { "epoch": 0.2568316867448119, "grad_norm": 6.297106717297493, "learning_rate": 4.874100575564184e-06, "loss": 0.9766, "step": 3555 }, { "epoch": 0.2569039319450214, "grad_norm": 7.470065076919877, "learning_rate": 4.874008905926543e-06, "loss": 0.9411, "step": 3556 }, { "epoch": 0.2569761771452309, "grad_norm": 7.127868610271444, "learning_rate": 4.873917203790563e-06, "loss": 0.9437, "step": 3557 }, { "epoch": 0.2570484223454404, "grad_norm": 7.020860651191367, "learning_rate": 4.8738254691575e-06, "loss": 1.0402, "step": 3558 }, { "epoch": 0.25712066754564994, "grad_norm": 7.97651969756207, "learning_rate": 4.8737337020286095e-06, "loss": 0.8523, "step": 3559 }, { "epoch": 0.2571929127458594, "grad_norm": 7.783901023447194, "learning_rate": 4.873641902405148e-06, "loss": 0.8732, "step": 3560 }, { "epoch": 0.25726515794606897, "grad_norm": 7.00135000017505, "learning_rate": 4.873550070288371e-06, "loss": 1.0305, "step": 3561 }, { "epoch": 0.25733740314627845, "grad_norm": 7.594185522329474, "learning_rate": 4.873458205679538e-06, "loss": 0.9116, "step": 3562 }, { "epoch": 0.257409648346488, "grad_norm": 8.194282343351283, "learning_rate": 4.873366308579903e-06, "loss": 0.9613, "step": 3563 }, { "epoch": 0.2574818935466975, "grad_norm": 7.042176026952186, "learning_rate": 4.8732743789907274e-06, "loss": 0.933, "step": 3564 }, { "epoch": 0.257554138746907, "grad_norm": 7.4058405123151045, "learning_rate": 4.873182416913268e-06, "loss": 0.9094, "step": 3565 }, { "epoch": 0.2576263839471165, "grad_norm": 5.918841790491544, "learning_rate": 4.873090422348784e-06, "loss": 0.9634, "step": 3566 }, { "epoch": 0.257698629147326, "grad_norm": 7.037813505571811, "learning_rate": 4.872998395298535e-06, "loss": 0.9854, "step": 3567 }, { "epoch": 0.25777087434753554, "grad_norm": 8.121927003007814, "learning_rate": 4.87290633576378e-06, "loss": 0.9923, "step": 3568 }, { "epoch": 0.257843119547745, "grad_norm": 9.398714716272172, "learning_rate": 4.872814243745781e-06, "loss": 0.9057, "step": 3569 }, { "epoch": 0.25791536474795457, "grad_norm": 6.2307231408685055, "learning_rate": 4.872722119245795e-06, "loss": 0.8694, "step": 3570 }, { "epoch": 0.25798760994816405, "grad_norm": 6.570788281209083, "learning_rate": 4.872629962265087e-06, "loss": 0.9389, "step": 3571 }, { "epoch": 0.2580598551483736, "grad_norm": 7.466860735732666, "learning_rate": 4.872537772804917e-06, "loss": 0.8858, "step": 3572 }, { "epoch": 0.2581321003485831, "grad_norm": 6.4114016776495895, "learning_rate": 4.872445550866547e-06, "loss": 0.9396, "step": 3573 }, { "epoch": 0.2582043455487926, "grad_norm": 6.752568921694641, "learning_rate": 4.872353296451239e-06, "loss": 0.9846, "step": 3574 }, { "epoch": 0.2582765907490021, "grad_norm": 7.740037482907716, "learning_rate": 4.872261009560257e-06, "loss": 0.9119, "step": 3575 }, { "epoch": 0.2583488359492116, "grad_norm": 6.612789489524347, "learning_rate": 4.872168690194864e-06, "loss": 0.8894, "step": 3576 }, { "epoch": 0.25842108114942114, "grad_norm": 8.542578026758768, "learning_rate": 4.872076338356322e-06, "loss": 0.9837, "step": 3577 }, { "epoch": 0.2584933263496306, "grad_norm": 7.818200069027138, "learning_rate": 4.871983954045898e-06, "loss": 0.9012, "step": 3578 }, { "epoch": 0.25856557154984017, "grad_norm": 7.6017973531295135, "learning_rate": 4.871891537264855e-06, "loss": 0.9584, "step": 3579 }, { "epoch": 0.25863781675004965, "grad_norm": 5.6955131003552575, "learning_rate": 4.871799088014459e-06, "loss": 0.9561, "step": 3580 }, { "epoch": 0.2587100619502592, "grad_norm": 8.37459187438409, "learning_rate": 4.871706606295974e-06, "loss": 0.8873, "step": 3581 }, { "epoch": 0.2587823071504687, "grad_norm": 8.118031491854067, "learning_rate": 4.871614092110668e-06, "loss": 0.9088, "step": 3582 }, { "epoch": 0.2588545523506782, "grad_norm": 10.397013646294328, "learning_rate": 4.871521545459806e-06, "loss": 0.9381, "step": 3583 }, { "epoch": 0.2589267975508877, "grad_norm": 7.4611169603744125, "learning_rate": 4.871428966344656e-06, "loss": 0.9384, "step": 3584 }, { "epoch": 0.2589990427510972, "grad_norm": 7.521483867826522, "learning_rate": 4.871336354766484e-06, "loss": 0.9472, "step": 3585 }, { "epoch": 0.25907128795130674, "grad_norm": 6.090024464625436, "learning_rate": 4.871243710726559e-06, "loss": 0.8725, "step": 3586 }, { "epoch": 0.2591435331515162, "grad_norm": 6.54139527488853, "learning_rate": 4.871151034226149e-06, "loss": 0.8667, "step": 3587 }, { "epoch": 0.25921577835172577, "grad_norm": 6.996006507691661, "learning_rate": 4.8710583252665225e-06, "loss": 0.9323, "step": 3588 }, { "epoch": 0.25928802355193525, "grad_norm": 8.784249115988132, "learning_rate": 4.870965583848948e-06, "loss": 0.9002, "step": 3589 }, { "epoch": 0.2593602687521448, "grad_norm": 9.060488142122098, "learning_rate": 4.870872809974695e-06, "loss": 0.8659, "step": 3590 }, { "epoch": 0.2594325139523543, "grad_norm": 6.890019604304844, "learning_rate": 4.8707800036450355e-06, "loss": 0.9378, "step": 3591 }, { "epoch": 0.2595047591525638, "grad_norm": 7.030924200351188, "learning_rate": 4.870687164861237e-06, "loss": 0.9232, "step": 3592 }, { "epoch": 0.2595770043527733, "grad_norm": 10.593975773658872, "learning_rate": 4.870594293624573e-06, "loss": 0.9376, "step": 3593 }, { "epoch": 0.2596492495529828, "grad_norm": 7.90953339561677, "learning_rate": 4.870501389936314e-06, "loss": 1.0555, "step": 3594 }, { "epoch": 0.25972149475319234, "grad_norm": 6.99641953587844, "learning_rate": 4.8704084537977314e-06, "loss": 0.9444, "step": 3595 }, { "epoch": 0.2597937399534018, "grad_norm": 8.216409444232525, "learning_rate": 4.870315485210097e-06, "loss": 0.9127, "step": 3596 }, { "epoch": 0.25986598515361137, "grad_norm": 10.6619378177602, "learning_rate": 4.870222484174684e-06, "loss": 0.8827, "step": 3597 }, { "epoch": 0.25993823035382085, "grad_norm": 6.930212093482027, "learning_rate": 4.870129450692766e-06, "loss": 0.8839, "step": 3598 }, { "epoch": 0.2600104755540304, "grad_norm": 8.189423080451089, "learning_rate": 4.870036384765616e-06, "loss": 0.9393, "step": 3599 }, { "epoch": 0.2600827207542399, "grad_norm": 6.459669438458162, "learning_rate": 4.869943286394508e-06, "loss": 0.9727, "step": 3600 }, { "epoch": 0.2601549659544494, "grad_norm": 6.941092995534367, "learning_rate": 4.869850155580717e-06, "loss": 0.9973, "step": 3601 }, { "epoch": 0.2602272111546589, "grad_norm": 6.057870572067376, "learning_rate": 4.8697569923255175e-06, "loss": 0.9686, "step": 3602 }, { "epoch": 0.2602994563548684, "grad_norm": 7.103525828285221, "learning_rate": 4.869663796630184e-06, "loss": 0.8995, "step": 3603 }, { "epoch": 0.26037170155507794, "grad_norm": 9.549568957442489, "learning_rate": 4.869570568495994e-06, "loss": 1.0213, "step": 3604 }, { "epoch": 0.2604439467552874, "grad_norm": 7.725003446806599, "learning_rate": 4.869477307924222e-06, "loss": 0.8976, "step": 3605 }, { "epoch": 0.26051619195549697, "grad_norm": 6.410506458002808, "learning_rate": 4.869384014916145e-06, "loss": 0.9041, "step": 3606 }, { "epoch": 0.26058843715570645, "grad_norm": 6.417109222343373, "learning_rate": 4.869290689473041e-06, "loss": 0.8728, "step": 3607 }, { "epoch": 0.260660682355916, "grad_norm": 6.942231360551716, "learning_rate": 4.869197331596187e-06, "loss": 0.8941, "step": 3608 }, { "epoch": 0.2607329275561255, "grad_norm": 6.652866416912285, "learning_rate": 4.869103941286862e-06, "loss": 0.9211, "step": 3609 }, { "epoch": 0.260805172756335, "grad_norm": 6.727375402299681, "learning_rate": 4.869010518546343e-06, "loss": 0.9494, "step": 3610 }, { "epoch": 0.2608774179565445, "grad_norm": 7.453723299655819, "learning_rate": 4.868917063375909e-06, "loss": 0.9939, "step": 3611 }, { "epoch": 0.260949663156754, "grad_norm": 8.21291222314342, "learning_rate": 4.86882357577684e-06, "loss": 0.9769, "step": 3612 }, { "epoch": 0.26102190835696354, "grad_norm": 5.820057867227856, "learning_rate": 4.868730055750416e-06, "loss": 0.9605, "step": 3613 }, { "epoch": 0.261094153557173, "grad_norm": 7.1409532734477725, "learning_rate": 4.868636503297916e-06, "loss": 0.9108, "step": 3614 }, { "epoch": 0.26116639875738257, "grad_norm": 6.789009682260021, "learning_rate": 4.868542918420621e-06, "loss": 0.9324, "step": 3615 }, { "epoch": 0.26123864395759205, "grad_norm": 5.755989272584125, "learning_rate": 4.868449301119814e-06, "loss": 0.8625, "step": 3616 }, { "epoch": 0.2613108891578016, "grad_norm": 6.061973627579933, "learning_rate": 4.868355651396775e-06, "loss": 0.9142, "step": 3617 }, { "epoch": 0.2613831343580111, "grad_norm": 6.630917767139803, "learning_rate": 4.868261969252784e-06, "loss": 0.8956, "step": 3618 }, { "epoch": 0.2614553795582206, "grad_norm": 6.895399312385476, "learning_rate": 4.868168254689127e-06, "loss": 0.8935, "step": 3619 }, { "epoch": 0.2615276247584301, "grad_norm": 8.536695370380723, "learning_rate": 4.868074507707085e-06, "loss": 0.8901, "step": 3620 }, { "epoch": 0.2615998699586396, "grad_norm": 6.7177577505858785, "learning_rate": 4.8679807283079416e-06, "loss": 0.956, "step": 3621 }, { "epoch": 0.26167211515884914, "grad_norm": 7.437673871227892, "learning_rate": 4.867886916492981e-06, "loss": 0.9553, "step": 3622 }, { "epoch": 0.2617443603590586, "grad_norm": 7.175012238920719, "learning_rate": 4.867793072263487e-06, "loss": 0.8572, "step": 3623 }, { "epoch": 0.26181660555926817, "grad_norm": 6.537305734931834, "learning_rate": 4.8676991956207444e-06, "loss": 0.9141, "step": 3624 }, { "epoch": 0.26188885075947765, "grad_norm": 8.0036808129245, "learning_rate": 4.867605286566039e-06, "loss": 0.945, "step": 3625 }, { "epoch": 0.2619610959596872, "grad_norm": 6.457301231689392, "learning_rate": 4.867511345100654e-06, "loss": 1.0005, "step": 3626 }, { "epoch": 0.2620333411598967, "grad_norm": 9.14314576953623, "learning_rate": 4.867417371225877e-06, "loss": 0.9399, "step": 3627 }, { "epoch": 0.2621055863601062, "grad_norm": 7.320146003059196, "learning_rate": 4.8673233649429954e-06, "loss": 0.9458, "step": 3628 }, { "epoch": 0.2621778315603157, "grad_norm": 6.8506766326186375, "learning_rate": 4.867229326253295e-06, "loss": 0.8538, "step": 3629 }, { "epoch": 0.2622500767605252, "grad_norm": 7.39724918477869, "learning_rate": 4.867135255158062e-06, "loss": 0.882, "step": 3630 }, { "epoch": 0.26232232196073474, "grad_norm": 6.344171218793606, "learning_rate": 4.867041151658586e-06, "loss": 0.9947, "step": 3631 }, { "epoch": 0.2623945671609442, "grad_norm": 6.191980762481377, "learning_rate": 4.866947015756155e-06, "loss": 0.9206, "step": 3632 }, { "epoch": 0.26246681236115377, "grad_norm": 8.242715567354997, "learning_rate": 4.866852847452056e-06, "loss": 0.9611, "step": 3633 }, { "epoch": 0.26253905756136325, "grad_norm": 7.194810416962712, "learning_rate": 4.86675864674758e-06, "loss": 0.9555, "step": 3634 }, { "epoch": 0.2626113027615728, "grad_norm": 6.249729608408884, "learning_rate": 4.866664413644015e-06, "loss": 0.9303, "step": 3635 }, { "epoch": 0.2626835479617823, "grad_norm": 6.9006747703668045, "learning_rate": 4.866570148142654e-06, "loss": 0.9775, "step": 3636 }, { "epoch": 0.2627557931619918, "grad_norm": 6.800283134400201, "learning_rate": 4.8664758502447825e-06, "loss": 0.9244, "step": 3637 }, { "epoch": 0.2628280383622013, "grad_norm": 6.543029967064368, "learning_rate": 4.866381519951696e-06, "loss": 0.8834, "step": 3638 }, { "epoch": 0.2629002835624108, "grad_norm": 6.069927267176156, "learning_rate": 4.866287157264683e-06, "loss": 0.9611, "step": 3639 }, { "epoch": 0.26297252876262034, "grad_norm": 6.151972645057609, "learning_rate": 4.866192762185036e-06, "loss": 0.9002, "step": 3640 }, { "epoch": 0.2630447739628298, "grad_norm": 8.69666887725104, "learning_rate": 4.866098334714048e-06, "loss": 0.9205, "step": 3641 }, { "epoch": 0.26311701916303937, "grad_norm": 6.455445396605465, "learning_rate": 4.86600387485301e-06, "loss": 0.9696, "step": 3642 }, { "epoch": 0.26318926436324885, "grad_norm": 6.330729836187062, "learning_rate": 4.865909382603217e-06, "loss": 0.9144, "step": 3643 }, { "epoch": 0.2632615095634584, "grad_norm": 6.882164845646062, "learning_rate": 4.8658148579659615e-06, "loss": 0.9891, "step": 3644 }, { "epoch": 0.2633337547636679, "grad_norm": 6.238971593181631, "learning_rate": 4.865720300942537e-06, "loss": 1.0056, "step": 3645 }, { "epoch": 0.2634059999638774, "grad_norm": 6.906088986828518, "learning_rate": 4.865625711534238e-06, "loss": 1.0139, "step": 3646 }, { "epoch": 0.2634782451640869, "grad_norm": 6.566405959529291, "learning_rate": 4.8655310897423615e-06, "loss": 0.9065, "step": 3647 }, { "epoch": 0.2635504903642964, "grad_norm": 5.484020080384957, "learning_rate": 4.865436435568199e-06, "loss": 0.9129, "step": 3648 }, { "epoch": 0.26362273556450594, "grad_norm": 5.483192251104573, "learning_rate": 4.86534174901305e-06, "loss": 0.8202, "step": 3649 }, { "epoch": 0.2636949807647154, "grad_norm": 8.641609011696557, "learning_rate": 4.865247030078208e-06, "loss": 0.8627, "step": 3650 }, { "epoch": 0.26376722596492497, "grad_norm": 7.660845689030851, "learning_rate": 4.865152278764971e-06, "loss": 0.8962, "step": 3651 }, { "epoch": 0.26383947116513445, "grad_norm": 6.786438534742641, "learning_rate": 4.865057495074636e-06, "loss": 0.962, "step": 3652 }, { "epoch": 0.263911716365344, "grad_norm": 6.554126738248612, "learning_rate": 4.864962679008501e-06, "loss": 0.918, "step": 3653 }, { "epoch": 0.2639839615655535, "grad_norm": 5.789667713071554, "learning_rate": 4.864867830567861e-06, "loss": 0.8401, "step": 3654 }, { "epoch": 0.264056206765763, "grad_norm": 5.984709010750596, "learning_rate": 4.864772949754019e-06, "loss": 0.8858, "step": 3655 }, { "epoch": 0.2641284519659725, "grad_norm": 8.768434125413656, "learning_rate": 4.864678036568269e-06, "loss": 0.9294, "step": 3656 }, { "epoch": 0.264200697166182, "grad_norm": 8.4247783688335, "learning_rate": 4.864583091011914e-06, "loss": 0.9896, "step": 3657 }, { "epoch": 0.26427294236639154, "grad_norm": 5.5731132496419775, "learning_rate": 4.8644881130862535e-06, "loss": 0.839, "step": 3658 }, { "epoch": 0.264345187566601, "grad_norm": 5.751706740796935, "learning_rate": 4.864393102792585e-06, "loss": 0.8733, "step": 3659 }, { "epoch": 0.26441743276681057, "grad_norm": 7.063864247038298, "learning_rate": 4.864298060132211e-06, "loss": 0.9528, "step": 3660 }, { "epoch": 0.26448967796702005, "grad_norm": 6.235132768895299, "learning_rate": 4.864202985106433e-06, "loss": 0.9816, "step": 3661 }, { "epoch": 0.2645619231672296, "grad_norm": 7.802549082593412, "learning_rate": 4.864107877716552e-06, "loss": 0.955, "step": 3662 }, { "epoch": 0.2646341683674391, "grad_norm": 8.411226182377531, "learning_rate": 4.864012737963869e-06, "loss": 0.9243, "step": 3663 }, { "epoch": 0.2647064135676486, "grad_norm": 7.759345296003228, "learning_rate": 4.863917565849687e-06, "loss": 0.9572, "step": 3664 }, { "epoch": 0.2647786587678581, "grad_norm": 7.80660519795877, "learning_rate": 4.863822361375309e-06, "loss": 0.9651, "step": 3665 }, { "epoch": 0.2648509039680676, "grad_norm": 7.710229533894048, "learning_rate": 4.8637271245420395e-06, "loss": 1.0021, "step": 3666 }, { "epoch": 0.26492314916827714, "grad_norm": 7.656895948914923, "learning_rate": 4.863631855351179e-06, "loss": 0.8723, "step": 3667 }, { "epoch": 0.2649953943684866, "grad_norm": 7.216546585223379, "learning_rate": 4.863536553804036e-06, "loss": 0.9939, "step": 3668 }, { "epoch": 0.26506763956869617, "grad_norm": 6.922609036276758, "learning_rate": 4.863441219901911e-06, "loss": 0.9479, "step": 3669 }, { "epoch": 0.26513988476890565, "grad_norm": 6.151780728206239, "learning_rate": 4.8633458536461115e-06, "loss": 0.8946, "step": 3670 }, { "epoch": 0.2652121299691152, "grad_norm": 8.774430093978816, "learning_rate": 4.863250455037942e-06, "loss": 0.926, "step": 3671 }, { "epoch": 0.2652843751693247, "grad_norm": 8.838599980165128, "learning_rate": 4.863155024078709e-06, "loss": 1.0207, "step": 3672 }, { "epoch": 0.2653566203695342, "grad_norm": 6.7439409541829365, "learning_rate": 4.863059560769718e-06, "loss": 0.8994, "step": 3673 }, { "epoch": 0.2654288655697437, "grad_norm": 7.417405109576415, "learning_rate": 4.862964065112277e-06, "loss": 0.9985, "step": 3674 }, { "epoch": 0.2655011107699532, "grad_norm": 7.136880706803535, "learning_rate": 4.862868537107692e-06, "loss": 1.0391, "step": 3675 }, { "epoch": 0.26557335597016274, "grad_norm": 9.964382446569577, "learning_rate": 4.8627729767572725e-06, "loss": 0.9071, "step": 3676 }, { "epoch": 0.2656456011703722, "grad_norm": 7.461455673678138, "learning_rate": 4.862677384062325e-06, "loss": 0.9111, "step": 3677 }, { "epoch": 0.26571784637058177, "grad_norm": 6.0421483691881654, "learning_rate": 4.8625817590241585e-06, "loss": 0.9513, "step": 3678 }, { "epoch": 0.26579009157079125, "grad_norm": 7.310411464732434, "learning_rate": 4.862486101644081e-06, "loss": 0.8926, "step": 3679 }, { "epoch": 0.2658623367710008, "grad_norm": 8.941373399213353, "learning_rate": 4.862390411923405e-06, "loss": 0.9703, "step": 3680 }, { "epoch": 0.2659345819712103, "grad_norm": 6.923364758825021, "learning_rate": 4.862294689863438e-06, "loss": 0.9966, "step": 3681 }, { "epoch": 0.2660068271714198, "grad_norm": 7.047224691642248, "learning_rate": 4.862198935465491e-06, "loss": 0.9573, "step": 3682 }, { "epoch": 0.2660790723716293, "grad_norm": 6.501682650577338, "learning_rate": 4.862103148730874e-06, "loss": 0.9911, "step": 3683 }, { "epoch": 0.2661513175718388, "grad_norm": 6.15663571771107, "learning_rate": 4.862007329660899e-06, "loss": 0.9764, "step": 3684 }, { "epoch": 0.26622356277204834, "grad_norm": 6.942634126152633, "learning_rate": 4.861911478256878e-06, "loss": 0.8904, "step": 3685 }, { "epoch": 0.2662958079722578, "grad_norm": 7.088140075952797, "learning_rate": 4.8618155945201225e-06, "loss": 0.9096, "step": 3686 }, { "epoch": 0.26636805317246737, "grad_norm": 5.9308469741933845, "learning_rate": 4.861719678451946e-06, "loss": 0.9705, "step": 3687 }, { "epoch": 0.26644029837267685, "grad_norm": 6.726687546474891, "learning_rate": 4.861623730053661e-06, "loss": 0.9989, "step": 3688 }, { "epoch": 0.2665125435728864, "grad_norm": 5.684742017812421, "learning_rate": 4.86152774932658e-06, "loss": 0.8825, "step": 3689 }, { "epoch": 0.2665847887730959, "grad_norm": 7.917117644732653, "learning_rate": 4.8614317362720175e-06, "loss": 0.919, "step": 3690 }, { "epoch": 0.2666570339733054, "grad_norm": 8.327498364089077, "learning_rate": 4.861335690891289e-06, "loss": 0.9007, "step": 3691 }, { "epoch": 0.2667292791735149, "grad_norm": 6.563919640439131, "learning_rate": 4.8612396131857075e-06, "loss": 0.9453, "step": 3692 }, { "epoch": 0.2668015243737244, "grad_norm": 8.288439354227727, "learning_rate": 4.86114350315659e-06, "loss": 0.971, "step": 3693 }, { "epoch": 0.26687376957393394, "grad_norm": 6.053644063315294, "learning_rate": 4.861047360805251e-06, "loss": 0.903, "step": 3694 }, { "epoch": 0.2669460147741434, "grad_norm": 6.502854820699275, "learning_rate": 4.8609511861330065e-06, "loss": 0.8859, "step": 3695 }, { "epoch": 0.26701825997435297, "grad_norm": 9.107498734730903, "learning_rate": 4.860854979141173e-06, "loss": 0.9931, "step": 3696 }, { "epoch": 0.26709050517456245, "grad_norm": 6.586554628466942, "learning_rate": 4.8607587398310685e-06, "loss": 0.9529, "step": 3697 }, { "epoch": 0.267162750374772, "grad_norm": 7.454503985456675, "learning_rate": 4.86066246820401e-06, "loss": 0.9427, "step": 3698 }, { "epoch": 0.2672349955749815, "grad_norm": 6.555409115522463, "learning_rate": 4.860566164261315e-06, "loss": 0.9883, "step": 3699 }, { "epoch": 0.267307240775191, "grad_norm": 5.166976960411555, "learning_rate": 4.860469828004303e-06, "loss": 0.9314, "step": 3700 }, { "epoch": 0.2673794859754005, "grad_norm": 6.355267672180188, "learning_rate": 4.86037345943429e-06, "loss": 0.9308, "step": 3701 }, { "epoch": 0.26745173117561, "grad_norm": 7.237685432759839, "learning_rate": 4.860277058552599e-06, "loss": 0.9113, "step": 3702 }, { "epoch": 0.26752397637581954, "grad_norm": 5.7571718022519125, "learning_rate": 4.860180625360546e-06, "loss": 0.9178, "step": 3703 }, { "epoch": 0.267596221576029, "grad_norm": 7.745799249003254, "learning_rate": 4.860084159859453e-06, "loss": 0.9812, "step": 3704 }, { "epoch": 0.26766846677623857, "grad_norm": 5.845919772338579, "learning_rate": 4.859987662050641e-06, "loss": 0.8664, "step": 3705 }, { "epoch": 0.26774071197644805, "grad_norm": 7.258440826122257, "learning_rate": 4.85989113193543e-06, "loss": 0.9628, "step": 3706 }, { "epoch": 0.2678129571766576, "grad_norm": 6.749901099716723, "learning_rate": 4.859794569515143e-06, "loss": 1.1135, "step": 3707 }, { "epoch": 0.2678852023768671, "grad_norm": 6.629061911199032, "learning_rate": 4.859697974791099e-06, "loss": 0.8898, "step": 3708 }, { "epoch": 0.2679574475770766, "grad_norm": 6.3258887590052195, "learning_rate": 4.859601347764622e-06, "loss": 0.9103, "step": 3709 }, { "epoch": 0.2680296927772861, "grad_norm": 7.037051371881411, "learning_rate": 4.859504688437036e-06, "loss": 0.953, "step": 3710 }, { "epoch": 0.2681019379774956, "grad_norm": 7.617285906816262, "learning_rate": 4.859407996809661e-06, "loss": 0.9497, "step": 3711 }, { "epoch": 0.26817418317770514, "grad_norm": 6.644020845880022, "learning_rate": 4.859311272883823e-06, "loss": 0.8473, "step": 3712 }, { "epoch": 0.2682464283779146, "grad_norm": 7.422149203405131, "learning_rate": 4.859214516660846e-06, "loss": 0.9505, "step": 3713 }, { "epoch": 0.26831867357812417, "grad_norm": 6.418571420653606, "learning_rate": 4.859117728142055e-06, "loss": 0.8984, "step": 3714 }, { "epoch": 0.26839091877833365, "grad_norm": 6.273041633096851, "learning_rate": 4.859020907328773e-06, "loss": 0.9082, "step": 3715 }, { "epoch": 0.2684631639785432, "grad_norm": 11.375131459576906, "learning_rate": 4.858924054222326e-06, "loss": 0.9287, "step": 3716 }, { "epoch": 0.2685354091787527, "grad_norm": 6.63445629154521, "learning_rate": 4.858827168824042e-06, "loss": 0.9456, "step": 3717 }, { "epoch": 0.2686076543789622, "grad_norm": 8.699398537376556, "learning_rate": 4.858730251135244e-06, "loss": 0.9166, "step": 3718 }, { "epoch": 0.2686798995791717, "grad_norm": 6.794906436368636, "learning_rate": 4.858633301157261e-06, "loss": 0.9543, "step": 3719 }, { "epoch": 0.2687521447793812, "grad_norm": 7.591844610089186, "learning_rate": 4.858536318891419e-06, "loss": 0.8879, "step": 3720 }, { "epoch": 0.26882438997959074, "grad_norm": 6.663353796210776, "learning_rate": 4.858439304339046e-06, "loss": 0.9072, "step": 3721 }, { "epoch": 0.2688966351798002, "grad_norm": 7.497470937928862, "learning_rate": 4.858342257501471e-06, "loss": 0.9669, "step": 3722 }, { "epoch": 0.26896888038000977, "grad_norm": 7.341500185212994, "learning_rate": 4.85824517838002e-06, "loss": 1.0237, "step": 3723 }, { "epoch": 0.26904112558021925, "grad_norm": 6.2609111910049, "learning_rate": 4.858148066976025e-06, "loss": 0.8525, "step": 3724 }, { "epoch": 0.2691133707804288, "grad_norm": 7.257708693502125, "learning_rate": 4.858050923290814e-06, "loss": 0.8885, "step": 3725 }, { "epoch": 0.2691856159806383, "grad_norm": 6.181537259700568, "learning_rate": 4.857953747325716e-06, "loss": 0.9375, "step": 3726 }, { "epoch": 0.2692578611808478, "grad_norm": 6.322377248396755, "learning_rate": 4.857856539082062e-06, "loss": 0.8079, "step": 3727 }, { "epoch": 0.2693301063810573, "grad_norm": 6.692025053024139, "learning_rate": 4.857759298561183e-06, "loss": 0.8941, "step": 3728 }, { "epoch": 0.2694023515812668, "grad_norm": 6.642792902107005, "learning_rate": 4.85766202576441e-06, "loss": 0.8747, "step": 3729 }, { "epoch": 0.26947459678147634, "grad_norm": 9.283283871348283, "learning_rate": 4.857564720693074e-06, "loss": 0.8887, "step": 3730 }, { "epoch": 0.2695468419816858, "grad_norm": 6.441459549127579, "learning_rate": 4.857467383348509e-06, "loss": 0.9455, "step": 3731 }, { "epoch": 0.26961908718189537, "grad_norm": 7.422504342149523, "learning_rate": 4.857370013732045e-06, "loss": 0.9513, "step": 3732 }, { "epoch": 0.26969133238210485, "grad_norm": 7.202888534537155, "learning_rate": 4.857272611845015e-06, "loss": 1.0435, "step": 3733 }, { "epoch": 0.2697635775823144, "grad_norm": 8.05290325245852, "learning_rate": 4.857175177688755e-06, "loss": 0.8898, "step": 3734 }, { "epoch": 0.2698358227825239, "grad_norm": 7.294905402842849, "learning_rate": 4.857077711264596e-06, "loss": 0.8357, "step": 3735 }, { "epoch": 0.2699080679827334, "grad_norm": 8.494653254320204, "learning_rate": 4.856980212573873e-06, "loss": 0.9694, "step": 3736 }, { "epoch": 0.2699803131829429, "grad_norm": 7.451975095960168, "learning_rate": 4.856882681617922e-06, "loss": 0.9619, "step": 3737 }, { "epoch": 0.2700525583831524, "grad_norm": 7.400950664593441, "learning_rate": 4.856785118398075e-06, "loss": 0.8767, "step": 3738 }, { "epoch": 0.27012480358336194, "grad_norm": 7.545446920234362, "learning_rate": 4.856687522915672e-06, "loss": 0.8873, "step": 3739 }, { "epoch": 0.2701970487835714, "grad_norm": 7.464803634687477, "learning_rate": 4.856589895172046e-06, "loss": 0.9067, "step": 3740 }, { "epoch": 0.27026929398378097, "grad_norm": 6.87152538928127, "learning_rate": 4.856492235168533e-06, "loss": 0.9262, "step": 3741 }, { "epoch": 0.27034153918399045, "grad_norm": 5.93606646448646, "learning_rate": 4.8563945429064715e-06, "loss": 0.8624, "step": 3742 }, { "epoch": 0.2704137843842, "grad_norm": 6.610485659949682, "learning_rate": 4.8562968183872e-06, "loss": 0.8865, "step": 3743 }, { "epoch": 0.2704860295844095, "grad_norm": 10.11179272190551, "learning_rate": 4.8561990616120525e-06, "loss": 0.9726, "step": 3744 }, { "epoch": 0.270558274784619, "grad_norm": 9.579938558170419, "learning_rate": 4.85610127258237e-06, "loss": 0.9339, "step": 3745 }, { "epoch": 0.2706305199848285, "grad_norm": 7.488506665963625, "learning_rate": 4.856003451299491e-06, "loss": 0.9676, "step": 3746 }, { "epoch": 0.270702765185038, "grad_norm": 6.208503908549201, "learning_rate": 4.855905597764753e-06, "loss": 0.8631, "step": 3747 }, { "epoch": 0.27077501038524754, "grad_norm": 8.297027184191627, "learning_rate": 4.855807711979498e-06, "loss": 0.973, "step": 3748 }, { "epoch": 0.270847255585457, "grad_norm": 7.6270561572746205, "learning_rate": 4.855709793945064e-06, "loss": 0.9455, "step": 3749 }, { "epoch": 0.27091950078566657, "grad_norm": 8.398012798127873, "learning_rate": 4.855611843662792e-06, "loss": 0.9938, "step": 3750 }, { "epoch": 0.27099174598587605, "grad_norm": 7.17430721753074, "learning_rate": 4.855513861134022e-06, "loss": 0.895, "step": 3751 }, { "epoch": 0.2710639911860856, "grad_norm": 7.111018332564909, "learning_rate": 4.855415846360098e-06, "loss": 0.9865, "step": 3752 }, { "epoch": 0.2711362363862951, "grad_norm": 5.774749895765407, "learning_rate": 4.855317799342359e-06, "loss": 0.845, "step": 3753 }, { "epoch": 0.2712084815865046, "grad_norm": 6.863793412076307, "learning_rate": 4.855219720082147e-06, "loss": 0.9082, "step": 3754 }, { "epoch": 0.2712807267867141, "grad_norm": 8.109965808198963, "learning_rate": 4.855121608580807e-06, "loss": 0.9025, "step": 3755 }, { "epoch": 0.2713529719869236, "grad_norm": 8.11494645435392, "learning_rate": 4.8550234648396795e-06, "loss": 0.9945, "step": 3756 }, { "epoch": 0.27142521718713314, "grad_norm": 7.448994449625189, "learning_rate": 4.854925288860111e-06, "loss": 0.9118, "step": 3757 }, { "epoch": 0.2714974623873426, "grad_norm": 7.085837097466055, "learning_rate": 4.854827080643443e-06, "loss": 0.8763, "step": 3758 }, { "epoch": 0.27156970758755217, "grad_norm": 6.834068941049368, "learning_rate": 4.854728840191021e-06, "loss": 0.8374, "step": 3759 }, { "epoch": 0.27164195278776165, "grad_norm": 8.984575829956485, "learning_rate": 4.854630567504189e-06, "loss": 0.9267, "step": 3760 }, { "epoch": 0.2717141979879712, "grad_norm": 8.320656352930019, "learning_rate": 4.854532262584294e-06, "loss": 0.938, "step": 3761 }, { "epoch": 0.2717864431881807, "grad_norm": 7.908284645480052, "learning_rate": 4.854433925432679e-06, "loss": 1.048, "step": 3762 }, { "epoch": 0.2718586883883902, "grad_norm": 5.803484087276326, "learning_rate": 4.8543355560506924e-06, "loss": 0.8115, "step": 3763 }, { "epoch": 0.2719309335885997, "grad_norm": 5.661593952911714, "learning_rate": 4.8542371544396795e-06, "loss": 0.8758, "step": 3764 }, { "epoch": 0.2720031787888092, "grad_norm": 5.385961311980471, "learning_rate": 4.854138720600988e-06, "loss": 0.8645, "step": 3765 }, { "epoch": 0.27207542398901874, "grad_norm": 6.468036999315304, "learning_rate": 4.854040254535966e-06, "loss": 0.8779, "step": 3766 }, { "epoch": 0.2721476691892282, "grad_norm": 10.654116598784166, "learning_rate": 4.85394175624596e-06, "loss": 0.9939, "step": 3767 }, { "epoch": 0.27221991438943777, "grad_norm": 7.39916808117352, "learning_rate": 4.85384322573232e-06, "loss": 0.9572, "step": 3768 }, { "epoch": 0.27229215958964725, "grad_norm": 8.102994727850334, "learning_rate": 4.853744662996393e-06, "loss": 0.8966, "step": 3769 }, { "epoch": 0.2723644047898568, "grad_norm": 6.973019510845557, "learning_rate": 4.85364606803953e-06, "loss": 0.9677, "step": 3770 }, { "epoch": 0.2724366499900663, "grad_norm": 9.869642518593276, "learning_rate": 4.853547440863079e-06, "loss": 0.9118, "step": 3771 }, { "epoch": 0.27250889519027577, "grad_norm": 8.894771849100943, "learning_rate": 4.85344878146839e-06, "loss": 0.9586, "step": 3772 }, { "epoch": 0.2725811403904853, "grad_norm": 10.783861634541204, "learning_rate": 4.853350089856817e-06, "loss": 0.9409, "step": 3773 }, { "epoch": 0.2726533855906948, "grad_norm": 5.572047579965415, "learning_rate": 4.853251366029707e-06, "loss": 0.8593, "step": 3774 }, { "epoch": 0.27272563079090434, "grad_norm": 5.74637854600991, "learning_rate": 4.853152609988413e-06, "loss": 0.8472, "step": 3775 }, { "epoch": 0.2727978759911138, "grad_norm": 8.062916826222507, "learning_rate": 4.853053821734287e-06, "loss": 0.8866, "step": 3776 }, { "epoch": 0.27287012119132337, "grad_norm": 9.903281744898893, "learning_rate": 4.852955001268681e-06, "loss": 0.9421, "step": 3777 }, { "epoch": 0.27294236639153285, "grad_norm": 6.37858540556784, "learning_rate": 4.852856148592948e-06, "loss": 0.942, "step": 3778 }, { "epoch": 0.2730146115917424, "grad_norm": 8.815106378477074, "learning_rate": 4.852757263708442e-06, "loss": 0.9444, "step": 3779 }, { "epoch": 0.2730868567919519, "grad_norm": 6.56395131366348, "learning_rate": 4.852658346616515e-06, "loss": 0.8775, "step": 3780 }, { "epoch": 0.27315910199216137, "grad_norm": 6.807337493469822, "learning_rate": 4.852559397318522e-06, "loss": 0.9536, "step": 3781 }, { "epoch": 0.2732313471923709, "grad_norm": 9.494181005301394, "learning_rate": 4.852460415815818e-06, "loss": 0.9732, "step": 3782 }, { "epoch": 0.2733035923925804, "grad_norm": 7.793431563246441, "learning_rate": 4.852361402109757e-06, "loss": 0.9377, "step": 3783 }, { "epoch": 0.27337583759278994, "grad_norm": 7.449091493652803, "learning_rate": 4.852262356201695e-06, "loss": 0.8968, "step": 3784 }, { "epoch": 0.2734480827929994, "grad_norm": 8.00227943371474, "learning_rate": 4.852163278092988e-06, "loss": 0.9955, "step": 3785 }, { "epoch": 0.27352032799320897, "grad_norm": 6.686282323337315, "learning_rate": 4.852064167784992e-06, "loss": 0.8666, "step": 3786 }, { "epoch": 0.27359257319341845, "grad_norm": 6.511333854664076, "learning_rate": 4.851965025279064e-06, "loss": 0.8784, "step": 3787 }, { "epoch": 0.273664818393628, "grad_norm": 7.4157627997955835, "learning_rate": 4.851865850576561e-06, "loss": 0.973, "step": 3788 }, { "epoch": 0.2737370635938375, "grad_norm": 7.437385365860926, "learning_rate": 4.85176664367884e-06, "loss": 0.9201, "step": 3789 }, { "epoch": 0.27380930879404697, "grad_norm": 7.01006111804042, "learning_rate": 4.851667404587259e-06, "loss": 0.8216, "step": 3790 }, { "epoch": 0.2738815539942565, "grad_norm": 7.179019070829659, "learning_rate": 4.851568133303179e-06, "loss": 0.9103, "step": 3791 }, { "epoch": 0.273953799194466, "grad_norm": 7.658769714519367, "learning_rate": 4.8514688298279564e-06, "loss": 0.9526, "step": 3792 }, { "epoch": 0.27402604439467554, "grad_norm": 7.125668143014731, "learning_rate": 4.851369494162952e-06, "loss": 0.9116, "step": 3793 }, { "epoch": 0.274098289594885, "grad_norm": 6.749317275947712, "learning_rate": 4.8512701263095235e-06, "loss": 0.9065, "step": 3794 }, { "epoch": 0.27417053479509457, "grad_norm": 8.06478589203551, "learning_rate": 4.851170726269033e-06, "loss": 0.8962, "step": 3795 }, { "epoch": 0.27424277999530405, "grad_norm": 7.574635502572132, "learning_rate": 4.8510712940428415e-06, "loss": 0.9079, "step": 3796 }, { "epoch": 0.2743150251955136, "grad_norm": 6.6569024424407655, "learning_rate": 4.850971829632309e-06, "loss": 0.882, "step": 3797 }, { "epoch": 0.2743872703957231, "grad_norm": 7.5958480134516275, "learning_rate": 4.850872333038797e-06, "loss": 0.8896, "step": 3798 }, { "epoch": 0.27445951559593257, "grad_norm": 7.2825858401089025, "learning_rate": 4.850772804263669e-06, "loss": 0.8768, "step": 3799 }, { "epoch": 0.2745317607961421, "grad_norm": 6.711635211384284, "learning_rate": 4.850673243308287e-06, "loss": 0.9992, "step": 3800 }, { "epoch": 0.2746040059963516, "grad_norm": 6.099957712996197, "learning_rate": 4.8505736501740124e-06, "loss": 0.9036, "step": 3801 }, { "epoch": 0.27467625119656114, "grad_norm": 7.272515423897038, "learning_rate": 4.85047402486221e-06, "loss": 0.9075, "step": 3802 }, { "epoch": 0.2747484963967706, "grad_norm": 7.260650441270553, "learning_rate": 4.850374367374243e-06, "loss": 0.8421, "step": 3803 }, { "epoch": 0.27482074159698017, "grad_norm": 7.605156012846569, "learning_rate": 4.850274677711476e-06, "loss": 1.0071, "step": 3804 }, { "epoch": 0.27489298679718965, "grad_norm": 6.168595845654929, "learning_rate": 4.850174955875274e-06, "loss": 0.9223, "step": 3805 }, { "epoch": 0.2749652319973992, "grad_norm": 6.319438180699114, "learning_rate": 4.850075201867001e-06, "loss": 0.9271, "step": 3806 }, { "epoch": 0.2750374771976087, "grad_norm": 7.820741746461377, "learning_rate": 4.849975415688024e-06, "loss": 1.0551, "step": 3807 }, { "epoch": 0.27510972239781817, "grad_norm": 8.861054976022096, "learning_rate": 4.849875597339708e-06, "loss": 1.0196, "step": 3808 }, { "epoch": 0.2751819675980277, "grad_norm": 6.823040848007157, "learning_rate": 4.849775746823419e-06, "loss": 1.0459, "step": 3809 }, { "epoch": 0.2752542127982372, "grad_norm": 7.1524584191722695, "learning_rate": 4.849675864140525e-06, "loss": 0.9335, "step": 3810 }, { "epoch": 0.27532645799844674, "grad_norm": 7.503393041485283, "learning_rate": 4.849575949292392e-06, "loss": 0.8488, "step": 3811 }, { "epoch": 0.2753987031986562, "grad_norm": 7.243332789784303, "learning_rate": 4.84947600228039e-06, "loss": 0.9365, "step": 3812 }, { "epoch": 0.27547094839886577, "grad_norm": 6.29924689741634, "learning_rate": 4.849376023105885e-06, "loss": 0.9296, "step": 3813 }, { "epoch": 0.27554319359907525, "grad_norm": 5.846698853343607, "learning_rate": 4.849276011770247e-06, "loss": 0.9548, "step": 3814 }, { "epoch": 0.2756154387992848, "grad_norm": 7.1651414830360345, "learning_rate": 4.849175968274843e-06, "loss": 0.9619, "step": 3815 }, { "epoch": 0.2756876839994943, "grad_norm": 6.789235559503137, "learning_rate": 4.8490758926210455e-06, "loss": 1.0991, "step": 3816 }, { "epoch": 0.27575992919970377, "grad_norm": 6.349555004477307, "learning_rate": 4.848975784810222e-06, "loss": 0.9523, "step": 3817 }, { "epoch": 0.2758321743999133, "grad_norm": 8.244691007525407, "learning_rate": 4.848875644843744e-06, "loss": 0.9373, "step": 3818 }, { "epoch": 0.2759044196001228, "grad_norm": 7.216369764932263, "learning_rate": 4.848775472722983e-06, "loss": 0.9854, "step": 3819 }, { "epoch": 0.27597666480033234, "grad_norm": 7.459526208144858, "learning_rate": 4.848675268449309e-06, "loss": 0.9155, "step": 3820 }, { "epoch": 0.2760489100005418, "grad_norm": 21.236608257286225, "learning_rate": 4.8485750320240935e-06, "loss": 0.8735, "step": 3821 }, { "epoch": 0.27612115520075137, "grad_norm": 6.325321885916101, "learning_rate": 4.848474763448711e-06, "loss": 0.9107, "step": 3822 }, { "epoch": 0.27619340040096085, "grad_norm": 6.533330120682737, "learning_rate": 4.848374462724531e-06, "loss": 0.929, "step": 3823 }, { "epoch": 0.2762656456011704, "grad_norm": 7.529818703544724, "learning_rate": 4.848274129852929e-06, "loss": 0.8528, "step": 3824 }, { "epoch": 0.2763378908013799, "grad_norm": 7.095456359251667, "learning_rate": 4.848173764835277e-06, "loss": 0.9204, "step": 3825 }, { "epoch": 0.27641013600158937, "grad_norm": 7.7383238913041446, "learning_rate": 4.848073367672949e-06, "loss": 0.9061, "step": 3826 }, { "epoch": 0.2764823812017989, "grad_norm": 6.678452976653872, "learning_rate": 4.84797293836732e-06, "loss": 0.93, "step": 3827 }, { "epoch": 0.2765546264020084, "grad_norm": 6.22212901878185, "learning_rate": 4.847872476919765e-06, "loss": 0.8809, "step": 3828 }, { "epoch": 0.27662687160221794, "grad_norm": 7.772081726539787, "learning_rate": 4.847771983331658e-06, "loss": 0.848, "step": 3829 }, { "epoch": 0.2766991168024274, "grad_norm": 7.521916220805208, "learning_rate": 4.847671457604376e-06, "loss": 0.8628, "step": 3830 }, { "epoch": 0.27677136200263697, "grad_norm": 6.288243893249238, "learning_rate": 4.847570899739294e-06, "loss": 0.8848, "step": 3831 }, { "epoch": 0.27684360720284645, "grad_norm": 6.27892173014298, "learning_rate": 4.847470309737791e-06, "loss": 1.0144, "step": 3832 }, { "epoch": 0.276915852403056, "grad_norm": 7.33897393936414, "learning_rate": 4.84736968760124e-06, "loss": 0.9656, "step": 3833 }, { "epoch": 0.2769880976032655, "grad_norm": 5.702357075868112, "learning_rate": 4.847269033331021e-06, "loss": 0.8992, "step": 3834 }, { "epoch": 0.27706034280347497, "grad_norm": 7.633870952391941, "learning_rate": 4.8471683469285125e-06, "loss": 0.9166, "step": 3835 }, { "epoch": 0.2771325880036845, "grad_norm": 6.785399964588923, "learning_rate": 4.847067628395091e-06, "loss": 0.9929, "step": 3836 }, { "epoch": 0.277204833203894, "grad_norm": 6.9888612134446495, "learning_rate": 4.846966877732137e-06, "loss": 0.9059, "step": 3837 }, { "epoch": 0.27727707840410354, "grad_norm": 6.351775105989688, "learning_rate": 4.8468660949410275e-06, "loss": 0.9336, "step": 3838 }, { "epoch": 0.277349323604313, "grad_norm": 6.197839286598981, "learning_rate": 4.846765280023144e-06, "loss": 0.9643, "step": 3839 }, { "epoch": 0.27742156880452257, "grad_norm": 7.131180876414415, "learning_rate": 4.846664432979867e-06, "loss": 0.9979, "step": 3840 }, { "epoch": 0.27749381400473205, "grad_norm": 10.173962544406209, "learning_rate": 4.846563553812574e-06, "loss": 0.9121, "step": 3841 }, { "epoch": 0.2775660592049416, "grad_norm": 6.6493287973907975, "learning_rate": 4.846462642522649e-06, "loss": 0.9136, "step": 3842 }, { "epoch": 0.2776383044051511, "grad_norm": 6.344269228577688, "learning_rate": 4.846361699111471e-06, "loss": 0.8863, "step": 3843 }, { "epoch": 0.27771054960536057, "grad_norm": 6.944671772944748, "learning_rate": 4.846260723580425e-06, "loss": 0.8868, "step": 3844 }, { "epoch": 0.2777827948055701, "grad_norm": 5.90815437738154, "learning_rate": 4.84615971593089e-06, "loss": 0.9266, "step": 3845 }, { "epoch": 0.2778550400057796, "grad_norm": 6.153050554637893, "learning_rate": 4.846058676164251e-06, "loss": 0.9665, "step": 3846 }, { "epoch": 0.27792728520598914, "grad_norm": 6.111403340760372, "learning_rate": 4.845957604281891e-06, "loss": 0.8377, "step": 3847 }, { "epoch": 0.2779995304061986, "grad_norm": 5.442183658796735, "learning_rate": 4.845856500285192e-06, "loss": 0.8627, "step": 3848 }, { "epoch": 0.27807177560640817, "grad_norm": 7.309738289757445, "learning_rate": 4.845755364175539e-06, "loss": 1.0454, "step": 3849 }, { "epoch": 0.27814402080661765, "grad_norm": 6.034856636980389, "learning_rate": 4.8456541959543165e-06, "loss": 0.8905, "step": 3850 }, { "epoch": 0.2782162660068272, "grad_norm": 6.666992688154552, "learning_rate": 4.845552995622909e-06, "loss": 0.9143, "step": 3851 }, { "epoch": 0.2782885112070367, "grad_norm": 6.335671077070382, "learning_rate": 4.8454517631827025e-06, "loss": 0.9292, "step": 3852 }, { "epoch": 0.27836075640724617, "grad_norm": 6.376434763101238, "learning_rate": 4.845350498635083e-06, "loss": 0.9752, "step": 3853 }, { "epoch": 0.2784330016074557, "grad_norm": 8.250012946841167, "learning_rate": 4.845249201981436e-06, "loss": 0.9456, "step": 3854 }, { "epoch": 0.2785052468076652, "grad_norm": 7.15073632136888, "learning_rate": 4.845147873223147e-06, "loss": 0.9182, "step": 3855 }, { "epoch": 0.27857749200787474, "grad_norm": 6.757818991597316, "learning_rate": 4.845046512361606e-06, "loss": 0.9025, "step": 3856 }, { "epoch": 0.2786497372080842, "grad_norm": 5.92486480687419, "learning_rate": 4.8449451193981985e-06, "loss": 0.888, "step": 3857 }, { "epoch": 0.27872198240829377, "grad_norm": 6.500655508000051, "learning_rate": 4.844843694334314e-06, "loss": 0.9314, "step": 3858 }, { "epoch": 0.27879422760850325, "grad_norm": 6.716346204257879, "learning_rate": 4.84474223717134e-06, "loss": 0.9315, "step": 3859 }, { "epoch": 0.2788664728087128, "grad_norm": 7.248808401115592, "learning_rate": 4.844640747910664e-06, "loss": 0.9088, "step": 3860 }, { "epoch": 0.2789387180089223, "grad_norm": 6.9338206939091815, "learning_rate": 4.844539226553677e-06, "loss": 0.9232, "step": 3861 }, { "epoch": 0.27901096320913177, "grad_norm": 7.259400160208497, "learning_rate": 4.844437673101769e-06, "loss": 0.8401, "step": 3862 }, { "epoch": 0.2790832084093413, "grad_norm": 7.469362880188477, "learning_rate": 4.844336087556329e-06, "loss": 0.9084, "step": 3863 }, { "epoch": 0.2791554536095508, "grad_norm": 6.0204383518144295, "learning_rate": 4.844234469918748e-06, "loss": 0.8566, "step": 3864 }, { "epoch": 0.27922769880976034, "grad_norm": 6.194581266449767, "learning_rate": 4.844132820190418e-06, "loss": 0.8317, "step": 3865 }, { "epoch": 0.2792999440099698, "grad_norm": 7.641839615401486, "learning_rate": 4.84403113837273e-06, "loss": 0.8624, "step": 3866 }, { "epoch": 0.27937218921017937, "grad_norm": 6.930813978427386, "learning_rate": 4.843929424467075e-06, "loss": 0.9179, "step": 3867 }, { "epoch": 0.27944443441038885, "grad_norm": 6.913338316152623, "learning_rate": 4.843827678474846e-06, "loss": 0.9896, "step": 3868 }, { "epoch": 0.2795166796105984, "grad_norm": 5.866564274385651, "learning_rate": 4.8437259003974366e-06, "loss": 0.9855, "step": 3869 }, { "epoch": 0.2795889248108079, "grad_norm": 7.224714977093846, "learning_rate": 4.843624090236239e-06, "loss": 1.015, "step": 3870 }, { "epoch": 0.27966117001101737, "grad_norm": 6.260358237841768, "learning_rate": 4.8435222479926474e-06, "loss": 0.8875, "step": 3871 }, { "epoch": 0.2797334152112269, "grad_norm": 7.946471903332272, "learning_rate": 4.843420373668056e-06, "loss": 1.0021, "step": 3872 }, { "epoch": 0.2798056604114364, "grad_norm": 10.18191206566764, "learning_rate": 4.843318467263859e-06, "loss": 0.9636, "step": 3873 }, { "epoch": 0.27987790561164594, "grad_norm": 8.41183206930613, "learning_rate": 4.843216528781452e-06, "loss": 0.9618, "step": 3874 }, { "epoch": 0.2799501508118554, "grad_norm": 7.626094051730535, "learning_rate": 4.84311455822223e-06, "loss": 0.9882, "step": 3875 }, { "epoch": 0.28002239601206497, "grad_norm": 10.874273166264091, "learning_rate": 4.843012555587588e-06, "loss": 0.8766, "step": 3876 }, { "epoch": 0.28009464121227445, "grad_norm": 6.96393203116722, "learning_rate": 4.842910520878925e-06, "loss": 0.9655, "step": 3877 }, { "epoch": 0.280166886412484, "grad_norm": 6.579273694427934, "learning_rate": 4.842808454097635e-06, "loss": 0.9288, "step": 3878 }, { "epoch": 0.2802391316126935, "grad_norm": 6.515148703458259, "learning_rate": 4.842706355245117e-06, "loss": 0.9023, "step": 3879 }, { "epoch": 0.28031137681290297, "grad_norm": 8.112758393720524, "learning_rate": 4.842604224322768e-06, "loss": 0.9429, "step": 3880 }, { "epoch": 0.2803836220131125, "grad_norm": 6.376322889511524, "learning_rate": 4.842502061331986e-06, "loss": 0.8521, "step": 3881 }, { "epoch": 0.280455867213322, "grad_norm": 6.213650707003297, "learning_rate": 4.842399866274169e-06, "loss": 0.8794, "step": 3882 }, { "epoch": 0.28052811241353154, "grad_norm": 6.876904033213729, "learning_rate": 4.8422976391507175e-06, "loss": 0.8596, "step": 3883 }, { "epoch": 0.280600357613741, "grad_norm": 6.474289738056576, "learning_rate": 4.842195379963029e-06, "loss": 0.9859, "step": 3884 }, { "epoch": 0.28067260281395057, "grad_norm": 6.835783759431898, "learning_rate": 4.842093088712505e-06, "loss": 0.8786, "step": 3885 }, { "epoch": 0.28074484801416005, "grad_norm": 6.826530916433862, "learning_rate": 4.841990765400545e-06, "loss": 0.8285, "step": 3886 }, { "epoch": 0.2808170932143696, "grad_norm": 6.273259028536231, "learning_rate": 4.84188841002855e-06, "loss": 0.9921, "step": 3887 }, { "epoch": 0.2808893384145791, "grad_norm": 7.558601067906665, "learning_rate": 4.841786022597921e-06, "loss": 0.9485, "step": 3888 }, { "epoch": 0.28096158361478857, "grad_norm": 7.090776668992703, "learning_rate": 4.841683603110059e-06, "loss": 0.9286, "step": 3889 }, { "epoch": 0.2810338288149981, "grad_norm": 8.346689228031432, "learning_rate": 4.841581151566367e-06, "loss": 0.9544, "step": 3890 }, { "epoch": 0.2811060740152076, "grad_norm": 7.058873688762694, "learning_rate": 4.8414786679682475e-06, "loss": 1.0053, "step": 3891 }, { "epoch": 0.28117831921541714, "grad_norm": 6.4627873310724, "learning_rate": 4.8413761523171035e-06, "loss": 0.8743, "step": 3892 }, { "epoch": 0.2812505644156266, "grad_norm": 8.448912185983758, "learning_rate": 4.841273604614337e-06, "loss": 0.9126, "step": 3893 }, { "epoch": 0.28132280961583617, "grad_norm": 7.885388500679001, "learning_rate": 4.841171024861353e-06, "loss": 0.9243, "step": 3894 }, { "epoch": 0.28139505481604565, "grad_norm": 5.856167090611748, "learning_rate": 4.8410684130595555e-06, "loss": 0.8908, "step": 3895 }, { "epoch": 0.2814673000162552, "grad_norm": 7.2982226154541605, "learning_rate": 4.840965769210349e-06, "loss": 0.9774, "step": 3896 }, { "epoch": 0.2815395452164647, "grad_norm": 6.325744331801465, "learning_rate": 4.840863093315139e-06, "loss": 0.9247, "step": 3897 }, { "epoch": 0.28161179041667417, "grad_norm": 6.41638483526729, "learning_rate": 4.8407603853753305e-06, "loss": 0.9083, "step": 3898 }, { "epoch": 0.2816840356168837, "grad_norm": 6.701558390193861, "learning_rate": 4.84065764539233e-06, "loss": 0.9781, "step": 3899 }, { "epoch": 0.2817562808170932, "grad_norm": 7.68270327397507, "learning_rate": 4.8405548733675445e-06, "loss": 0.9103, "step": 3900 }, { "epoch": 0.28182852601730274, "grad_norm": 7.349585239878655, "learning_rate": 4.840452069302379e-06, "loss": 0.9792, "step": 3901 }, { "epoch": 0.2819007712175122, "grad_norm": 6.751354222968466, "learning_rate": 4.840349233198242e-06, "loss": 0.8866, "step": 3902 }, { "epoch": 0.28197301641772177, "grad_norm": 9.36082831866515, "learning_rate": 4.840246365056542e-06, "loss": 1.019, "step": 3903 }, { "epoch": 0.28204526161793125, "grad_norm": 5.612011473194665, "learning_rate": 4.840143464878686e-06, "loss": 0.935, "step": 3904 }, { "epoch": 0.2821175068181408, "grad_norm": 6.47386431657732, "learning_rate": 4.8400405326660825e-06, "loss": 0.9679, "step": 3905 }, { "epoch": 0.2821897520183503, "grad_norm": 7.388655353702121, "learning_rate": 4.839937568420141e-06, "loss": 0.92, "step": 3906 }, { "epoch": 0.28226199721855977, "grad_norm": 6.150875940978536, "learning_rate": 4.839834572142272e-06, "loss": 0.8136, "step": 3907 }, { "epoch": 0.2823342424187693, "grad_norm": 7.332927894944227, "learning_rate": 4.839731543833883e-06, "loss": 0.9043, "step": 3908 }, { "epoch": 0.2824064876189788, "grad_norm": 7.597940426883377, "learning_rate": 4.839628483496388e-06, "loss": 0.9552, "step": 3909 }, { "epoch": 0.28247873281918834, "grad_norm": 8.051418051598214, "learning_rate": 4.839525391131194e-06, "loss": 0.8599, "step": 3910 }, { "epoch": 0.2825509780193978, "grad_norm": 6.749203387690151, "learning_rate": 4.839422266739714e-06, "loss": 0.8835, "step": 3911 }, { "epoch": 0.28262322321960737, "grad_norm": 6.09410336765901, "learning_rate": 4.83931911032336e-06, "loss": 0.9571, "step": 3912 }, { "epoch": 0.28269546841981685, "grad_norm": 7.395170656702119, "learning_rate": 4.839215921883543e-06, "loss": 0.9421, "step": 3913 }, { "epoch": 0.2827677136200264, "grad_norm": 6.245079850479671, "learning_rate": 4.839112701421678e-06, "loss": 0.8654, "step": 3914 }, { "epoch": 0.2828399588202359, "grad_norm": 8.058861673018328, "learning_rate": 4.839009448939175e-06, "loss": 0.9328, "step": 3915 }, { "epoch": 0.28291220402044537, "grad_norm": 6.439444535272554, "learning_rate": 4.838906164437449e-06, "loss": 0.8851, "step": 3916 }, { "epoch": 0.2829844492206549, "grad_norm": 6.112646295399891, "learning_rate": 4.8388028479179135e-06, "loss": 0.881, "step": 3917 }, { "epoch": 0.2830566944208644, "grad_norm": 9.485420282571127, "learning_rate": 4.838699499381983e-06, "loss": 0.95, "step": 3918 }, { "epoch": 0.28312893962107394, "grad_norm": 7.206997374581634, "learning_rate": 4.8385961188310726e-06, "loss": 0.8776, "step": 3919 }, { "epoch": 0.2832011848212834, "grad_norm": 6.911557466839992, "learning_rate": 4.838492706266597e-06, "loss": 0.9371, "step": 3920 }, { "epoch": 0.28327343002149297, "grad_norm": 8.260609134261406, "learning_rate": 4.838389261689972e-06, "loss": 0.9814, "step": 3921 }, { "epoch": 0.28334567522170245, "grad_norm": 8.878831063391514, "learning_rate": 4.838285785102613e-06, "loss": 0.9985, "step": 3922 }, { "epoch": 0.283417920421912, "grad_norm": 6.122343148854385, "learning_rate": 4.838182276505938e-06, "loss": 0.8884, "step": 3923 }, { "epoch": 0.2834901656221215, "grad_norm": 9.28875079967468, "learning_rate": 4.8380787359013624e-06, "loss": 0.9448, "step": 3924 }, { "epoch": 0.28356241082233097, "grad_norm": 6.523735140103609, "learning_rate": 4.837975163290305e-06, "loss": 0.9652, "step": 3925 }, { "epoch": 0.2836346560225405, "grad_norm": 6.373371290202949, "learning_rate": 4.837871558674183e-06, "loss": 0.9431, "step": 3926 }, { "epoch": 0.28370690122275, "grad_norm": 8.335585124639852, "learning_rate": 4.837767922054414e-06, "loss": 1.0193, "step": 3927 }, { "epoch": 0.28377914642295954, "grad_norm": 7.785785624725963, "learning_rate": 4.837664253432418e-06, "loss": 0.949, "step": 3928 }, { "epoch": 0.283851391623169, "grad_norm": 7.011014175080778, "learning_rate": 4.837560552809613e-06, "loss": 0.8532, "step": 3929 }, { "epoch": 0.28392363682337857, "grad_norm": 6.984182197247969, "learning_rate": 4.837456820187419e-06, "loss": 0.9409, "step": 3930 }, { "epoch": 0.28399588202358805, "grad_norm": 7.752872303938821, "learning_rate": 4.837353055567256e-06, "loss": 0.812, "step": 3931 }, { "epoch": 0.2840681272237976, "grad_norm": 8.77832112729791, "learning_rate": 4.837249258950545e-06, "loss": 0.9598, "step": 3932 }, { "epoch": 0.2841403724240071, "grad_norm": 7.160393593907931, "learning_rate": 4.837145430338705e-06, "loss": 0.8653, "step": 3933 }, { "epoch": 0.28421261762421657, "grad_norm": 6.034070240070907, "learning_rate": 4.837041569733161e-06, "loss": 0.9029, "step": 3934 }, { "epoch": 0.2842848628244261, "grad_norm": 9.655043838377747, "learning_rate": 4.836937677135331e-06, "loss": 0.8711, "step": 3935 }, { "epoch": 0.2843571080246356, "grad_norm": 7.951606050845439, "learning_rate": 4.836833752546638e-06, "loss": 0.897, "step": 3936 }, { "epoch": 0.28442935322484514, "grad_norm": 6.544970251238703, "learning_rate": 4.836729795968506e-06, "loss": 0.9034, "step": 3937 }, { "epoch": 0.2845015984250546, "grad_norm": 6.214465328182751, "learning_rate": 4.836625807402359e-06, "loss": 0.8897, "step": 3938 }, { "epoch": 0.28457384362526417, "grad_norm": 7.236911930313189, "learning_rate": 4.8365217868496175e-06, "loss": 0.9798, "step": 3939 }, { "epoch": 0.28464608882547365, "grad_norm": 8.682444466793054, "learning_rate": 4.8364177343117066e-06, "loss": 0.9832, "step": 3940 }, { "epoch": 0.2847183340256832, "grad_norm": 6.6798482596700355, "learning_rate": 4.836313649790052e-06, "loss": 1.025, "step": 3941 }, { "epoch": 0.2847905792258927, "grad_norm": 7.863686078827021, "learning_rate": 4.836209533286077e-06, "loss": 1.0105, "step": 3942 }, { "epoch": 0.28486282442610217, "grad_norm": 6.677676392242418, "learning_rate": 4.836105384801208e-06, "loss": 0.9376, "step": 3943 }, { "epoch": 0.2849350696263117, "grad_norm": 6.2997934095630335, "learning_rate": 4.83600120433687e-06, "loss": 0.9118, "step": 3944 }, { "epoch": 0.2850073148265212, "grad_norm": 6.06139371550917, "learning_rate": 4.835896991894488e-06, "loss": 0.8706, "step": 3945 }, { "epoch": 0.28507956002673074, "grad_norm": 6.454128676865612, "learning_rate": 4.835792747475492e-06, "loss": 0.8921, "step": 3946 }, { "epoch": 0.2851518052269402, "grad_norm": 7.233202713638132, "learning_rate": 4.835688471081305e-06, "loss": 0.8455, "step": 3947 }, { "epoch": 0.28522405042714977, "grad_norm": 7.514986197432928, "learning_rate": 4.835584162713358e-06, "loss": 0.9537, "step": 3948 }, { "epoch": 0.28529629562735925, "grad_norm": 6.444443892701809, "learning_rate": 4.835479822373076e-06, "loss": 0.8929, "step": 3949 }, { "epoch": 0.2853685408275688, "grad_norm": 5.686258369149494, "learning_rate": 4.83537545006189e-06, "loss": 0.9945, "step": 3950 }, { "epoch": 0.2854407860277783, "grad_norm": 5.963726706042559, "learning_rate": 4.835271045781226e-06, "loss": 0.9059, "step": 3951 }, { "epoch": 0.28551303122798777, "grad_norm": 6.910858135901832, "learning_rate": 4.835166609532515e-06, "loss": 1.0059, "step": 3952 }, { "epoch": 0.2855852764281973, "grad_norm": 7.051900556065487, "learning_rate": 4.835062141317187e-06, "loss": 0.8689, "step": 3953 }, { "epoch": 0.2856575216284068, "grad_norm": 6.21153323597335, "learning_rate": 4.834957641136671e-06, "loss": 0.8517, "step": 3954 }, { "epoch": 0.28572976682861634, "grad_norm": 6.180422347367994, "learning_rate": 4.834853108992396e-06, "loss": 0.9637, "step": 3955 }, { "epoch": 0.2858020120288258, "grad_norm": 6.362396047526134, "learning_rate": 4.834748544885798e-06, "loss": 0.9024, "step": 3956 }, { "epoch": 0.28587425722903537, "grad_norm": 7.913646510075225, "learning_rate": 4.8346439488183025e-06, "loss": 0.9109, "step": 3957 }, { "epoch": 0.28594650242924485, "grad_norm": 6.8382618816131515, "learning_rate": 4.834539320791346e-06, "loss": 0.945, "step": 3958 }, { "epoch": 0.2860187476294544, "grad_norm": 6.851657427453143, "learning_rate": 4.834434660806358e-06, "loss": 0.9158, "step": 3959 }, { "epoch": 0.2860909928296639, "grad_norm": 7.726934749943396, "learning_rate": 4.834329968864772e-06, "loss": 0.8787, "step": 3960 }, { "epoch": 0.28616323802987337, "grad_norm": 7.893742132598267, "learning_rate": 4.834225244968021e-06, "loss": 0.9904, "step": 3961 }, { "epoch": 0.2862354832300829, "grad_norm": 6.546659115529933, "learning_rate": 4.8341204891175395e-06, "loss": 0.9018, "step": 3962 }, { "epoch": 0.2863077284302924, "grad_norm": 6.543051830162838, "learning_rate": 4.834015701314761e-06, "loss": 0.9077, "step": 3963 }, { "epoch": 0.28637997363050194, "grad_norm": 8.223604922263743, "learning_rate": 4.833910881561119e-06, "loss": 1.0087, "step": 3964 }, { "epoch": 0.2864522188307114, "grad_norm": 6.7556676735413825, "learning_rate": 4.833806029858049e-06, "loss": 0.8724, "step": 3965 }, { "epoch": 0.28652446403092097, "grad_norm": 6.56640770235335, "learning_rate": 4.8337011462069874e-06, "loss": 0.9308, "step": 3966 }, { "epoch": 0.28659670923113045, "grad_norm": 7.774191477645989, "learning_rate": 4.833596230609369e-06, "loss": 0.9498, "step": 3967 }, { "epoch": 0.28666895443134, "grad_norm": 5.779325917677955, "learning_rate": 4.8334912830666295e-06, "loss": 0.9375, "step": 3968 }, { "epoch": 0.2867411996315495, "grad_norm": 6.112782652390897, "learning_rate": 4.833386303580207e-06, "loss": 0.9138, "step": 3969 }, { "epoch": 0.28681344483175897, "grad_norm": 6.316976338682658, "learning_rate": 4.833281292151537e-06, "loss": 0.8497, "step": 3970 }, { "epoch": 0.2868856900319685, "grad_norm": 7.532732172697746, "learning_rate": 4.833176248782058e-06, "loss": 0.8852, "step": 3971 }, { "epoch": 0.286957935232178, "grad_norm": 7.016149148710646, "learning_rate": 4.833071173473208e-06, "loss": 1.0117, "step": 3972 }, { "epoch": 0.28703018043238754, "grad_norm": 6.6675207862487085, "learning_rate": 4.832966066226425e-06, "loss": 0.9806, "step": 3973 }, { "epoch": 0.287102425632597, "grad_norm": 6.187497533932589, "learning_rate": 4.832860927043148e-06, "loss": 0.9323, "step": 3974 }, { "epoch": 0.28717467083280657, "grad_norm": 7.8337194739867435, "learning_rate": 4.832755755924816e-06, "loss": 1.0534, "step": 3975 }, { "epoch": 0.28724691603301605, "grad_norm": 6.931506067745126, "learning_rate": 4.83265055287287e-06, "loss": 0.8683, "step": 3976 }, { "epoch": 0.2873191612332256, "grad_norm": 5.969231231622882, "learning_rate": 4.832545317888748e-06, "loss": 0.9802, "step": 3977 }, { "epoch": 0.2873914064334351, "grad_norm": 8.065156247729673, "learning_rate": 4.832440050973892e-06, "loss": 0.8274, "step": 3978 }, { "epoch": 0.28746365163364457, "grad_norm": 5.793818488019722, "learning_rate": 4.832334752129743e-06, "loss": 0.8849, "step": 3979 }, { "epoch": 0.2875358968338541, "grad_norm": 6.025818587921563, "learning_rate": 4.832229421357742e-06, "loss": 0.9004, "step": 3980 }, { "epoch": 0.2876081420340636, "grad_norm": 5.460223018856411, "learning_rate": 4.832124058659331e-06, "loss": 0.9289, "step": 3981 }, { "epoch": 0.28768038723427314, "grad_norm": 6.360786899853788, "learning_rate": 4.832018664035952e-06, "loss": 0.9054, "step": 3982 }, { "epoch": 0.2877526324344826, "grad_norm": 6.629763348298592, "learning_rate": 4.831913237489049e-06, "loss": 0.8959, "step": 3983 }, { "epoch": 0.28782487763469217, "grad_norm": 6.513115195946105, "learning_rate": 4.831807779020063e-06, "loss": 0.9414, "step": 3984 }, { "epoch": 0.28789712283490165, "grad_norm": 6.649714310324583, "learning_rate": 4.831702288630441e-06, "loss": 0.903, "step": 3985 }, { "epoch": 0.28796936803511114, "grad_norm": 8.069410098234945, "learning_rate": 4.831596766321624e-06, "loss": 1.0241, "step": 3986 }, { "epoch": 0.2880416132353207, "grad_norm": 7.111116502018051, "learning_rate": 4.8314912120950576e-06, "loss": 0.9516, "step": 3987 }, { "epoch": 0.28811385843553017, "grad_norm": 6.129115687646043, "learning_rate": 4.831385625952188e-06, "loss": 0.9374, "step": 3988 }, { "epoch": 0.2881861036357397, "grad_norm": 7.500431557001221, "learning_rate": 4.831280007894458e-06, "loss": 0.9887, "step": 3989 }, { "epoch": 0.2882583488359492, "grad_norm": 5.964364402911084, "learning_rate": 4.831174357923315e-06, "loss": 0.9425, "step": 3990 }, { "epoch": 0.28833059403615874, "grad_norm": 5.729724463849952, "learning_rate": 4.831068676040205e-06, "loss": 0.8829, "step": 3991 }, { "epoch": 0.2884028392363682, "grad_norm": 7.586451158507509, "learning_rate": 4.830962962246575e-06, "loss": 0.9314, "step": 3992 }, { "epoch": 0.28847508443657777, "grad_norm": 7.256179018026663, "learning_rate": 4.830857216543872e-06, "loss": 0.9891, "step": 3993 }, { "epoch": 0.28854732963678725, "grad_norm": 6.764000677829485, "learning_rate": 4.830751438933543e-06, "loss": 0.84, "step": 3994 }, { "epoch": 0.28861957483699674, "grad_norm": 8.215271884647025, "learning_rate": 4.830645629417038e-06, "loss": 0.9421, "step": 3995 }, { "epoch": 0.2886918200372063, "grad_norm": 7.104100410696886, "learning_rate": 4.830539787995803e-06, "loss": 0.8859, "step": 3996 }, { "epoch": 0.28876406523741577, "grad_norm": 6.683947894347001, "learning_rate": 4.8304339146712875e-06, "loss": 0.9022, "step": 3997 }, { "epoch": 0.2888363104376253, "grad_norm": 6.962590664559208, "learning_rate": 4.830328009444941e-06, "loss": 1.0064, "step": 3998 }, { "epoch": 0.2889085556378348, "grad_norm": 7.782972520501586, "learning_rate": 4.8302220723182146e-06, "loss": 0.9284, "step": 3999 }, { "epoch": 0.28898080083804434, "grad_norm": 6.876570920932557, "learning_rate": 4.830116103292556e-06, "loss": 0.9257, "step": 4000 }, { "epoch": 0.2890530460382538, "grad_norm": 7.496989854105353, "learning_rate": 4.830010102369418e-06, "loss": 0.9969, "step": 4001 }, { "epoch": 0.28912529123846337, "grad_norm": 6.5100671130118135, "learning_rate": 4.829904069550251e-06, "loss": 0.9333, "step": 4002 }, { "epoch": 0.28919753643867285, "grad_norm": 5.64106643090695, "learning_rate": 4.829798004836506e-06, "loss": 0.8665, "step": 4003 }, { "epoch": 0.28926978163888234, "grad_norm": 8.208132772044786, "learning_rate": 4.829691908229634e-06, "loss": 0.8693, "step": 4004 }, { "epoch": 0.2893420268390919, "grad_norm": 7.641360631685092, "learning_rate": 4.829585779731091e-06, "loss": 0.9784, "step": 4005 }, { "epoch": 0.28941427203930137, "grad_norm": 6.35968451719133, "learning_rate": 4.829479619342326e-06, "loss": 0.8544, "step": 4006 }, { "epoch": 0.2894865172395109, "grad_norm": 7.269855276816012, "learning_rate": 4.829373427064794e-06, "loss": 0.978, "step": 4007 }, { "epoch": 0.2895587624397204, "grad_norm": 6.9765020797180695, "learning_rate": 4.829267202899949e-06, "loss": 0.9441, "step": 4008 }, { "epoch": 0.28963100763992994, "grad_norm": 6.729857733074443, "learning_rate": 4.8291609468492436e-06, "loss": 0.8894, "step": 4009 }, { "epoch": 0.2897032528401394, "grad_norm": 6.598006045530382, "learning_rate": 4.829054658914134e-06, "loss": 0.8939, "step": 4010 }, { "epoch": 0.28977549804034897, "grad_norm": 7.286942119411966, "learning_rate": 4.8289483390960745e-06, "loss": 0.9604, "step": 4011 }, { "epoch": 0.28984774324055845, "grad_norm": 6.886391651956115, "learning_rate": 4.828841987396521e-06, "loss": 0.9083, "step": 4012 }, { "epoch": 0.28991998844076794, "grad_norm": 8.373534572556142, "learning_rate": 4.828735603816927e-06, "loss": 0.8917, "step": 4013 }, { "epoch": 0.2899922336409775, "grad_norm": 7.004478112111342, "learning_rate": 4.8286291883587526e-06, "loss": 1.021, "step": 4014 }, { "epoch": 0.29006447884118697, "grad_norm": 6.745882261812341, "learning_rate": 4.8285227410234525e-06, "loss": 0.8346, "step": 4015 }, { "epoch": 0.2901367240413965, "grad_norm": 7.140648238730648, "learning_rate": 4.828416261812484e-06, "loss": 0.9282, "step": 4016 }, { "epoch": 0.290208969241606, "grad_norm": 7.831129399837673, "learning_rate": 4.828309750727304e-06, "loss": 0.9028, "step": 4017 }, { "epoch": 0.29028121444181554, "grad_norm": 6.846173697717317, "learning_rate": 4.828203207769372e-06, "loss": 0.9284, "step": 4018 }, { "epoch": 0.290353459642025, "grad_norm": 7.023321810608645, "learning_rate": 4.828096632940146e-06, "loss": 0.9947, "step": 4019 }, { "epoch": 0.29042570484223457, "grad_norm": 7.5885509385309975, "learning_rate": 4.827990026241084e-06, "loss": 0.9637, "step": 4020 }, { "epoch": 0.29049795004244405, "grad_norm": 7.248592897868071, "learning_rate": 4.827883387673646e-06, "loss": 0.9357, "step": 4021 }, { "epoch": 0.29057019524265354, "grad_norm": 7.883479079766943, "learning_rate": 4.827776717239293e-06, "loss": 0.9157, "step": 4022 }, { "epoch": 0.2906424404428631, "grad_norm": 6.790379720191262, "learning_rate": 4.827670014939483e-06, "loss": 0.9244, "step": 4023 }, { "epoch": 0.29071468564307257, "grad_norm": 7.505092672021112, "learning_rate": 4.827563280775678e-06, "loss": 0.968, "step": 4024 }, { "epoch": 0.2907869308432821, "grad_norm": 7.2337081964397925, "learning_rate": 4.82745651474934e-06, "loss": 0.8778, "step": 4025 }, { "epoch": 0.2908591760434916, "grad_norm": 8.198019244381536, "learning_rate": 4.827349716861929e-06, "loss": 0.9492, "step": 4026 }, { "epoch": 0.29093142124370114, "grad_norm": 6.2646718049301136, "learning_rate": 4.827242887114907e-06, "loss": 0.875, "step": 4027 }, { "epoch": 0.2910036664439106, "grad_norm": 6.424901986766085, "learning_rate": 4.8271360255097364e-06, "loss": 0.8516, "step": 4028 }, { "epoch": 0.29107591164412017, "grad_norm": 7.6947659525930705, "learning_rate": 4.827029132047881e-06, "loss": 0.9411, "step": 4029 }, { "epoch": 0.29114815684432965, "grad_norm": 6.119331695369129, "learning_rate": 4.8269222067308046e-06, "loss": 0.9626, "step": 4030 }, { "epoch": 0.29122040204453914, "grad_norm": 6.294104030315482, "learning_rate": 4.826815249559968e-06, "loss": 0.8731, "step": 4031 }, { "epoch": 0.2912926472447487, "grad_norm": 6.543891317887414, "learning_rate": 4.826708260536839e-06, "loss": 0.8623, "step": 4032 }, { "epoch": 0.29136489244495817, "grad_norm": 8.049589006819598, "learning_rate": 4.82660123966288e-06, "loss": 1.0111, "step": 4033 }, { "epoch": 0.2914371376451677, "grad_norm": 6.123839599302735, "learning_rate": 4.826494186939556e-06, "loss": 0.8675, "step": 4034 }, { "epoch": 0.2915093828453772, "grad_norm": 6.845556012149672, "learning_rate": 4.826387102368333e-06, "loss": 0.8496, "step": 4035 }, { "epoch": 0.29158162804558674, "grad_norm": 6.591031494917331, "learning_rate": 4.826279985950678e-06, "loss": 0.9923, "step": 4036 }, { "epoch": 0.2916538732457962, "grad_norm": 7.161114634271384, "learning_rate": 4.826172837688055e-06, "loss": 0.8888, "step": 4037 }, { "epoch": 0.29172611844600577, "grad_norm": 6.71459008892247, "learning_rate": 4.8260656575819325e-06, "loss": 0.8525, "step": 4038 }, { "epoch": 0.29179836364621525, "grad_norm": 5.507620647146197, "learning_rate": 4.825958445633777e-06, "loss": 0.942, "step": 4039 }, { "epoch": 0.29187060884642474, "grad_norm": 6.40323981298054, "learning_rate": 4.825851201845056e-06, "loss": 0.8745, "step": 4040 }, { "epoch": 0.2919428540466343, "grad_norm": 5.993935381078033, "learning_rate": 4.82574392621724e-06, "loss": 0.9007, "step": 4041 }, { "epoch": 0.29201509924684377, "grad_norm": 6.386971994214693, "learning_rate": 4.825636618751793e-06, "loss": 0.8643, "step": 4042 }, { "epoch": 0.2920873444470533, "grad_norm": 5.983528097200701, "learning_rate": 4.825529279450188e-06, "loss": 0.8686, "step": 4043 }, { "epoch": 0.2921595896472628, "grad_norm": 7.143487602748061, "learning_rate": 4.825421908313892e-06, "loss": 0.9686, "step": 4044 }, { "epoch": 0.29223183484747234, "grad_norm": 7.160056621817786, "learning_rate": 4.825314505344376e-06, "loss": 0.909, "step": 4045 }, { "epoch": 0.2923040800476818, "grad_norm": 5.126034329874294, "learning_rate": 4.8252070705431095e-06, "loss": 0.8166, "step": 4046 }, { "epoch": 0.29237632524789137, "grad_norm": 5.878490304014674, "learning_rate": 4.825099603911564e-06, "loss": 0.8223, "step": 4047 }, { "epoch": 0.29244857044810085, "grad_norm": 5.492903552850529, "learning_rate": 4.82499210545121e-06, "loss": 0.8966, "step": 4048 }, { "epoch": 0.29252081564831034, "grad_norm": 6.182137371431344, "learning_rate": 4.8248845751635195e-06, "loss": 0.9099, "step": 4049 }, { "epoch": 0.2925930608485199, "grad_norm": 5.882238271380957, "learning_rate": 4.824777013049965e-06, "loss": 0.966, "step": 4050 }, { "epoch": 0.29266530604872937, "grad_norm": 6.377224702295227, "learning_rate": 4.824669419112017e-06, "loss": 1.0012, "step": 4051 }, { "epoch": 0.2927375512489389, "grad_norm": 6.739241609552504, "learning_rate": 4.82456179335115e-06, "loss": 0.9939, "step": 4052 }, { "epoch": 0.2928097964491484, "grad_norm": 5.533583154362562, "learning_rate": 4.824454135768838e-06, "loss": 0.8438, "step": 4053 }, { "epoch": 0.29288204164935794, "grad_norm": 5.993411579462889, "learning_rate": 4.8243464463665525e-06, "loss": 0.8762, "step": 4054 }, { "epoch": 0.2929542868495674, "grad_norm": 7.327438354857602, "learning_rate": 4.824238725145769e-06, "loss": 1.0086, "step": 4055 }, { "epoch": 0.29302653204977697, "grad_norm": 5.96520699205047, "learning_rate": 4.824130972107963e-06, "loss": 0.8994, "step": 4056 }, { "epoch": 0.29309877724998645, "grad_norm": 7.088148686832171, "learning_rate": 4.824023187254607e-06, "loss": 0.8437, "step": 4057 }, { "epoch": 0.29317102245019594, "grad_norm": 6.332828836257612, "learning_rate": 4.823915370587179e-06, "loss": 0.8843, "step": 4058 }, { "epoch": 0.2932432676504055, "grad_norm": 8.089967764470744, "learning_rate": 4.823807522107154e-06, "loss": 0.8942, "step": 4059 }, { "epoch": 0.29331551285061497, "grad_norm": 7.242423472423181, "learning_rate": 4.823699641816009e-06, "loss": 0.9288, "step": 4060 }, { "epoch": 0.2933877580508245, "grad_norm": 5.8794097583384435, "learning_rate": 4.823591729715219e-06, "loss": 0.9519, "step": 4061 }, { "epoch": 0.293460003251034, "grad_norm": 7.646850795419141, "learning_rate": 4.823483785806262e-06, "loss": 1.0168, "step": 4062 }, { "epoch": 0.29353224845124354, "grad_norm": 7.319130003044409, "learning_rate": 4.823375810090617e-06, "loss": 0.9296, "step": 4063 }, { "epoch": 0.293604493651453, "grad_norm": 9.889664589774629, "learning_rate": 4.823267802569761e-06, "loss": 0.9184, "step": 4064 }, { "epoch": 0.29367673885166257, "grad_norm": 7.09357899195033, "learning_rate": 4.8231597632451725e-06, "loss": 0.9118, "step": 4065 }, { "epoch": 0.29374898405187205, "grad_norm": 7.370180268483357, "learning_rate": 4.8230516921183315e-06, "loss": 0.9908, "step": 4066 }, { "epoch": 0.29382122925208154, "grad_norm": 6.315404544778504, "learning_rate": 4.822943589190715e-06, "loss": 0.9967, "step": 4067 }, { "epoch": 0.2938934744522911, "grad_norm": 6.999168619103456, "learning_rate": 4.8228354544638055e-06, "loss": 1.0304, "step": 4068 }, { "epoch": 0.29396571965250057, "grad_norm": 5.918608798927844, "learning_rate": 4.822727287939082e-06, "loss": 0.905, "step": 4069 }, { "epoch": 0.2940379648527101, "grad_norm": 5.73353940608188, "learning_rate": 4.822619089618025e-06, "loss": 0.8502, "step": 4070 }, { "epoch": 0.2941102100529196, "grad_norm": 7.112953308355517, "learning_rate": 4.8225108595021166e-06, "loss": 0.9576, "step": 4071 }, { "epoch": 0.29418245525312914, "grad_norm": 7.398453999453619, "learning_rate": 4.822402597592838e-06, "loss": 0.9106, "step": 4072 }, { "epoch": 0.2942547004533386, "grad_norm": 6.819703379694143, "learning_rate": 4.8222943038916705e-06, "loss": 0.821, "step": 4073 }, { "epoch": 0.29432694565354817, "grad_norm": 6.125573267675934, "learning_rate": 4.822185978400097e-06, "loss": 0.9015, "step": 4074 }, { "epoch": 0.29439919085375765, "grad_norm": 6.119048048793006, "learning_rate": 4.822077621119601e-06, "loss": 0.9153, "step": 4075 }, { "epoch": 0.29447143605396714, "grad_norm": 5.799532957017423, "learning_rate": 4.8219692320516656e-06, "loss": 0.9534, "step": 4076 }, { "epoch": 0.2945436812541767, "grad_norm": 6.458450742905388, "learning_rate": 4.8218608111977735e-06, "loss": 0.8755, "step": 4077 }, { "epoch": 0.29461592645438617, "grad_norm": 5.524420018198796, "learning_rate": 4.82175235855941e-06, "loss": 0.8652, "step": 4078 }, { "epoch": 0.2946881716545957, "grad_norm": 5.882695778224523, "learning_rate": 4.82164387413806e-06, "loss": 0.9111, "step": 4079 }, { "epoch": 0.2947604168548052, "grad_norm": 5.834418531928686, "learning_rate": 4.821535357935207e-06, "loss": 0.854, "step": 4080 }, { "epoch": 0.29483266205501474, "grad_norm": 6.754770359340269, "learning_rate": 4.821426809952338e-06, "loss": 0.8295, "step": 4081 }, { "epoch": 0.2949049072552242, "grad_norm": 6.844746556295703, "learning_rate": 4.821318230190939e-06, "loss": 0.8738, "step": 4082 }, { "epoch": 0.29497715245543377, "grad_norm": 6.537884276786366, "learning_rate": 4.8212096186524945e-06, "loss": 0.9387, "step": 4083 }, { "epoch": 0.29504939765564325, "grad_norm": 6.416197854622936, "learning_rate": 4.821100975338494e-06, "loss": 0.8931, "step": 4084 }, { "epoch": 0.29512164285585274, "grad_norm": 6.67796572975946, "learning_rate": 4.8209923002504224e-06, "loss": 0.9614, "step": 4085 }, { "epoch": 0.2951938880560623, "grad_norm": 6.672860351985467, "learning_rate": 4.820883593389769e-06, "loss": 0.928, "step": 4086 }, { "epoch": 0.29526613325627177, "grad_norm": 7.805833825897737, "learning_rate": 4.820774854758021e-06, "loss": 0.9141, "step": 4087 }, { "epoch": 0.2953383784564813, "grad_norm": 6.619253293246622, "learning_rate": 4.8206660843566674e-06, "loss": 0.9651, "step": 4088 }, { "epoch": 0.2954106236566908, "grad_norm": 6.43150480041823, "learning_rate": 4.820557282187197e-06, "loss": 1.074, "step": 4089 }, { "epoch": 0.29548286885690034, "grad_norm": 6.772434956803988, "learning_rate": 4.820448448251098e-06, "loss": 0.9018, "step": 4090 }, { "epoch": 0.2955551140571098, "grad_norm": 7.6445344935640795, "learning_rate": 4.820339582549863e-06, "loss": 0.909, "step": 4091 }, { "epoch": 0.29562735925731937, "grad_norm": 8.350105294117647, "learning_rate": 4.82023068508498e-06, "loss": 0.9156, "step": 4092 }, { "epoch": 0.29569960445752885, "grad_norm": 6.396478411256762, "learning_rate": 4.82012175585794e-06, "loss": 0.962, "step": 4093 }, { "epoch": 0.29577184965773834, "grad_norm": 7.665643402587598, "learning_rate": 4.820012794870236e-06, "loss": 0.9348, "step": 4094 }, { "epoch": 0.2958440948579479, "grad_norm": 6.29725725324718, "learning_rate": 4.819903802123357e-06, "loss": 0.8994, "step": 4095 }, { "epoch": 0.29591634005815737, "grad_norm": 6.96971582126204, "learning_rate": 4.819794777618797e-06, "loss": 0.9322, "step": 4096 }, { "epoch": 0.2959885852583669, "grad_norm": 6.765025052787591, "learning_rate": 4.8196857213580476e-06, "loss": 0.9078, "step": 4097 }, { "epoch": 0.2960608304585764, "grad_norm": 7.785719970261085, "learning_rate": 4.819576633342602e-06, "loss": 0.8824, "step": 4098 }, { "epoch": 0.29613307565878594, "grad_norm": 6.256863297041985, "learning_rate": 4.8194675135739525e-06, "loss": 0.8527, "step": 4099 }, { "epoch": 0.2962053208589954, "grad_norm": 5.832544037235301, "learning_rate": 4.819358362053595e-06, "loss": 0.9995, "step": 4100 }, { "epoch": 0.29627756605920497, "grad_norm": 6.99205901811077, "learning_rate": 4.819249178783021e-06, "loss": 0.9343, "step": 4101 }, { "epoch": 0.29634981125941445, "grad_norm": 7.129130989608214, "learning_rate": 4.819139963763727e-06, "loss": 0.8944, "step": 4102 }, { "epoch": 0.29642205645962394, "grad_norm": 7.525065181614631, "learning_rate": 4.819030716997208e-06, "loss": 0.8713, "step": 4103 }, { "epoch": 0.2964943016598335, "grad_norm": 7.734701993561175, "learning_rate": 4.818921438484958e-06, "loss": 0.9504, "step": 4104 }, { "epoch": 0.29656654686004297, "grad_norm": 8.17215321287882, "learning_rate": 4.818812128228475e-06, "loss": 0.8336, "step": 4105 }, { "epoch": 0.2966387920602525, "grad_norm": 5.852805236130798, "learning_rate": 4.818702786229254e-06, "loss": 0.8384, "step": 4106 }, { "epoch": 0.296711037260462, "grad_norm": 7.716499525639797, "learning_rate": 4.818593412488792e-06, "loss": 0.9274, "step": 4107 }, { "epoch": 0.29678328246067154, "grad_norm": 7.335071906717001, "learning_rate": 4.818484007008587e-06, "loss": 0.9138, "step": 4108 }, { "epoch": 0.296855527660881, "grad_norm": 9.387909812464788, "learning_rate": 4.818374569790136e-06, "loss": 0.8593, "step": 4109 }, { "epoch": 0.29692777286109057, "grad_norm": 6.915249169881225, "learning_rate": 4.8182651008349374e-06, "loss": 0.8849, "step": 4110 }, { "epoch": 0.29700001806130005, "grad_norm": 5.7445543458625545, "learning_rate": 4.818155600144489e-06, "loss": 0.8924, "step": 4111 }, { "epoch": 0.29707226326150954, "grad_norm": 6.5218695486545855, "learning_rate": 4.818046067720291e-06, "loss": 0.8996, "step": 4112 }, { "epoch": 0.2971445084617191, "grad_norm": 6.130264511002238, "learning_rate": 4.817936503563842e-06, "loss": 0.9916, "step": 4113 }, { "epoch": 0.29721675366192857, "grad_norm": 6.478960170910162, "learning_rate": 4.817826907676642e-06, "loss": 0.8346, "step": 4114 }, { "epoch": 0.2972889988621381, "grad_norm": 9.607187012719347, "learning_rate": 4.8177172800601915e-06, "loss": 0.987, "step": 4115 }, { "epoch": 0.2973612440623476, "grad_norm": 6.527420600516327, "learning_rate": 4.8176076207159905e-06, "loss": 0.9241, "step": 4116 }, { "epoch": 0.29743348926255714, "grad_norm": 5.506048864356335, "learning_rate": 4.817497929645541e-06, "loss": 0.8394, "step": 4117 }, { "epoch": 0.2975057344627666, "grad_norm": 6.667423809289878, "learning_rate": 4.8173882068503444e-06, "loss": 1.0013, "step": 4118 }, { "epoch": 0.29757797966297617, "grad_norm": 7.2532362456462325, "learning_rate": 4.817278452331902e-06, "loss": 0.9559, "step": 4119 }, { "epoch": 0.29765022486318565, "grad_norm": 7.437791321763051, "learning_rate": 4.8171686660917174e-06, "loss": 0.8554, "step": 4120 }, { "epoch": 0.29772247006339514, "grad_norm": 5.817711903278709, "learning_rate": 4.817058848131293e-06, "loss": 0.8647, "step": 4121 }, { "epoch": 0.2977947152636047, "grad_norm": 7.03814684535963, "learning_rate": 4.8169489984521314e-06, "loss": 1.0031, "step": 4122 }, { "epoch": 0.29786696046381417, "grad_norm": 7.184557768632009, "learning_rate": 4.816839117055738e-06, "loss": 0.9952, "step": 4123 }, { "epoch": 0.2979392056640237, "grad_norm": 7.364009102786197, "learning_rate": 4.816729203943615e-06, "loss": 0.9382, "step": 4124 }, { "epoch": 0.2980114508642332, "grad_norm": 8.696131966664295, "learning_rate": 4.816619259117269e-06, "loss": 0.9148, "step": 4125 }, { "epoch": 0.29808369606444274, "grad_norm": 5.671424028107209, "learning_rate": 4.816509282578203e-06, "loss": 0.9516, "step": 4126 }, { "epoch": 0.2981559412646522, "grad_norm": 6.667825725252854, "learning_rate": 4.8163992743279244e-06, "loss": 0.9515, "step": 4127 }, { "epoch": 0.29822818646486177, "grad_norm": 7.333025174457573, "learning_rate": 4.816289234367938e-06, "loss": 0.8993, "step": 4128 }, { "epoch": 0.29830043166507125, "grad_norm": 7.071999701573292, "learning_rate": 4.81617916269975e-06, "loss": 0.9322, "step": 4129 }, { "epoch": 0.29837267686528074, "grad_norm": 7.928358926956966, "learning_rate": 4.8160690593248685e-06, "loss": 0.9504, "step": 4130 }, { "epoch": 0.2984449220654903, "grad_norm": 7.159328548125848, "learning_rate": 4.8159589242448e-06, "loss": 0.9217, "step": 4131 }, { "epoch": 0.29851716726569977, "grad_norm": 7.298578818996176, "learning_rate": 4.815848757461051e-06, "loss": 0.928, "step": 4132 }, { "epoch": 0.2985894124659093, "grad_norm": 8.831691325418959, "learning_rate": 4.815738558975131e-06, "loss": 0.9168, "step": 4133 }, { "epoch": 0.2986616576661188, "grad_norm": 6.377316521517887, "learning_rate": 4.815628328788548e-06, "loss": 0.9028, "step": 4134 }, { "epoch": 0.29873390286632834, "grad_norm": 5.9675592163369675, "learning_rate": 4.815518066902813e-06, "loss": 0.8826, "step": 4135 }, { "epoch": 0.2988061480665378, "grad_norm": 6.374676565305557, "learning_rate": 4.815407773319431e-06, "loss": 0.8533, "step": 4136 }, { "epoch": 0.29887839326674737, "grad_norm": 7.255425626335585, "learning_rate": 4.815297448039916e-06, "loss": 0.9825, "step": 4137 }, { "epoch": 0.29895063846695685, "grad_norm": 7.20315836404285, "learning_rate": 4.815187091065776e-06, "loss": 1.0064, "step": 4138 }, { "epoch": 0.29902288366716634, "grad_norm": 6.472386614122028, "learning_rate": 4.8150767023985225e-06, "loss": 0.9237, "step": 4139 }, { "epoch": 0.2990951288673759, "grad_norm": 7.070581184466579, "learning_rate": 4.814966282039667e-06, "loss": 0.8429, "step": 4140 }, { "epoch": 0.29916737406758537, "grad_norm": 7.117920420121156, "learning_rate": 4.81485582999072e-06, "loss": 0.942, "step": 4141 }, { "epoch": 0.2992396192677949, "grad_norm": 6.830412387713531, "learning_rate": 4.814745346253193e-06, "loss": 0.9948, "step": 4142 }, { "epoch": 0.2993118644680044, "grad_norm": 8.275956905852007, "learning_rate": 4.8146348308286015e-06, "loss": 0.8983, "step": 4143 }, { "epoch": 0.29938410966821394, "grad_norm": 6.137259534176777, "learning_rate": 4.814524283718455e-06, "loss": 0.8774, "step": 4144 }, { "epoch": 0.2994563548684234, "grad_norm": 7.690700206055611, "learning_rate": 4.8144137049242686e-06, "loss": 0.8557, "step": 4145 }, { "epoch": 0.29952860006863297, "grad_norm": 6.223150948438482, "learning_rate": 4.8143030944475555e-06, "loss": 0.9111, "step": 4146 }, { "epoch": 0.29960084526884245, "grad_norm": 7.846513360188784, "learning_rate": 4.814192452289831e-06, "loss": 0.8984, "step": 4147 }, { "epoch": 0.29967309046905194, "grad_norm": 6.4823053573062515, "learning_rate": 4.814081778452607e-06, "loss": 0.9412, "step": 4148 }, { "epoch": 0.2997453356692615, "grad_norm": 5.692211170131612, "learning_rate": 4.813971072937401e-06, "loss": 0.8749, "step": 4149 }, { "epoch": 0.29981758086947097, "grad_norm": 7.322813927773994, "learning_rate": 4.813860335745728e-06, "loss": 0.9349, "step": 4150 }, { "epoch": 0.2998898260696805, "grad_norm": 6.029303834625073, "learning_rate": 4.813749566879103e-06, "loss": 0.8732, "step": 4151 }, { "epoch": 0.29996207126989, "grad_norm": 7.67793385846485, "learning_rate": 4.813638766339044e-06, "loss": 0.9184, "step": 4152 }, { "epoch": 0.30003431647009954, "grad_norm": 5.812415665865848, "learning_rate": 4.813527934127066e-06, "loss": 0.9353, "step": 4153 }, { "epoch": 0.300106561670309, "grad_norm": 8.056123326195733, "learning_rate": 4.8134170702446865e-06, "loss": 1.007, "step": 4154 }, { "epoch": 0.30017880687051857, "grad_norm": 6.274715839654271, "learning_rate": 4.813306174693424e-06, "loss": 0.862, "step": 4155 }, { "epoch": 0.30025105207072805, "grad_norm": 8.39732051074948, "learning_rate": 4.813195247474796e-06, "loss": 0.9868, "step": 4156 }, { "epoch": 0.30032329727093754, "grad_norm": 5.577496319559714, "learning_rate": 4.813084288590321e-06, "loss": 0.8387, "step": 4157 }, { "epoch": 0.3003955424711471, "grad_norm": 8.206310300467475, "learning_rate": 4.812973298041518e-06, "loss": 0.9131, "step": 4158 }, { "epoch": 0.30046778767135657, "grad_norm": 6.828235057930245, "learning_rate": 4.812862275829907e-06, "loss": 0.9195, "step": 4159 }, { "epoch": 0.3005400328715661, "grad_norm": 8.007154603310005, "learning_rate": 4.812751221957007e-06, "loss": 0.9504, "step": 4160 }, { "epoch": 0.3006122780717756, "grad_norm": 6.907290833350956, "learning_rate": 4.812640136424338e-06, "loss": 0.9497, "step": 4161 }, { "epoch": 0.30068452327198514, "grad_norm": 6.573085712207722, "learning_rate": 4.812529019233422e-06, "loss": 0.8145, "step": 4162 }, { "epoch": 0.3007567684721946, "grad_norm": 6.382694462743483, "learning_rate": 4.812417870385779e-06, "loss": 0.9311, "step": 4163 }, { "epoch": 0.30082901367240417, "grad_norm": 7.803896874446891, "learning_rate": 4.8123066898829316e-06, "loss": 0.9332, "step": 4164 }, { "epoch": 0.30090125887261365, "grad_norm": 6.896028575457427, "learning_rate": 4.8121954777264e-06, "loss": 0.8035, "step": 4165 }, { "epoch": 0.30097350407282314, "grad_norm": 7.188843178673366, "learning_rate": 4.812084233917708e-06, "loss": 0.9455, "step": 4166 }, { "epoch": 0.3010457492730327, "grad_norm": 7.463486097350339, "learning_rate": 4.811972958458377e-06, "loss": 0.8545, "step": 4167 }, { "epoch": 0.30111799447324217, "grad_norm": 6.052436262423038, "learning_rate": 4.8118616513499326e-06, "loss": 0.9347, "step": 4168 }, { "epoch": 0.3011902396734517, "grad_norm": 7.066944985337105, "learning_rate": 4.811750312593897e-06, "loss": 0.8448, "step": 4169 }, { "epoch": 0.3012624848736612, "grad_norm": 7.515275530710118, "learning_rate": 4.811638942191794e-06, "loss": 0.9517, "step": 4170 }, { "epoch": 0.30133473007387074, "grad_norm": 6.384011575043015, "learning_rate": 4.81152754014515e-06, "loss": 0.8664, "step": 4171 }, { "epoch": 0.3014069752740802, "grad_norm": 6.3052734707750595, "learning_rate": 4.811416106455488e-06, "loss": 0.911, "step": 4172 }, { "epoch": 0.30147922047428977, "grad_norm": 6.626390994892129, "learning_rate": 4.811304641124334e-06, "loss": 0.9455, "step": 4173 }, { "epoch": 0.30155146567449925, "grad_norm": 6.581024183436687, "learning_rate": 4.811193144153214e-06, "loss": 0.9522, "step": 4174 }, { "epoch": 0.30162371087470874, "grad_norm": 7.861376407072229, "learning_rate": 4.811081615543655e-06, "loss": 0.8681, "step": 4175 }, { "epoch": 0.3016959560749183, "grad_norm": 6.226143433984386, "learning_rate": 4.810970055297182e-06, "loss": 0.8966, "step": 4176 }, { "epoch": 0.30176820127512777, "grad_norm": 7.2341537936111875, "learning_rate": 4.8108584634153246e-06, "loss": 0.9367, "step": 4177 }, { "epoch": 0.3018404464753373, "grad_norm": 6.955808383191121, "learning_rate": 4.810746839899608e-06, "loss": 0.9133, "step": 4178 }, { "epoch": 0.3019126916755468, "grad_norm": 6.407001432682721, "learning_rate": 4.810635184751562e-06, "loss": 0.9386, "step": 4179 }, { "epoch": 0.30198493687575634, "grad_norm": 5.801457570527165, "learning_rate": 4.810523497972715e-06, "loss": 0.9408, "step": 4180 }, { "epoch": 0.3020571820759658, "grad_norm": 5.847588079408086, "learning_rate": 4.810411779564594e-06, "loss": 0.8928, "step": 4181 }, { "epoch": 0.30212942727617537, "grad_norm": 8.25210035926342, "learning_rate": 4.81030002952873e-06, "loss": 0.9551, "step": 4182 }, { "epoch": 0.30220167247638485, "grad_norm": 7.155516740971421, "learning_rate": 4.810188247866653e-06, "loss": 0.8699, "step": 4183 }, { "epoch": 0.30227391767659434, "grad_norm": 6.707168319981138, "learning_rate": 4.810076434579892e-06, "loss": 0.8889, "step": 4184 }, { "epoch": 0.3023461628768039, "grad_norm": 5.237984486024188, "learning_rate": 4.809964589669978e-06, "loss": 0.8742, "step": 4185 }, { "epoch": 0.30241840807701337, "grad_norm": 6.8706695790051775, "learning_rate": 4.8098527131384435e-06, "loss": 0.9148, "step": 4186 }, { "epoch": 0.3024906532772229, "grad_norm": 6.216577505063767, "learning_rate": 4.809740804986819e-06, "loss": 0.8905, "step": 4187 }, { "epoch": 0.3025628984774324, "grad_norm": 9.162231574608764, "learning_rate": 4.809628865216635e-06, "loss": 0.9219, "step": 4188 }, { "epoch": 0.30263514367764194, "grad_norm": 6.664554483872561, "learning_rate": 4.809516893829425e-06, "loss": 0.9186, "step": 4189 }, { "epoch": 0.3027073888778514, "grad_norm": 6.081287022445597, "learning_rate": 4.8094048908267234e-06, "loss": 0.9791, "step": 4190 }, { "epoch": 0.30277963407806097, "grad_norm": 6.152656862072055, "learning_rate": 4.809292856210062e-06, "loss": 0.8453, "step": 4191 }, { "epoch": 0.30285187927827045, "grad_norm": 6.10565556236113, "learning_rate": 4.809180789980973e-06, "loss": 0.8973, "step": 4192 }, { "epoch": 0.30292412447847994, "grad_norm": 7.882076172327755, "learning_rate": 4.809068692140993e-06, "loss": 0.9464, "step": 4193 }, { "epoch": 0.3029963696786895, "grad_norm": 6.04972988818267, "learning_rate": 4.808956562691655e-06, "loss": 0.8423, "step": 4194 }, { "epoch": 0.30306861487889897, "grad_norm": 6.039501810812109, "learning_rate": 4.808844401634495e-06, "loss": 0.9763, "step": 4195 }, { "epoch": 0.3031408600791085, "grad_norm": 5.63585327863889, "learning_rate": 4.808732208971046e-06, "loss": 0.888, "step": 4196 }, { "epoch": 0.303213105279318, "grad_norm": 7.161273642268068, "learning_rate": 4.808619984702848e-06, "loss": 0.8663, "step": 4197 }, { "epoch": 0.30328535047952754, "grad_norm": 8.503538404997853, "learning_rate": 4.808507728831434e-06, "loss": 1.0018, "step": 4198 }, { "epoch": 0.303357595679737, "grad_norm": 6.255017212280693, "learning_rate": 4.808395441358341e-06, "loss": 0.9043, "step": 4199 }, { "epoch": 0.30342984087994657, "grad_norm": 7.127453297997589, "learning_rate": 4.808283122285108e-06, "loss": 0.9725, "step": 4200 }, { "epoch": 0.30350208608015605, "grad_norm": 7.252207452869307, "learning_rate": 4.80817077161327e-06, "loss": 0.9715, "step": 4201 }, { "epoch": 0.30357433128036554, "grad_norm": 6.221728661050583, "learning_rate": 4.8080583893443675e-06, "loss": 0.8512, "step": 4202 }, { "epoch": 0.3036465764805751, "grad_norm": 9.035977825541178, "learning_rate": 4.807945975479937e-06, "loss": 1.0117, "step": 4203 }, { "epoch": 0.30371882168078457, "grad_norm": 5.279529539591935, "learning_rate": 4.807833530021518e-06, "loss": 0.8448, "step": 4204 }, { "epoch": 0.3037910668809941, "grad_norm": 6.1913554192511215, "learning_rate": 4.807721052970651e-06, "loss": 0.9157, "step": 4205 }, { "epoch": 0.3038633120812036, "grad_norm": 6.620882878279197, "learning_rate": 4.807608544328873e-06, "loss": 0.8626, "step": 4206 }, { "epoch": 0.30393555728141314, "grad_norm": 5.9110488368859535, "learning_rate": 4.807496004097728e-06, "loss": 0.962, "step": 4207 }, { "epoch": 0.3040078024816226, "grad_norm": 6.641260884076991, "learning_rate": 4.8073834322787526e-06, "loss": 0.9156, "step": 4208 }, { "epoch": 0.3040800476818321, "grad_norm": 7.487987307578521, "learning_rate": 4.80727082887349e-06, "loss": 0.8965, "step": 4209 }, { "epoch": 0.30415229288204165, "grad_norm": 7.79779695552655, "learning_rate": 4.807158193883481e-06, "loss": 0.9325, "step": 4210 }, { "epoch": 0.30422453808225114, "grad_norm": 5.473269268285117, "learning_rate": 4.807045527310268e-06, "loss": 0.9394, "step": 4211 }, { "epoch": 0.3042967832824607, "grad_norm": 6.2773337230590425, "learning_rate": 4.806932829155393e-06, "loss": 0.956, "step": 4212 }, { "epoch": 0.30436902848267017, "grad_norm": 10.167428868806809, "learning_rate": 4.806820099420398e-06, "loss": 0.99, "step": 4213 }, { "epoch": 0.3044412736828797, "grad_norm": 5.823197232369867, "learning_rate": 4.806707338106829e-06, "loss": 0.8073, "step": 4214 }, { "epoch": 0.3045135188830892, "grad_norm": 7.2987358775445665, "learning_rate": 4.806594545216225e-06, "loss": 0.9003, "step": 4215 }, { "epoch": 0.30458576408329874, "grad_norm": 8.275006859537376, "learning_rate": 4.806481720750134e-06, "loss": 0.9037, "step": 4216 }, { "epoch": 0.3046580092835082, "grad_norm": 7.117699882077646, "learning_rate": 4.8063688647101e-06, "loss": 0.8696, "step": 4217 }, { "epoch": 0.3047302544837177, "grad_norm": 8.784334231634542, "learning_rate": 4.806255977097666e-06, "loss": 0.8815, "step": 4218 }, { "epoch": 0.30480249968392725, "grad_norm": 7.108873018421487, "learning_rate": 4.806143057914378e-06, "loss": 0.9526, "step": 4219 }, { "epoch": 0.30487474488413674, "grad_norm": 7.842411489379399, "learning_rate": 4.806030107161784e-06, "loss": 0.8845, "step": 4220 }, { "epoch": 0.3049469900843463, "grad_norm": 8.292137043197252, "learning_rate": 4.805917124841426e-06, "loss": 0.9844, "step": 4221 }, { "epoch": 0.30501923528455577, "grad_norm": 5.214892577896406, "learning_rate": 4.805804110954854e-06, "loss": 0.7989, "step": 4222 }, { "epoch": 0.3050914804847653, "grad_norm": 6.248509343719883, "learning_rate": 4.805691065503614e-06, "loss": 0.8924, "step": 4223 }, { "epoch": 0.3051637256849748, "grad_norm": 7.283101775491536, "learning_rate": 4.805577988489253e-06, "loss": 1.0139, "step": 4224 }, { "epoch": 0.30523597088518434, "grad_norm": 7.235833369135752, "learning_rate": 4.805464879913321e-06, "loss": 0.9561, "step": 4225 }, { "epoch": 0.3053082160853938, "grad_norm": 6.420434052242064, "learning_rate": 4.805351739777363e-06, "loss": 1.0233, "step": 4226 }, { "epoch": 0.3053804612856033, "grad_norm": 5.612024898003442, "learning_rate": 4.805238568082931e-06, "loss": 0.8432, "step": 4227 }, { "epoch": 0.30545270648581285, "grad_norm": 7.405245040764656, "learning_rate": 4.805125364831572e-06, "loss": 0.9266, "step": 4228 }, { "epoch": 0.30552495168602234, "grad_norm": 7.217950421592976, "learning_rate": 4.805012130024838e-06, "loss": 0.9654, "step": 4229 }, { "epoch": 0.3055971968862319, "grad_norm": 6.721601581909458, "learning_rate": 4.8048988636642764e-06, "loss": 0.9542, "step": 4230 }, { "epoch": 0.30566944208644137, "grad_norm": 6.098201123757791, "learning_rate": 4.8047855657514395e-06, "loss": 0.9087, "step": 4231 }, { "epoch": 0.3057416872866509, "grad_norm": 7.641876804647672, "learning_rate": 4.804672236287877e-06, "loss": 0.7996, "step": 4232 }, { "epoch": 0.3058139324868604, "grad_norm": 6.14429502039147, "learning_rate": 4.804558875275141e-06, "loss": 0.8706, "step": 4233 }, { "epoch": 0.30588617768706994, "grad_norm": 7.388458386008366, "learning_rate": 4.8044454827147846e-06, "loss": 0.9359, "step": 4234 }, { "epoch": 0.3059584228872794, "grad_norm": 6.273874688027664, "learning_rate": 4.8043320586083585e-06, "loss": 1.0096, "step": 4235 }, { "epoch": 0.3060306680874889, "grad_norm": 6.4113594334708965, "learning_rate": 4.804218602957416e-06, "loss": 0.8319, "step": 4236 }, { "epoch": 0.30610291328769845, "grad_norm": 7.457914401032957, "learning_rate": 4.804105115763509e-06, "loss": 0.9501, "step": 4237 }, { "epoch": 0.30617515848790794, "grad_norm": 6.519874120327928, "learning_rate": 4.803991597028193e-06, "loss": 0.897, "step": 4238 }, { "epoch": 0.3062474036881175, "grad_norm": 5.953300253223357, "learning_rate": 4.80387804675302e-06, "loss": 0.9234, "step": 4239 }, { "epoch": 0.30631964888832697, "grad_norm": 5.855472658854599, "learning_rate": 4.803764464939545e-06, "loss": 0.905, "step": 4240 }, { "epoch": 0.3063918940885365, "grad_norm": 7.484515672608056, "learning_rate": 4.803650851589324e-06, "loss": 0.9019, "step": 4241 }, { "epoch": 0.306464139288746, "grad_norm": 5.9204512120032655, "learning_rate": 4.803537206703912e-06, "loss": 0.9142, "step": 4242 }, { "epoch": 0.30653638448895554, "grad_norm": 7.8822349132579905, "learning_rate": 4.803423530284864e-06, "loss": 0.9244, "step": 4243 }, { "epoch": 0.306608629689165, "grad_norm": 7.1504314372469535, "learning_rate": 4.803309822333736e-06, "loss": 0.8779, "step": 4244 }, { "epoch": 0.3066808748893745, "grad_norm": 6.366114688665925, "learning_rate": 4.803196082852085e-06, "loss": 0.9106, "step": 4245 }, { "epoch": 0.30675312008958405, "grad_norm": 8.05891563523408, "learning_rate": 4.803082311841468e-06, "loss": 0.971, "step": 4246 }, { "epoch": 0.30682536528979354, "grad_norm": 7.23441085641762, "learning_rate": 4.8029685093034415e-06, "loss": 0.8967, "step": 4247 }, { "epoch": 0.3068976104900031, "grad_norm": 6.426011287200827, "learning_rate": 4.802854675239566e-06, "loss": 0.9262, "step": 4248 }, { "epoch": 0.30696985569021257, "grad_norm": 5.932519369692555, "learning_rate": 4.802740809651397e-06, "loss": 0.9095, "step": 4249 }, { "epoch": 0.3070421008904221, "grad_norm": 7.306425000159721, "learning_rate": 4.802626912540494e-06, "loss": 0.9321, "step": 4250 }, { "epoch": 0.3071143460906316, "grad_norm": 6.131741608831108, "learning_rate": 4.802512983908417e-06, "loss": 0.9014, "step": 4251 }, { "epoch": 0.30718659129084114, "grad_norm": 6.485952603647194, "learning_rate": 4.802399023756724e-06, "loss": 0.8687, "step": 4252 }, { "epoch": 0.3072588364910506, "grad_norm": 6.992914428431764, "learning_rate": 4.802285032086977e-06, "loss": 0.9631, "step": 4253 }, { "epoch": 0.3073310816912601, "grad_norm": 6.109868880892553, "learning_rate": 4.802171008900736e-06, "loss": 0.9342, "step": 4254 }, { "epoch": 0.30740332689146965, "grad_norm": 6.1470326778200475, "learning_rate": 4.802056954199561e-06, "loss": 0.8696, "step": 4255 }, { "epoch": 0.30747557209167914, "grad_norm": 6.351305141138285, "learning_rate": 4.801942867985013e-06, "loss": 0.9277, "step": 4256 }, { "epoch": 0.3075478172918887, "grad_norm": 7.14790757513251, "learning_rate": 4.801828750258656e-06, "loss": 0.8974, "step": 4257 }, { "epoch": 0.30762006249209817, "grad_norm": 7.785605318599316, "learning_rate": 4.801714601022049e-06, "loss": 0.9658, "step": 4258 }, { "epoch": 0.3076923076923077, "grad_norm": 6.997661472432321, "learning_rate": 4.801600420276757e-06, "loss": 0.903, "step": 4259 }, { "epoch": 0.3077645528925172, "grad_norm": 6.66066713583076, "learning_rate": 4.801486208024343e-06, "loss": 0.9772, "step": 4260 }, { "epoch": 0.30783679809272674, "grad_norm": 6.474980546487578, "learning_rate": 4.801371964266369e-06, "loss": 0.7631, "step": 4261 }, { "epoch": 0.3079090432929362, "grad_norm": 5.634744638175822, "learning_rate": 4.8012576890044e-06, "loss": 0.8155, "step": 4262 }, { "epoch": 0.3079812884931457, "grad_norm": 7.481783041770916, "learning_rate": 4.80114338224e-06, "loss": 0.9829, "step": 4263 }, { "epoch": 0.30805353369335525, "grad_norm": 6.857350669845354, "learning_rate": 4.801029043974733e-06, "loss": 0.9063, "step": 4264 }, { "epoch": 0.30812577889356474, "grad_norm": 6.446787236587549, "learning_rate": 4.800914674210166e-06, "loss": 0.8139, "step": 4265 }, { "epoch": 0.3081980240937743, "grad_norm": 7.897659640774201, "learning_rate": 4.800800272947863e-06, "loss": 0.9484, "step": 4266 }, { "epoch": 0.30827026929398377, "grad_norm": 7.621979662437402, "learning_rate": 4.800685840189392e-06, "loss": 0.9624, "step": 4267 }, { "epoch": 0.3083425144941933, "grad_norm": 6.749974568636988, "learning_rate": 4.800571375936317e-06, "loss": 0.903, "step": 4268 }, { "epoch": 0.3084147596944028, "grad_norm": 6.419996743364786, "learning_rate": 4.800456880190207e-06, "loss": 0.8461, "step": 4269 }, { "epoch": 0.30848700489461234, "grad_norm": 7.559118097930276, "learning_rate": 4.800342352952627e-06, "loss": 1.0204, "step": 4270 }, { "epoch": 0.3085592500948218, "grad_norm": 6.958584474459126, "learning_rate": 4.800227794225147e-06, "loss": 0.9452, "step": 4271 }, { "epoch": 0.3086314952950313, "grad_norm": 5.9739711753576366, "learning_rate": 4.800113204009336e-06, "loss": 0.8881, "step": 4272 }, { "epoch": 0.30870374049524085, "grad_norm": 6.503191531206253, "learning_rate": 4.79999858230676e-06, "loss": 0.9188, "step": 4273 }, { "epoch": 0.30877598569545034, "grad_norm": 7.183779342270177, "learning_rate": 4.799883929118988e-06, "loss": 0.9652, "step": 4274 }, { "epoch": 0.3088482308956599, "grad_norm": 7.2537187707160715, "learning_rate": 4.7997692444475925e-06, "loss": 1.0026, "step": 4275 }, { "epoch": 0.30892047609586937, "grad_norm": 7.928650255437358, "learning_rate": 4.799654528294141e-06, "loss": 0.9672, "step": 4276 }, { "epoch": 0.3089927212960789, "grad_norm": 5.259989906852525, "learning_rate": 4.799539780660205e-06, "loss": 0.8666, "step": 4277 }, { "epoch": 0.3090649664962884, "grad_norm": 7.49081086846017, "learning_rate": 4.799425001547354e-06, "loss": 0.9368, "step": 4278 }, { "epoch": 0.30913721169649794, "grad_norm": 5.990939610935417, "learning_rate": 4.799310190957161e-06, "loss": 0.8665, "step": 4279 }, { "epoch": 0.3092094568967074, "grad_norm": 8.11766119888258, "learning_rate": 4.799195348891197e-06, "loss": 0.9205, "step": 4280 }, { "epoch": 0.3092817020969169, "grad_norm": 7.1683061483944295, "learning_rate": 4.799080475351032e-06, "loss": 0.9352, "step": 4281 }, { "epoch": 0.30935394729712645, "grad_norm": 6.221264814712149, "learning_rate": 4.798965570338243e-06, "loss": 0.8914, "step": 4282 }, { "epoch": 0.30942619249733594, "grad_norm": 8.497990538985295, "learning_rate": 4.798850633854399e-06, "loss": 0.8713, "step": 4283 }, { "epoch": 0.3094984376975455, "grad_norm": 7.662824524919452, "learning_rate": 4.798735665901074e-06, "loss": 0.8973, "step": 4284 }, { "epoch": 0.30957068289775497, "grad_norm": 6.392078635389732, "learning_rate": 4.798620666479844e-06, "loss": 0.826, "step": 4285 }, { "epoch": 0.3096429280979645, "grad_norm": 7.30740857365016, "learning_rate": 4.798505635592281e-06, "loss": 0.99, "step": 4286 }, { "epoch": 0.309715173298174, "grad_norm": 7.020701994832436, "learning_rate": 4.798390573239962e-06, "loss": 0.9204, "step": 4287 }, { "epoch": 0.30978741849838354, "grad_norm": 7.331076997372641, "learning_rate": 4.798275479424459e-06, "loss": 0.9785, "step": 4288 }, { "epoch": 0.309859663698593, "grad_norm": 8.78285110375567, "learning_rate": 4.798160354147349e-06, "loss": 0.9907, "step": 4289 }, { "epoch": 0.3099319088988025, "grad_norm": 7.614982904125718, "learning_rate": 4.79804519741021e-06, "loss": 0.946, "step": 4290 }, { "epoch": 0.31000415409901205, "grad_norm": 7.048783477362756, "learning_rate": 4.797930009214615e-06, "loss": 1.0755, "step": 4291 }, { "epoch": 0.31007639929922154, "grad_norm": 7.73511799027567, "learning_rate": 4.797814789562142e-06, "loss": 0.9444, "step": 4292 }, { "epoch": 0.3101486444994311, "grad_norm": 7.622621008793574, "learning_rate": 4.79769953845437e-06, "loss": 0.9546, "step": 4293 }, { "epoch": 0.31022088969964057, "grad_norm": 6.667442975939303, "learning_rate": 4.797584255892875e-06, "loss": 0.8289, "step": 4294 }, { "epoch": 0.3102931348998501, "grad_norm": 5.547412427199836, "learning_rate": 4.7974689418792356e-06, "loss": 0.8791, "step": 4295 }, { "epoch": 0.3103653801000596, "grad_norm": 6.424980062609661, "learning_rate": 4.79735359641503e-06, "loss": 0.9233, "step": 4296 }, { "epoch": 0.31043762530026914, "grad_norm": 9.008670868550537, "learning_rate": 4.797238219501837e-06, "loss": 0.9512, "step": 4297 }, { "epoch": 0.3105098705004786, "grad_norm": 6.135029891235854, "learning_rate": 4.797122811141237e-06, "loss": 0.9532, "step": 4298 }, { "epoch": 0.3105821157006881, "grad_norm": 6.927153974665259, "learning_rate": 4.797007371334809e-06, "loss": 0.9871, "step": 4299 }, { "epoch": 0.31065436090089765, "grad_norm": 7.2573389200645435, "learning_rate": 4.796891900084134e-06, "loss": 0.9623, "step": 4300 }, { "epoch": 0.31072660610110714, "grad_norm": 7.665922073473762, "learning_rate": 4.796776397390792e-06, "loss": 1.0481, "step": 4301 }, { "epoch": 0.3107988513013167, "grad_norm": 6.87019013431866, "learning_rate": 4.796660863256365e-06, "loss": 0.8725, "step": 4302 }, { "epoch": 0.31087109650152617, "grad_norm": 7.98201470471822, "learning_rate": 4.796545297682433e-06, "loss": 1.0227, "step": 4303 }, { "epoch": 0.3109433417017357, "grad_norm": 7.70088407717376, "learning_rate": 4.796429700670579e-06, "loss": 0.8846, "step": 4304 }, { "epoch": 0.3110155869019452, "grad_norm": 6.861585812571409, "learning_rate": 4.796314072222386e-06, "loss": 0.9203, "step": 4305 }, { "epoch": 0.31108783210215474, "grad_norm": 6.405505923346976, "learning_rate": 4.796198412339437e-06, "loss": 0.8288, "step": 4306 }, { "epoch": 0.3111600773023642, "grad_norm": 6.267724196840988, "learning_rate": 4.796082721023314e-06, "loss": 0.9118, "step": 4307 }, { "epoch": 0.3112323225025737, "grad_norm": 6.495679446443402, "learning_rate": 4.795966998275602e-06, "loss": 0.9283, "step": 4308 }, { "epoch": 0.31130456770278325, "grad_norm": 6.812522118208791, "learning_rate": 4.795851244097883e-06, "loss": 0.8473, "step": 4309 }, { "epoch": 0.31137681290299274, "grad_norm": 5.90436180651941, "learning_rate": 4.795735458491745e-06, "loss": 0.8978, "step": 4310 }, { "epoch": 0.3114490581032023, "grad_norm": 6.222308650249105, "learning_rate": 4.79561964145877e-06, "loss": 0.8457, "step": 4311 }, { "epoch": 0.31152130330341177, "grad_norm": 6.208900203896287, "learning_rate": 4.795503793000544e-06, "loss": 0.8992, "step": 4312 }, { "epoch": 0.3115935485036213, "grad_norm": 7.205400550186162, "learning_rate": 4.7953879131186544e-06, "loss": 0.9245, "step": 4313 }, { "epoch": 0.3116657937038308, "grad_norm": 6.415976086731295, "learning_rate": 4.795272001814686e-06, "loss": 0.9259, "step": 4314 }, { "epoch": 0.31173803890404034, "grad_norm": 6.552285228179996, "learning_rate": 4.795156059090225e-06, "loss": 0.9431, "step": 4315 }, { "epoch": 0.3118102841042498, "grad_norm": 6.230614773465157, "learning_rate": 4.795040084946862e-06, "loss": 0.8699, "step": 4316 }, { "epoch": 0.3118825293044593, "grad_norm": 8.992926466975568, "learning_rate": 4.79492407938618e-06, "loss": 1.0185, "step": 4317 }, { "epoch": 0.31195477450466885, "grad_norm": 7.039822248468996, "learning_rate": 4.794808042409771e-06, "loss": 0.9162, "step": 4318 }, { "epoch": 0.31202701970487834, "grad_norm": 5.758169590139612, "learning_rate": 4.794691974019221e-06, "loss": 0.8987, "step": 4319 }, { "epoch": 0.3120992649050879, "grad_norm": 6.823800050657087, "learning_rate": 4.79457587421612e-06, "loss": 0.9665, "step": 4320 }, { "epoch": 0.31217151010529737, "grad_norm": 7.722757758484998, "learning_rate": 4.794459743002056e-06, "loss": 0.8884, "step": 4321 }, { "epoch": 0.3122437553055069, "grad_norm": 5.929783032518487, "learning_rate": 4.7943435803786204e-06, "loss": 0.8389, "step": 4322 }, { "epoch": 0.3123160005057164, "grad_norm": 6.7958672138104745, "learning_rate": 4.794227386347402e-06, "loss": 0.9172, "step": 4323 }, { "epoch": 0.31238824570592594, "grad_norm": 6.243369590924195, "learning_rate": 4.794111160909993e-06, "loss": 0.9618, "step": 4324 }, { "epoch": 0.3124604909061354, "grad_norm": 6.193480404742214, "learning_rate": 4.793994904067982e-06, "loss": 0.8618, "step": 4325 }, { "epoch": 0.3125327361063449, "grad_norm": 6.783285512294805, "learning_rate": 4.793878615822964e-06, "loss": 0.9588, "step": 4326 }, { "epoch": 0.31260498130655445, "grad_norm": 7.523803068253666, "learning_rate": 4.793762296176527e-06, "loss": 0.828, "step": 4327 }, { "epoch": 0.31267722650676394, "grad_norm": 5.920725366125673, "learning_rate": 4.7936459451302655e-06, "loss": 0.9042, "step": 4328 }, { "epoch": 0.3127494717069735, "grad_norm": 6.770580423962465, "learning_rate": 4.7935295626857725e-06, "loss": 0.985, "step": 4329 }, { "epoch": 0.31282171690718297, "grad_norm": 6.453046673537207, "learning_rate": 4.79341314884464e-06, "loss": 0.8557, "step": 4330 }, { "epoch": 0.3128939621073925, "grad_norm": 5.863304346535955, "learning_rate": 4.793296703608463e-06, "loss": 0.8685, "step": 4331 }, { "epoch": 0.312966207307602, "grad_norm": 6.522762642862086, "learning_rate": 4.793180226978834e-06, "loss": 0.8483, "step": 4332 }, { "epoch": 0.31303845250781154, "grad_norm": 6.422761804835997, "learning_rate": 4.793063718957348e-06, "loss": 0.9671, "step": 4333 }, { "epoch": 0.313110697708021, "grad_norm": 6.072903545315299, "learning_rate": 4.7929471795456015e-06, "loss": 1.005, "step": 4334 }, { "epoch": 0.3131829429082305, "grad_norm": 6.419485422588037, "learning_rate": 4.792830608745187e-06, "loss": 0.8755, "step": 4335 }, { "epoch": 0.31325518810844005, "grad_norm": 7.0060210899233795, "learning_rate": 4.792714006557703e-06, "loss": 0.8056, "step": 4336 }, { "epoch": 0.31332743330864954, "grad_norm": 7.737462392786485, "learning_rate": 4.792597372984743e-06, "loss": 0.922, "step": 4337 }, { "epoch": 0.3133996785088591, "grad_norm": 8.24826493947174, "learning_rate": 4.792480708027906e-06, "loss": 0.9316, "step": 4338 }, { "epoch": 0.31347192370906857, "grad_norm": 7.204417346529733, "learning_rate": 4.792364011688788e-06, "loss": 0.9202, "step": 4339 }, { "epoch": 0.3135441689092781, "grad_norm": 7.306736166175676, "learning_rate": 4.792247283968986e-06, "loss": 0.9152, "step": 4340 }, { "epoch": 0.3136164141094876, "grad_norm": 7.149517773992507, "learning_rate": 4.7921305248701e-06, "loss": 0.9663, "step": 4341 }, { "epoch": 0.31368865930969714, "grad_norm": 7.6137071375876495, "learning_rate": 4.7920137343937256e-06, "loss": 1.0001, "step": 4342 }, { "epoch": 0.3137609045099066, "grad_norm": 8.099795642676733, "learning_rate": 4.791896912541463e-06, "loss": 0.9373, "step": 4343 }, { "epoch": 0.3138331497101161, "grad_norm": 7.223991835850425, "learning_rate": 4.791780059314911e-06, "loss": 0.9701, "step": 4344 }, { "epoch": 0.31390539491032565, "grad_norm": 8.547524380466427, "learning_rate": 4.79166317471567e-06, "loss": 0.855, "step": 4345 }, { "epoch": 0.31397764011053514, "grad_norm": 8.562867985078528, "learning_rate": 4.791546258745339e-06, "loss": 0.9034, "step": 4346 }, { "epoch": 0.3140498853107447, "grad_norm": 6.618685322014161, "learning_rate": 4.791429311405518e-06, "loss": 0.8505, "step": 4347 }, { "epoch": 0.31412213051095417, "grad_norm": 7.263732269848409, "learning_rate": 4.791312332697811e-06, "loss": 0.9553, "step": 4348 }, { "epoch": 0.3141943757111637, "grad_norm": 6.733855260632097, "learning_rate": 4.791195322623816e-06, "loss": 0.919, "step": 4349 }, { "epoch": 0.3142666209113732, "grad_norm": 6.060143081436318, "learning_rate": 4.791078281185137e-06, "loss": 0.9405, "step": 4350 }, { "epoch": 0.31433886611158274, "grad_norm": 7.304225837684006, "learning_rate": 4.790961208383374e-06, "loss": 0.9026, "step": 4351 }, { "epoch": 0.3144111113117922, "grad_norm": 7.845773139624116, "learning_rate": 4.790844104220132e-06, "loss": 0.9337, "step": 4352 }, { "epoch": 0.3144833565120017, "grad_norm": 6.242038386047512, "learning_rate": 4.7907269686970125e-06, "loss": 0.9252, "step": 4353 }, { "epoch": 0.31455560171221125, "grad_norm": 8.039887173748246, "learning_rate": 4.79060980181562e-06, "loss": 0.8475, "step": 4354 }, { "epoch": 0.31462784691242074, "grad_norm": 6.256695022532301, "learning_rate": 4.790492603577557e-06, "loss": 0.8585, "step": 4355 }, { "epoch": 0.3147000921126303, "grad_norm": 6.346978930209816, "learning_rate": 4.790375373984429e-06, "loss": 0.9449, "step": 4356 }, { "epoch": 0.31477233731283977, "grad_norm": 6.1641011786848905, "learning_rate": 4.79025811303784e-06, "loss": 0.979, "step": 4357 }, { "epoch": 0.3148445825130493, "grad_norm": 7.970064660627295, "learning_rate": 4.790140820739397e-06, "loss": 0.8178, "step": 4358 }, { "epoch": 0.3149168277132588, "grad_norm": 7.1398877320425616, "learning_rate": 4.790023497090702e-06, "loss": 0.8821, "step": 4359 }, { "epoch": 0.31498907291346834, "grad_norm": 6.491272275384374, "learning_rate": 4.789906142093366e-06, "loss": 0.9122, "step": 4360 }, { "epoch": 0.3150613181136778, "grad_norm": 6.5561117414037025, "learning_rate": 4.789788755748991e-06, "loss": 0.9251, "step": 4361 }, { "epoch": 0.3151335633138873, "grad_norm": 7.508802207789411, "learning_rate": 4.7896713380591865e-06, "loss": 1.0746, "step": 4362 }, { "epoch": 0.31520580851409685, "grad_norm": 6.379116449671287, "learning_rate": 4.78955388902556e-06, "loss": 0.8595, "step": 4363 }, { "epoch": 0.31527805371430634, "grad_norm": 6.809683401223756, "learning_rate": 4.789436408649718e-06, "loss": 1.014, "step": 4364 }, { "epoch": 0.3153502989145159, "grad_norm": 6.090558434049723, "learning_rate": 4.7893188969332685e-06, "loss": 0.9111, "step": 4365 }, { "epoch": 0.31542254411472537, "grad_norm": 7.374583087063825, "learning_rate": 4.789201353877822e-06, "loss": 0.9684, "step": 4366 }, { "epoch": 0.3154947893149349, "grad_norm": 6.95887336995735, "learning_rate": 4.789083779484985e-06, "loss": 0.9622, "step": 4367 }, { "epoch": 0.3155670345151444, "grad_norm": 7.4654002319071004, "learning_rate": 4.788966173756369e-06, "loss": 0.8949, "step": 4368 }, { "epoch": 0.31563927971535394, "grad_norm": 6.371729591585102, "learning_rate": 4.788848536693584e-06, "loss": 0.9534, "step": 4369 }, { "epoch": 0.3157115249155634, "grad_norm": 7.078911541822186, "learning_rate": 4.78873086829824e-06, "loss": 0.8916, "step": 4370 }, { "epoch": 0.3157837701157729, "grad_norm": 6.917929874873126, "learning_rate": 4.788613168571946e-06, "loss": 0.8797, "step": 4371 }, { "epoch": 0.31585601531598245, "grad_norm": 6.014903632174476, "learning_rate": 4.788495437516315e-06, "loss": 0.8881, "step": 4372 }, { "epoch": 0.31592826051619194, "grad_norm": 6.8737642044500555, "learning_rate": 4.788377675132959e-06, "loss": 0.9434, "step": 4373 }, { "epoch": 0.3160005057164015, "grad_norm": 7.04712103099127, "learning_rate": 4.788259881423489e-06, "loss": 0.9124, "step": 4374 }, { "epoch": 0.31607275091661097, "grad_norm": 7.036591666704524, "learning_rate": 4.788142056389518e-06, "loss": 0.8038, "step": 4375 }, { "epoch": 0.3161449961168205, "grad_norm": 7.70400964945885, "learning_rate": 4.788024200032659e-06, "loss": 0.8363, "step": 4376 }, { "epoch": 0.31621724131703, "grad_norm": 6.1433906830408125, "learning_rate": 4.787906312354525e-06, "loss": 0.9214, "step": 4377 }, { "epoch": 0.31628948651723954, "grad_norm": 7.298322186940281, "learning_rate": 4.787788393356731e-06, "loss": 0.8739, "step": 4378 }, { "epoch": 0.316361731717449, "grad_norm": 8.240524861924754, "learning_rate": 4.787670443040889e-06, "loss": 0.9824, "step": 4379 }, { "epoch": 0.3164339769176585, "grad_norm": 6.1379749122124325, "learning_rate": 4.787552461408616e-06, "loss": 0.8883, "step": 4380 }, { "epoch": 0.31650622211786805, "grad_norm": 6.583862701894123, "learning_rate": 4.787434448461525e-06, "loss": 0.8615, "step": 4381 }, { "epoch": 0.31657846731807754, "grad_norm": 8.276373582685583, "learning_rate": 4.7873164042012335e-06, "loss": 0.9503, "step": 4382 }, { "epoch": 0.3166507125182871, "grad_norm": 6.6752955224843795, "learning_rate": 4.787198328629356e-06, "loss": 0.9234, "step": 4383 }, { "epoch": 0.31672295771849657, "grad_norm": 5.899488989473077, "learning_rate": 4.787080221747509e-06, "loss": 0.897, "step": 4384 }, { "epoch": 0.3167952029187061, "grad_norm": 6.7292254025127045, "learning_rate": 4.786962083557309e-06, "loss": 0.8015, "step": 4385 }, { "epoch": 0.3168674481189156, "grad_norm": 7.262796355879905, "learning_rate": 4.786843914060375e-06, "loss": 0.8814, "step": 4386 }, { "epoch": 0.31693969331912514, "grad_norm": 7.984946631705289, "learning_rate": 4.786725713258324e-06, "loss": 0.9795, "step": 4387 }, { "epoch": 0.3170119385193346, "grad_norm": 6.742084241586892, "learning_rate": 4.786607481152772e-06, "loss": 0.9014, "step": 4388 }, { "epoch": 0.3170841837195441, "grad_norm": 6.677238506194272, "learning_rate": 4.786489217745339e-06, "loss": 0.9821, "step": 4389 }, { "epoch": 0.31715642891975365, "grad_norm": 5.932083068163147, "learning_rate": 4.786370923037644e-06, "loss": 0.8552, "step": 4390 }, { "epoch": 0.31722867411996314, "grad_norm": 6.886710164221109, "learning_rate": 4.786252597031307e-06, "loss": 0.8668, "step": 4391 }, { "epoch": 0.3173009193201727, "grad_norm": 6.3634302204976345, "learning_rate": 4.786134239727947e-06, "loss": 0.8804, "step": 4392 }, { "epoch": 0.31737316452038217, "grad_norm": 7.967202189600843, "learning_rate": 4.786015851129184e-06, "loss": 0.8521, "step": 4393 }, { "epoch": 0.3174454097205917, "grad_norm": 6.653180341369469, "learning_rate": 4.785897431236639e-06, "loss": 0.9712, "step": 4394 }, { "epoch": 0.3175176549208012, "grad_norm": 6.765921006984482, "learning_rate": 4.785778980051934e-06, "loss": 1.0132, "step": 4395 }, { "epoch": 0.31758990012101074, "grad_norm": 5.715585635692597, "learning_rate": 4.785660497576689e-06, "loss": 0.8951, "step": 4396 }, { "epoch": 0.3176621453212202, "grad_norm": 6.735979566130826, "learning_rate": 4.785541983812525e-06, "loss": 0.9498, "step": 4397 }, { "epoch": 0.3177343905214297, "grad_norm": 5.572635631664857, "learning_rate": 4.785423438761067e-06, "loss": 0.8084, "step": 4398 }, { "epoch": 0.31780663572163925, "grad_norm": 8.661107211968169, "learning_rate": 4.785304862423937e-06, "loss": 0.9183, "step": 4399 }, { "epoch": 0.31787888092184874, "grad_norm": 6.50812697366318, "learning_rate": 4.7851862548027575e-06, "loss": 0.8426, "step": 4400 }, { "epoch": 0.3179511261220583, "grad_norm": 7.5381511552472515, "learning_rate": 4.785067615899153e-06, "loss": 0.9728, "step": 4401 }, { "epoch": 0.31802337132226777, "grad_norm": 6.672711143540304, "learning_rate": 4.784948945714747e-06, "loss": 0.9388, "step": 4402 }, { "epoch": 0.3180956165224773, "grad_norm": 7.552450264972475, "learning_rate": 4.7848302442511626e-06, "loss": 0.8919, "step": 4403 }, { "epoch": 0.3181678617226868, "grad_norm": 8.239154970115687, "learning_rate": 4.784711511510028e-06, "loss": 0.9715, "step": 4404 }, { "epoch": 0.31824010692289634, "grad_norm": 7.455335501189919, "learning_rate": 4.784592747492966e-06, "loss": 1.0081, "step": 4405 }, { "epoch": 0.3183123521231058, "grad_norm": 7.238230656191233, "learning_rate": 4.784473952201604e-06, "loss": 0.9194, "step": 4406 }, { "epoch": 0.3183845973233153, "grad_norm": 7.157216102653873, "learning_rate": 4.784355125637567e-06, "loss": 0.9227, "step": 4407 }, { "epoch": 0.31845684252352485, "grad_norm": 6.080860454005172, "learning_rate": 4.784236267802481e-06, "loss": 0.9383, "step": 4408 }, { "epoch": 0.31852908772373434, "grad_norm": 9.564204444324265, "learning_rate": 4.7841173786979744e-06, "loss": 0.9048, "step": 4409 }, { "epoch": 0.3186013329239439, "grad_norm": 6.314733129134398, "learning_rate": 4.783998458325675e-06, "loss": 0.9295, "step": 4410 }, { "epoch": 0.31867357812415337, "grad_norm": 6.735040830900469, "learning_rate": 4.78387950668721e-06, "loss": 0.993, "step": 4411 }, { "epoch": 0.3187458233243629, "grad_norm": 9.477961626415084, "learning_rate": 4.7837605237842076e-06, "loss": 1.0559, "step": 4412 }, { "epoch": 0.3188180685245724, "grad_norm": 7.869922757187745, "learning_rate": 4.783641509618297e-06, "loss": 0.9046, "step": 4413 }, { "epoch": 0.31889031372478194, "grad_norm": 6.866428927901594, "learning_rate": 4.783522464191107e-06, "loss": 0.9316, "step": 4414 }, { "epoch": 0.3189625589249914, "grad_norm": 7.139047794907158, "learning_rate": 4.783403387504268e-06, "loss": 0.9148, "step": 4415 }, { "epoch": 0.3190348041252009, "grad_norm": 7.344852186238693, "learning_rate": 4.783284279559409e-06, "loss": 0.8412, "step": 4416 }, { "epoch": 0.31910704932541045, "grad_norm": 6.022296485740154, "learning_rate": 4.7831651403581615e-06, "loss": 1.02, "step": 4417 }, { "epoch": 0.31917929452561994, "grad_norm": 6.26193714772362, "learning_rate": 4.783045969902156e-06, "loss": 0.8657, "step": 4418 }, { "epoch": 0.3192515397258295, "grad_norm": 7.385179638701989, "learning_rate": 4.782926768193024e-06, "loss": 0.8774, "step": 4419 }, { "epoch": 0.31932378492603897, "grad_norm": 6.432732159027387, "learning_rate": 4.782807535232396e-06, "loss": 0.904, "step": 4420 }, { "epoch": 0.3193960301262485, "grad_norm": 6.25816483285598, "learning_rate": 4.782688271021907e-06, "loss": 0.9265, "step": 4421 }, { "epoch": 0.319468275326458, "grad_norm": 7.090358377069752, "learning_rate": 4.782568975563187e-06, "loss": 0.8973, "step": 4422 }, { "epoch": 0.3195405205266675, "grad_norm": 7.486716905506269, "learning_rate": 4.78244964885787e-06, "loss": 1.0153, "step": 4423 }, { "epoch": 0.319612765726877, "grad_norm": 6.217996676262832, "learning_rate": 4.782330290907589e-06, "loss": 0.9349, "step": 4424 }, { "epoch": 0.3196850109270865, "grad_norm": 6.046021078372697, "learning_rate": 4.7822109017139795e-06, "loss": 0.8841, "step": 4425 }, { "epoch": 0.31975725612729605, "grad_norm": 6.529910009444641, "learning_rate": 4.782091481278674e-06, "loss": 0.8892, "step": 4426 }, { "epoch": 0.31982950132750554, "grad_norm": 7.327995380483445, "learning_rate": 4.781972029603309e-06, "loss": 0.9197, "step": 4427 }, { "epoch": 0.3199017465277151, "grad_norm": 5.623955014967311, "learning_rate": 4.781852546689518e-06, "loss": 0.9416, "step": 4428 }, { "epoch": 0.31997399172792457, "grad_norm": 7.078633473302967, "learning_rate": 4.781733032538938e-06, "loss": 1.022, "step": 4429 }, { "epoch": 0.3200462369281341, "grad_norm": 5.682478480031877, "learning_rate": 4.781613487153204e-06, "loss": 0.77, "step": 4430 }, { "epoch": 0.3201184821283436, "grad_norm": 8.947412502546346, "learning_rate": 4.781493910533953e-06, "loss": 0.9351, "step": 4431 }, { "epoch": 0.3201907273285531, "grad_norm": 7.0119184120249525, "learning_rate": 4.781374302682822e-06, "loss": 0.9445, "step": 4432 }, { "epoch": 0.3202629725287626, "grad_norm": 7.412847614339443, "learning_rate": 4.7812546636014474e-06, "loss": 0.8986, "step": 4433 }, { "epoch": 0.3203352177289721, "grad_norm": 6.406645674232813, "learning_rate": 4.781134993291468e-06, "loss": 0.8933, "step": 4434 }, { "epoch": 0.32040746292918165, "grad_norm": 6.672965539075813, "learning_rate": 4.781015291754523e-06, "loss": 0.8576, "step": 4435 }, { "epoch": 0.32047970812939114, "grad_norm": 8.43512833783124, "learning_rate": 4.780895558992248e-06, "loss": 0.95, "step": 4436 }, { "epoch": 0.3205519533296007, "grad_norm": 6.559566459751176, "learning_rate": 4.7807757950062846e-06, "loss": 0.8535, "step": 4437 }, { "epoch": 0.32062419852981017, "grad_norm": 7.653453580906327, "learning_rate": 4.780655999798272e-06, "loss": 0.9002, "step": 4438 }, { "epoch": 0.3206964437300197, "grad_norm": 6.3945175291926715, "learning_rate": 4.7805361733698485e-06, "loss": 0.9394, "step": 4439 }, { "epoch": 0.3207686889302292, "grad_norm": 5.8840661403898595, "learning_rate": 4.7804163157226555e-06, "loss": 0.8854, "step": 4440 }, { "epoch": 0.3208409341304387, "grad_norm": 8.057177302200913, "learning_rate": 4.780296426858335e-06, "loss": 0.8932, "step": 4441 }, { "epoch": 0.3209131793306482, "grad_norm": 7.594408548315693, "learning_rate": 4.780176506778526e-06, "loss": 0.9099, "step": 4442 }, { "epoch": 0.3209854245308577, "grad_norm": 8.094199149729903, "learning_rate": 4.780056555484871e-06, "loss": 0.9489, "step": 4443 }, { "epoch": 0.32105766973106725, "grad_norm": 6.350614394854932, "learning_rate": 4.779936572979012e-06, "loss": 0.8506, "step": 4444 }, { "epoch": 0.32112991493127674, "grad_norm": 5.644639211499963, "learning_rate": 4.779816559262593e-06, "loss": 0.8372, "step": 4445 }, { "epoch": 0.3212021601314863, "grad_norm": 5.5544991548479015, "learning_rate": 4.779696514337255e-06, "loss": 0.983, "step": 4446 }, { "epoch": 0.32127440533169577, "grad_norm": 6.245640874861774, "learning_rate": 4.779576438204641e-06, "loss": 0.9362, "step": 4447 }, { "epoch": 0.3213466505319053, "grad_norm": 6.4046810554942875, "learning_rate": 4.779456330866396e-06, "loss": 0.8656, "step": 4448 }, { "epoch": 0.3214188957321148, "grad_norm": 7.692929715169576, "learning_rate": 4.779336192324163e-06, "loss": 0.9145, "step": 4449 }, { "epoch": 0.3214911409323243, "grad_norm": 7.003754834880314, "learning_rate": 4.779216022579587e-06, "loss": 0.8829, "step": 4450 }, { "epoch": 0.3215633861325338, "grad_norm": 6.8978665246257655, "learning_rate": 4.779095821634314e-06, "loss": 0.9175, "step": 4451 }, { "epoch": 0.3216356313327433, "grad_norm": 9.00507593066765, "learning_rate": 4.778975589489989e-06, "loss": 1.0036, "step": 4452 }, { "epoch": 0.32170787653295285, "grad_norm": 5.629203243216786, "learning_rate": 4.778855326148259e-06, "loss": 0.8968, "step": 4453 }, { "epoch": 0.32178012173316234, "grad_norm": 6.9777221399398695, "learning_rate": 4.778735031610767e-06, "loss": 0.9308, "step": 4454 }, { "epoch": 0.3218523669333719, "grad_norm": 6.2030542787608995, "learning_rate": 4.778614705879162e-06, "loss": 0.8868, "step": 4455 }, { "epoch": 0.32192461213358137, "grad_norm": 5.833963777897186, "learning_rate": 4.778494348955092e-06, "loss": 0.8668, "step": 4456 }, { "epoch": 0.3219968573337909, "grad_norm": 7.066678321876751, "learning_rate": 4.778373960840203e-06, "loss": 0.9945, "step": 4457 }, { "epoch": 0.3220691025340004, "grad_norm": 7.94976915708116, "learning_rate": 4.778253541536143e-06, "loss": 0.9241, "step": 4458 }, { "epoch": 0.3221413477342099, "grad_norm": 5.859873025709807, "learning_rate": 4.7781330910445615e-06, "loss": 0.9077, "step": 4459 }, { "epoch": 0.3222135929344194, "grad_norm": 6.985346852389891, "learning_rate": 4.778012609367106e-06, "loss": 0.8862, "step": 4460 }, { "epoch": 0.3222858381346289, "grad_norm": 7.259220705268764, "learning_rate": 4.777892096505427e-06, "loss": 1.0466, "step": 4461 }, { "epoch": 0.32235808333483845, "grad_norm": 6.437073924954725, "learning_rate": 4.777771552461174e-06, "loss": 0.9445, "step": 4462 }, { "epoch": 0.32243032853504794, "grad_norm": 7.162126685195198, "learning_rate": 4.777650977235997e-06, "loss": 0.9816, "step": 4463 }, { "epoch": 0.3225025737352575, "grad_norm": 6.702197315249609, "learning_rate": 4.777530370831547e-06, "loss": 0.8693, "step": 4464 }, { "epoch": 0.32257481893546697, "grad_norm": 6.025637213856125, "learning_rate": 4.777409733249475e-06, "loss": 0.8574, "step": 4465 }, { "epoch": 0.3226470641356765, "grad_norm": 6.2627266056689335, "learning_rate": 4.777289064491431e-06, "loss": 0.9002, "step": 4466 }, { "epoch": 0.322719309335886, "grad_norm": 7.539317768103906, "learning_rate": 4.777168364559069e-06, "loss": 0.9931, "step": 4467 }, { "epoch": 0.3227915545360955, "grad_norm": 7.327980804651432, "learning_rate": 4.777047633454038e-06, "loss": 0.9416, "step": 4468 }, { "epoch": 0.322863799736305, "grad_norm": 8.72352780926185, "learning_rate": 4.776926871177995e-06, "loss": 0.9503, "step": 4469 }, { "epoch": 0.3229360449365145, "grad_norm": 7.183165993330806, "learning_rate": 4.776806077732591e-06, "loss": 0.8904, "step": 4470 }, { "epoch": 0.32300829013672405, "grad_norm": 8.320208882864192, "learning_rate": 4.776685253119478e-06, "loss": 0.9694, "step": 4471 }, { "epoch": 0.32308053533693354, "grad_norm": 6.306199054493331, "learning_rate": 4.776564397340313e-06, "loss": 0.8731, "step": 4472 }, { "epoch": 0.3231527805371431, "grad_norm": 5.3968074120650185, "learning_rate": 4.776443510396749e-06, "loss": 0.8529, "step": 4473 }, { "epoch": 0.32322502573735257, "grad_norm": 6.966870520619299, "learning_rate": 4.776322592290441e-06, "loss": 0.8997, "step": 4474 }, { "epoch": 0.3232972709375621, "grad_norm": 7.795978135259823, "learning_rate": 4.776201643023044e-06, "loss": 0.9264, "step": 4475 }, { "epoch": 0.3233695161377716, "grad_norm": 8.381435825185278, "learning_rate": 4.776080662596213e-06, "loss": 0.8795, "step": 4476 }, { "epoch": 0.3234417613379811, "grad_norm": 6.909541799808492, "learning_rate": 4.775959651011606e-06, "loss": 0.856, "step": 4477 }, { "epoch": 0.3235140065381906, "grad_norm": 6.067451264225367, "learning_rate": 4.775838608270878e-06, "loss": 0.9753, "step": 4478 }, { "epoch": 0.3235862517384001, "grad_norm": 6.930652159903299, "learning_rate": 4.775717534375686e-06, "loss": 0.9508, "step": 4479 }, { "epoch": 0.32365849693860965, "grad_norm": 6.582688151103448, "learning_rate": 4.775596429327689e-06, "loss": 0.9438, "step": 4480 }, { "epoch": 0.32373074213881914, "grad_norm": 5.376350410621779, "learning_rate": 4.775475293128544e-06, "loss": 0.9571, "step": 4481 }, { "epoch": 0.3238029873390287, "grad_norm": 8.429596993258338, "learning_rate": 4.7753541257799085e-06, "loss": 0.9121, "step": 4482 }, { "epoch": 0.32387523253923817, "grad_norm": 6.692273584240139, "learning_rate": 4.775232927283442e-06, "loss": 0.8995, "step": 4483 }, { "epoch": 0.3239474777394477, "grad_norm": 6.448731633348924, "learning_rate": 4.775111697640803e-06, "loss": 0.9495, "step": 4484 }, { "epoch": 0.3240197229396572, "grad_norm": 8.307280084647752, "learning_rate": 4.774990436853651e-06, "loss": 0.9413, "step": 4485 }, { "epoch": 0.3240919681398667, "grad_norm": 7.292416372359135, "learning_rate": 4.774869144923646e-06, "loss": 0.8757, "step": 4486 }, { "epoch": 0.3241642133400762, "grad_norm": 8.375286495945089, "learning_rate": 4.7747478218524505e-06, "loss": 0.977, "step": 4487 }, { "epoch": 0.3242364585402857, "grad_norm": 6.412759590471605, "learning_rate": 4.774626467641722e-06, "loss": 0.9213, "step": 4488 }, { "epoch": 0.32430870374049525, "grad_norm": 6.512406466758755, "learning_rate": 4.774505082293124e-06, "loss": 0.904, "step": 4489 }, { "epoch": 0.32438094894070474, "grad_norm": 6.413459108246836, "learning_rate": 4.774383665808317e-06, "loss": 0.9579, "step": 4490 }, { "epoch": 0.3244531941409143, "grad_norm": 6.203187111031929, "learning_rate": 4.774262218188964e-06, "loss": 0.9124, "step": 4491 }, { "epoch": 0.32452543934112377, "grad_norm": 6.085360185029558, "learning_rate": 4.774140739436727e-06, "loss": 0.848, "step": 4492 }, { "epoch": 0.3245976845413333, "grad_norm": 6.965707431201072, "learning_rate": 4.774019229553268e-06, "loss": 0.9196, "step": 4493 }, { "epoch": 0.3246699297415428, "grad_norm": 7.074802182940126, "learning_rate": 4.773897688540253e-06, "loss": 0.902, "step": 4494 }, { "epoch": 0.3247421749417523, "grad_norm": 5.584103583350674, "learning_rate": 4.773776116399343e-06, "loss": 0.8599, "step": 4495 }, { "epoch": 0.3248144201419618, "grad_norm": 32.23463629923176, "learning_rate": 4.773654513132204e-06, "loss": 0.9569, "step": 4496 }, { "epoch": 0.3248866653421713, "grad_norm": 6.267827966643117, "learning_rate": 4.773532878740501e-06, "loss": 0.9222, "step": 4497 }, { "epoch": 0.32495891054238085, "grad_norm": 6.070513276421441, "learning_rate": 4.773411213225897e-06, "loss": 0.972, "step": 4498 }, { "epoch": 0.32503115574259034, "grad_norm": 8.01980142932606, "learning_rate": 4.773289516590059e-06, "loss": 0.9311, "step": 4499 }, { "epoch": 0.3251034009427999, "grad_norm": 6.577840549725071, "learning_rate": 4.773167788834653e-06, "loss": 0.9293, "step": 4500 }, { "epoch": 0.32517564614300937, "grad_norm": 7.457140978190202, "learning_rate": 4.773046029961343e-06, "loss": 0.9481, "step": 4501 }, { "epoch": 0.3252478913432189, "grad_norm": 6.55479312852539, "learning_rate": 4.7729242399718e-06, "loss": 0.9891, "step": 4502 }, { "epoch": 0.3253201365434284, "grad_norm": 6.125988316110397, "learning_rate": 4.772802418867688e-06, "loss": 0.8283, "step": 4503 }, { "epoch": 0.3253923817436379, "grad_norm": 7.981916015366686, "learning_rate": 4.7726805666506755e-06, "loss": 0.9402, "step": 4504 }, { "epoch": 0.3254646269438474, "grad_norm": 8.516978193086084, "learning_rate": 4.7725586833224316e-06, "loss": 0.9394, "step": 4505 }, { "epoch": 0.3255368721440569, "grad_norm": 9.60040032823644, "learning_rate": 4.772436768884623e-06, "loss": 0.8856, "step": 4506 }, { "epoch": 0.32560911734426645, "grad_norm": 6.247839897712742, "learning_rate": 4.77231482333892e-06, "loss": 0.9325, "step": 4507 }, { "epoch": 0.32568136254447594, "grad_norm": 6.45682388221919, "learning_rate": 4.772192846686991e-06, "loss": 0.9791, "step": 4508 }, { "epoch": 0.3257536077446855, "grad_norm": 8.183157897850458, "learning_rate": 4.772070838930506e-06, "loss": 0.9308, "step": 4509 }, { "epoch": 0.32582585294489497, "grad_norm": 8.478696717931829, "learning_rate": 4.7719488000711355e-06, "loss": 0.9333, "step": 4510 }, { "epoch": 0.3258980981451045, "grad_norm": 8.832953342925615, "learning_rate": 4.7718267301105505e-06, "loss": 0.8417, "step": 4511 }, { "epoch": 0.325970343345314, "grad_norm": 6.55535761575667, "learning_rate": 4.771704629050421e-06, "loss": 1.007, "step": 4512 }, { "epoch": 0.3260425885455235, "grad_norm": 8.182628317904024, "learning_rate": 4.77158249689242e-06, "loss": 0.8943, "step": 4513 }, { "epoch": 0.326114833745733, "grad_norm": 7.148988729600644, "learning_rate": 4.771460333638217e-06, "loss": 0.9332, "step": 4514 }, { "epoch": 0.3261870789459425, "grad_norm": 6.875526269363887, "learning_rate": 4.771338139289486e-06, "loss": 0.9713, "step": 4515 }, { "epoch": 0.32625932414615205, "grad_norm": 6.710204185414996, "learning_rate": 4.7712159138479e-06, "loss": 0.942, "step": 4516 }, { "epoch": 0.32633156934636154, "grad_norm": 8.691327685740422, "learning_rate": 4.771093657315131e-06, "loss": 0.8681, "step": 4517 }, { "epoch": 0.3264038145465711, "grad_norm": 7.96503460605381, "learning_rate": 4.770971369692854e-06, "loss": 0.8776, "step": 4518 }, { "epoch": 0.32647605974678057, "grad_norm": 6.7203662436455405, "learning_rate": 4.770849050982741e-06, "loss": 0.9634, "step": 4519 }, { "epoch": 0.3265483049469901, "grad_norm": 5.691384466649287, "learning_rate": 4.7707267011864685e-06, "loss": 0.8189, "step": 4520 }, { "epoch": 0.3266205501471996, "grad_norm": 6.220540311350511, "learning_rate": 4.770604320305711e-06, "loss": 1.0182, "step": 4521 }, { "epoch": 0.3266927953474091, "grad_norm": 6.9153994889342965, "learning_rate": 4.770481908342142e-06, "loss": 0.8471, "step": 4522 }, { "epoch": 0.3267650405476186, "grad_norm": 6.916163721144532, "learning_rate": 4.770359465297441e-06, "loss": 0.9513, "step": 4523 }, { "epoch": 0.3268372857478281, "grad_norm": 8.631936642503542, "learning_rate": 4.77023699117328e-06, "loss": 0.8723, "step": 4524 }, { "epoch": 0.32690953094803765, "grad_norm": 7.328905011209388, "learning_rate": 4.770114485971338e-06, "loss": 0.9335, "step": 4525 }, { "epoch": 0.32698177614824714, "grad_norm": 6.283032216648998, "learning_rate": 4.76999194969329e-06, "loss": 0.8017, "step": 4526 }, { "epoch": 0.3270540213484567, "grad_norm": 8.702844913160266, "learning_rate": 4.769869382340816e-06, "loss": 0.9257, "step": 4527 }, { "epoch": 0.32712626654866617, "grad_norm": 7.126904149513143, "learning_rate": 4.769746783915592e-06, "loss": 0.8975, "step": 4528 }, { "epoch": 0.3271985117488757, "grad_norm": 6.943244274274534, "learning_rate": 4.769624154419298e-06, "loss": 0.9148, "step": 4529 }, { "epoch": 0.3272707569490852, "grad_norm": 5.95917417911011, "learning_rate": 4.76950149385361e-06, "loss": 0.8842, "step": 4530 }, { "epoch": 0.3273430021492947, "grad_norm": 7.108893141293934, "learning_rate": 4.7693788022202095e-06, "loss": 0.8635, "step": 4531 }, { "epoch": 0.3274152473495042, "grad_norm": 5.625714066004546, "learning_rate": 4.769256079520775e-06, "loss": 0.8527, "step": 4532 }, { "epoch": 0.3274874925497137, "grad_norm": 5.528390608682419, "learning_rate": 4.769133325756987e-06, "loss": 0.8587, "step": 4533 }, { "epoch": 0.32755973774992325, "grad_norm": 5.675984892896961, "learning_rate": 4.769010540930525e-06, "loss": 0.886, "step": 4534 }, { "epoch": 0.32763198295013274, "grad_norm": 8.236545573442694, "learning_rate": 4.768887725043071e-06, "loss": 0.8884, "step": 4535 }, { "epoch": 0.3277042281503423, "grad_norm": 7.376388677687504, "learning_rate": 4.768764878096306e-06, "loss": 0.9797, "step": 4536 }, { "epoch": 0.32777647335055177, "grad_norm": 6.049255060409395, "learning_rate": 4.768642000091911e-06, "loss": 0.8931, "step": 4537 }, { "epoch": 0.3278487185507613, "grad_norm": 6.294098272603118, "learning_rate": 4.768519091031569e-06, "loss": 0.9396, "step": 4538 }, { "epoch": 0.3279209637509708, "grad_norm": 9.876957180330885, "learning_rate": 4.768396150916961e-06, "loss": 0.9466, "step": 4539 }, { "epoch": 0.3279932089511803, "grad_norm": 7.470656147728116, "learning_rate": 4.7682731797497715e-06, "loss": 0.9287, "step": 4540 }, { "epoch": 0.3280654541513898, "grad_norm": 7.159669550510677, "learning_rate": 4.768150177531684e-06, "loss": 0.9516, "step": 4541 }, { "epoch": 0.3281376993515993, "grad_norm": 6.503581600850105, "learning_rate": 4.768027144264381e-06, "loss": 0.8792, "step": 4542 }, { "epoch": 0.32820994455180885, "grad_norm": 5.988933848674472, "learning_rate": 4.767904079949548e-06, "loss": 0.9939, "step": 4543 }, { "epoch": 0.32828218975201834, "grad_norm": 5.321456539705975, "learning_rate": 4.767780984588868e-06, "loss": 0.9366, "step": 4544 }, { "epoch": 0.3283544349522279, "grad_norm": 7.869188374374241, "learning_rate": 4.767657858184028e-06, "loss": 0.895, "step": 4545 }, { "epoch": 0.32842668015243737, "grad_norm": 6.980947451409152, "learning_rate": 4.767534700736713e-06, "loss": 0.9036, "step": 4546 }, { "epoch": 0.3284989253526469, "grad_norm": 5.636990461984645, "learning_rate": 4.767411512248607e-06, "loss": 0.8743, "step": 4547 }, { "epoch": 0.3285711705528564, "grad_norm": 6.704229232783803, "learning_rate": 4.767288292721399e-06, "loss": 0.9043, "step": 4548 }, { "epoch": 0.3286434157530659, "grad_norm": 6.13892572007367, "learning_rate": 4.767165042156775e-06, "loss": 0.842, "step": 4549 }, { "epoch": 0.3287156609532754, "grad_norm": 6.293138176160448, "learning_rate": 4.767041760556421e-06, "loss": 0.9651, "step": 4550 }, { "epoch": 0.3287879061534849, "grad_norm": 6.5989885537755955, "learning_rate": 4.7669184479220264e-06, "loss": 0.9518, "step": 4551 }, { "epoch": 0.32886015135369445, "grad_norm": 6.0093999980182415, "learning_rate": 4.766795104255279e-06, "loss": 0.9077, "step": 4552 }, { "epoch": 0.32893239655390394, "grad_norm": 10.430072631583045, "learning_rate": 4.766671729557865e-06, "loss": 0.9961, "step": 4553 }, { "epoch": 0.3290046417541135, "grad_norm": 6.6506060101869195, "learning_rate": 4.7665483238314756e-06, "loss": 0.9559, "step": 4554 }, { "epoch": 0.32907688695432297, "grad_norm": 5.379474108494246, "learning_rate": 4.7664248870778e-06, "loss": 0.848, "step": 4555 }, { "epoch": 0.3291491321545325, "grad_norm": 6.887043339982468, "learning_rate": 4.766301419298527e-06, "loss": 0.9566, "step": 4556 }, { "epoch": 0.329221377354742, "grad_norm": 7.310631146405149, "learning_rate": 4.766177920495347e-06, "loss": 0.8964, "step": 4557 }, { "epoch": 0.3292936225549515, "grad_norm": 7.597839510161291, "learning_rate": 4.766054390669952e-06, "loss": 0.8927, "step": 4558 }, { "epoch": 0.329365867755161, "grad_norm": 7.663353190131538, "learning_rate": 4.76593082982403e-06, "loss": 0.9656, "step": 4559 }, { "epoch": 0.3294381129553705, "grad_norm": 6.721099868411576, "learning_rate": 4.765807237959276e-06, "loss": 0.9115, "step": 4560 }, { "epoch": 0.32951035815558005, "grad_norm": 5.765048362615097, "learning_rate": 4.765683615077379e-06, "loss": 0.9201, "step": 4561 }, { "epoch": 0.32958260335578954, "grad_norm": 6.1681719137099345, "learning_rate": 4.765559961180033e-06, "loss": 0.9141, "step": 4562 }, { "epoch": 0.3296548485559991, "grad_norm": 7.636233200676704, "learning_rate": 4.765436276268931e-06, "loss": 1.0125, "step": 4563 }, { "epoch": 0.32972709375620857, "grad_norm": 7.7780698842494065, "learning_rate": 4.7653125603457645e-06, "loss": 0.9935, "step": 4564 }, { "epoch": 0.3297993389564181, "grad_norm": 6.962303293075891, "learning_rate": 4.765188813412228e-06, "loss": 0.9501, "step": 4565 }, { "epoch": 0.3298715841566276, "grad_norm": 6.671179706159548, "learning_rate": 4.765065035470016e-06, "loss": 0.9106, "step": 4566 }, { "epoch": 0.3299438293568371, "grad_norm": 8.511046916607805, "learning_rate": 4.764941226520822e-06, "loss": 1.0572, "step": 4567 }, { "epoch": 0.3300160745570466, "grad_norm": 6.662059558960627, "learning_rate": 4.7648173865663405e-06, "loss": 0.9862, "step": 4568 }, { "epoch": 0.3300883197572561, "grad_norm": 6.3439252857364155, "learning_rate": 4.7646935156082685e-06, "loss": 0.8924, "step": 4569 }, { "epoch": 0.33016056495746565, "grad_norm": 7.619011700636349, "learning_rate": 4.7645696136483e-06, "loss": 0.8756, "step": 4570 }, { "epoch": 0.33023281015767514, "grad_norm": 9.099840569409547, "learning_rate": 4.764445680688132e-06, "loss": 0.9397, "step": 4571 }, { "epoch": 0.3303050553578847, "grad_norm": 7.4647775723943885, "learning_rate": 4.764321716729462e-06, "loss": 0.9566, "step": 4572 }, { "epoch": 0.33037730055809417, "grad_norm": 6.724941505830064, "learning_rate": 4.7641977217739846e-06, "loss": 0.9446, "step": 4573 }, { "epoch": 0.3304495457583037, "grad_norm": 5.682255265503632, "learning_rate": 4.764073695823399e-06, "loss": 0.9142, "step": 4574 }, { "epoch": 0.3305217909585132, "grad_norm": 5.786242169984508, "learning_rate": 4.7639496388794016e-06, "loss": 0.8468, "step": 4575 }, { "epoch": 0.3305940361587227, "grad_norm": 6.94421886162472, "learning_rate": 4.763825550943692e-06, "loss": 0.9181, "step": 4576 }, { "epoch": 0.3306662813589322, "grad_norm": 7.0148313621431155, "learning_rate": 4.763701432017969e-06, "loss": 0.924, "step": 4577 }, { "epoch": 0.3307385265591417, "grad_norm": 8.175697264260458, "learning_rate": 4.76357728210393e-06, "loss": 0.8624, "step": 4578 }, { "epoch": 0.33081077175935125, "grad_norm": 12.823833235524356, "learning_rate": 4.763453101203276e-06, "loss": 0.8512, "step": 4579 }, { "epoch": 0.33088301695956074, "grad_norm": 7.232174499550143, "learning_rate": 4.763328889317707e-06, "loss": 0.8609, "step": 4580 }, { "epoch": 0.3309552621597703, "grad_norm": 6.4668290239120845, "learning_rate": 4.763204646448922e-06, "loss": 0.8468, "step": 4581 }, { "epoch": 0.33102750735997977, "grad_norm": 6.92764682246621, "learning_rate": 4.763080372598623e-06, "loss": 0.8925, "step": 4582 }, { "epoch": 0.3310997525601893, "grad_norm": 8.159491468553087, "learning_rate": 4.76295606776851e-06, "loss": 0.9046, "step": 4583 }, { "epoch": 0.3311719977603988, "grad_norm": 6.051961331825747, "learning_rate": 4.762831731960287e-06, "loss": 0.8321, "step": 4584 }, { "epoch": 0.3312442429606083, "grad_norm": 7.095002588939665, "learning_rate": 4.762707365175654e-06, "loss": 0.9224, "step": 4585 }, { "epoch": 0.3313164881608178, "grad_norm": 6.820716033218987, "learning_rate": 4.762582967416314e-06, "loss": 0.8255, "step": 4586 }, { "epoch": 0.3313887333610273, "grad_norm": 6.335385676012026, "learning_rate": 4.76245853868397e-06, "loss": 0.9201, "step": 4587 }, { "epoch": 0.33146097856123685, "grad_norm": 6.450798742236638, "learning_rate": 4.762334078980325e-06, "loss": 0.9333, "step": 4588 }, { "epoch": 0.33153322376144634, "grad_norm": 6.803681151367446, "learning_rate": 4.762209588307084e-06, "loss": 0.828, "step": 4589 }, { "epoch": 0.3316054689616559, "grad_norm": 6.073122451811528, "learning_rate": 4.762085066665949e-06, "loss": 0.9103, "step": 4590 }, { "epoch": 0.33167771416186537, "grad_norm": 6.348368950374429, "learning_rate": 4.7619605140586266e-06, "loss": 0.8629, "step": 4591 }, { "epoch": 0.3317499593620749, "grad_norm": 5.905782691189766, "learning_rate": 4.76183593048682e-06, "loss": 0.8705, "step": 4592 }, { "epoch": 0.3318222045622844, "grad_norm": 7.975331898064014, "learning_rate": 4.761711315952236e-06, "loss": 0.9896, "step": 4593 }, { "epoch": 0.3318944497624939, "grad_norm": 6.904683345354034, "learning_rate": 4.761586670456582e-06, "loss": 0.8771, "step": 4594 }, { "epoch": 0.3319666949627034, "grad_norm": 6.400778913782681, "learning_rate": 4.761461994001561e-06, "loss": 0.9009, "step": 4595 }, { "epoch": 0.3320389401629129, "grad_norm": 9.017416737983492, "learning_rate": 4.7613372865888814e-06, "loss": 1.0099, "step": 4596 }, { "epoch": 0.33211118536312245, "grad_norm": 9.53451412153191, "learning_rate": 4.76121254822025e-06, "loss": 0.911, "step": 4597 }, { "epoch": 0.33218343056333194, "grad_norm": 7.146466760022019, "learning_rate": 4.761087778897375e-06, "loss": 0.8891, "step": 4598 }, { "epoch": 0.3322556757635415, "grad_norm": 6.923483771408653, "learning_rate": 4.7609629786219636e-06, "loss": 1.0629, "step": 4599 }, { "epoch": 0.33232792096375097, "grad_norm": 9.095520057007288, "learning_rate": 4.760838147395724e-06, "loss": 0.9414, "step": 4600 }, { "epoch": 0.3324001661639605, "grad_norm": 7.7375010944914155, "learning_rate": 4.7607132852203664e-06, "loss": 0.8828, "step": 4601 }, { "epoch": 0.33247241136417, "grad_norm": 7.436322030779664, "learning_rate": 4.760588392097599e-06, "loss": 0.9731, "step": 4602 }, { "epoch": 0.3325446565643795, "grad_norm": 8.40670468827235, "learning_rate": 4.760463468029132e-06, "loss": 0.9671, "step": 4603 }, { "epoch": 0.332616901764589, "grad_norm": 6.147884358556335, "learning_rate": 4.760338513016675e-06, "loss": 0.8898, "step": 4604 }, { "epoch": 0.3326891469647985, "grad_norm": 6.673310529661644, "learning_rate": 4.760213527061939e-06, "loss": 0.9019, "step": 4605 }, { "epoch": 0.33276139216500805, "grad_norm": 8.07564307330155, "learning_rate": 4.760088510166635e-06, "loss": 0.874, "step": 4606 }, { "epoch": 0.33283363736521754, "grad_norm": 7.948656546842253, "learning_rate": 4.759963462332473e-06, "loss": 0.8461, "step": 4607 }, { "epoch": 0.3329058825654271, "grad_norm": 7.5483154438615685, "learning_rate": 4.7598383835611675e-06, "loss": 0.8723, "step": 4608 }, { "epoch": 0.33297812776563657, "grad_norm": 7.269199599500816, "learning_rate": 4.759713273854428e-06, "loss": 0.8442, "step": 4609 }, { "epoch": 0.3330503729658461, "grad_norm": 6.997603687473545, "learning_rate": 4.759588133213968e-06, "loss": 0.8175, "step": 4610 }, { "epoch": 0.3331226181660556, "grad_norm": 6.909865869746723, "learning_rate": 4.759462961641503e-06, "loss": 0.8517, "step": 4611 }, { "epoch": 0.3331948633662651, "grad_norm": 7.552928573838356, "learning_rate": 4.759337759138742e-06, "loss": 0.9204, "step": 4612 }, { "epoch": 0.3332671085664746, "grad_norm": 9.006056125521047, "learning_rate": 4.759212525707403e-06, "loss": 0.8718, "step": 4613 }, { "epoch": 0.3333393537666841, "grad_norm": 8.78090376527716, "learning_rate": 4.759087261349198e-06, "loss": 0.904, "step": 4614 }, { "epoch": 0.33341159896689365, "grad_norm": 7.293927465272214, "learning_rate": 4.758961966065842e-06, "loss": 0.9572, "step": 4615 }, { "epoch": 0.33348384416710314, "grad_norm": 7.014592627879927, "learning_rate": 4.758836639859051e-06, "loss": 1.0019, "step": 4616 }, { "epoch": 0.3335560893673127, "grad_norm": 6.893316111048146, "learning_rate": 4.75871128273054e-06, "loss": 0.8535, "step": 4617 }, { "epoch": 0.33362833456752217, "grad_norm": 8.852052491540494, "learning_rate": 4.758585894682026e-06, "loss": 0.935, "step": 4618 }, { "epoch": 0.3337005797677317, "grad_norm": 9.88984896491489, "learning_rate": 4.758460475715225e-06, "loss": 0.8213, "step": 4619 }, { "epoch": 0.3337728249679412, "grad_norm": 8.541322679493755, "learning_rate": 4.7583350258318526e-06, "loss": 0.8586, "step": 4620 }, { "epoch": 0.3338450701681507, "grad_norm": 6.28264818714276, "learning_rate": 4.758209545033629e-06, "loss": 0.9701, "step": 4621 }, { "epoch": 0.3339173153683602, "grad_norm": 6.57091251858555, "learning_rate": 4.758084033322268e-06, "loss": 0.9482, "step": 4622 }, { "epoch": 0.3339895605685697, "grad_norm": 7.047404402578056, "learning_rate": 4.757958490699492e-06, "loss": 0.8956, "step": 4623 }, { "epoch": 0.33406180576877925, "grad_norm": 8.766227279888211, "learning_rate": 4.757832917167015e-06, "loss": 0.8761, "step": 4624 }, { "epoch": 0.33413405096898874, "grad_norm": 8.699786163859144, "learning_rate": 4.75770731272656e-06, "loss": 0.9168, "step": 4625 }, { "epoch": 0.3342062961691983, "grad_norm": 5.887906770361994, "learning_rate": 4.757581677379845e-06, "loss": 0.903, "step": 4626 }, { "epoch": 0.33427854136940777, "grad_norm": 6.325491651643354, "learning_rate": 4.7574560111285885e-06, "loss": 0.8813, "step": 4627 }, { "epoch": 0.3343507865696173, "grad_norm": 6.506562368203225, "learning_rate": 4.7573303139745134e-06, "loss": 1.0088, "step": 4628 }, { "epoch": 0.3344230317698268, "grad_norm": 9.460490738316148, "learning_rate": 4.757204585919338e-06, "loss": 0.8936, "step": 4629 }, { "epoch": 0.3344952769700363, "grad_norm": 7.448751449930505, "learning_rate": 4.757078826964785e-06, "loss": 0.9217, "step": 4630 }, { "epoch": 0.3345675221702458, "grad_norm": 6.258743883560023, "learning_rate": 4.756953037112575e-06, "loss": 0.9815, "step": 4631 }, { "epoch": 0.3346397673704553, "grad_norm": 8.193029967677985, "learning_rate": 4.75682721636443e-06, "loss": 0.9477, "step": 4632 }, { "epoch": 0.33471201257066485, "grad_norm": 6.395993242118276, "learning_rate": 4.7567013647220736e-06, "loss": 0.862, "step": 4633 }, { "epoch": 0.33478425777087434, "grad_norm": 6.437868570683813, "learning_rate": 4.756575482187228e-06, "loss": 0.9263, "step": 4634 }, { "epoch": 0.3348565029710839, "grad_norm": 6.908680759922932, "learning_rate": 4.756449568761615e-06, "loss": 0.9042, "step": 4635 }, { "epoch": 0.33492874817129337, "grad_norm": 8.868417367215025, "learning_rate": 4.7563236244469605e-06, "loss": 0.8425, "step": 4636 }, { "epoch": 0.33500099337150285, "grad_norm": 7.622948542553719, "learning_rate": 4.756197649244987e-06, "loss": 0.9332, "step": 4637 }, { "epoch": 0.3350732385717124, "grad_norm": 6.088726144241528, "learning_rate": 4.756071643157419e-06, "loss": 0.9039, "step": 4638 }, { "epoch": 0.3351454837719219, "grad_norm": 8.073689111650388, "learning_rate": 4.755945606185982e-06, "loss": 0.9388, "step": 4639 }, { "epoch": 0.3352177289721314, "grad_norm": 6.39175755694379, "learning_rate": 4.755819538332403e-06, "loss": 0.8881, "step": 4640 }, { "epoch": 0.3352899741723409, "grad_norm": 6.350774474485868, "learning_rate": 4.755693439598405e-06, "loss": 0.8921, "step": 4641 }, { "epoch": 0.33536221937255045, "grad_norm": 8.917256938850453, "learning_rate": 4.755567309985714e-06, "loss": 0.8154, "step": 4642 }, { "epoch": 0.33543446457275994, "grad_norm": 6.56027212927606, "learning_rate": 4.755441149496059e-06, "loss": 0.9723, "step": 4643 }, { "epoch": 0.3355067097729695, "grad_norm": 6.886204138281266, "learning_rate": 4.755314958131166e-06, "loss": 0.8996, "step": 4644 }, { "epoch": 0.33557895497317897, "grad_norm": 6.041045619570703, "learning_rate": 4.755188735892762e-06, "loss": 0.8751, "step": 4645 }, { "epoch": 0.33565120017338845, "grad_norm": 7.844403121930659, "learning_rate": 4.755062482782575e-06, "loss": 0.9456, "step": 4646 }, { "epoch": 0.335723445373598, "grad_norm": 7.813785538764216, "learning_rate": 4.754936198802334e-06, "loss": 0.8417, "step": 4647 }, { "epoch": 0.3357956905738075, "grad_norm": 7.471886905802675, "learning_rate": 4.754809883953767e-06, "loss": 0.8902, "step": 4648 }, { "epoch": 0.335867935774017, "grad_norm": 7.246150606245942, "learning_rate": 4.754683538238604e-06, "loss": 0.8454, "step": 4649 }, { "epoch": 0.3359401809742265, "grad_norm": 6.766201496891236, "learning_rate": 4.754557161658574e-06, "loss": 0.9435, "step": 4650 }, { "epoch": 0.33601242617443605, "grad_norm": 8.549249573730187, "learning_rate": 4.7544307542154065e-06, "loss": 0.9082, "step": 4651 }, { "epoch": 0.33608467137464554, "grad_norm": 6.314611100811529, "learning_rate": 4.754304315910832e-06, "loss": 0.9901, "step": 4652 }, { "epoch": 0.3361569165748551, "grad_norm": 6.312663463326768, "learning_rate": 4.754177846746583e-06, "loss": 0.8354, "step": 4653 }, { "epoch": 0.33622916177506457, "grad_norm": 5.468282450716626, "learning_rate": 4.754051346724389e-06, "loss": 0.851, "step": 4654 }, { "epoch": 0.33630140697527405, "grad_norm": 6.618234887038955, "learning_rate": 4.753924815845982e-06, "loss": 0.9072, "step": 4655 }, { "epoch": 0.3363736521754836, "grad_norm": 6.746021935235876, "learning_rate": 4.753798254113094e-06, "loss": 0.9499, "step": 4656 }, { "epoch": 0.3364458973756931, "grad_norm": 5.905960318106848, "learning_rate": 4.753671661527459e-06, "loss": 0.9066, "step": 4657 }, { "epoch": 0.3365181425759026, "grad_norm": 6.733565492058412, "learning_rate": 4.7535450380908074e-06, "loss": 0.88, "step": 4658 }, { "epoch": 0.3365903877761121, "grad_norm": 6.168734367064792, "learning_rate": 4.753418383804876e-06, "loss": 0.9302, "step": 4659 }, { "epoch": 0.33666263297632165, "grad_norm": 5.705202492439364, "learning_rate": 4.753291698671395e-06, "loss": 0.8838, "step": 4660 }, { "epoch": 0.33673487817653114, "grad_norm": 5.992058584723924, "learning_rate": 4.7531649826921e-06, "loss": 0.9271, "step": 4661 }, { "epoch": 0.3368071233767407, "grad_norm": 6.91635042256569, "learning_rate": 4.753038235868726e-06, "loss": 0.9169, "step": 4662 }, { "epoch": 0.33687936857695017, "grad_norm": 5.888133202455875, "learning_rate": 4.752911458203009e-06, "loss": 0.9322, "step": 4663 }, { "epoch": 0.33695161377715965, "grad_norm": 6.13507155099197, "learning_rate": 4.752784649696682e-06, "loss": 0.8698, "step": 4664 }, { "epoch": 0.3370238589773692, "grad_norm": 6.405683092294392, "learning_rate": 4.752657810351483e-06, "loss": 0.9707, "step": 4665 }, { "epoch": 0.3370961041775787, "grad_norm": 7.611332635679957, "learning_rate": 4.752530940169148e-06, "loss": 0.9492, "step": 4666 }, { "epoch": 0.3371683493777882, "grad_norm": 5.908880061942853, "learning_rate": 4.7524040391514134e-06, "loss": 0.902, "step": 4667 }, { "epoch": 0.3372405945779977, "grad_norm": 6.9288763111210505, "learning_rate": 4.752277107300016e-06, "loss": 0.9526, "step": 4668 }, { "epoch": 0.33731283977820725, "grad_norm": 8.253871125102735, "learning_rate": 4.752150144616694e-06, "loss": 0.9096, "step": 4669 }, { "epoch": 0.33738508497841674, "grad_norm": 5.593747783639138, "learning_rate": 4.752023151103184e-06, "loss": 0.8567, "step": 4670 }, { "epoch": 0.3374573301786263, "grad_norm": 6.668302271792145, "learning_rate": 4.7518961267612276e-06, "loss": 0.9121, "step": 4671 }, { "epoch": 0.33752957537883577, "grad_norm": 6.165203573883175, "learning_rate": 4.751769071592561e-06, "loss": 0.987, "step": 4672 }, { "epoch": 0.33760182057904525, "grad_norm": 7.398327932382314, "learning_rate": 4.751641985598924e-06, "loss": 0.9055, "step": 4673 }, { "epoch": 0.3376740657792548, "grad_norm": 6.862778221285516, "learning_rate": 4.7515148687820565e-06, "loss": 0.8558, "step": 4674 }, { "epoch": 0.3377463109794643, "grad_norm": 6.729327441047722, "learning_rate": 4.751387721143698e-06, "loss": 0.7887, "step": 4675 }, { "epoch": 0.3378185561796738, "grad_norm": 6.665789864419787, "learning_rate": 4.751260542685591e-06, "loss": 0.8645, "step": 4676 }, { "epoch": 0.3378908013798833, "grad_norm": 6.490522664658773, "learning_rate": 4.751133333409474e-06, "loss": 1.0351, "step": 4677 }, { "epoch": 0.33796304658009285, "grad_norm": 6.647204059441934, "learning_rate": 4.75100609331709e-06, "loss": 0.8491, "step": 4678 }, { "epoch": 0.33803529178030234, "grad_norm": 8.049667673774062, "learning_rate": 4.750878822410181e-06, "loss": 0.9304, "step": 4679 }, { "epoch": 0.3381075369805119, "grad_norm": 6.009663746800595, "learning_rate": 4.7507515206904875e-06, "loss": 0.8, "step": 4680 }, { "epoch": 0.33817978218072137, "grad_norm": 7.036728822453794, "learning_rate": 4.750624188159754e-06, "loss": 0.9536, "step": 4681 }, { "epoch": 0.33825202738093085, "grad_norm": 6.04311741040075, "learning_rate": 4.750496824819723e-06, "loss": 0.8914, "step": 4682 }, { "epoch": 0.3383242725811404, "grad_norm": 6.572899126693448, "learning_rate": 4.750369430672137e-06, "loss": 0.8721, "step": 4683 }, { "epoch": 0.3383965177813499, "grad_norm": 7.142173014984764, "learning_rate": 4.750242005718742e-06, "loss": 0.8689, "step": 4684 }, { "epoch": 0.3384687629815594, "grad_norm": 5.724448444833667, "learning_rate": 4.75011454996128e-06, "loss": 0.9143, "step": 4685 }, { "epoch": 0.3385410081817689, "grad_norm": 6.198560904564676, "learning_rate": 4.7499870634014975e-06, "loss": 0.8788, "step": 4686 }, { "epoch": 0.33861325338197845, "grad_norm": 5.457298102984559, "learning_rate": 4.749859546041139e-06, "loss": 0.7786, "step": 4687 }, { "epoch": 0.33868549858218794, "grad_norm": 6.446283958512788, "learning_rate": 4.74973199788195e-06, "loss": 0.8519, "step": 4688 }, { "epoch": 0.3387577437823975, "grad_norm": 5.892891445307782, "learning_rate": 4.749604418925677e-06, "loss": 0.8952, "step": 4689 }, { "epoch": 0.33882998898260697, "grad_norm": 6.655723013634846, "learning_rate": 4.749476809174066e-06, "loss": 0.8691, "step": 4690 }, { "epoch": 0.33890223418281645, "grad_norm": 8.259396836839748, "learning_rate": 4.749349168628864e-06, "loss": 0.9287, "step": 4691 }, { "epoch": 0.338974479383026, "grad_norm": 6.07880811603494, "learning_rate": 4.7492214972918195e-06, "loss": 0.9129, "step": 4692 }, { "epoch": 0.3390467245832355, "grad_norm": 7.2553885593379945, "learning_rate": 4.749093795164677e-06, "loss": 0.9494, "step": 4693 }, { "epoch": 0.339118969783445, "grad_norm": 9.288093689622594, "learning_rate": 4.748966062249189e-06, "loss": 0.9486, "step": 4694 }, { "epoch": 0.3391912149836545, "grad_norm": 7.706859990811352, "learning_rate": 4.748838298547101e-06, "loss": 0.9275, "step": 4695 }, { "epoch": 0.33926346018386405, "grad_norm": 6.12590642956128, "learning_rate": 4.748710504060162e-06, "loss": 0.8555, "step": 4696 }, { "epoch": 0.33933570538407354, "grad_norm": 7.682569706082618, "learning_rate": 4.748582678790123e-06, "loss": 0.953, "step": 4697 }, { "epoch": 0.3394079505842831, "grad_norm": 6.197705724209281, "learning_rate": 4.748454822738733e-06, "loss": 0.9044, "step": 4698 }, { "epoch": 0.33948019578449257, "grad_norm": 7.293869412449017, "learning_rate": 4.748326935907742e-06, "loss": 0.8726, "step": 4699 }, { "epoch": 0.33955244098470205, "grad_norm": 5.961991723107009, "learning_rate": 4.748199018298901e-06, "loss": 0.8354, "step": 4700 }, { "epoch": 0.3396246861849116, "grad_norm": 7.015330285082026, "learning_rate": 4.748071069913962e-06, "loss": 0.8635, "step": 4701 }, { "epoch": 0.3396969313851211, "grad_norm": 5.918049001621709, "learning_rate": 4.747943090754675e-06, "loss": 0.8834, "step": 4702 }, { "epoch": 0.3397691765853306, "grad_norm": 5.810893503316402, "learning_rate": 4.747815080822792e-06, "loss": 0.8249, "step": 4703 }, { "epoch": 0.3398414217855401, "grad_norm": 8.857823473186226, "learning_rate": 4.747687040120066e-06, "loss": 0.9339, "step": 4704 }, { "epoch": 0.33991366698574965, "grad_norm": 6.383215005573057, "learning_rate": 4.74755896864825e-06, "loss": 0.864, "step": 4705 }, { "epoch": 0.33998591218595914, "grad_norm": 8.806623948268198, "learning_rate": 4.747430866409096e-06, "loss": 0.9873, "step": 4706 }, { "epoch": 0.3400581573861687, "grad_norm": 8.304719653924113, "learning_rate": 4.747302733404359e-06, "loss": 0.9586, "step": 4707 }, { "epoch": 0.34013040258637817, "grad_norm": 6.400421915452198, "learning_rate": 4.747174569635793e-06, "loss": 0.913, "step": 4708 }, { "epoch": 0.34020264778658765, "grad_norm": 6.764633141246059, "learning_rate": 4.747046375105151e-06, "loss": 0.9683, "step": 4709 }, { "epoch": 0.3402748929867972, "grad_norm": 5.634527318400954, "learning_rate": 4.74691814981419e-06, "loss": 0.8647, "step": 4710 }, { "epoch": 0.3403471381870067, "grad_norm": 5.70614786380475, "learning_rate": 4.746789893764664e-06, "loss": 0.8682, "step": 4711 }, { "epoch": 0.3404193833872162, "grad_norm": 5.736774659692827, "learning_rate": 4.746661606958328e-06, "loss": 0.91, "step": 4712 }, { "epoch": 0.3404916285874257, "grad_norm": 6.326015996941099, "learning_rate": 4.746533289396939e-06, "loss": 0.8407, "step": 4713 }, { "epoch": 0.34056387378763525, "grad_norm": 8.022983913359383, "learning_rate": 4.7464049410822545e-06, "loss": 0.8569, "step": 4714 }, { "epoch": 0.34063611898784474, "grad_norm": 9.013791854902998, "learning_rate": 4.746276562016029e-06, "loss": 0.8877, "step": 4715 }, { "epoch": 0.3407083641880543, "grad_norm": 5.875087980869647, "learning_rate": 4.746148152200023e-06, "loss": 0.7604, "step": 4716 }, { "epoch": 0.34078060938826377, "grad_norm": 6.2975513483114325, "learning_rate": 4.746019711635992e-06, "loss": 0.855, "step": 4717 }, { "epoch": 0.34085285458847325, "grad_norm": 6.266088302111875, "learning_rate": 4.745891240325696e-06, "loss": 0.8928, "step": 4718 }, { "epoch": 0.3409250997886828, "grad_norm": 6.746715452950137, "learning_rate": 4.745762738270892e-06, "loss": 0.9546, "step": 4719 }, { "epoch": 0.3409973449888923, "grad_norm": 6.097724127284761, "learning_rate": 4.74563420547334e-06, "loss": 0.8528, "step": 4720 }, { "epoch": 0.3410695901891018, "grad_norm": 6.839299117647531, "learning_rate": 4.745505641934799e-06, "loss": 0.8877, "step": 4721 }, { "epoch": 0.3411418353893113, "grad_norm": 7.194454643147366, "learning_rate": 4.7453770476570305e-06, "loss": 0.9382, "step": 4722 }, { "epoch": 0.34121408058952085, "grad_norm": 8.802052899972043, "learning_rate": 4.745248422641793e-06, "loss": 0.9408, "step": 4723 }, { "epoch": 0.34128632578973034, "grad_norm": 7.63198757496908, "learning_rate": 4.745119766890847e-06, "loss": 0.9871, "step": 4724 }, { "epoch": 0.3413585709899399, "grad_norm": 7.757157251012987, "learning_rate": 4.744991080405955e-06, "loss": 0.8698, "step": 4725 }, { "epoch": 0.34143081619014937, "grad_norm": 7.777635830386212, "learning_rate": 4.744862363188879e-06, "loss": 0.9681, "step": 4726 }, { "epoch": 0.34150306139035885, "grad_norm": 8.30135338101824, "learning_rate": 4.74473361524138e-06, "loss": 0.9716, "step": 4727 }, { "epoch": 0.3415753065905684, "grad_norm": 5.93776758494489, "learning_rate": 4.7446048365652205e-06, "loss": 0.9634, "step": 4728 }, { "epoch": 0.3416475517907779, "grad_norm": 8.78739741171892, "learning_rate": 4.744476027162164e-06, "loss": 0.9651, "step": 4729 }, { "epoch": 0.3417197969909874, "grad_norm": 7.551232132570863, "learning_rate": 4.744347187033973e-06, "loss": 0.9158, "step": 4730 }, { "epoch": 0.3417920421911969, "grad_norm": 8.599694765573668, "learning_rate": 4.744218316182411e-06, "loss": 0.9641, "step": 4731 }, { "epoch": 0.34186428739140645, "grad_norm": 8.248917942178652, "learning_rate": 4.744089414609244e-06, "loss": 0.9556, "step": 4732 }, { "epoch": 0.34193653259161594, "grad_norm": 5.723685047774995, "learning_rate": 4.743960482316234e-06, "loss": 0.9496, "step": 4733 }, { "epoch": 0.3420087777918255, "grad_norm": 7.598791136720495, "learning_rate": 4.743831519305149e-06, "loss": 0.8765, "step": 4734 }, { "epoch": 0.34208102299203497, "grad_norm": 5.9543145060176546, "learning_rate": 4.743702525577752e-06, "loss": 0.9256, "step": 4735 }, { "epoch": 0.34215326819224445, "grad_norm": 11.859226391731948, "learning_rate": 4.743573501135809e-06, "loss": 0.9687, "step": 4736 }, { "epoch": 0.342225513392454, "grad_norm": 7.232358053998726, "learning_rate": 4.743444445981087e-06, "loss": 0.941, "step": 4737 }, { "epoch": 0.3422977585926635, "grad_norm": 7.939735157967306, "learning_rate": 4.743315360115352e-06, "loss": 1.0065, "step": 4738 }, { "epoch": 0.342370003792873, "grad_norm": 5.297435089476951, "learning_rate": 4.743186243540373e-06, "loss": 0.8728, "step": 4739 }, { "epoch": 0.3424422489930825, "grad_norm": 6.219597686807579, "learning_rate": 4.743057096257915e-06, "loss": 0.9657, "step": 4740 }, { "epoch": 0.34251449419329205, "grad_norm": 7.215688873840963, "learning_rate": 4.742927918269748e-06, "loss": 0.8286, "step": 4741 }, { "epoch": 0.34258673939350154, "grad_norm": 7.171193341873666, "learning_rate": 4.742798709577638e-06, "loss": 0.8916, "step": 4742 }, { "epoch": 0.3426589845937111, "grad_norm": 7.806766450777952, "learning_rate": 4.742669470183356e-06, "loss": 0.9031, "step": 4743 }, { "epoch": 0.34273122979392057, "grad_norm": 6.930360987125074, "learning_rate": 4.74254020008867e-06, "loss": 0.8826, "step": 4744 }, { "epoch": 0.34280347499413005, "grad_norm": 7.1875615656330645, "learning_rate": 4.74241089929535e-06, "loss": 0.9451, "step": 4745 }, { "epoch": 0.3428757201943396, "grad_norm": 5.938651204187698, "learning_rate": 4.742281567805165e-06, "loss": 0.8556, "step": 4746 }, { "epoch": 0.3429479653945491, "grad_norm": 6.36124656936514, "learning_rate": 4.742152205619887e-06, "loss": 0.9079, "step": 4747 }, { "epoch": 0.3430202105947586, "grad_norm": 6.468430958127217, "learning_rate": 4.742022812741287e-06, "loss": 0.9156, "step": 4748 }, { "epoch": 0.3430924557949681, "grad_norm": 7.663848718428299, "learning_rate": 4.741893389171134e-06, "loss": 0.9361, "step": 4749 }, { "epoch": 0.34316470099517765, "grad_norm": 7.126372355755827, "learning_rate": 4.741763934911202e-06, "loss": 0.8589, "step": 4750 }, { "epoch": 0.34323694619538714, "grad_norm": 6.923181552585386, "learning_rate": 4.741634449963262e-06, "loss": 0.8968, "step": 4751 }, { "epoch": 0.3433091913955967, "grad_norm": 7.798393025287676, "learning_rate": 4.741504934329087e-06, "loss": 0.9613, "step": 4752 }, { "epoch": 0.34338143659580617, "grad_norm": 6.490537357978344, "learning_rate": 4.741375388010451e-06, "loss": 0.9241, "step": 4753 }, { "epoch": 0.34345368179601565, "grad_norm": 5.708863765083316, "learning_rate": 4.741245811009125e-06, "loss": 0.785, "step": 4754 }, { "epoch": 0.3435259269962252, "grad_norm": 7.0510652860032765, "learning_rate": 4.741116203326885e-06, "loss": 0.9242, "step": 4755 }, { "epoch": 0.3435981721964347, "grad_norm": 5.572461413270852, "learning_rate": 4.740986564965503e-06, "loss": 0.9212, "step": 4756 }, { "epoch": 0.3436704173966442, "grad_norm": 5.513802116388157, "learning_rate": 4.7408568959267555e-06, "loss": 0.837, "step": 4757 }, { "epoch": 0.3437426625968537, "grad_norm": 7.283550373861911, "learning_rate": 4.740727196212418e-06, "loss": 0.9051, "step": 4758 }, { "epoch": 0.34381490779706325, "grad_norm": 6.470470844570698, "learning_rate": 4.7405974658242634e-06, "loss": 0.9448, "step": 4759 }, { "epoch": 0.34388715299727274, "grad_norm": 7.869287992694313, "learning_rate": 4.74046770476407e-06, "loss": 0.9147, "step": 4760 }, { "epoch": 0.3439593981974823, "grad_norm": 6.825514933964289, "learning_rate": 4.740337913033614e-06, "loss": 0.9089, "step": 4761 }, { "epoch": 0.34403164339769177, "grad_norm": 7.1371209626842065, "learning_rate": 4.74020809063467e-06, "loss": 0.9267, "step": 4762 }, { "epoch": 0.34410388859790125, "grad_norm": 7.247300533353916, "learning_rate": 4.7400782375690176e-06, "loss": 1.001, "step": 4763 }, { "epoch": 0.3441761337981108, "grad_norm": 7.396895154209182, "learning_rate": 4.7399483538384335e-06, "loss": 0.919, "step": 4764 }, { "epoch": 0.3442483789983203, "grad_norm": 6.620262611458682, "learning_rate": 4.739818439444695e-06, "loss": 0.9751, "step": 4765 }, { "epoch": 0.3443206241985298, "grad_norm": 5.551886215671014, "learning_rate": 4.739688494389582e-06, "loss": 0.8822, "step": 4766 }, { "epoch": 0.3443928693987393, "grad_norm": 5.974918712877045, "learning_rate": 4.739558518674872e-06, "loss": 0.8527, "step": 4767 }, { "epoch": 0.34446511459894885, "grad_norm": 7.593633203432431, "learning_rate": 4.739428512302345e-06, "loss": 0.9646, "step": 4768 }, { "epoch": 0.34453735979915834, "grad_norm": 6.184839158183312, "learning_rate": 4.739298475273781e-06, "loss": 0.8128, "step": 4769 }, { "epoch": 0.3446096049993679, "grad_norm": 5.537779149582069, "learning_rate": 4.7391684075909585e-06, "loss": 0.8214, "step": 4770 }, { "epoch": 0.34468185019957737, "grad_norm": 7.2764533619788105, "learning_rate": 4.739038309255659e-06, "loss": 0.9267, "step": 4771 }, { "epoch": 0.34475409539978685, "grad_norm": 6.2085337083811885, "learning_rate": 4.738908180269665e-06, "loss": 0.9266, "step": 4772 }, { "epoch": 0.3448263405999964, "grad_norm": 6.3424985055166125, "learning_rate": 4.738778020634755e-06, "loss": 0.8218, "step": 4773 }, { "epoch": 0.3448985858002059, "grad_norm": 5.143467450637894, "learning_rate": 4.738647830352713e-06, "loss": 0.9342, "step": 4774 }, { "epoch": 0.3449708310004154, "grad_norm": 5.329779672846147, "learning_rate": 4.738517609425319e-06, "loss": 0.8666, "step": 4775 }, { "epoch": 0.3450430762006249, "grad_norm": 6.605101446026752, "learning_rate": 4.738387357854359e-06, "loss": 0.8882, "step": 4776 }, { "epoch": 0.34511532140083445, "grad_norm": 7.093025876363604, "learning_rate": 4.738257075641613e-06, "loss": 0.8746, "step": 4777 }, { "epoch": 0.34518756660104394, "grad_norm": 6.329158444260092, "learning_rate": 4.738126762788866e-06, "loss": 0.894, "step": 4778 }, { "epoch": 0.3452598118012535, "grad_norm": 7.466997140428408, "learning_rate": 4.7379964192979015e-06, "loss": 0.8719, "step": 4779 }, { "epoch": 0.34533205700146297, "grad_norm": 6.0327775829206765, "learning_rate": 4.737866045170503e-06, "loss": 0.9509, "step": 4780 }, { "epoch": 0.34540430220167245, "grad_norm": 6.984185747488387, "learning_rate": 4.737735640408456e-06, "loss": 0.9864, "step": 4781 }, { "epoch": 0.345476547401882, "grad_norm": 6.133565091163463, "learning_rate": 4.737605205013546e-06, "loss": 0.8378, "step": 4782 }, { "epoch": 0.3455487926020915, "grad_norm": 8.150637979636787, "learning_rate": 4.737474738987558e-06, "loss": 0.8201, "step": 4783 }, { "epoch": 0.345621037802301, "grad_norm": 5.63423399111414, "learning_rate": 4.737344242332278e-06, "loss": 0.9026, "step": 4784 }, { "epoch": 0.3456932830025105, "grad_norm": 6.743355765853028, "learning_rate": 4.737213715049492e-06, "loss": 0.8988, "step": 4785 }, { "epoch": 0.34576552820272005, "grad_norm": 7.829797744964475, "learning_rate": 4.737083157140988e-06, "loss": 0.9473, "step": 4786 }, { "epoch": 0.34583777340292954, "grad_norm": 7.5027183692623565, "learning_rate": 4.736952568608553e-06, "loss": 1.0386, "step": 4787 }, { "epoch": 0.3459100186031391, "grad_norm": 7.166156159190252, "learning_rate": 4.736821949453973e-06, "loss": 0.9248, "step": 4788 }, { "epoch": 0.34598226380334857, "grad_norm": 6.480498476517867, "learning_rate": 4.736691299679038e-06, "loss": 0.921, "step": 4789 }, { "epoch": 0.34605450900355805, "grad_norm": 8.547341844825484, "learning_rate": 4.736560619285537e-06, "loss": 0.9479, "step": 4790 }, { "epoch": 0.3461267542037676, "grad_norm": 7.3208953397408605, "learning_rate": 4.736429908275255e-06, "loss": 0.9253, "step": 4791 }, { "epoch": 0.3461989994039771, "grad_norm": 7.282421885476452, "learning_rate": 4.7362991666499856e-06, "loss": 0.9073, "step": 4792 }, { "epoch": 0.3462712446041866, "grad_norm": 7.866684171150689, "learning_rate": 4.736168394411517e-06, "loss": 0.9267, "step": 4793 }, { "epoch": 0.3463434898043961, "grad_norm": 6.391222489833286, "learning_rate": 4.736037591561639e-06, "loss": 0.8795, "step": 4794 }, { "epoch": 0.34641573500460565, "grad_norm": 8.767495674804456, "learning_rate": 4.735906758102144e-06, "loss": 0.8775, "step": 4795 }, { "epoch": 0.34648798020481514, "grad_norm": 7.6276968267371, "learning_rate": 4.7357758940348195e-06, "loss": 0.9336, "step": 4796 }, { "epoch": 0.3465602254050247, "grad_norm": 7.446086895308853, "learning_rate": 4.73564499936146e-06, "loss": 0.8151, "step": 4797 }, { "epoch": 0.34663247060523417, "grad_norm": 7.0886666658037285, "learning_rate": 4.735514074083855e-06, "loss": 0.9652, "step": 4798 }, { "epoch": 0.34670471580544365, "grad_norm": 7.789517709789422, "learning_rate": 4.7353831182038e-06, "loss": 0.9289, "step": 4799 }, { "epoch": 0.3467769610056532, "grad_norm": 7.102468636325011, "learning_rate": 4.735252131723085e-06, "loss": 0.9773, "step": 4800 }, { "epoch": 0.3468492062058627, "grad_norm": 6.688518981308947, "learning_rate": 4.7351211146435036e-06, "loss": 0.8854, "step": 4801 }, { "epoch": 0.3469214514060722, "grad_norm": 6.512356091335268, "learning_rate": 4.7349900669668506e-06, "loss": 0.9496, "step": 4802 }, { "epoch": 0.3469936966062817, "grad_norm": 8.53447106445957, "learning_rate": 4.734858988694918e-06, "loss": 0.9007, "step": 4803 }, { "epoch": 0.34706594180649125, "grad_norm": 7.611924764512946, "learning_rate": 4.734727879829502e-06, "loss": 1.0246, "step": 4804 }, { "epoch": 0.34713818700670074, "grad_norm": 5.731649889457857, "learning_rate": 4.7345967403723955e-06, "loss": 0.8358, "step": 4805 }, { "epoch": 0.3472104322069103, "grad_norm": 5.734139502572373, "learning_rate": 4.734465570325394e-06, "loss": 0.9071, "step": 4806 }, { "epoch": 0.34728267740711977, "grad_norm": 7.836994803419747, "learning_rate": 4.734334369690296e-06, "loss": 0.9454, "step": 4807 }, { "epoch": 0.34735492260732925, "grad_norm": 6.87729869907075, "learning_rate": 4.734203138468893e-06, "loss": 0.8749, "step": 4808 }, { "epoch": 0.3474271678075388, "grad_norm": 6.687434971573438, "learning_rate": 4.7340718766629856e-06, "loss": 0.8804, "step": 4809 }, { "epoch": 0.3474994130077483, "grad_norm": 6.478084003559828, "learning_rate": 4.733940584274368e-06, "loss": 0.939, "step": 4810 }, { "epoch": 0.3475716582079578, "grad_norm": 7.367200703775946, "learning_rate": 4.733809261304838e-06, "loss": 0.9452, "step": 4811 }, { "epoch": 0.3476439034081673, "grad_norm": 6.609927323442475, "learning_rate": 4.733677907756194e-06, "loss": 1.0468, "step": 4812 }, { "epoch": 0.34771614860837685, "grad_norm": 6.919729481519643, "learning_rate": 4.733546523630234e-06, "loss": 0.8875, "step": 4813 }, { "epoch": 0.34778839380858634, "grad_norm": 5.820319709517347, "learning_rate": 4.733415108928756e-06, "loss": 0.8658, "step": 4814 }, { "epoch": 0.3478606390087959, "grad_norm": 7.105193868377313, "learning_rate": 4.73328366365356e-06, "loss": 0.8159, "step": 4815 }, { "epoch": 0.34793288420900537, "grad_norm": 6.5540068390071085, "learning_rate": 4.733152187806444e-06, "loss": 0.8662, "step": 4816 }, { "epoch": 0.34800512940921485, "grad_norm": 7.057643875023051, "learning_rate": 4.7330206813892085e-06, "loss": 0.9895, "step": 4817 }, { "epoch": 0.3480773746094244, "grad_norm": 5.906394351224589, "learning_rate": 4.732889144403654e-06, "loss": 0.8879, "step": 4818 }, { "epoch": 0.3481496198096339, "grad_norm": 5.732092795601168, "learning_rate": 4.732757576851581e-06, "loss": 0.9363, "step": 4819 }, { "epoch": 0.3482218650098434, "grad_norm": 5.664929890480773, "learning_rate": 4.73262597873479e-06, "loss": 0.8782, "step": 4820 }, { "epoch": 0.3482941102100529, "grad_norm": 6.024748623617686, "learning_rate": 4.732494350055083e-06, "loss": 0.9051, "step": 4821 }, { "epoch": 0.34836635541026245, "grad_norm": 6.895924024955538, "learning_rate": 4.732362690814262e-06, "loss": 0.9158, "step": 4822 }, { "epoch": 0.34843860061047194, "grad_norm": 5.5850359849445015, "learning_rate": 4.732231001014129e-06, "loss": 1.0064, "step": 4823 }, { "epoch": 0.3485108458106815, "grad_norm": 7.8584129396885265, "learning_rate": 4.732099280656486e-06, "loss": 0.9082, "step": 4824 }, { "epoch": 0.34858309101089097, "grad_norm": 7.697970075684421, "learning_rate": 4.731967529743138e-06, "loss": 0.9528, "step": 4825 }, { "epoch": 0.34865533621110045, "grad_norm": 8.030673826456507, "learning_rate": 4.731835748275887e-06, "loss": 0.8399, "step": 4826 }, { "epoch": 0.34872758141131, "grad_norm": 5.638044702304652, "learning_rate": 4.731703936256537e-06, "loss": 0.8677, "step": 4827 }, { "epoch": 0.3487998266115195, "grad_norm": 6.656935105862315, "learning_rate": 4.731572093686894e-06, "loss": 0.9461, "step": 4828 }, { "epoch": 0.348872071811729, "grad_norm": 7.309242240153499, "learning_rate": 4.731440220568761e-06, "loss": 0.8635, "step": 4829 }, { "epoch": 0.3489443170119385, "grad_norm": 6.656515053499895, "learning_rate": 4.731308316903945e-06, "loss": 0.9509, "step": 4830 }, { "epoch": 0.34901656221214805, "grad_norm": 6.465496922252779, "learning_rate": 4.73117638269425e-06, "loss": 0.9387, "step": 4831 }, { "epoch": 0.34908880741235754, "grad_norm": 6.052854120104628, "learning_rate": 4.731044417941483e-06, "loss": 0.902, "step": 4832 }, { "epoch": 0.3491610526125671, "grad_norm": 5.137125882451521, "learning_rate": 4.730912422647449e-06, "loss": 0.9325, "step": 4833 }, { "epoch": 0.34923329781277657, "grad_norm": 7.37473878559517, "learning_rate": 4.730780396813957e-06, "loss": 0.9161, "step": 4834 }, { "epoch": 0.34930554301298605, "grad_norm": 6.348134597360939, "learning_rate": 4.730648340442814e-06, "loss": 0.9087, "step": 4835 }, { "epoch": 0.3493777882131956, "grad_norm": 6.084119179886959, "learning_rate": 4.7305162535358265e-06, "loss": 0.9036, "step": 4836 }, { "epoch": 0.3494500334134051, "grad_norm": 6.644831503173456, "learning_rate": 4.730384136094803e-06, "loss": 0.8998, "step": 4837 }, { "epoch": 0.3495222786136146, "grad_norm": 6.313926611110418, "learning_rate": 4.730251988121554e-06, "loss": 0.8668, "step": 4838 }, { "epoch": 0.3495945238138241, "grad_norm": 5.982793613980866, "learning_rate": 4.730119809617886e-06, "loss": 0.8702, "step": 4839 }, { "epoch": 0.34966676901403365, "grad_norm": 6.363790792597847, "learning_rate": 4.7299876005856085e-06, "loss": 0.9335, "step": 4840 }, { "epoch": 0.34973901421424314, "grad_norm": 5.777259232816549, "learning_rate": 4.729855361026533e-06, "loss": 0.9321, "step": 4841 }, { "epoch": 0.3498112594144527, "grad_norm": 7.729196251935109, "learning_rate": 4.729723090942469e-06, "loss": 0.9696, "step": 4842 }, { "epoch": 0.34988350461466217, "grad_norm": 7.4130467642597235, "learning_rate": 4.729590790335228e-06, "loss": 0.9163, "step": 4843 }, { "epoch": 0.34995574981487165, "grad_norm": 6.715547872023216, "learning_rate": 4.729458459206619e-06, "loss": 0.9378, "step": 4844 }, { "epoch": 0.3500279950150812, "grad_norm": 7.241359165395393, "learning_rate": 4.7293260975584555e-06, "loss": 0.8755, "step": 4845 }, { "epoch": 0.3501002402152907, "grad_norm": 6.153964320295394, "learning_rate": 4.729193705392548e-06, "loss": 0.9362, "step": 4846 }, { "epoch": 0.3501724854155002, "grad_norm": 6.541357660790818, "learning_rate": 4.72906128271071e-06, "loss": 0.9262, "step": 4847 }, { "epoch": 0.3502447306157097, "grad_norm": 6.240764661056168, "learning_rate": 4.728928829514754e-06, "loss": 0.8284, "step": 4848 }, { "epoch": 0.35031697581591925, "grad_norm": 9.163450983299896, "learning_rate": 4.728796345806492e-06, "loss": 1.0413, "step": 4849 }, { "epoch": 0.35038922101612874, "grad_norm": 8.588068967402812, "learning_rate": 4.72866383158774e-06, "loss": 0.8553, "step": 4850 }, { "epoch": 0.3504614662163382, "grad_norm": 5.773281234670593, "learning_rate": 4.728531286860309e-06, "loss": 0.9089, "step": 4851 }, { "epoch": 0.35053371141654777, "grad_norm": 7.070594402615584, "learning_rate": 4.728398711626016e-06, "loss": 0.9063, "step": 4852 }, { "epoch": 0.35060595661675725, "grad_norm": 6.371737075218412, "learning_rate": 4.728266105886675e-06, "loss": 0.9184, "step": 4853 }, { "epoch": 0.3506782018169668, "grad_norm": 8.023578230265317, "learning_rate": 4.728133469644101e-06, "loss": 0.9957, "step": 4854 }, { "epoch": 0.3507504470171763, "grad_norm": 8.93158873228693, "learning_rate": 4.728000802900109e-06, "loss": 0.9389, "step": 4855 }, { "epoch": 0.3508226922173858, "grad_norm": 7.792955533353454, "learning_rate": 4.7278681056565165e-06, "loss": 0.791, "step": 4856 }, { "epoch": 0.3508949374175953, "grad_norm": 6.759743334169402, "learning_rate": 4.72773537791514e-06, "loss": 0.8517, "step": 4857 }, { "epoch": 0.35096718261780485, "grad_norm": 9.683335719762127, "learning_rate": 4.7276026196777955e-06, "loss": 0.9379, "step": 4858 }, { "epoch": 0.35103942781801434, "grad_norm": 8.912872315236815, "learning_rate": 4.727469830946301e-06, "loss": 1.0097, "step": 4859 }, { "epoch": 0.3511116730182238, "grad_norm": 6.490038060339724, "learning_rate": 4.7273370117224735e-06, "loss": 0.9321, "step": 4860 }, { "epoch": 0.35118391821843337, "grad_norm": 6.323096116194099, "learning_rate": 4.727204162008132e-06, "loss": 0.9648, "step": 4861 }, { "epoch": 0.35125616341864285, "grad_norm": 7.922158621802766, "learning_rate": 4.727071281805095e-06, "loss": 0.9139, "step": 4862 }, { "epoch": 0.3513284086188524, "grad_norm": 7.620837717140178, "learning_rate": 4.726938371115182e-06, "loss": 0.8704, "step": 4863 }, { "epoch": 0.3514006538190619, "grad_norm": 6.160224677112821, "learning_rate": 4.72680542994021e-06, "loss": 0.9316, "step": 4864 }, { "epoch": 0.3514728990192714, "grad_norm": 8.205823588859584, "learning_rate": 4.7266724582820025e-06, "loss": 0.9798, "step": 4865 }, { "epoch": 0.3515451442194809, "grad_norm": 9.939693184920326, "learning_rate": 4.726539456142377e-06, "loss": 0.9463, "step": 4866 }, { "epoch": 0.35161738941969045, "grad_norm": 5.252251823332373, "learning_rate": 4.726406423523156e-06, "loss": 0.8108, "step": 4867 }, { "epoch": 0.35168963461989994, "grad_norm": 7.858255416575531, "learning_rate": 4.726273360426158e-06, "loss": 0.9343, "step": 4868 }, { "epoch": 0.3517618798201094, "grad_norm": 5.891706048743006, "learning_rate": 4.7261402668532075e-06, "loss": 0.8706, "step": 4869 }, { "epoch": 0.35183412502031897, "grad_norm": 6.899268302268688, "learning_rate": 4.726007142806125e-06, "loss": 0.9625, "step": 4870 }, { "epoch": 0.35190637022052845, "grad_norm": 9.246484036703661, "learning_rate": 4.725873988286733e-06, "loss": 0.8858, "step": 4871 }, { "epoch": 0.351978615420738, "grad_norm": 10.018311329243488, "learning_rate": 4.725740803296855e-06, "loss": 0.9347, "step": 4872 }, { "epoch": 0.3520508606209475, "grad_norm": 9.381572809834738, "learning_rate": 4.7256075878383125e-06, "loss": 0.8859, "step": 4873 }, { "epoch": 0.352123105821157, "grad_norm": 6.214186637852806, "learning_rate": 4.725474341912931e-06, "loss": 0.9023, "step": 4874 }, { "epoch": 0.3521953510213665, "grad_norm": 7.045548612352585, "learning_rate": 4.725341065522534e-06, "loss": 0.85, "step": 4875 }, { "epoch": 0.35226759622157605, "grad_norm": 7.861548667325699, "learning_rate": 4.725207758668945e-06, "loss": 0.911, "step": 4876 }, { "epoch": 0.35233984142178554, "grad_norm": 6.849743313400339, "learning_rate": 4.7250744213539905e-06, "loss": 0.7814, "step": 4877 }, { "epoch": 0.352412086621995, "grad_norm": 8.005900591120424, "learning_rate": 4.724941053579493e-06, "loss": 1.021, "step": 4878 }, { "epoch": 0.35248433182220457, "grad_norm": 8.794414169292613, "learning_rate": 4.724807655347281e-06, "loss": 0.9711, "step": 4879 }, { "epoch": 0.35255657702241405, "grad_norm": 8.534585042385078, "learning_rate": 4.724674226659181e-06, "loss": 0.9037, "step": 4880 }, { "epoch": 0.3526288222226236, "grad_norm": 6.61368068833947, "learning_rate": 4.724540767517017e-06, "loss": 0.8663, "step": 4881 }, { "epoch": 0.3527010674228331, "grad_norm": 8.228293791885772, "learning_rate": 4.724407277922616e-06, "loss": 0.9321, "step": 4882 }, { "epoch": 0.3527733126230426, "grad_norm": 7.757025702800414, "learning_rate": 4.724273757877808e-06, "loss": 0.9164, "step": 4883 }, { "epoch": 0.3528455578232521, "grad_norm": 8.151308162992391, "learning_rate": 4.724140207384419e-06, "loss": 0.8744, "step": 4884 }, { "epoch": 0.35291780302346165, "grad_norm": 5.991667365585661, "learning_rate": 4.724006626444277e-06, "loss": 0.9936, "step": 4885 }, { "epoch": 0.35299004822367114, "grad_norm": 7.456715868379862, "learning_rate": 4.723873015059212e-06, "loss": 0.9514, "step": 4886 }, { "epoch": 0.3530622934238806, "grad_norm": 7.361815742340331, "learning_rate": 4.723739373231051e-06, "loss": 0.9352, "step": 4887 }, { "epoch": 0.35313453862409017, "grad_norm": 7.040250315984422, "learning_rate": 4.723605700961625e-06, "loss": 0.8654, "step": 4888 }, { "epoch": 0.35320678382429965, "grad_norm": 7.982055805008355, "learning_rate": 4.723471998252764e-06, "loss": 0.8885, "step": 4889 }, { "epoch": 0.3532790290245092, "grad_norm": 7.045594904779572, "learning_rate": 4.723338265106298e-06, "loss": 0.9284, "step": 4890 }, { "epoch": 0.3533512742247187, "grad_norm": 6.9757819171554445, "learning_rate": 4.723204501524057e-06, "loss": 0.9254, "step": 4891 }, { "epoch": 0.3534235194249282, "grad_norm": 6.671180563885735, "learning_rate": 4.723070707507873e-06, "loss": 0.9172, "step": 4892 }, { "epoch": 0.3534957646251377, "grad_norm": 7.615900832836879, "learning_rate": 4.722936883059575e-06, "loss": 0.8363, "step": 4893 }, { "epoch": 0.35356800982534725, "grad_norm": 8.518594934363396, "learning_rate": 4.722803028181e-06, "loss": 0.9305, "step": 4894 }, { "epoch": 0.35364025502555674, "grad_norm": 7.708305922450911, "learning_rate": 4.722669142873976e-06, "loss": 0.9412, "step": 4895 }, { "epoch": 0.3537125002257662, "grad_norm": 6.018964677668047, "learning_rate": 4.722535227140337e-06, "loss": 0.9591, "step": 4896 }, { "epoch": 0.35378474542597577, "grad_norm": 6.310936158102884, "learning_rate": 4.722401280981917e-06, "loss": 0.8832, "step": 4897 }, { "epoch": 0.35385699062618525, "grad_norm": 5.459292009724019, "learning_rate": 4.722267304400549e-06, "loss": 0.8813, "step": 4898 }, { "epoch": 0.3539292358263948, "grad_norm": 6.946128359613895, "learning_rate": 4.722133297398067e-06, "loss": 0.817, "step": 4899 }, { "epoch": 0.3540014810266043, "grad_norm": 9.00629501596249, "learning_rate": 4.721999259976305e-06, "loss": 0.8911, "step": 4900 }, { "epoch": 0.3540737262268138, "grad_norm": 7.3039210932335275, "learning_rate": 4.7218651921370995e-06, "loss": 0.8221, "step": 4901 }, { "epoch": 0.3541459714270233, "grad_norm": 6.351632468576023, "learning_rate": 4.721731093882284e-06, "loss": 0.8819, "step": 4902 }, { "epoch": 0.35421821662723285, "grad_norm": 7.255725047124695, "learning_rate": 4.721596965213695e-06, "loss": 0.9731, "step": 4903 }, { "epoch": 0.35429046182744234, "grad_norm": 6.738085651599497, "learning_rate": 4.721462806133168e-06, "loss": 0.9219, "step": 4904 }, { "epoch": 0.3543627070276518, "grad_norm": 6.936539867048155, "learning_rate": 4.721328616642541e-06, "loss": 0.8716, "step": 4905 }, { "epoch": 0.35443495222786137, "grad_norm": 8.337714251692192, "learning_rate": 4.721194396743649e-06, "loss": 1.0583, "step": 4906 }, { "epoch": 0.35450719742807085, "grad_norm": 7.4452289844804405, "learning_rate": 4.721060146438331e-06, "loss": 0.948, "step": 4907 }, { "epoch": 0.3545794426282804, "grad_norm": 6.721874273593638, "learning_rate": 4.720925865728424e-06, "loss": 0.873, "step": 4908 }, { "epoch": 0.3546516878284899, "grad_norm": 5.672519951924406, "learning_rate": 4.720791554615767e-06, "loss": 0.8823, "step": 4909 }, { "epoch": 0.3547239330286994, "grad_norm": 9.500782181512577, "learning_rate": 4.720657213102196e-06, "loss": 0.9975, "step": 4910 }, { "epoch": 0.3547961782289089, "grad_norm": 5.80890766006183, "learning_rate": 4.720522841189553e-06, "loss": 0.8909, "step": 4911 }, { "epoch": 0.35486842342911845, "grad_norm": 7.082382976132078, "learning_rate": 4.720388438879677e-06, "loss": 1.0269, "step": 4912 }, { "epoch": 0.35494066862932794, "grad_norm": 6.0012857331108185, "learning_rate": 4.720254006174407e-06, "loss": 0.8668, "step": 4913 }, { "epoch": 0.3550129138295374, "grad_norm": 6.580836373832006, "learning_rate": 4.720119543075584e-06, "loss": 0.8437, "step": 4914 }, { "epoch": 0.35508515902974697, "grad_norm": 6.576531803986631, "learning_rate": 4.719985049585047e-06, "loss": 0.8884, "step": 4915 }, { "epoch": 0.35515740422995645, "grad_norm": 6.354360642104119, "learning_rate": 4.71985052570464e-06, "loss": 0.9202, "step": 4916 }, { "epoch": 0.355229649430166, "grad_norm": 7.059490001293942, "learning_rate": 4.719715971436202e-06, "loss": 0.8587, "step": 4917 }, { "epoch": 0.3553018946303755, "grad_norm": 6.259889331850962, "learning_rate": 4.719581386781576e-06, "loss": 0.8806, "step": 4918 }, { "epoch": 0.355374139830585, "grad_norm": 6.222224965927488, "learning_rate": 4.719446771742604e-06, "loss": 0.8756, "step": 4919 }, { "epoch": 0.3554463850307945, "grad_norm": 9.801724683069573, "learning_rate": 4.71931212632113e-06, "loss": 0.9288, "step": 4920 }, { "epoch": 0.35551863023100405, "grad_norm": 5.880368316429272, "learning_rate": 4.719177450518995e-06, "loss": 0.9322, "step": 4921 }, { "epoch": 0.35559087543121354, "grad_norm": 6.541198746130326, "learning_rate": 4.719042744338044e-06, "loss": 0.883, "step": 4922 }, { "epoch": 0.355663120631423, "grad_norm": 7.341911702680111, "learning_rate": 4.7189080077801205e-06, "loss": 1.006, "step": 4923 }, { "epoch": 0.35573536583163257, "grad_norm": 7.210970034376091, "learning_rate": 4.71877324084707e-06, "loss": 0.925, "step": 4924 }, { "epoch": 0.35580761103184205, "grad_norm": 7.007113248541146, "learning_rate": 4.718638443540736e-06, "loss": 1.0031, "step": 4925 }, { "epoch": 0.3558798562320516, "grad_norm": 5.589575376539317, "learning_rate": 4.718503615862965e-06, "loss": 0.9169, "step": 4926 }, { "epoch": 0.3559521014322611, "grad_norm": 6.620192889028193, "learning_rate": 4.718368757815601e-06, "loss": 0.9676, "step": 4927 }, { "epoch": 0.3560243466324706, "grad_norm": 7.052375220189145, "learning_rate": 4.718233869400492e-06, "loss": 0.8658, "step": 4928 }, { "epoch": 0.3560965918326801, "grad_norm": 6.481501740116167, "learning_rate": 4.718098950619484e-06, "loss": 0.907, "step": 4929 }, { "epoch": 0.35616883703288965, "grad_norm": 6.52317551869151, "learning_rate": 4.717964001474422e-06, "loss": 0.9071, "step": 4930 }, { "epoch": 0.35624108223309914, "grad_norm": 5.323341168561284, "learning_rate": 4.717829021967157e-06, "loss": 0.9234, "step": 4931 }, { "epoch": 0.3563133274333086, "grad_norm": 6.301280106569244, "learning_rate": 4.717694012099533e-06, "loss": 0.9423, "step": 4932 }, { "epoch": 0.35638557263351817, "grad_norm": 7.1438605993325, "learning_rate": 4.717558971873401e-06, "loss": 0.8649, "step": 4933 }, { "epoch": 0.35645781783372765, "grad_norm": 6.237873978956593, "learning_rate": 4.717423901290608e-06, "loss": 0.8732, "step": 4934 }, { "epoch": 0.3565300630339372, "grad_norm": 7.462036690954316, "learning_rate": 4.717288800353004e-06, "loss": 0.8712, "step": 4935 }, { "epoch": 0.3566023082341467, "grad_norm": 5.857999513031538, "learning_rate": 4.717153669062437e-06, "loss": 0.8249, "step": 4936 }, { "epoch": 0.3566745534343562, "grad_norm": 5.871740532716421, "learning_rate": 4.717018507420759e-06, "loss": 0.9077, "step": 4937 }, { "epoch": 0.3567467986345657, "grad_norm": 6.929176079458273, "learning_rate": 4.716883315429819e-06, "loss": 0.9238, "step": 4938 }, { "epoch": 0.35681904383477525, "grad_norm": 7.624888434922804, "learning_rate": 4.716748093091467e-06, "loss": 0.8851, "step": 4939 }, { "epoch": 0.35689128903498474, "grad_norm": 6.647667738285555, "learning_rate": 4.716612840407555e-06, "loss": 0.8153, "step": 4940 }, { "epoch": 0.3569635342351942, "grad_norm": 6.544773246969527, "learning_rate": 4.7164775573799335e-06, "loss": 0.8422, "step": 4941 }, { "epoch": 0.35703577943540377, "grad_norm": 7.085958226351303, "learning_rate": 4.716342244010457e-06, "loss": 0.8414, "step": 4942 }, { "epoch": 0.35710802463561325, "grad_norm": 9.525553417398637, "learning_rate": 4.716206900300974e-06, "loss": 0.9852, "step": 4943 }, { "epoch": 0.3571802698358228, "grad_norm": 6.616585624192105, "learning_rate": 4.716071526253341e-06, "loss": 0.9624, "step": 4944 }, { "epoch": 0.3572525150360323, "grad_norm": 7.109905382430848, "learning_rate": 4.715936121869408e-06, "loss": 0.9158, "step": 4945 }, { "epoch": 0.3573247602362418, "grad_norm": 5.886601781663906, "learning_rate": 4.715800687151031e-06, "loss": 1.0144, "step": 4946 }, { "epoch": 0.3573970054364513, "grad_norm": 5.509353487056074, "learning_rate": 4.715665222100063e-06, "loss": 0.876, "step": 4947 }, { "epoch": 0.35746925063666085, "grad_norm": 5.679100793394427, "learning_rate": 4.715529726718359e-06, "loss": 0.7607, "step": 4948 }, { "epoch": 0.35754149583687034, "grad_norm": 7.393595124893546, "learning_rate": 4.715394201007773e-06, "loss": 0.9082, "step": 4949 }, { "epoch": 0.3576137410370798, "grad_norm": 7.3679941813790775, "learning_rate": 4.71525864497016e-06, "loss": 0.905, "step": 4950 }, { "epoch": 0.35768598623728937, "grad_norm": 7.920610616630715, "learning_rate": 4.715123058607376e-06, "loss": 0.9713, "step": 4951 }, { "epoch": 0.35775823143749885, "grad_norm": 7.555563275327042, "learning_rate": 4.714987441921277e-06, "loss": 0.894, "step": 4952 }, { "epoch": 0.3578304766377084, "grad_norm": 6.832243979857055, "learning_rate": 4.7148517949137205e-06, "loss": 0.9417, "step": 4953 }, { "epoch": 0.3579027218379179, "grad_norm": 7.254915084079018, "learning_rate": 4.714716117586563e-06, "loss": 0.8664, "step": 4954 }, { "epoch": 0.3579749670381274, "grad_norm": 6.613445931111876, "learning_rate": 4.714580409941661e-06, "loss": 0.9687, "step": 4955 }, { "epoch": 0.3580472122383369, "grad_norm": 7.129334854410234, "learning_rate": 4.714444671980873e-06, "loss": 0.9598, "step": 4956 }, { "epoch": 0.35811945743854645, "grad_norm": 6.245145209207071, "learning_rate": 4.714308903706057e-06, "loss": 0.9256, "step": 4957 }, { "epoch": 0.35819170263875594, "grad_norm": 8.629167517406128, "learning_rate": 4.714173105119071e-06, "loss": 0.9485, "step": 4958 }, { "epoch": 0.3582639478389654, "grad_norm": 8.052011217301843, "learning_rate": 4.714037276221774e-06, "loss": 0.9336, "step": 4959 }, { "epoch": 0.35833619303917497, "grad_norm": 7.75341106534365, "learning_rate": 4.713901417016026e-06, "loss": 0.9014, "step": 4960 }, { "epoch": 0.35840843823938445, "grad_norm": 6.764616505649515, "learning_rate": 4.713765527503686e-06, "loss": 0.9146, "step": 4961 }, { "epoch": 0.358480683439594, "grad_norm": 6.2252059473493775, "learning_rate": 4.713629607686616e-06, "loss": 0.8882, "step": 4962 }, { "epoch": 0.3585529286398035, "grad_norm": 8.1681396816763, "learning_rate": 4.713493657566674e-06, "loss": 0.9399, "step": 4963 }, { "epoch": 0.358625173840013, "grad_norm": 9.081185696525822, "learning_rate": 4.7133576771457246e-06, "loss": 0.8967, "step": 4964 }, { "epoch": 0.3586974190402225, "grad_norm": 7.250726334592307, "learning_rate": 4.713221666425626e-06, "loss": 0.927, "step": 4965 }, { "epoch": 0.35876966424043205, "grad_norm": 6.880336008516436, "learning_rate": 4.713085625408242e-06, "loss": 0.9904, "step": 4966 }, { "epoch": 0.35884190944064154, "grad_norm": 7.48058603857136, "learning_rate": 4.712949554095433e-06, "loss": 0.8682, "step": 4967 }, { "epoch": 0.358914154640851, "grad_norm": 6.579162370882893, "learning_rate": 4.7128134524890625e-06, "loss": 0.9681, "step": 4968 }, { "epoch": 0.35898639984106057, "grad_norm": 6.895976023815985, "learning_rate": 4.712677320590995e-06, "loss": 0.9969, "step": 4969 }, { "epoch": 0.35905864504127005, "grad_norm": 7.56386484487934, "learning_rate": 4.712541158403093e-06, "loss": 0.9243, "step": 4970 }, { "epoch": 0.3591308902414796, "grad_norm": 5.997600711487352, "learning_rate": 4.71240496592722e-06, "loss": 0.8798, "step": 4971 }, { "epoch": 0.3592031354416891, "grad_norm": 7.335537088047281, "learning_rate": 4.7122687431652404e-06, "loss": 0.9176, "step": 4972 }, { "epoch": 0.3592753806418986, "grad_norm": 6.059490912176211, "learning_rate": 4.71213249011902e-06, "loss": 0.8983, "step": 4973 }, { "epoch": 0.3593476258421081, "grad_norm": 7.2123159824743315, "learning_rate": 4.711996206790425e-06, "loss": 0.8341, "step": 4974 }, { "epoch": 0.35941987104231765, "grad_norm": 6.637187471952393, "learning_rate": 4.711859893181317e-06, "loss": 0.8325, "step": 4975 }, { "epoch": 0.35949211624252714, "grad_norm": 7.252367619618292, "learning_rate": 4.7117235492935654e-06, "loss": 0.875, "step": 4976 }, { "epoch": 0.3595643614427366, "grad_norm": 6.852940628487041, "learning_rate": 4.711587175129036e-06, "loss": 0.9612, "step": 4977 }, { "epoch": 0.35963660664294617, "grad_norm": 5.521938438893067, "learning_rate": 4.711450770689595e-06, "loss": 0.9368, "step": 4978 }, { "epoch": 0.35970885184315565, "grad_norm": 8.146482736079312, "learning_rate": 4.711314335977109e-06, "loss": 0.9944, "step": 4979 }, { "epoch": 0.3597810970433652, "grad_norm": 8.039947431401963, "learning_rate": 4.711177870993449e-06, "loss": 0.8816, "step": 4980 }, { "epoch": 0.3598533422435747, "grad_norm": 6.22656770751429, "learning_rate": 4.71104137574048e-06, "loss": 0.8752, "step": 4981 }, { "epoch": 0.3599255874437842, "grad_norm": 5.752992929147599, "learning_rate": 4.710904850220071e-06, "loss": 0.8335, "step": 4982 }, { "epoch": 0.3599978326439937, "grad_norm": 5.681863527983998, "learning_rate": 4.71076829443409e-06, "loss": 0.8607, "step": 4983 }, { "epoch": 0.36007007784420325, "grad_norm": 5.768033483442453, "learning_rate": 4.710631708384409e-06, "loss": 0.9113, "step": 4984 }, { "epoch": 0.36014232304441274, "grad_norm": 8.599244514815586, "learning_rate": 4.710495092072896e-06, "loss": 0.8845, "step": 4985 }, { "epoch": 0.3602145682446222, "grad_norm": 6.6664333620574014, "learning_rate": 4.710358445501422e-06, "loss": 1.0013, "step": 4986 }, { "epoch": 0.36028681344483177, "grad_norm": 6.990984969917284, "learning_rate": 4.710221768671857e-06, "loss": 0.8581, "step": 4987 }, { "epoch": 0.36035905864504125, "grad_norm": 9.010902582785217, "learning_rate": 4.710085061586071e-06, "loss": 0.9043, "step": 4988 }, { "epoch": 0.3604313038452508, "grad_norm": 5.49456223525628, "learning_rate": 4.709948324245938e-06, "loss": 0.9596, "step": 4989 }, { "epoch": 0.3605035490454603, "grad_norm": 6.709405122359751, "learning_rate": 4.709811556653328e-06, "loss": 0.9269, "step": 4990 }, { "epoch": 0.3605757942456698, "grad_norm": 7.094159219508558, "learning_rate": 4.7096747588101134e-06, "loss": 0.9666, "step": 4991 }, { "epoch": 0.3606480394458793, "grad_norm": 6.690142813442611, "learning_rate": 4.709537930718167e-06, "loss": 0.8876, "step": 4992 }, { "epoch": 0.36072028464608885, "grad_norm": 7.140229930007096, "learning_rate": 4.709401072379361e-06, "loss": 0.9425, "step": 4993 }, { "epoch": 0.36079252984629834, "grad_norm": 5.609426854140983, "learning_rate": 4.709264183795572e-06, "loss": 0.8603, "step": 4994 }, { "epoch": 0.3608647750465078, "grad_norm": 7.312993742095219, "learning_rate": 4.7091272649686704e-06, "loss": 0.9317, "step": 4995 }, { "epoch": 0.36093702024671737, "grad_norm": 7.883961256422651, "learning_rate": 4.708990315900531e-06, "loss": 0.9281, "step": 4996 }, { "epoch": 0.36100926544692685, "grad_norm": 6.757226254827905, "learning_rate": 4.7088533365930315e-06, "loss": 0.892, "step": 4997 }, { "epoch": 0.3610815106471364, "grad_norm": 7.787677117502665, "learning_rate": 4.708716327048043e-06, "loss": 0.9, "step": 4998 }, { "epoch": 0.3611537558473459, "grad_norm": 6.424814410071113, "learning_rate": 4.708579287267444e-06, "loss": 0.8679, "step": 4999 }, { "epoch": 0.3612260010475554, "grad_norm": 6.434901926142485, "learning_rate": 4.7084422172531085e-06, "loss": 0.8629, "step": 5000 }, { "epoch": 0.3612982462477649, "grad_norm": 7.9968300738009965, "learning_rate": 4.708305117006914e-06, "loss": 0.9542, "step": 5001 }, { "epoch": 0.36137049144797445, "grad_norm": 6.459383905752508, "learning_rate": 4.708167986530737e-06, "loss": 0.9646, "step": 5002 }, { "epoch": 0.36144273664818394, "grad_norm": 6.632019930194133, "learning_rate": 4.708030825826456e-06, "loss": 0.9413, "step": 5003 }, { "epoch": 0.3615149818483934, "grad_norm": 6.486362235494471, "learning_rate": 4.7078936348959456e-06, "loss": 0.9766, "step": 5004 }, { "epoch": 0.36158722704860297, "grad_norm": 6.505022602672438, "learning_rate": 4.707756413741087e-06, "loss": 0.8711, "step": 5005 }, { "epoch": 0.36165947224881245, "grad_norm": 5.717785696640359, "learning_rate": 4.707619162363757e-06, "loss": 1.0074, "step": 5006 }, { "epoch": 0.361731717449022, "grad_norm": 6.40044128565547, "learning_rate": 4.707481880765835e-06, "loss": 0.8916, "step": 5007 }, { "epoch": 0.3618039626492315, "grad_norm": 5.483510180155958, "learning_rate": 4.7073445689492e-06, "loss": 0.886, "step": 5008 }, { "epoch": 0.361876207849441, "grad_norm": 6.2171024459539295, "learning_rate": 4.707207226915731e-06, "loss": 0.859, "step": 5009 }, { "epoch": 0.3619484530496505, "grad_norm": 7.26783374315313, "learning_rate": 4.707069854667309e-06, "loss": 0.8519, "step": 5010 }, { "epoch": 0.36202069824986005, "grad_norm": 5.7918989102936305, "learning_rate": 4.706932452205815e-06, "loss": 0.8291, "step": 5011 }, { "epoch": 0.36209294345006954, "grad_norm": 5.779093902210485, "learning_rate": 4.706795019533129e-06, "loss": 0.8611, "step": 5012 }, { "epoch": 0.362165188650279, "grad_norm": 6.268945285117184, "learning_rate": 4.706657556651133e-06, "loss": 0.8412, "step": 5013 }, { "epoch": 0.36223743385048857, "grad_norm": 6.754185544489044, "learning_rate": 4.706520063561708e-06, "loss": 0.8861, "step": 5014 }, { "epoch": 0.36230967905069805, "grad_norm": 7.938975760172434, "learning_rate": 4.706382540266736e-06, "loss": 0.9324, "step": 5015 }, { "epoch": 0.3623819242509076, "grad_norm": 6.856251902827761, "learning_rate": 4.706244986768102e-06, "loss": 0.8514, "step": 5016 }, { "epoch": 0.3624541694511171, "grad_norm": 7.1145598903137595, "learning_rate": 4.706107403067686e-06, "loss": 0.8715, "step": 5017 }, { "epoch": 0.3625264146513266, "grad_norm": 6.519998157711588, "learning_rate": 4.705969789167372e-06, "loss": 0.9317, "step": 5018 }, { "epoch": 0.3625986598515361, "grad_norm": 6.571639608465659, "learning_rate": 4.705832145069045e-06, "loss": 0.9178, "step": 5019 }, { "epoch": 0.36267090505174565, "grad_norm": 6.5244058038835995, "learning_rate": 4.705694470774589e-06, "loss": 0.957, "step": 5020 }, { "epoch": 0.36274315025195514, "grad_norm": 7.021474324140929, "learning_rate": 4.7055567662858876e-06, "loss": 0.9329, "step": 5021 }, { "epoch": 0.3628153954521646, "grad_norm": 5.924442751147241, "learning_rate": 4.705419031604826e-06, "loss": 0.9623, "step": 5022 }, { "epoch": 0.36288764065237417, "grad_norm": 6.038704963947284, "learning_rate": 4.705281266733292e-06, "loss": 0.8708, "step": 5023 }, { "epoch": 0.36295988585258365, "grad_norm": 7.551878224360889, "learning_rate": 4.705143471673169e-06, "loss": 0.9237, "step": 5024 }, { "epoch": 0.3630321310527932, "grad_norm": 6.412808963701604, "learning_rate": 4.705005646426344e-06, "loss": 0.8918, "step": 5025 }, { "epoch": 0.3631043762530027, "grad_norm": 6.1287945262482655, "learning_rate": 4.704867790994704e-06, "loss": 0.7919, "step": 5026 }, { "epoch": 0.3631766214532122, "grad_norm": 7.174600187320572, "learning_rate": 4.704729905380135e-06, "loss": 0.8449, "step": 5027 }, { "epoch": 0.3632488666534217, "grad_norm": 6.916511196855715, "learning_rate": 4.704591989584527e-06, "loss": 0.9097, "step": 5028 }, { "epoch": 0.36332111185363125, "grad_norm": 7.9823590325853955, "learning_rate": 4.704454043609765e-06, "loss": 0.9288, "step": 5029 }, { "epoch": 0.36339335705384074, "grad_norm": 6.431356220461797, "learning_rate": 4.704316067457739e-06, "loss": 0.8204, "step": 5030 }, { "epoch": 0.3634656022540502, "grad_norm": 7.920124409787002, "learning_rate": 4.704178061130338e-06, "loss": 0.9231, "step": 5031 }, { "epoch": 0.36353784745425977, "grad_norm": 8.974143715816506, "learning_rate": 4.704040024629451e-06, "loss": 0.8762, "step": 5032 }, { "epoch": 0.36361009265446925, "grad_norm": 6.119944140110771, "learning_rate": 4.703901957956967e-06, "loss": 0.8917, "step": 5033 }, { "epoch": 0.3636823378546788, "grad_norm": 6.571206266777593, "learning_rate": 4.703763861114776e-06, "loss": 0.9474, "step": 5034 }, { "epoch": 0.3637545830548883, "grad_norm": 6.701470444292257, "learning_rate": 4.70362573410477e-06, "loss": 0.9708, "step": 5035 }, { "epoch": 0.3638268282550978, "grad_norm": 7.626118312187047, "learning_rate": 4.703487576928838e-06, "loss": 0.9127, "step": 5036 }, { "epoch": 0.3638990734553073, "grad_norm": 6.045129805087031, "learning_rate": 4.7033493895888715e-06, "loss": 0.9084, "step": 5037 }, { "epoch": 0.36397131865551685, "grad_norm": 6.783132827882214, "learning_rate": 4.703211172086764e-06, "loss": 0.7618, "step": 5038 }, { "epoch": 0.36404356385572634, "grad_norm": 7.623692134569398, "learning_rate": 4.703072924424405e-06, "loss": 0.8703, "step": 5039 }, { "epoch": 0.3641158090559358, "grad_norm": 6.008494404165058, "learning_rate": 4.702934646603689e-06, "loss": 0.9508, "step": 5040 }, { "epoch": 0.36418805425614537, "grad_norm": 6.6059339146347416, "learning_rate": 4.702796338626507e-06, "loss": 0.8222, "step": 5041 }, { "epoch": 0.36426029945635485, "grad_norm": 7.10213831548948, "learning_rate": 4.7026580004947545e-06, "loss": 0.9414, "step": 5042 }, { "epoch": 0.3643325446565644, "grad_norm": 8.03145756434722, "learning_rate": 4.702519632210324e-06, "loss": 0.9593, "step": 5043 }, { "epoch": 0.3644047898567739, "grad_norm": 5.963620523211805, "learning_rate": 4.70238123377511e-06, "loss": 0.9251, "step": 5044 }, { "epoch": 0.3644770350569834, "grad_norm": 6.666621907401829, "learning_rate": 4.7022428051910066e-06, "loss": 0.9167, "step": 5045 }, { "epoch": 0.3645492802571929, "grad_norm": 7.752112808178339, "learning_rate": 4.70210434645991e-06, "loss": 0.9508, "step": 5046 }, { "epoch": 0.36462152545740245, "grad_norm": 7.42533713451984, "learning_rate": 4.7019658575837134e-06, "loss": 0.9227, "step": 5047 }, { "epoch": 0.36469377065761194, "grad_norm": 5.92707246355916, "learning_rate": 4.701827338564316e-06, "loss": 0.8724, "step": 5048 }, { "epoch": 0.3647660158578214, "grad_norm": 7.168652311156676, "learning_rate": 4.70168878940361e-06, "loss": 0.9482, "step": 5049 }, { "epoch": 0.36483826105803097, "grad_norm": 5.7039286008114, "learning_rate": 4.7015502101034935e-06, "loss": 1.0019, "step": 5050 }, { "epoch": 0.36491050625824045, "grad_norm": 7.276956626763879, "learning_rate": 4.701411600665866e-06, "loss": 0.8795, "step": 5051 }, { "epoch": 0.36498275145845, "grad_norm": 7.253462655139403, "learning_rate": 4.701272961092622e-06, "loss": 0.8687, "step": 5052 }, { "epoch": 0.3650549966586595, "grad_norm": 6.586369872465699, "learning_rate": 4.70113429138566e-06, "loss": 1.0009, "step": 5053 }, { "epoch": 0.365127241858869, "grad_norm": 7.464548118083085, "learning_rate": 4.700995591546879e-06, "loss": 0.8832, "step": 5054 }, { "epoch": 0.3651994870590785, "grad_norm": 7.28006901802631, "learning_rate": 4.700856861578177e-06, "loss": 0.8895, "step": 5055 }, { "epoch": 0.36527173225928805, "grad_norm": 6.862759322250542, "learning_rate": 4.7007181014814544e-06, "loss": 0.8947, "step": 5056 }, { "epoch": 0.36534397745949754, "grad_norm": 7.35593290074971, "learning_rate": 4.700579311258609e-06, "loss": 0.9413, "step": 5057 }, { "epoch": 0.365416222659707, "grad_norm": 8.277262177364689, "learning_rate": 4.7004404909115405e-06, "loss": 0.864, "step": 5058 }, { "epoch": 0.36548846785991657, "grad_norm": 6.390918976753785, "learning_rate": 4.700301640442152e-06, "loss": 0.8117, "step": 5059 }, { "epoch": 0.36556071306012605, "grad_norm": 5.943007635212768, "learning_rate": 4.700162759852342e-06, "loss": 0.8244, "step": 5060 }, { "epoch": 0.3656329582603356, "grad_norm": 8.200134778659653, "learning_rate": 4.700023849144011e-06, "loss": 0.8931, "step": 5061 }, { "epoch": 0.3657052034605451, "grad_norm": 4.96972953689855, "learning_rate": 4.699884908319063e-06, "loss": 0.8491, "step": 5062 }, { "epoch": 0.3657774486607546, "grad_norm": 7.384574752475754, "learning_rate": 4.699745937379399e-06, "loss": 0.8493, "step": 5063 }, { "epoch": 0.3658496938609641, "grad_norm": 6.414743143411688, "learning_rate": 4.69960693632692e-06, "loss": 0.8667, "step": 5064 }, { "epoch": 0.3659219390611736, "grad_norm": 6.419975649588446, "learning_rate": 4.69946790516353e-06, "loss": 0.8907, "step": 5065 }, { "epoch": 0.36599418426138314, "grad_norm": 6.246172228732276, "learning_rate": 4.699328843891132e-06, "loss": 0.9023, "step": 5066 }, { "epoch": 0.3660664294615926, "grad_norm": 6.645849277712391, "learning_rate": 4.699189752511631e-06, "loss": 0.9341, "step": 5067 }, { "epoch": 0.36613867466180217, "grad_norm": 8.65545804914606, "learning_rate": 4.699050631026929e-06, "loss": 0.9438, "step": 5068 }, { "epoch": 0.36621091986201165, "grad_norm": 7.766229924486542, "learning_rate": 4.6989114794389315e-06, "loss": 0.8394, "step": 5069 }, { "epoch": 0.3662831650622212, "grad_norm": 7.421230440761344, "learning_rate": 4.698772297749543e-06, "loss": 0.8711, "step": 5070 }, { "epoch": 0.3663554102624307, "grad_norm": 7.081762999952766, "learning_rate": 4.698633085960669e-06, "loss": 0.9101, "step": 5071 }, { "epoch": 0.3664276554626402, "grad_norm": 7.229764123975974, "learning_rate": 4.6984938440742154e-06, "loss": 0.8247, "step": 5072 }, { "epoch": 0.3664999006628497, "grad_norm": 7.225530965229627, "learning_rate": 4.6983545720920875e-06, "loss": 0.8749, "step": 5073 }, { "epoch": 0.3665721458630592, "grad_norm": 7.320512343440108, "learning_rate": 4.6982152700161935e-06, "loss": 0.8496, "step": 5074 }, { "epoch": 0.36664439106326874, "grad_norm": 6.671605696592146, "learning_rate": 4.698075937848438e-06, "loss": 0.8011, "step": 5075 }, { "epoch": 0.3667166362634782, "grad_norm": 7.420610500833346, "learning_rate": 4.69793657559073e-06, "loss": 0.976, "step": 5076 }, { "epoch": 0.36678888146368777, "grad_norm": 6.78803922362451, "learning_rate": 4.697797183244978e-06, "loss": 0.8946, "step": 5077 }, { "epoch": 0.36686112666389725, "grad_norm": 6.529413430477025, "learning_rate": 4.697657760813087e-06, "loss": 1.0076, "step": 5078 }, { "epoch": 0.3669333718641068, "grad_norm": 6.971961132630625, "learning_rate": 4.697518308296969e-06, "loss": 1.0296, "step": 5079 }, { "epoch": 0.3670056170643163, "grad_norm": 10.202471818798566, "learning_rate": 4.697378825698532e-06, "loss": 0.8563, "step": 5080 }, { "epoch": 0.3670778622645258, "grad_norm": 6.511243632374442, "learning_rate": 4.6972393130196845e-06, "loss": 0.8924, "step": 5081 }, { "epoch": 0.3671501074647353, "grad_norm": 7.225060813146289, "learning_rate": 4.697099770262336e-06, "loss": 0.8592, "step": 5082 }, { "epoch": 0.3672223526649448, "grad_norm": 6.043441546832439, "learning_rate": 4.696960197428398e-06, "loss": 0.9356, "step": 5083 }, { "epoch": 0.36729459786515434, "grad_norm": 5.995754647373749, "learning_rate": 4.696820594519782e-06, "loss": 0.9047, "step": 5084 }, { "epoch": 0.3673668430653638, "grad_norm": 5.133989219544241, "learning_rate": 4.696680961538397e-06, "loss": 0.8688, "step": 5085 }, { "epoch": 0.36743908826557337, "grad_norm": 5.168807949862192, "learning_rate": 4.696541298486155e-06, "loss": 0.9125, "step": 5086 }, { "epoch": 0.36751133346578285, "grad_norm": 6.578675308223925, "learning_rate": 4.696401605364968e-06, "loss": 0.9334, "step": 5087 }, { "epoch": 0.3675835786659924, "grad_norm": 6.805889877446283, "learning_rate": 4.6962618821767485e-06, "loss": 0.8058, "step": 5088 }, { "epoch": 0.3676558238662019, "grad_norm": 6.837610007452434, "learning_rate": 4.6961221289234095e-06, "loss": 0.8759, "step": 5089 }, { "epoch": 0.3677280690664114, "grad_norm": 7.091554814617222, "learning_rate": 4.695982345606864e-06, "loss": 0.8883, "step": 5090 }, { "epoch": 0.3678003142666209, "grad_norm": 11.533417288708007, "learning_rate": 4.695842532229025e-06, "loss": 0.9535, "step": 5091 }, { "epoch": 0.3678725594668304, "grad_norm": 6.664302629301291, "learning_rate": 4.695702688791806e-06, "loss": 0.9188, "step": 5092 }, { "epoch": 0.36794480466703994, "grad_norm": 8.262444560316558, "learning_rate": 4.695562815297122e-06, "loss": 0.8967, "step": 5093 }, { "epoch": 0.3680170498672494, "grad_norm": 5.867434236136001, "learning_rate": 4.695422911746889e-06, "loss": 0.875, "step": 5094 }, { "epoch": 0.36808929506745897, "grad_norm": 7.964415085479196, "learning_rate": 4.6952829781430194e-06, "loss": 0.991, "step": 5095 }, { "epoch": 0.36816154026766845, "grad_norm": 6.394462347387637, "learning_rate": 4.695143014487432e-06, "loss": 0.8164, "step": 5096 }, { "epoch": 0.368233785467878, "grad_norm": 7.274515729693799, "learning_rate": 4.69500302078204e-06, "loss": 0.8488, "step": 5097 }, { "epoch": 0.3683060306680875, "grad_norm": 6.662723742193948, "learning_rate": 4.694862997028762e-06, "loss": 0.9688, "step": 5098 }, { "epoch": 0.368378275868297, "grad_norm": 9.377027775491749, "learning_rate": 4.694722943229513e-06, "loss": 0.9626, "step": 5099 }, { "epoch": 0.3684505210685065, "grad_norm": 7.243423636245577, "learning_rate": 4.694582859386211e-06, "loss": 0.9235, "step": 5100 }, { "epoch": 0.368522766268716, "grad_norm": 9.052727422125162, "learning_rate": 4.694442745500773e-06, "loss": 0.8921, "step": 5101 }, { "epoch": 0.36859501146892554, "grad_norm": 6.3811735292461345, "learning_rate": 4.694302601575119e-06, "loss": 0.9253, "step": 5102 }, { "epoch": 0.368667256669135, "grad_norm": 6.61165065682988, "learning_rate": 4.694162427611165e-06, "loss": 0.8607, "step": 5103 }, { "epoch": 0.36873950186934457, "grad_norm": 5.746179057511724, "learning_rate": 4.694022223610832e-06, "loss": 0.8858, "step": 5104 }, { "epoch": 0.36881174706955405, "grad_norm": 6.592532655695097, "learning_rate": 4.693881989576038e-06, "loss": 0.8893, "step": 5105 }, { "epoch": 0.3688839922697636, "grad_norm": 6.267846529388446, "learning_rate": 4.693741725508703e-06, "loss": 0.9237, "step": 5106 }, { "epoch": 0.3689562374699731, "grad_norm": 6.59901803543394, "learning_rate": 4.693601431410747e-06, "loss": 0.8916, "step": 5107 }, { "epoch": 0.3690284826701826, "grad_norm": 8.260869120023877, "learning_rate": 4.693461107284091e-06, "loss": 0.938, "step": 5108 }, { "epoch": 0.3691007278703921, "grad_norm": 8.338718454526882, "learning_rate": 4.693320753130655e-06, "loss": 0.9511, "step": 5109 }, { "epoch": 0.3691729730706016, "grad_norm": 6.723385936939159, "learning_rate": 4.693180368952361e-06, "loss": 0.9767, "step": 5110 }, { "epoch": 0.36924521827081114, "grad_norm": 6.968546637745322, "learning_rate": 4.693039954751131e-06, "loss": 0.9438, "step": 5111 }, { "epoch": 0.3693174634710206, "grad_norm": 7.601604653660099, "learning_rate": 4.692899510528886e-06, "loss": 1.011, "step": 5112 }, { "epoch": 0.36938970867123017, "grad_norm": 10.02838987652679, "learning_rate": 4.69275903628755e-06, "loss": 0.9059, "step": 5113 }, { "epoch": 0.36946195387143965, "grad_norm": 7.4327120716622925, "learning_rate": 4.692618532029046e-06, "loss": 0.9632, "step": 5114 }, { "epoch": 0.3695341990716492, "grad_norm": 6.9158631129929296, "learning_rate": 4.692477997755296e-06, "loss": 0.9647, "step": 5115 }, { "epoch": 0.3696064442718587, "grad_norm": 6.823168039908594, "learning_rate": 4.692337433468224e-06, "loss": 0.8878, "step": 5116 }, { "epoch": 0.3696786894720682, "grad_norm": 6.277819555513047, "learning_rate": 4.692196839169756e-06, "loss": 0.896, "step": 5117 }, { "epoch": 0.3697509346722777, "grad_norm": 5.803460424011385, "learning_rate": 4.692056214861815e-06, "loss": 0.9324, "step": 5118 }, { "epoch": 0.3698231798724872, "grad_norm": 5.827552910510954, "learning_rate": 4.691915560546326e-06, "loss": 0.9059, "step": 5119 }, { "epoch": 0.36989542507269674, "grad_norm": 7.751625136753079, "learning_rate": 4.691774876225215e-06, "loss": 0.8827, "step": 5120 }, { "epoch": 0.3699676702729062, "grad_norm": 5.202894784227545, "learning_rate": 4.691634161900408e-06, "loss": 0.9443, "step": 5121 }, { "epoch": 0.37003991547311577, "grad_norm": 7.302734897365587, "learning_rate": 4.69149341757383e-06, "loss": 0.9627, "step": 5122 }, { "epoch": 0.37011216067332525, "grad_norm": 8.094212817064404, "learning_rate": 4.69135264324741e-06, "loss": 0.9108, "step": 5123 }, { "epoch": 0.3701844058735348, "grad_norm": 8.001072334899915, "learning_rate": 4.691211838923073e-06, "loss": 0.9936, "step": 5124 }, { "epoch": 0.3702566510737443, "grad_norm": 7.057774946456157, "learning_rate": 4.691071004602748e-06, "loss": 0.9693, "step": 5125 }, { "epoch": 0.3703288962739538, "grad_norm": 8.28314026186061, "learning_rate": 4.690930140288362e-06, "loss": 0.9636, "step": 5126 }, { "epoch": 0.3704011414741633, "grad_norm": 6.2135374373501255, "learning_rate": 4.690789245981843e-06, "loss": 0.9795, "step": 5127 }, { "epoch": 0.3704733866743728, "grad_norm": 6.704372618905871, "learning_rate": 4.690648321685121e-06, "loss": 0.945, "step": 5128 }, { "epoch": 0.37054563187458234, "grad_norm": 6.921217773097917, "learning_rate": 4.690507367400124e-06, "loss": 0.9778, "step": 5129 }, { "epoch": 0.3706178770747918, "grad_norm": 8.479396307707528, "learning_rate": 4.690366383128782e-06, "loss": 0.8512, "step": 5130 }, { "epoch": 0.37069012227500137, "grad_norm": 5.849313627227383, "learning_rate": 4.690225368873025e-06, "loss": 0.9582, "step": 5131 }, { "epoch": 0.37076236747521085, "grad_norm": 7.8086357630810745, "learning_rate": 4.690084324634783e-06, "loss": 0.9135, "step": 5132 }, { "epoch": 0.3708346126754204, "grad_norm": 6.740914623562459, "learning_rate": 4.689943250415988e-06, "loss": 0.8988, "step": 5133 }, { "epoch": 0.3709068578756299, "grad_norm": 6.4966877053893715, "learning_rate": 4.6898021462185696e-06, "loss": 0.919, "step": 5134 }, { "epoch": 0.3709791030758394, "grad_norm": 6.945389943501024, "learning_rate": 4.68966101204446e-06, "loss": 0.8766, "step": 5135 }, { "epoch": 0.3710513482760489, "grad_norm": 6.791957014309537, "learning_rate": 4.689519847895591e-06, "loss": 1.0238, "step": 5136 }, { "epoch": 0.3711235934762584, "grad_norm": 7.453350967404564, "learning_rate": 4.689378653773896e-06, "loss": 0.9656, "step": 5137 }, { "epoch": 0.37119583867646794, "grad_norm": 6.517584563227267, "learning_rate": 4.689237429681307e-06, "loss": 0.9392, "step": 5138 }, { "epoch": 0.3712680838766774, "grad_norm": 5.981430883273412, "learning_rate": 4.689096175619757e-06, "loss": 0.8759, "step": 5139 }, { "epoch": 0.37134032907688697, "grad_norm": 5.178580279413186, "learning_rate": 4.688954891591181e-06, "loss": 0.83, "step": 5140 }, { "epoch": 0.37141257427709645, "grad_norm": 7.447807542406068, "learning_rate": 4.688813577597511e-06, "loss": 0.9338, "step": 5141 }, { "epoch": 0.371484819477306, "grad_norm": 6.980011876486919, "learning_rate": 4.688672233640683e-06, "loss": 0.9568, "step": 5142 }, { "epoch": 0.3715570646775155, "grad_norm": 6.205145134592536, "learning_rate": 4.688530859722632e-06, "loss": 0.8556, "step": 5143 }, { "epoch": 0.371629309877725, "grad_norm": 6.674487993698643, "learning_rate": 4.6883894558452926e-06, "loss": 0.8532, "step": 5144 }, { "epoch": 0.3717015550779345, "grad_norm": 5.766907091953759, "learning_rate": 4.688248022010601e-06, "loss": 0.9644, "step": 5145 }, { "epoch": 0.371773800278144, "grad_norm": 7.451814100353716, "learning_rate": 4.688106558220492e-06, "loss": 0.9803, "step": 5146 }, { "epoch": 0.37184604547835354, "grad_norm": 7.183477453687505, "learning_rate": 4.687965064476904e-06, "loss": 0.886, "step": 5147 }, { "epoch": 0.371918290678563, "grad_norm": 5.773376051558987, "learning_rate": 4.687823540781773e-06, "loss": 0.8708, "step": 5148 }, { "epoch": 0.37199053587877257, "grad_norm": 5.524377378706426, "learning_rate": 4.687681987137036e-06, "loss": 0.889, "step": 5149 }, { "epoch": 0.37206278107898205, "grad_norm": 10.651660098501262, "learning_rate": 4.687540403544633e-06, "loss": 0.9234, "step": 5150 }, { "epoch": 0.3721350262791916, "grad_norm": 6.7471324339709655, "learning_rate": 4.687398790006498e-06, "loss": 1.0042, "step": 5151 }, { "epoch": 0.3722072714794011, "grad_norm": 6.832052188271946, "learning_rate": 4.687257146524573e-06, "loss": 0.9489, "step": 5152 }, { "epoch": 0.3722795166796106, "grad_norm": 7.208090041213159, "learning_rate": 4.687115473100796e-06, "loss": 0.9724, "step": 5153 }, { "epoch": 0.3723517618798201, "grad_norm": 6.55278647753131, "learning_rate": 4.686973769737106e-06, "loss": 0.8545, "step": 5154 }, { "epoch": 0.3724240070800296, "grad_norm": 7.946023764792948, "learning_rate": 4.686832036435444e-06, "loss": 0.9161, "step": 5155 }, { "epoch": 0.37249625228023914, "grad_norm": 6.953969016979536, "learning_rate": 4.686690273197749e-06, "loss": 0.9249, "step": 5156 }, { "epoch": 0.3725684974804486, "grad_norm": 6.706705910862999, "learning_rate": 4.686548480025962e-06, "loss": 0.9076, "step": 5157 }, { "epoch": 0.37264074268065817, "grad_norm": 7.413722907791714, "learning_rate": 4.6864066569220235e-06, "loss": 0.9018, "step": 5158 }, { "epoch": 0.37271298788086765, "grad_norm": 6.420228473246232, "learning_rate": 4.686264803887877e-06, "loss": 0.8885, "step": 5159 }, { "epoch": 0.3727852330810772, "grad_norm": 7.977526091864991, "learning_rate": 4.6861229209254615e-06, "loss": 0.9132, "step": 5160 }, { "epoch": 0.3728574782812867, "grad_norm": 8.086695126040269, "learning_rate": 4.685981008036721e-06, "loss": 0.9636, "step": 5161 }, { "epoch": 0.3729297234814962, "grad_norm": 5.537066488755875, "learning_rate": 4.685839065223597e-06, "loss": 0.7833, "step": 5162 }, { "epoch": 0.3730019686817057, "grad_norm": 8.636725816935146, "learning_rate": 4.685697092488034e-06, "loss": 0.9737, "step": 5163 }, { "epoch": 0.3730742138819152, "grad_norm": 7.015888166695513, "learning_rate": 4.685555089831976e-06, "loss": 0.9113, "step": 5164 }, { "epoch": 0.37314645908212474, "grad_norm": 7.070644577405248, "learning_rate": 4.6854130572573645e-06, "loss": 0.8787, "step": 5165 }, { "epoch": 0.3732187042823342, "grad_norm": 6.5957421642500265, "learning_rate": 4.685270994766146e-06, "loss": 0.9693, "step": 5166 }, { "epoch": 0.37329094948254377, "grad_norm": 5.848307907702943, "learning_rate": 4.685128902360263e-06, "loss": 0.8612, "step": 5167 }, { "epoch": 0.37336319468275325, "grad_norm": 7.805717270496901, "learning_rate": 4.684986780041663e-06, "loss": 0.9352, "step": 5168 }, { "epoch": 0.3734354398829628, "grad_norm": 7.859752122692915, "learning_rate": 4.68484462781229e-06, "loss": 0.9766, "step": 5169 }, { "epoch": 0.3735076850831723, "grad_norm": 6.392596325497311, "learning_rate": 4.6847024456740905e-06, "loss": 0.8656, "step": 5170 }, { "epoch": 0.3735799302833818, "grad_norm": 6.339049152588703, "learning_rate": 4.684560233629011e-06, "loss": 0.9277, "step": 5171 }, { "epoch": 0.3736521754835913, "grad_norm": 6.063789466504572, "learning_rate": 4.684417991678999e-06, "loss": 0.8256, "step": 5172 }, { "epoch": 0.3737244206838008, "grad_norm": 7.572334389599429, "learning_rate": 4.684275719825999e-06, "loss": 0.8787, "step": 5173 }, { "epoch": 0.37379666588401034, "grad_norm": 6.023949825204559, "learning_rate": 4.684133418071962e-06, "loss": 0.9036, "step": 5174 }, { "epoch": 0.3738689110842198, "grad_norm": 7.1291034326325935, "learning_rate": 4.683991086418833e-06, "loss": 1.0101, "step": 5175 }, { "epoch": 0.37394115628442937, "grad_norm": 6.404376691138799, "learning_rate": 4.683848724868563e-06, "loss": 0.8408, "step": 5176 }, { "epoch": 0.37401340148463885, "grad_norm": 6.152167346181337, "learning_rate": 4.6837063334230995e-06, "loss": 0.9046, "step": 5177 }, { "epoch": 0.3740856466848484, "grad_norm": 10.09554877064485, "learning_rate": 4.6835639120843915e-06, "loss": 1.0101, "step": 5178 }, { "epoch": 0.3741578918850579, "grad_norm": 6.303982871385725, "learning_rate": 4.683421460854388e-06, "loss": 0.9325, "step": 5179 }, { "epoch": 0.3742301370852674, "grad_norm": 5.2066898448654495, "learning_rate": 4.683278979735041e-06, "loss": 0.9144, "step": 5180 }, { "epoch": 0.3743023822854769, "grad_norm": 6.49747241834457, "learning_rate": 4.6831364687283e-06, "loss": 0.8527, "step": 5181 }, { "epoch": 0.3743746274856864, "grad_norm": 7.981557569200936, "learning_rate": 4.682993927836116e-06, "loss": 0.9736, "step": 5182 }, { "epoch": 0.37444687268589594, "grad_norm": 7.689783765037047, "learning_rate": 4.682851357060439e-06, "loss": 0.9701, "step": 5183 }, { "epoch": 0.3745191178861054, "grad_norm": 7.064428555649147, "learning_rate": 4.682708756403223e-06, "loss": 0.8188, "step": 5184 }, { "epoch": 0.37459136308631497, "grad_norm": 6.158399797148886, "learning_rate": 4.682566125866419e-06, "loss": 0.9297, "step": 5185 }, { "epoch": 0.37466360828652445, "grad_norm": 6.214285776924421, "learning_rate": 4.682423465451979e-06, "loss": 0.8724, "step": 5186 }, { "epoch": 0.374735853486734, "grad_norm": 8.720623293657559, "learning_rate": 4.682280775161856e-06, "loss": 0.9322, "step": 5187 }, { "epoch": 0.3748080986869435, "grad_norm": 7.08422777101162, "learning_rate": 4.682138054998004e-06, "loss": 0.9724, "step": 5188 }, { "epoch": 0.374880343887153, "grad_norm": 7.39011295794929, "learning_rate": 4.681995304962375e-06, "loss": 0.8678, "step": 5189 }, { "epoch": 0.3749525890873625, "grad_norm": 5.602174009944292, "learning_rate": 4.681852525056927e-06, "loss": 0.864, "step": 5190 }, { "epoch": 0.375024834287572, "grad_norm": 8.76544308941145, "learning_rate": 4.68170971528361e-06, "loss": 0.8856, "step": 5191 }, { "epoch": 0.37509707948778154, "grad_norm": 7.705475918517907, "learning_rate": 4.6815668756443824e-06, "loss": 0.9837, "step": 5192 }, { "epoch": 0.375169324687991, "grad_norm": 6.387727187683236, "learning_rate": 4.681424006141197e-06, "loss": 0.9513, "step": 5193 }, { "epoch": 0.37524156988820057, "grad_norm": 8.688880865554596, "learning_rate": 4.681281106776011e-06, "loss": 0.9268, "step": 5194 }, { "epoch": 0.37531381508841005, "grad_norm": 7.763398432684578, "learning_rate": 4.6811381775507805e-06, "loss": 0.9226, "step": 5195 }, { "epoch": 0.3753860602886196, "grad_norm": 5.467472297727353, "learning_rate": 4.680995218467462e-06, "loss": 0.8596, "step": 5196 }, { "epoch": 0.3754583054888291, "grad_norm": 8.207812555771701, "learning_rate": 4.680852229528012e-06, "loss": 0.9231, "step": 5197 }, { "epoch": 0.3755305506890386, "grad_norm": 8.9850033747646, "learning_rate": 4.680709210734389e-06, "loss": 0.9454, "step": 5198 }, { "epoch": 0.3756027958892481, "grad_norm": 7.227321275621066, "learning_rate": 4.680566162088549e-06, "loss": 0.9379, "step": 5199 }, { "epoch": 0.3756750410894576, "grad_norm": 7.9634467934911, "learning_rate": 4.680423083592452e-06, "loss": 0.8719, "step": 5200 }, { "epoch": 0.37574728628966714, "grad_norm": 7.294698059652239, "learning_rate": 4.680279975248057e-06, "loss": 0.9425, "step": 5201 }, { "epoch": 0.3758195314898766, "grad_norm": 6.788698105421446, "learning_rate": 4.68013683705732e-06, "loss": 0.9084, "step": 5202 }, { "epoch": 0.37589177669008617, "grad_norm": 6.136327739572049, "learning_rate": 4.679993669022204e-06, "loss": 1.0004, "step": 5203 }, { "epoch": 0.37596402189029565, "grad_norm": 6.032596734255846, "learning_rate": 4.679850471144667e-06, "loss": 0.8435, "step": 5204 }, { "epoch": 0.3760362670905052, "grad_norm": 7.674036409110242, "learning_rate": 4.679707243426669e-06, "loss": 0.9929, "step": 5205 }, { "epoch": 0.3761085122907147, "grad_norm": 7.77518113100094, "learning_rate": 4.6795639858701715e-06, "loss": 0.852, "step": 5206 }, { "epoch": 0.3761807574909242, "grad_norm": 8.1826217911768, "learning_rate": 4.679420698477135e-06, "loss": 1.0343, "step": 5207 }, { "epoch": 0.3762530026911337, "grad_norm": 6.965133481249379, "learning_rate": 4.679277381249523e-06, "loss": 0.8963, "step": 5208 }, { "epoch": 0.3763252478913432, "grad_norm": 6.234743647563232, "learning_rate": 4.679134034189294e-06, "loss": 0.9972, "step": 5209 }, { "epoch": 0.37639749309155274, "grad_norm": 6.87425810538939, "learning_rate": 4.678990657298413e-06, "loss": 0.8532, "step": 5210 }, { "epoch": 0.3764697382917622, "grad_norm": 6.23533160296825, "learning_rate": 4.678847250578841e-06, "loss": 0.8139, "step": 5211 }, { "epoch": 0.37654198349197177, "grad_norm": 7.27191323146106, "learning_rate": 4.6787038140325424e-06, "loss": 0.8056, "step": 5212 }, { "epoch": 0.37661422869218125, "grad_norm": 7.137676272912936, "learning_rate": 4.678560347661481e-06, "loss": 0.9202, "step": 5213 }, { "epoch": 0.3766864738923908, "grad_norm": 6.236934585783608, "learning_rate": 4.678416851467618e-06, "loss": 0.8631, "step": 5214 }, { "epoch": 0.3767587190926003, "grad_norm": 9.12725258810826, "learning_rate": 4.6782733254529215e-06, "loss": 0.9854, "step": 5215 }, { "epoch": 0.3768309642928098, "grad_norm": 6.477478330716541, "learning_rate": 4.678129769619354e-06, "loss": 0.9014, "step": 5216 }, { "epoch": 0.3769032094930193, "grad_norm": 7.813736962621977, "learning_rate": 4.6779861839688815e-06, "loss": 0.9765, "step": 5217 }, { "epoch": 0.3769754546932288, "grad_norm": 6.84066633313946, "learning_rate": 4.6778425685034685e-06, "loss": 0.8257, "step": 5218 }, { "epoch": 0.37704769989343834, "grad_norm": 6.065330237999914, "learning_rate": 4.677698923225082e-06, "loss": 0.9208, "step": 5219 }, { "epoch": 0.3771199450936478, "grad_norm": 6.236672190927614, "learning_rate": 4.677555248135688e-06, "loss": 0.9505, "step": 5220 }, { "epoch": 0.37719219029385737, "grad_norm": 8.981060444975782, "learning_rate": 4.6774115432372534e-06, "loss": 0.9996, "step": 5221 }, { "epoch": 0.37726443549406685, "grad_norm": 6.894792676081173, "learning_rate": 4.6772678085317455e-06, "loss": 1.0062, "step": 5222 }, { "epoch": 0.3773366806942764, "grad_norm": 5.651377734432659, "learning_rate": 4.677124044021132e-06, "loss": 0.9562, "step": 5223 }, { "epoch": 0.3774089258944859, "grad_norm": 6.2394350402566126, "learning_rate": 4.676980249707381e-06, "loss": 0.9509, "step": 5224 }, { "epoch": 0.3774811710946954, "grad_norm": 7.718039290724301, "learning_rate": 4.676836425592461e-06, "loss": 0.9725, "step": 5225 }, { "epoch": 0.3775534162949049, "grad_norm": 6.788129419750857, "learning_rate": 4.6766925716783394e-06, "loss": 0.8483, "step": 5226 }, { "epoch": 0.3776256614951144, "grad_norm": 5.179892603523089, "learning_rate": 4.6765486879669865e-06, "loss": 0.9069, "step": 5227 }, { "epoch": 0.37769790669532394, "grad_norm": 6.379885129462498, "learning_rate": 4.676404774460373e-06, "loss": 0.9014, "step": 5228 }, { "epoch": 0.3777701518955334, "grad_norm": 7.919990751858331, "learning_rate": 4.676260831160467e-06, "loss": 0.904, "step": 5229 }, { "epoch": 0.37784239709574297, "grad_norm": 6.18926483570892, "learning_rate": 4.6761168580692415e-06, "loss": 0.8886, "step": 5230 }, { "epoch": 0.37791464229595245, "grad_norm": 5.997025706279517, "learning_rate": 4.675972855188665e-06, "loss": 0.9297, "step": 5231 }, { "epoch": 0.377986887496162, "grad_norm": 7.5248844581427585, "learning_rate": 4.675828822520709e-06, "loss": 0.9755, "step": 5232 }, { "epoch": 0.3780591326963715, "grad_norm": 7.152870946316465, "learning_rate": 4.675684760067347e-06, "loss": 0.8706, "step": 5233 }, { "epoch": 0.378131377896581, "grad_norm": 6.5987622343059416, "learning_rate": 4.6755406678305495e-06, "loss": 0.8689, "step": 5234 }, { "epoch": 0.3782036230967905, "grad_norm": 6.649839080928658, "learning_rate": 4.67539654581229e-06, "loss": 0.8952, "step": 5235 }, { "epoch": 0.378275868297, "grad_norm": 5.754754795713634, "learning_rate": 4.675252394014539e-06, "loss": 1.0599, "step": 5236 }, { "epoch": 0.37834811349720954, "grad_norm": 5.384715149552601, "learning_rate": 4.675108212439273e-06, "loss": 0.9237, "step": 5237 }, { "epoch": 0.378420358697419, "grad_norm": 6.296313693868167, "learning_rate": 4.6749640010884644e-06, "loss": 0.9189, "step": 5238 }, { "epoch": 0.37849260389762857, "grad_norm": 7.818078086761677, "learning_rate": 4.674819759964088e-06, "loss": 0.9619, "step": 5239 }, { "epoch": 0.37856484909783805, "grad_norm": 7.330105446841567, "learning_rate": 4.6746754890681165e-06, "loss": 0.8886, "step": 5240 }, { "epoch": 0.3786370942980476, "grad_norm": 6.538299114919882, "learning_rate": 4.674531188402527e-06, "loss": 0.8499, "step": 5241 }, { "epoch": 0.3787093394982571, "grad_norm": 8.482710364150735, "learning_rate": 4.674386857969293e-06, "loss": 0.8991, "step": 5242 }, { "epoch": 0.3787815846984666, "grad_norm": 7.74690430863758, "learning_rate": 4.674242497770393e-06, "loss": 0.9199, "step": 5243 }, { "epoch": 0.3788538298986761, "grad_norm": 8.44429838600778, "learning_rate": 4.6740981078078e-06, "loss": 1.0164, "step": 5244 }, { "epoch": 0.3789260750988856, "grad_norm": 7.340236131575411, "learning_rate": 4.673953688083492e-06, "loss": 0.8953, "step": 5245 }, { "epoch": 0.37899832029909514, "grad_norm": 5.460622797508534, "learning_rate": 4.673809238599446e-06, "loss": 0.9007, "step": 5246 }, { "epoch": 0.3790705654993046, "grad_norm": 7.939321954636003, "learning_rate": 4.67366475935764e-06, "loss": 0.8335, "step": 5247 }, { "epoch": 0.37914281069951417, "grad_norm": 5.618651304131356, "learning_rate": 4.673520250360051e-06, "loss": 0.8569, "step": 5248 }, { "epoch": 0.37921505589972365, "grad_norm": 6.728843027683952, "learning_rate": 4.673375711608656e-06, "loss": 0.8603, "step": 5249 }, { "epoch": 0.3792873010999332, "grad_norm": 6.681867277287038, "learning_rate": 4.6732311431054365e-06, "loss": 0.9007, "step": 5250 }, { "epoch": 0.3793595463001427, "grad_norm": 5.777967686262714, "learning_rate": 4.67308654485237e-06, "loss": 0.8322, "step": 5251 }, { "epoch": 0.3794317915003522, "grad_norm": 5.537389246950336, "learning_rate": 4.672941916851436e-06, "loss": 0.9093, "step": 5252 }, { "epoch": 0.3795040367005617, "grad_norm": 6.999095040816409, "learning_rate": 4.6727972591046135e-06, "loss": 0.8817, "step": 5253 }, { "epoch": 0.3795762819007712, "grad_norm": 6.767179033145967, "learning_rate": 4.672652571613885e-06, "loss": 0.8812, "step": 5254 }, { "epoch": 0.37964852710098074, "grad_norm": 6.796395681734202, "learning_rate": 4.672507854381229e-06, "loss": 0.9239, "step": 5255 }, { "epoch": 0.3797207723011902, "grad_norm": 7.088823801032163, "learning_rate": 4.672363107408627e-06, "loss": 0.9581, "step": 5256 }, { "epoch": 0.37979301750139977, "grad_norm": 6.376696790885593, "learning_rate": 4.67221833069806e-06, "loss": 0.8994, "step": 5257 }, { "epoch": 0.37986526270160925, "grad_norm": 8.602248549928449, "learning_rate": 4.672073524251513e-06, "loss": 0.8846, "step": 5258 }, { "epoch": 0.3799375079018188, "grad_norm": 5.956914061347313, "learning_rate": 4.671928688070964e-06, "loss": 0.8963, "step": 5259 }, { "epoch": 0.3800097531020283, "grad_norm": 7.609100274173148, "learning_rate": 4.671783822158398e-06, "loss": 0.9859, "step": 5260 }, { "epoch": 0.3800819983022378, "grad_norm": 7.458661915735435, "learning_rate": 4.671638926515798e-06, "loss": 0.8197, "step": 5261 }, { "epoch": 0.3801542435024473, "grad_norm": 8.7500762936127, "learning_rate": 4.671494001145147e-06, "loss": 0.9667, "step": 5262 }, { "epoch": 0.3802264887026568, "grad_norm": 5.85459082029613, "learning_rate": 4.67134904604843e-06, "loss": 0.8026, "step": 5263 }, { "epoch": 0.38029873390286634, "grad_norm": 5.757862189193667, "learning_rate": 4.67120406122763e-06, "loss": 0.9325, "step": 5264 }, { "epoch": 0.3803709791030758, "grad_norm": 7.325228055456917, "learning_rate": 4.671059046684733e-06, "loss": 0.8296, "step": 5265 }, { "epoch": 0.38044322430328537, "grad_norm": 6.316151686370755, "learning_rate": 4.670914002421722e-06, "loss": 0.9203, "step": 5266 }, { "epoch": 0.38051546950349485, "grad_norm": 6.912438015375346, "learning_rate": 4.670768928440584e-06, "loss": 0.8814, "step": 5267 }, { "epoch": 0.3805877147037044, "grad_norm": 7.514442779126188, "learning_rate": 4.670623824743306e-06, "loss": 0.8536, "step": 5268 }, { "epoch": 0.3806599599039139, "grad_norm": 6.007059077331013, "learning_rate": 4.670478691331872e-06, "loss": 0.8721, "step": 5269 }, { "epoch": 0.3807322051041234, "grad_norm": 7.491724662287539, "learning_rate": 4.6703335282082715e-06, "loss": 0.87, "step": 5270 }, { "epoch": 0.3808044503043329, "grad_norm": 8.457503054102686, "learning_rate": 4.670188335374489e-06, "loss": 0.8924, "step": 5271 }, { "epoch": 0.3808766955045424, "grad_norm": 7.70937875067995, "learning_rate": 4.670043112832513e-06, "loss": 0.9104, "step": 5272 }, { "epoch": 0.38094894070475194, "grad_norm": 8.094087925046212, "learning_rate": 4.669897860584333e-06, "loss": 0.9031, "step": 5273 }, { "epoch": 0.3810211859049614, "grad_norm": 8.740633065278278, "learning_rate": 4.669752578631935e-06, "loss": 0.9094, "step": 5274 }, { "epoch": 0.38109343110517097, "grad_norm": 8.063126414013325, "learning_rate": 4.669607266977309e-06, "loss": 0.8882, "step": 5275 }, { "epoch": 0.38116567630538045, "grad_norm": 7.878681835937013, "learning_rate": 4.6694619256224445e-06, "loss": 0.854, "step": 5276 }, { "epoch": 0.38123792150559, "grad_norm": 7.753062227756895, "learning_rate": 4.669316554569331e-06, "loss": 0.9796, "step": 5277 }, { "epoch": 0.3813101667057995, "grad_norm": 7.7244022076360075, "learning_rate": 4.669171153819957e-06, "loss": 0.9053, "step": 5278 }, { "epoch": 0.38138241190600897, "grad_norm": 6.622597888699732, "learning_rate": 4.669025723376315e-06, "loss": 0.9277, "step": 5279 }, { "epoch": 0.3814546571062185, "grad_norm": 5.399036512251063, "learning_rate": 4.668880263240395e-06, "loss": 0.8608, "step": 5280 }, { "epoch": 0.381526902306428, "grad_norm": 7.258471570923072, "learning_rate": 4.668734773414188e-06, "loss": 0.8997, "step": 5281 }, { "epoch": 0.38159914750663754, "grad_norm": 6.977178702315354, "learning_rate": 4.668589253899686e-06, "loss": 0.853, "step": 5282 }, { "epoch": 0.381671392706847, "grad_norm": 6.9911257486472405, "learning_rate": 4.66844370469888e-06, "loss": 0.9587, "step": 5283 }, { "epoch": 0.38174363790705657, "grad_norm": 6.159866742265672, "learning_rate": 4.668298125813765e-06, "loss": 0.8299, "step": 5284 }, { "epoch": 0.38181588310726605, "grad_norm": 6.270549104486571, "learning_rate": 4.668152517246332e-06, "loss": 0.8714, "step": 5285 }, { "epoch": 0.3818881283074756, "grad_norm": 5.831314318793861, "learning_rate": 4.668006878998574e-06, "loss": 0.8402, "step": 5286 }, { "epoch": 0.3819603735076851, "grad_norm": 6.487775898911888, "learning_rate": 4.6678612110724855e-06, "loss": 0.8742, "step": 5287 }, { "epoch": 0.38203261870789457, "grad_norm": 6.858721795403867, "learning_rate": 4.667715513470059e-06, "loss": 0.8911, "step": 5288 }, { "epoch": 0.3821048639081041, "grad_norm": 7.387530269657609, "learning_rate": 4.6675697861932915e-06, "loss": 0.9566, "step": 5289 }, { "epoch": 0.3821771091083136, "grad_norm": 7.7016223734105, "learning_rate": 4.667424029244176e-06, "loss": 0.9973, "step": 5290 }, { "epoch": 0.38224935430852314, "grad_norm": 7.762902133618123, "learning_rate": 4.667278242624709e-06, "loss": 0.8858, "step": 5291 }, { "epoch": 0.3823215995087326, "grad_norm": 5.79128075647287, "learning_rate": 4.667132426336886e-06, "loss": 0.861, "step": 5292 }, { "epoch": 0.38239384470894217, "grad_norm": 8.592887918407891, "learning_rate": 4.666986580382702e-06, "loss": 0.9645, "step": 5293 }, { "epoch": 0.38246608990915165, "grad_norm": 5.9884513338125585, "learning_rate": 4.666840704764154e-06, "loss": 0.9016, "step": 5294 }, { "epoch": 0.3825383351093612, "grad_norm": 5.702560104193849, "learning_rate": 4.66669479948324e-06, "loss": 0.8322, "step": 5295 }, { "epoch": 0.3826105803095707, "grad_norm": 6.47259142054136, "learning_rate": 4.666548864541956e-06, "loss": 0.8425, "step": 5296 }, { "epoch": 0.38268282550978017, "grad_norm": 7.7901195547309765, "learning_rate": 4.6664028999423e-06, "loss": 0.8839, "step": 5297 }, { "epoch": 0.3827550707099897, "grad_norm": 8.547896134155614, "learning_rate": 4.666256905686271e-06, "loss": 0.8388, "step": 5298 }, { "epoch": 0.3828273159101992, "grad_norm": 5.064805812596523, "learning_rate": 4.666110881775867e-06, "loss": 0.809, "step": 5299 }, { "epoch": 0.38289956111040874, "grad_norm": 5.94027869806983, "learning_rate": 4.665964828213086e-06, "loss": 0.8777, "step": 5300 }, { "epoch": 0.3829718063106182, "grad_norm": 8.467201232054283, "learning_rate": 4.665818744999929e-06, "loss": 0.9039, "step": 5301 }, { "epoch": 0.38304405151082777, "grad_norm": 8.16075413304552, "learning_rate": 4.665672632138395e-06, "loss": 0.9486, "step": 5302 }, { "epoch": 0.38311629671103725, "grad_norm": 7.824605855064482, "learning_rate": 4.665526489630484e-06, "loss": 0.9313, "step": 5303 }, { "epoch": 0.3831885419112468, "grad_norm": 5.581113383467594, "learning_rate": 4.6653803174781974e-06, "loss": 0.9061, "step": 5304 }, { "epoch": 0.3832607871114563, "grad_norm": 5.795127913846406, "learning_rate": 4.665234115683535e-06, "loss": 0.8592, "step": 5305 }, { "epoch": 0.38333303231166577, "grad_norm": 6.805796273401009, "learning_rate": 4.665087884248499e-06, "loss": 0.9627, "step": 5306 }, { "epoch": 0.3834052775118753, "grad_norm": 8.110773395957159, "learning_rate": 4.66494162317509e-06, "loss": 0.8043, "step": 5307 }, { "epoch": 0.3834775227120848, "grad_norm": 7.518235227192382, "learning_rate": 4.664795332465313e-06, "loss": 0.9383, "step": 5308 }, { "epoch": 0.38354976791229434, "grad_norm": 7.681522681763637, "learning_rate": 4.664649012121168e-06, "loss": 0.9536, "step": 5309 }, { "epoch": 0.3836220131125038, "grad_norm": 6.630927547053831, "learning_rate": 4.664502662144658e-06, "loss": 0.8503, "step": 5310 }, { "epoch": 0.38369425831271337, "grad_norm": 6.562735417094396, "learning_rate": 4.664356282537787e-06, "loss": 0.8419, "step": 5311 }, { "epoch": 0.38376650351292285, "grad_norm": 6.9281393599028, "learning_rate": 4.66420987330256e-06, "loss": 0.8683, "step": 5312 }, { "epoch": 0.3838387487131324, "grad_norm": 6.031197819459346, "learning_rate": 4.66406343444098e-06, "loss": 0.8993, "step": 5313 }, { "epoch": 0.3839109939133419, "grad_norm": 6.532367227448123, "learning_rate": 4.663916965955052e-06, "loss": 0.8755, "step": 5314 }, { "epoch": 0.38398323911355137, "grad_norm": 6.318095229698819, "learning_rate": 4.66377046784678e-06, "loss": 0.8935, "step": 5315 }, { "epoch": 0.3840554843137609, "grad_norm": 5.787537491019902, "learning_rate": 4.663623940118172e-06, "loss": 0.9163, "step": 5316 }, { "epoch": 0.3841277295139704, "grad_norm": 5.410911534880761, "learning_rate": 4.66347738277123e-06, "loss": 0.8645, "step": 5317 }, { "epoch": 0.38419997471417994, "grad_norm": 6.507170903153059, "learning_rate": 4.663330795807964e-06, "loss": 0.8351, "step": 5318 }, { "epoch": 0.3842722199143894, "grad_norm": 6.471244589007722, "learning_rate": 4.6631841792303785e-06, "loss": 0.9064, "step": 5319 }, { "epoch": 0.38434446511459897, "grad_norm": 5.86102060225009, "learning_rate": 4.663037533040482e-06, "loss": 0.925, "step": 5320 }, { "epoch": 0.38441671031480845, "grad_norm": 6.815366369462466, "learning_rate": 4.662890857240281e-06, "loss": 0.8939, "step": 5321 }, { "epoch": 0.384488955515018, "grad_norm": 6.3613101348763115, "learning_rate": 4.662744151831783e-06, "loss": 0.9133, "step": 5322 }, { "epoch": 0.3845612007152275, "grad_norm": 6.008624236550697, "learning_rate": 4.662597416816997e-06, "loss": 0.8555, "step": 5323 }, { "epoch": 0.38463344591543697, "grad_norm": 6.166314123575401, "learning_rate": 4.662450652197932e-06, "loss": 0.9034, "step": 5324 }, { "epoch": 0.3847056911156465, "grad_norm": 7.329202991574133, "learning_rate": 4.662303857976595e-06, "loss": 0.877, "step": 5325 }, { "epoch": 0.384777936315856, "grad_norm": 7.52133805403313, "learning_rate": 4.662157034154998e-06, "loss": 0.7772, "step": 5326 }, { "epoch": 0.38485018151606554, "grad_norm": 6.472393391992109, "learning_rate": 4.662010180735151e-06, "loss": 0.9287, "step": 5327 }, { "epoch": 0.384922426716275, "grad_norm": 6.4271934001118565, "learning_rate": 4.661863297719063e-06, "loss": 0.9184, "step": 5328 }, { "epoch": 0.38499467191648457, "grad_norm": 6.860303950568769, "learning_rate": 4.661716385108744e-06, "loss": 0.9197, "step": 5329 }, { "epoch": 0.38506691711669405, "grad_norm": 6.344296286226838, "learning_rate": 4.661569442906208e-06, "loss": 0.8996, "step": 5330 }, { "epoch": 0.3851391623169036, "grad_norm": 6.327791030924986, "learning_rate": 4.6614224711134624e-06, "loss": 0.9865, "step": 5331 }, { "epoch": 0.3852114075171131, "grad_norm": 5.3622435964183035, "learning_rate": 4.661275469732522e-06, "loss": 0.8947, "step": 5332 }, { "epoch": 0.38528365271732257, "grad_norm": 7.7788481778081175, "learning_rate": 4.6611284387653995e-06, "loss": 0.994, "step": 5333 }, { "epoch": 0.3853558979175321, "grad_norm": 8.029318491898048, "learning_rate": 4.660981378214106e-06, "loss": 0.9198, "step": 5334 }, { "epoch": 0.3854281431177416, "grad_norm": 5.866793805969968, "learning_rate": 4.6608342880806555e-06, "loss": 0.814, "step": 5335 }, { "epoch": 0.38550038831795114, "grad_norm": 5.943539088023998, "learning_rate": 4.660687168367062e-06, "loss": 0.9264, "step": 5336 }, { "epoch": 0.3855726335181606, "grad_norm": 6.8892826494188775, "learning_rate": 4.660540019075338e-06, "loss": 0.953, "step": 5337 }, { "epoch": 0.38564487871837017, "grad_norm": 6.1608163386830785, "learning_rate": 4.660392840207498e-06, "loss": 0.9115, "step": 5338 }, { "epoch": 0.38571712391857965, "grad_norm": 9.23372228287518, "learning_rate": 4.6602456317655584e-06, "loss": 0.9477, "step": 5339 }, { "epoch": 0.3857893691187892, "grad_norm": 7.302657848037782, "learning_rate": 4.660098393751534e-06, "loss": 1.0181, "step": 5340 }, { "epoch": 0.3858616143189987, "grad_norm": 7.534534545373702, "learning_rate": 4.659951126167439e-06, "loss": 0.9636, "step": 5341 }, { "epoch": 0.38593385951920817, "grad_norm": 5.9968565017709095, "learning_rate": 4.65980382901529e-06, "loss": 0.8788, "step": 5342 }, { "epoch": 0.3860061047194177, "grad_norm": 6.843273128808941, "learning_rate": 4.659656502297104e-06, "loss": 0.8988, "step": 5343 }, { "epoch": 0.3860783499196272, "grad_norm": 6.661932440482471, "learning_rate": 4.6595091460148976e-06, "loss": 0.9442, "step": 5344 }, { "epoch": 0.38615059511983674, "grad_norm": 8.468702252805898, "learning_rate": 4.659361760170687e-06, "loss": 0.925, "step": 5345 }, { "epoch": 0.3862228403200462, "grad_norm": 7.59064460962306, "learning_rate": 4.659214344766492e-06, "loss": 0.9951, "step": 5346 }, { "epoch": 0.38629508552025577, "grad_norm": 6.198030085020115, "learning_rate": 4.6590668998043275e-06, "loss": 1.0044, "step": 5347 }, { "epoch": 0.38636733072046525, "grad_norm": 5.778117883078534, "learning_rate": 4.658919425286214e-06, "loss": 0.9512, "step": 5348 }, { "epoch": 0.3864395759206748, "grad_norm": 8.185005565121763, "learning_rate": 4.65877192121417e-06, "loss": 0.9772, "step": 5349 }, { "epoch": 0.3865118211208843, "grad_norm": 7.5840814521858, "learning_rate": 4.6586243875902145e-06, "loss": 0.9522, "step": 5350 }, { "epoch": 0.38658406632109377, "grad_norm": 6.195329432807093, "learning_rate": 4.658476824416367e-06, "loss": 0.9954, "step": 5351 }, { "epoch": 0.3866563115213033, "grad_norm": 7.163867344584944, "learning_rate": 4.658329231694648e-06, "loss": 0.9203, "step": 5352 }, { "epoch": 0.3867285567215128, "grad_norm": 7.872058621847324, "learning_rate": 4.6581816094270785e-06, "loss": 0.9247, "step": 5353 }, { "epoch": 0.38680080192172234, "grad_norm": 6.727424734680501, "learning_rate": 4.658033957615677e-06, "loss": 0.9036, "step": 5354 }, { "epoch": 0.3868730471219318, "grad_norm": 6.109693435994636, "learning_rate": 4.657886276262466e-06, "loss": 0.9675, "step": 5355 }, { "epoch": 0.38694529232214137, "grad_norm": 6.0243702919517474, "learning_rate": 4.657738565369469e-06, "loss": 0.8901, "step": 5356 }, { "epoch": 0.38701753752235085, "grad_norm": 5.9982214516885, "learning_rate": 4.6575908249387055e-06, "loss": 0.9202, "step": 5357 }, { "epoch": 0.3870897827225604, "grad_norm": 7.117761783481973, "learning_rate": 4.657443054972199e-06, "loss": 0.9406, "step": 5358 }, { "epoch": 0.3871620279227699, "grad_norm": 5.743297153092525, "learning_rate": 4.6572952554719715e-06, "loss": 0.8916, "step": 5359 }, { "epoch": 0.38723427312297937, "grad_norm": 7.320967507548578, "learning_rate": 4.657147426440049e-06, "loss": 0.9024, "step": 5360 }, { "epoch": 0.3873065183231889, "grad_norm": 7.04047924014468, "learning_rate": 4.656999567878451e-06, "loss": 0.9487, "step": 5361 }, { "epoch": 0.3873787635233984, "grad_norm": 6.554062423636937, "learning_rate": 4.656851679789205e-06, "loss": 0.937, "step": 5362 }, { "epoch": 0.38745100872360794, "grad_norm": 5.479716961087754, "learning_rate": 4.6567037621743335e-06, "loss": 0.8606, "step": 5363 }, { "epoch": 0.3875232539238174, "grad_norm": 7.690059328879684, "learning_rate": 4.6565558150358625e-06, "loss": 1.0513, "step": 5364 }, { "epoch": 0.38759549912402697, "grad_norm": 6.83853856764813, "learning_rate": 4.656407838375817e-06, "loss": 0.9258, "step": 5365 }, { "epoch": 0.38766774432423645, "grad_norm": 6.895854323740319, "learning_rate": 4.656259832196222e-06, "loss": 0.9639, "step": 5366 }, { "epoch": 0.387739989524446, "grad_norm": 7.278346452828578, "learning_rate": 4.656111796499104e-06, "loss": 1.0007, "step": 5367 }, { "epoch": 0.3878122347246555, "grad_norm": 5.491362812317087, "learning_rate": 4.65596373128649e-06, "loss": 0.8086, "step": 5368 }, { "epoch": 0.38788447992486497, "grad_norm": 6.573785868472691, "learning_rate": 4.655815636560407e-06, "loss": 1.0255, "step": 5369 }, { "epoch": 0.3879567251250745, "grad_norm": 6.349719916947648, "learning_rate": 4.655667512322881e-06, "loss": 0.8685, "step": 5370 }, { "epoch": 0.388028970325284, "grad_norm": 6.5107582267173445, "learning_rate": 4.655519358575941e-06, "loss": 0.8321, "step": 5371 }, { "epoch": 0.38810121552549354, "grad_norm": 6.989757128249455, "learning_rate": 4.655371175321615e-06, "loss": 0.9576, "step": 5372 }, { "epoch": 0.388173460725703, "grad_norm": 6.29066391926687, "learning_rate": 4.655222962561929e-06, "loss": 0.8825, "step": 5373 }, { "epoch": 0.38824570592591257, "grad_norm": 7.221366903406396, "learning_rate": 4.6550747202989166e-06, "loss": 0.9607, "step": 5374 }, { "epoch": 0.38831795112612205, "grad_norm": 7.155207795458592, "learning_rate": 4.6549264485346035e-06, "loss": 0.8403, "step": 5375 }, { "epoch": 0.3883901963263316, "grad_norm": 5.873524257286058, "learning_rate": 4.65477814727102e-06, "loss": 0.9241, "step": 5376 }, { "epoch": 0.3884624415265411, "grad_norm": 7.325577738505056, "learning_rate": 4.654629816510198e-06, "loss": 0.9468, "step": 5377 }, { "epoch": 0.38853468672675057, "grad_norm": 7.114753717380832, "learning_rate": 4.654481456254166e-06, "loss": 0.8085, "step": 5378 }, { "epoch": 0.3886069319269601, "grad_norm": 6.809219550449997, "learning_rate": 4.654333066504956e-06, "loss": 1.0039, "step": 5379 }, { "epoch": 0.3886791771271696, "grad_norm": 7.61792965134567, "learning_rate": 4.654184647264599e-06, "loss": 0.9537, "step": 5380 }, { "epoch": 0.38875142232737914, "grad_norm": 6.0460734464504355, "learning_rate": 4.654036198535127e-06, "loss": 0.8863, "step": 5381 }, { "epoch": 0.3888236675275886, "grad_norm": 5.938572515695158, "learning_rate": 4.653887720318572e-06, "loss": 0.9434, "step": 5382 }, { "epoch": 0.38889591272779817, "grad_norm": 6.359037873051114, "learning_rate": 4.653739212616966e-06, "loss": 0.9238, "step": 5383 }, { "epoch": 0.38896815792800765, "grad_norm": 5.6649517755568555, "learning_rate": 4.6535906754323425e-06, "loss": 0.8632, "step": 5384 }, { "epoch": 0.3890404031282172, "grad_norm": 6.48468472763678, "learning_rate": 4.653442108766735e-06, "loss": 0.877, "step": 5385 }, { "epoch": 0.3891126483284267, "grad_norm": 6.488755991342682, "learning_rate": 4.653293512622176e-06, "loss": 0.8993, "step": 5386 }, { "epoch": 0.38918489352863617, "grad_norm": 7.235435589356359, "learning_rate": 4.6531448870007025e-06, "loss": 0.9405, "step": 5387 }, { "epoch": 0.3892571387288457, "grad_norm": 7.95080964386143, "learning_rate": 4.652996231904346e-06, "loss": 0.8874, "step": 5388 }, { "epoch": 0.3893293839290552, "grad_norm": 6.525655147942103, "learning_rate": 4.652847547335144e-06, "loss": 0.9027, "step": 5389 }, { "epoch": 0.38940162912926474, "grad_norm": 6.030187009122539, "learning_rate": 4.65269883329513e-06, "loss": 0.8401, "step": 5390 }, { "epoch": 0.3894738743294742, "grad_norm": 6.384571444737618, "learning_rate": 4.65255008978634e-06, "loss": 0.9341, "step": 5391 }, { "epoch": 0.38954611952968377, "grad_norm": 5.661552851875542, "learning_rate": 4.652401316810811e-06, "loss": 0.8544, "step": 5392 }, { "epoch": 0.38961836472989325, "grad_norm": 7.102634328064015, "learning_rate": 4.652252514370579e-06, "loss": 0.9567, "step": 5393 }, { "epoch": 0.3896906099301028, "grad_norm": 7.187888756894763, "learning_rate": 4.652103682467682e-06, "loss": 0.8806, "step": 5394 }, { "epoch": 0.3897628551303123, "grad_norm": 8.329896548797702, "learning_rate": 4.651954821104156e-06, "loss": 0.9739, "step": 5395 }, { "epoch": 0.38983510033052177, "grad_norm": 5.85167984278206, "learning_rate": 4.651805930282039e-06, "loss": 0.8509, "step": 5396 }, { "epoch": 0.3899073455307313, "grad_norm": 7.5454550092348125, "learning_rate": 4.651657010003371e-06, "loss": 0.9159, "step": 5397 }, { "epoch": 0.3899795907309408, "grad_norm": 6.054661038248627, "learning_rate": 4.651508060270188e-06, "loss": 0.8189, "step": 5398 }, { "epoch": 0.39005183593115034, "grad_norm": 6.492642860647578, "learning_rate": 4.65135908108453e-06, "loss": 0.9586, "step": 5399 }, { "epoch": 0.3901240811313598, "grad_norm": 7.630617917538972, "learning_rate": 4.651210072448437e-06, "loss": 0.926, "step": 5400 }, { "epoch": 0.39019632633156937, "grad_norm": 6.0301522160133745, "learning_rate": 4.651061034363948e-06, "loss": 0.8986, "step": 5401 }, { "epoch": 0.39026857153177885, "grad_norm": 8.387800313061636, "learning_rate": 4.650911966833104e-06, "loss": 0.9049, "step": 5402 }, { "epoch": 0.3903408167319884, "grad_norm": 6.855989007146473, "learning_rate": 4.650762869857946e-06, "loss": 0.8529, "step": 5403 }, { "epoch": 0.3904130619321979, "grad_norm": 6.37455718054571, "learning_rate": 4.650613743440513e-06, "loss": 0.9229, "step": 5404 }, { "epoch": 0.39048530713240737, "grad_norm": 7.312490870804264, "learning_rate": 4.650464587582848e-06, "loss": 0.8041, "step": 5405 }, { "epoch": 0.3905575523326169, "grad_norm": 5.636749204049748, "learning_rate": 4.6503154022869925e-06, "loss": 0.9189, "step": 5406 }, { "epoch": 0.3906297975328264, "grad_norm": 7.21063410326322, "learning_rate": 4.65016618755499e-06, "loss": 0.908, "step": 5407 }, { "epoch": 0.39070204273303594, "grad_norm": 6.344234654746953, "learning_rate": 4.65001694338888e-06, "loss": 0.8697, "step": 5408 }, { "epoch": 0.3907742879332454, "grad_norm": 7.878407059204757, "learning_rate": 4.649867669790708e-06, "loss": 0.8492, "step": 5409 }, { "epoch": 0.39084653313345497, "grad_norm": 8.25508977923218, "learning_rate": 4.649718366762518e-06, "loss": 0.918, "step": 5410 }, { "epoch": 0.39091877833366445, "grad_norm": 6.339138215035714, "learning_rate": 4.649569034306352e-06, "loss": 0.8712, "step": 5411 }, { "epoch": 0.390991023533874, "grad_norm": 6.066433918159699, "learning_rate": 4.649419672424254e-06, "loss": 0.9045, "step": 5412 }, { "epoch": 0.3910632687340835, "grad_norm": 5.088075624935569, "learning_rate": 4.649270281118271e-06, "loss": 0.9452, "step": 5413 }, { "epoch": 0.39113551393429297, "grad_norm": 8.197601842267986, "learning_rate": 4.649120860390446e-06, "loss": 0.8363, "step": 5414 }, { "epoch": 0.3912077591345025, "grad_norm": 8.000304693141722, "learning_rate": 4.648971410242825e-06, "loss": 0.9318, "step": 5415 }, { "epoch": 0.391280004334712, "grad_norm": 7.108924264557807, "learning_rate": 4.648821930677454e-06, "loss": 0.8719, "step": 5416 }, { "epoch": 0.39135224953492154, "grad_norm": 7.098718726372552, "learning_rate": 4.6486724216963795e-06, "loss": 0.8669, "step": 5417 }, { "epoch": 0.391424494735131, "grad_norm": 6.608068170154289, "learning_rate": 4.6485228833016485e-06, "loss": 0.8829, "step": 5418 }, { "epoch": 0.39149673993534057, "grad_norm": 7.031555169158799, "learning_rate": 4.648373315495306e-06, "loss": 0.9997, "step": 5419 }, { "epoch": 0.39156898513555005, "grad_norm": 4.6371101400602734, "learning_rate": 4.648223718279402e-06, "loss": 0.7336, "step": 5420 }, { "epoch": 0.3916412303357596, "grad_norm": 6.987321543589906, "learning_rate": 4.648074091655983e-06, "loss": 0.8773, "step": 5421 }, { "epoch": 0.3917134755359691, "grad_norm": 7.684623117928401, "learning_rate": 4.647924435627097e-06, "loss": 0.919, "step": 5422 }, { "epoch": 0.39178572073617857, "grad_norm": 6.8070511331555705, "learning_rate": 4.647774750194794e-06, "loss": 0.9248, "step": 5423 }, { "epoch": 0.3918579659363881, "grad_norm": 5.989237669769854, "learning_rate": 4.647625035361122e-06, "loss": 0.9633, "step": 5424 }, { "epoch": 0.3919302111365976, "grad_norm": 7.603835202313077, "learning_rate": 4.647475291128131e-06, "loss": 0.9416, "step": 5425 }, { "epoch": 0.39200245633680714, "grad_norm": 5.84857826889014, "learning_rate": 4.64732551749787e-06, "loss": 0.8919, "step": 5426 }, { "epoch": 0.3920747015370166, "grad_norm": 7.913283284304088, "learning_rate": 4.64717571447239e-06, "loss": 0.9226, "step": 5427 }, { "epoch": 0.39214694673722617, "grad_norm": 8.450211507644397, "learning_rate": 4.647025882053743e-06, "loss": 1.0459, "step": 5428 }, { "epoch": 0.39221919193743565, "grad_norm": 7.110302673850779, "learning_rate": 4.646876020243978e-06, "loss": 0.922, "step": 5429 }, { "epoch": 0.3922914371376452, "grad_norm": 6.095315428935787, "learning_rate": 4.646726129045146e-06, "loss": 0.9363, "step": 5430 }, { "epoch": 0.3923636823378547, "grad_norm": 5.7580655791565265, "learning_rate": 4.646576208459302e-06, "loss": 0.8129, "step": 5431 }, { "epoch": 0.39243592753806417, "grad_norm": 8.32649968016742, "learning_rate": 4.646426258488495e-06, "loss": 0.8697, "step": 5432 }, { "epoch": 0.3925081727382737, "grad_norm": 7.759136352064683, "learning_rate": 4.64627627913478e-06, "loss": 0.9647, "step": 5433 }, { "epoch": 0.3925804179384832, "grad_norm": 6.813514283994227, "learning_rate": 4.646126270400209e-06, "loss": 0.9098, "step": 5434 }, { "epoch": 0.39265266313869274, "grad_norm": 5.47423779481711, "learning_rate": 4.645976232286835e-06, "loss": 0.9386, "step": 5435 }, { "epoch": 0.3927249083389022, "grad_norm": 7.5562291904664605, "learning_rate": 4.645826164796714e-06, "loss": 0.8995, "step": 5436 }, { "epoch": 0.39279715353911177, "grad_norm": 7.097073083327919, "learning_rate": 4.645676067931898e-06, "loss": 0.9003, "step": 5437 }, { "epoch": 0.39286939873932125, "grad_norm": 8.017406600251107, "learning_rate": 4.645525941694442e-06, "loss": 0.8731, "step": 5438 }, { "epoch": 0.3929416439395308, "grad_norm": 7.478111879852939, "learning_rate": 4.645375786086404e-06, "loss": 0.87, "step": 5439 }, { "epoch": 0.3930138891397403, "grad_norm": 6.7101902573470245, "learning_rate": 4.645225601109835e-06, "loss": 0.8424, "step": 5440 }, { "epoch": 0.39308613433994977, "grad_norm": 7.264725036668521, "learning_rate": 4.6450753867667944e-06, "loss": 0.8467, "step": 5441 }, { "epoch": 0.3931583795401593, "grad_norm": 5.521103512852642, "learning_rate": 4.644925143059337e-06, "loss": 0.8625, "step": 5442 }, { "epoch": 0.3932306247403688, "grad_norm": 6.547916739165738, "learning_rate": 4.644774869989519e-06, "loss": 0.8882, "step": 5443 }, { "epoch": 0.39330286994057834, "grad_norm": 7.325407455621096, "learning_rate": 4.6446245675593994e-06, "loss": 0.8846, "step": 5444 }, { "epoch": 0.3933751151407878, "grad_norm": 6.144614129983793, "learning_rate": 4.644474235771035e-06, "loss": 0.8964, "step": 5445 }, { "epoch": 0.39344736034099737, "grad_norm": 5.672547523834089, "learning_rate": 4.644323874626482e-06, "loss": 0.8125, "step": 5446 }, { "epoch": 0.39351960554120685, "grad_norm": 5.382637275756039, "learning_rate": 4.644173484127801e-06, "loss": 0.8375, "step": 5447 }, { "epoch": 0.3935918507414164, "grad_norm": 9.218907655563994, "learning_rate": 4.64402306427705e-06, "loss": 0.9101, "step": 5448 }, { "epoch": 0.3936640959416259, "grad_norm": 6.195100989946392, "learning_rate": 4.643872615076287e-06, "loss": 0.8902, "step": 5449 }, { "epoch": 0.39373634114183537, "grad_norm": 6.110752896685929, "learning_rate": 4.643722136527573e-06, "loss": 0.8366, "step": 5450 }, { "epoch": 0.3938085863420449, "grad_norm": 7.129826834824256, "learning_rate": 4.643571628632968e-06, "loss": 0.8957, "step": 5451 }, { "epoch": 0.3938808315422544, "grad_norm": 6.951785389299425, "learning_rate": 4.643421091394531e-06, "loss": 0.9546, "step": 5452 }, { "epoch": 0.39395307674246394, "grad_norm": 10.147748470041543, "learning_rate": 4.643270524814324e-06, "loss": 0.9208, "step": 5453 }, { "epoch": 0.3940253219426734, "grad_norm": 8.477941276114738, "learning_rate": 4.6431199288944074e-06, "loss": 0.8326, "step": 5454 }, { "epoch": 0.39409756714288297, "grad_norm": 6.889411663742396, "learning_rate": 4.642969303636843e-06, "loss": 0.906, "step": 5455 }, { "epoch": 0.39416981234309245, "grad_norm": 8.363908853728415, "learning_rate": 4.642818649043693e-06, "loss": 0.8627, "step": 5456 }, { "epoch": 0.394242057543302, "grad_norm": 7.55940448050331, "learning_rate": 4.6426679651170195e-06, "loss": 0.9284, "step": 5457 }, { "epoch": 0.3943143027435115, "grad_norm": 8.48514425538352, "learning_rate": 4.6425172518588855e-06, "loss": 0.9706, "step": 5458 }, { "epoch": 0.39438654794372097, "grad_norm": 8.56335489502248, "learning_rate": 4.642366509271353e-06, "loss": 0.9906, "step": 5459 }, { "epoch": 0.3944587931439305, "grad_norm": 6.095702186298389, "learning_rate": 4.6422157373564865e-06, "loss": 0.8714, "step": 5460 }, { "epoch": 0.39453103834414, "grad_norm": 5.977757552599972, "learning_rate": 4.642064936116351e-06, "loss": 0.8376, "step": 5461 }, { "epoch": 0.39460328354434954, "grad_norm": 6.106393071727004, "learning_rate": 4.641914105553009e-06, "loss": 0.9621, "step": 5462 }, { "epoch": 0.394675528744559, "grad_norm": 7.106585078865537, "learning_rate": 4.6417632456685256e-06, "loss": 0.9677, "step": 5463 }, { "epoch": 0.39474777394476857, "grad_norm": 5.49539807003279, "learning_rate": 4.641612356464967e-06, "loss": 0.8622, "step": 5464 }, { "epoch": 0.39482001914497805, "grad_norm": 6.626020244996945, "learning_rate": 4.641461437944398e-06, "loss": 0.9309, "step": 5465 }, { "epoch": 0.3948922643451876, "grad_norm": 6.291953006653773, "learning_rate": 4.641310490108885e-06, "loss": 0.9521, "step": 5466 }, { "epoch": 0.3949645095453971, "grad_norm": 5.377253038028371, "learning_rate": 4.641159512960493e-06, "loss": 0.8483, "step": 5467 }, { "epoch": 0.39503675474560657, "grad_norm": 6.0356318709227805, "learning_rate": 4.64100850650129e-06, "loss": 0.8133, "step": 5468 }, { "epoch": 0.3951089999458161, "grad_norm": 5.443484290872491, "learning_rate": 4.640857470733343e-06, "loss": 0.9509, "step": 5469 }, { "epoch": 0.3951812451460256, "grad_norm": 6.028023444744606, "learning_rate": 4.64070640565872e-06, "loss": 0.8208, "step": 5470 }, { "epoch": 0.39525349034623514, "grad_norm": 6.513253711277544, "learning_rate": 4.640555311279489e-06, "loss": 0.9199, "step": 5471 }, { "epoch": 0.3953257355464446, "grad_norm": 6.721526951559956, "learning_rate": 4.640404187597717e-06, "loss": 0.8222, "step": 5472 }, { "epoch": 0.39539798074665417, "grad_norm": 6.111697952915849, "learning_rate": 4.640253034615473e-06, "loss": 0.8855, "step": 5473 }, { "epoch": 0.39547022594686365, "grad_norm": 7.047840128441778, "learning_rate": 4.640101852334827e-06, "loss": 0.9382, "step": 5474 }, { "epoch": 0.3955424711470732, "grad_norm": 6.714048364032711, "learning_rate": 4.639950640757849e-06, "loss": 0.905, "step": 5475 }, { "epoch": 0.3956147163472827, "grad_norm": 6.8464759727796904, "learning_rate": 4.639799399886607e-06, "loss": 0.8658, "step": 5476 }, { "epoch": 0.39568696154749217, "grad_norm": 6.764960487628137, "learning_rate": 4.639648129723175e-06, "loss": 0.8674, "step": 5477 }, { "epoch": 0.3957592067477017, "grad_norm": 5.742067919673245, "learning_rate": 4.63949683026962e-06, "loss": 0.8564, "step": 5478 }, { "epoch": 0.3958314519479112, "grad_norm": 9.191216579933164, "learning_rate": 4.6393455015280145e-06, "loss": 0.8443, "step": 5479 }, { "epoch": 0.39590369714812074, "grad_norm": 7.3592669242163815, "learning_rate": 4.6391941435004305e-06, "loss": 0.9716, "step": 5480 }, { "epoch": 0.3959759423483302, "grad_norm": 6.776921028783727, "learning_rate": 4.639042756188939e-06, "loss": 0.9247, "step": 5481 }, { "epoch": 0.39604818754853977, "grad_norm": 7.056156516659594, "learning_rate": 4.638891339595614e-06, "loss": 0.9134, "step": 5482 }, { "epoch": 0.39612043274874925, "grad_norm": 6.527836687981032, "learning_rate": 4.638739893722527e-06, "loss": 0.9092, "step": 5483 }, { "epoch": 0.3961926779489588, "grad_norm": 5.3372248915946034, "learning_rate": 4.638588418571751e-06, "loss": 0.8256, "step": 5484 }, { "epoch": 0.3962649231491683, "grad_norm": 7.3330792758744785, "learning_rate": 4.63843691414536e-06, "loss": 0.8914, "step": 5485 }, { "epoch": 0.39633716834937777, "grad_norm": 5.742910243631381, "learning_rate": 4.638285380445428e-06, "loss": 0.7993, "step": 5486 }, { "epoch": 0.3964094135495873, "grad_norm": 6.81222981608362, "learning_rate": 4.63813381747403e-06, "loss": 0.944, "step": 5487 }, { "epoch": 0.3964816587497968, "grad_norm": 8.098543728502115, "learning_rate": 4.63798222523324e-06, "loss": 0.9342, "step": 5488 }, { "epoch": 0.39655390395000634, "grad_norm": 6.531870835288059, "learning_rate": 4.637830603725133e-06, "loss": 0.8763, "step": 5489 }, { "epoch": 0.3966261491502158, "grad_norm": 6.204331134215682, "learning_rate": 4.637678952951786e-06, "loss": 0.9467, "step": 5490 }, { "epoch": 0.39669839435042537, "grad_norm": 8.322282355577528, "learning_rate": 4.637527272915273e-06, "loss": 0.97, "step": 5491 }, { "epoch": 0.39677063955063485, "grad_norm": 6.449210764777764, "learning_rate": 4.637375563617671e-06, "loss": 0.9058, "step": 5492 }, { "epoch": 0.3968428847508444, "grad_norm": 5.956077664454983, "learning_rate": 4.637223825061058e-06, "loss": 0.8645, "step": 5493 }, { "epoch": 0.3969151299510539, "grad_norm": 6.552287556953213, "learning_rate": 4.6370720572475104e-06, "loss": 0.9296, "step": 5494 }, { "epoch": 0.39698737515126337, "grad_norm": 7.065603738893681, "learning_rate": 4.636920260179105e-06, "loss": 0.9559, "step": 5495 }, { "epoch": 0.3970596203514729, "grad_norm": 6.126263546526864, "learning_rate": 4.63676843385792e-06, "loss": 0.8418, "step": 5496 }, { "epoch": 0.3971318655516824, "grad_norm": 6.492005934397017, "learning_rate": 4.636616578286036e-06, "loss": 0.8432, "step": 5497 }, { "epoch": 0.39720411075189194, "grad_norm": 6.90642537028119, "learning_rate": 4.636464693465529e-06, "loss": 0.9155, "step": 5498 }, { "epoch": 0.3972763559521014, "grad_norm": 6.483976420826195, "learning_rate": 4.636312779398479e-06, "loss": 0.9359, "step": 5499 }, { "epoch": 0.39734860115231097, "grad_norm": 8.571883135050141, "learning_rate": 4.636160836086966e-06, "loss": 0.8205, "step": 5500 }, { "epoch": 0.39742084635252045, "grad_norm": 6.6812732264344294, "learning_rate": 4.636008863533069e-06, "loss": 0.8874, "step": 5501 }, { "epoch": 0.39749309155272994, "grad_norm": 5.885037228216167, "learning_rate": 4.63585686173887e-06, "loss": 0.8865, "step": 5502 }, { "epoch": 0.3975653367529395, "grad_norm": 8.029636324845438, "learning_rate": 4.635704830706449e-06, "loss": 0.959, "step": 5503 }, { "epoch": 0.39763758195314897, "grad_norm": 6.489787074565187, "learning_rate": 4.635552770437887e-06, "loss": 0.8183, "step": 5504 }, { "epoch": 0.3977098271533585, "grad_norm": 7.6528462220177635, "learning_rate": 4.6354006809352655e-06, "loss": 0.8803, "step": 5505 }, { "epoch": 0.397782072353568, "grad_norm": 6.638771570393667, "learning_rate": 4.635248562200666e-06, "loss": 0.9064, "step": 5506 }, { "epoch": 0.39785431755377754, "grad_norm": 6.299958110851716, "learning_rate": 4.635096414236173e-06, "loss": 0.8671, "step": 5507 }, { "epoch": 0.397926562753987, "grad_norm": 10.309982454893648, "learning_rate": 4.6349442370438676e-06, "loss": 0.9873, "step": 5508 }, { "epoch": 0.39799880795419657, "grad_norm": 8.089849880037514, "learning_rate": 4.634792030625833e-06, "loss": 0.8818, "step": 5509 }, { "epoch": 0.39807105315440605, "grad_norm": 6.712638026688445, "learning_rate": 4.634639794984153e-06, "loss": 0.9052, "step": 5510 }, { "epoch": 0.39814329835461554, "grad_norm": 7.819170492133971, "learning_rate": 4.634487530120911e-06, "loss": 0.8318, "step": 5511 }, { "epoch": 0.3982155435548251, "grad_norm": 10.202889455789977, "learning_rate": 4.634335236038193e-06, "loss": 0.9469, "step": 5512 }, { "epoch": 0.39828778875503457, "grad_norm": 5.805467843220054, "learning_rate": 4.634182912738084e-06, "loss": 0.9502, "step": 5513 }, { "epoch": 0.3983600339552441, "grad_norm": 7.5900430304673305, "learning_rate": 4.634030560222665e-06, "loss": 0.9215, "step": 5514 }, { "epoch": 0.3984322791554536, "grad_norm": 6.692724736016703, "learning_rate": 4.633878178494027e-06, "loss": 0.9141, "step": 5515 }, { "epoch": 0.39850452435566314, "grad_norm": 7.795924310307969, "learning_rate": 4.633725767554253e-06, "loss": 0.8769, "step": 5516 }, { "epoch": 0.3985767695558726, "grad_norm": 6.3086521016070725, "learning_rate": 4.633573327405429e-06, "loss": 0.8888, "step": 5517 }, { "epoch": 0.39864901475608217, "grad_norm": 7.02412752319875, "learning_rate": 4.633420858049644e-06, "loss": 0.9647, "step": 5518 }, { "epoch": 0.39872125995629165, "grad_norm": 6.146740690159342, "learning_rate": 4.633268359488983e-06, "loss": 0.8807, "step": 5519 }, { "epoch": 0.39879350515650114, "grad_norm": 7.806168821354075, "learning_rate": 4.6331158317255355e-06, "loss": 0.9375, "step": 5520 }, { "epoch": 0.3988657503567107, "grad_norm": 5.707069349847156, "learning_rate": 4.632963274761388e-06, "loss": 0.9119, "step": 5521 }, { "epoch": 0.39893799555692017, "grad_norm": 8.263458833448004, "learning_rate": 4.632810688598629e-06, "loss": 0.9217, "step": 5522 }, { "epoch": 0.3990102407571297, "grad_norm": 10.448967943835822, "learning_rate": 4.632658073239348e-06, "loss": 0.9123, "step": 5523 }, { "epoch": 0.3990824859573392, "grad_norm": 8.137029106239037, "learning_rate": 4.632505428685634e-06, "loss": 0.9093, "step": 5524 }, { "epoch": 0.39915473115754874, "grad_norm": 6.345999182748246, "learning_rate": 4.632352754939577e-06, "loss": 0.9565, "step": 5525 }, { "epoch": 0.3992269763577582, "grad_norm": 7.181042764327857, "learning_rate": 4.632200052003265e-06, "loss": 0.9087, "step": 5526 }, { "epoch": 0.39929922155796777, "grad_norm": 6.561009991747948, "learning_rate": 4.632047319878792e-06, "loss": 0.8092, "step": 5527 }, { "epoch": 0.39937146675817725, "grad_norm": 7.678729504957362, "learning_rate": 4.631894558568245e-06, "loss": 0.8661, "step": 5528 }, { "epoch": 0.39944371195838674, "grad_norm": 7.670379928071803, "learning_rate": 4.631741768073717e-06, "loss": 0.9024, "step": 5529 }, { "epoch": 0.3995159571585963, "grad_norm": 5.688559632574785, "learning_rate": 4.6315889483973e-06, "loss": 0.8961, "step": 5530 }, { "epoch": 0.39958820235880577, "grad_norm": 6.793696219223508, "learning_rate": 4.631436099541085e-06, "loss": 1.0175, "step": 5531 }, { "epoch": 0.3996604475590153, "grad_norm": 6.596136013981322, "learning_rate": 4.631283221507164e-06, "loss": 0.9409, "step": 5532 }, { "epoch": 0.3997326927592248, "grad_norm": 7.079141101787255, "learning_rate": 4.631130314297631e-06, "loss": 0.9124, "step": 5533 }, { "epoch": 0.39980493795943434, "grad_norm": 6.968460419116476, "learning_rate": 4.630977377914579e-06, "loss": 0.8944, "step": 5534 }, { "epoch": 0.3998771831596438, "grad_norm": 9.234028404689541, "learning_rate": 4.630824412360101e-06, "loss": 0.9817, "step": 5535 }, { "epoch": 0.39994942835985337, "grad_norm": 6.403883185117668, "learning_rate": 4.630671417636292e-06, "loss": 0.9314, "step": 5536 }, { "epoch": 0.40002167356006285, "grad_norm": 6.116741295687528, "learning_rate": 4.630518393745245e-06, "loss": 0.7783, "step": 5537 }, { "epoch": 0.40009391876027234, "grad_norm": 6.617422124493077, "learning_rate": 4.630365340689056e-06, "loss": 0.7966, "step": 5538 }, { "epoch": 0.4001661639604819, "grad_norm": 6.501428593854302, "learning_rate": 4.630212258469818e-06, "loss": 0.9144, "step": 5539 }, { "epoch": 0.40023840916069137, "grad_norm": 8.723117185817236, "learning_rate": 4.63005914708963e-06, "loss": 0.9708, "step": 5540 }, { "epoch": 0.4003106543609009, "grad_norm": 5.57062579928452, "learning_rate": 4.629906006550585e-06, "loss": 0.8318, "step": 5541 }, { "epoch": 0.4003828995611104, "grad_norm": 6.803217452073343, "learning_rate": 4.629752836854781e-06, "loss": 0.8456, "step": 5542 }, { "epoch": 0.40045514476131994, "grad_norm": 6.723085220189439, "learning_rate": 4.629599638004315e-06, "loss": 0.8344, "step": 5543 }, { "epoch": 0.4005273899615294, "grad_norm": 4.997738517500355, "learning_rate": 4.629446410001283e-06, "loss": 0.8527, "step": 5544 }, { "epoch": 0.40059963516173897, "grad_norm": 6.417070582504126, "learning_rate": 4.629293152847782e-06, "loss": 0.8677, "step": 5545 }, { "epoch": 0.40067188036194845, "grad_norm": 6.7729770807508975, "learning_rate": 4.629139866545913e-06, "loss": 0.8178, "step": 5546 }, { "epoch": 0.40074412556215794, "grad_norm": 5.469681142827177, "learning_rate": 4.6289865510977706e-06, "loss": 0.8566, "step": 5547 }, { "epoch": 0.4008163707623675, "grad_norm": 6.062310913912562, "learning_rate": 4.628833206505457e-06, "loss": 0.915, "step": 5548 }, { "epoch": 0.40088861596257697, "grad_norm": 7.414869998093226, "learning_rate": 4.6286798327710684e-06, "loss": 0.941, "step": 5549 }, { "epoch": 0.4009608611627865, "grad_norm": 7.946228754706168, "learning_rate": 4.628526429896706e-06, "loss": 0.9561, "step": 5550 }, { "epoch": 0.401033106362996, "grad_norm": 6.7076031147256865, "learning_rate": 4.628372997884469e-06, "loss": 0.9674, "step": 5551 }, { "epoch": 0.40110535156320554, "grad_norm": 6.509033381690775, "learning_rate": 4.628219536736459e-06, "loss": 0.8085, "step": 5552 }, { "epoch": 0.401177596763415, "grad_norm": 5.869169873055243, "learning_rate": 4.628066046454776e-06, "loss": 0.9059, "step": 5553 }, { "epoch": 0.40124984196362457, "grad_norm": 5.918323589300099, "learning_rate": 4.627912527041521e-06, "loss": 0.8669, "step": 5554 }, { "epoch": 0.40132208716383405, "grad_norm": 6.812178857915897, "learning_rate": 4.627758978498796e-06, "loss": 0.8827, "step": 5555 }, { "epoch": 0.40139433236404354, "grad_norm": 5.8602502601486455, "learning_rate": 4.627605400828702e-06, "loss": 0.8602, "step": 5556 }, { "epoch": 0.4014665775642531, "grad_norm": 6.490771565001591, "learning_rate": 4.627451794033342e-06, "loss": 0.83, "step": 5557 }, { "epoch": 0.40153882276446257, "grad_norm": 7.349024659922461, "learning_rate": 4.62729815811482e-06, "loss": 0.9682, "step": 5558 }, { "epoch": 0.4016110679646721, "grad_norm": 7.400995764879722, "learning_rate": 4.627144493075237e-06, "loss": 0.9374, "step": 5559 }, { "epoch": 0.4016833131648816, "grad_norm": 7.29701275256416, "learning_rate": 4.626990798916697e-06, "loss": 0.9461, "step": 5560 }, { "epoch": 0.40175555836509114, "grad_norm": 7.187670896405807, "learning_rate": 4.626837075641306e-06, "loss": 0.9592, "step": 5561 }, { "epoch": 0.4018278035653006, "grad_norm": 7.538586094484501, "learning_rate": 4.626683323251166e-06, "loss": 0.916, "step": 5562 }, { "epoch": 0.40190004876551016, "grad_norm": 6.172441751074704, "learning_rate": 4.626529541748382e-06, "loss": 0.9277, "step": 5563 }, { "epoch": 0.40197229396571965, "grad_norm": 8.197631158829417, "learning_rate": 4.626375731135061e-06, "loss": 0.9218, "step": 5564 }, { "epoch": 0.40204453916592914, "grad_norm": 7.173403242590685, "learning_rate": 4.626221891413306e-06, "loss": 0.9231, "step": 5565 }, { "epoch": 0.4021167843661387, "grad_norm": 7.634251719481501, "learning_rate": 4.626068022585225e-06, "loss": 0.8548, "step": 5566 }, { "epoch": 0.40218902956634817, "grad_norm": 7.4103202097866365, "learning_rate": 4.6259141246529235e-06, "loss": 0.9286, "step": 5567 }, { "epoch": 0.4022612747665577, "grad_norm": 7.441882469177983, "learning_rate": 4.625760197618508e-06, "loss": 0.878, "step": 5568 }, { "epoch": 0.4023335199667672, "grad_norm": 7.465341468559057, "learning_rate": 4.625606241484086e-06, "loss": 0.9622, "step": 5569 }, { "epoch": 0.40240576516697674, "grad_norm": 6.153285208224934, "learning_rate": 4.625452256251765e-06, "loss": 0.8877, "step": 5570 }, { "epoch": 0.4024780103671862, "grad_norm": 6.630019679435879, "learning_rate": 4.6252982419236524e-06, "loss": 0.8446, "step": 5571 }, { "epoch": 0.40255025556739576, "grad_norm": 6.126404581756353, "learning_rate": 4.625144198501857e-06, "loss": 0.8953, "step": 5572 }, { "epoch": 0.40262250076760525, "grad_norm": 8.05419446766399, "learning_rate": 4.6249901259884886e-06, "loss": 1.0324, "step": 5573 }, { "epoch": 0.40269474596781474, "grad_norm": 6.99474600755489, "learning_rate": 4.624836024385655e-06, "loss": 0.9612, "step": 5574 }, { "epoch": 0.4027669911680243, "grad_norm": 6.0767431576103705, "learning_rate": 4.624681893695466e-06, "loss": 0.9443, "step": 5575 }, { "epoch": 0.40283923636823377, "grad_norm": 8.835896486006714, "learning_rate": 4.624527733920032e-06, "loss": 0.9713, "step": 5576 }, { "epoch": 0.4029114815684433, "grad_norm": 8.091403930463363, "learning_rate": 4.624373545061463e-06, "loss": 0.9119, "step": 5577 }, { "epoch": 0.4029837267686528, "grad_norm": 5.963162827987002, "learning_rate": 4.624219327121869e-06, "loss": 0.9103, "step": 5578 }, { "epoch": 0.40305597196886234, "grad_norm": 6.315556758642176, "learning_rate": 4.624065080103362e-06, "loss": 0.9285, "step": 5579 }, { "epoch": 0.4031282171690718, "grad_norm": 6.237798759323389, "learning_rate": 4.6239108040080524e-06, "loss": 0.905, "step": 5580 }, { "epoch": 0.40320046236928136, "grad_norm": 5.534730495104742, "learning_rate": 4.623756498838054e-06, "loss": 0.9302, "step": 5581 }, { "epoch": 0.40327270756949085, "grad_norm": 5.529668032182635, "learning_rate": 4.623602164595478e-06, "loss": 0.8555, "step": 5582 }, { "epoch": 0.40334495276970034, "grad_norm": 8.157475288112852, "learning_rate": 4.623447801282437e-06, "loss": 0.9006, "step": 5583 }, { "epoch": 0.4034171979699099, "grad_norm": 6.6751723706566946, "learning_rate": 4.623293408901044e-06, "loss": 0.8753, "step": 5584 }, { "epoch": 0.40348944317011937, "grad_norm": 7.313075898796791, "learning_rate": 4.623138987453414e-06, "loss": 0.7452, "step": 5585 }, { "epoch": 0.4035616883703289, "grad_norm": 6.418016300036879, "learning_rate": 4.622984536941658e-06, "loss": 0.9538, "step": 5586 }, { "epoch": 0.4036339335705384, "grad_norm": 7.515944761583848, "learning_rate": 4.622830057367894e-06, "loss": 0.9723, "step": 5587 }, { "epoch": 0.40370617877074794, "grad_norm": 7.090206387075635, "learning_rate": 4.622675548734233e-06, "loss": 0.9453, "step": 5588 }, { "epoch": 0.4037784239709574, "grad_norm": 9.00426424518783, "learning_rate": 4.622521011042793e-06, "loss": 0.9632, "step": 5589 }, { "epoch": 0.40385066917116696, "grad_norm": 5.671026833750753, "learning_rate": 4.622366444295688e-06, "loss": 0.8742, "step": 5590 }, { "epoch": 0.40392291437137645, "grad_norm": 6.435321920859722, "learning_rate": 4.622211848495035e-06, "loss": 0.9493, "step": 5591 }, { "epoch": 0.40399515957158594, "grad_norm": 8.117544656483265, "learning_rate": 4.622057223642949e-06, "loss": 0.8736, "step": 5592 }, { "epoch": 0.4040674047717955, "grad_norm": 7.0815222126421755, "learning_rate": 4.621902569741548e-06, "loss": 0.9734, "step": 5593 }, { "epoch": 0.40413964997200497, "grad_norm": 5.959196583918108, "learning_rate": 4.621747886792948e-06, "loss": 0.8622, "step": 5594 }, { "epoch": 0.4042118951722145, "grad_norm": 7.082149750801196, "learning_rate": 4.621593174799266e-06, "loss": 0.9013, "step": 5595 }, { "epoch": 0.404284140372424, "grad_norm": 7.772356826577665, "learning_rate": 4.621438433762621e-06, "loss": 0.8985, "step": 5596 }, { "epoch": 0.40435638557263354, "grad_norm": 8.497358023750033, "learning_rate": 4.621283663685132e-06, "loss": 0.937, "step": 5597 }, { "epoch": 0.404428630772843, "grad_norm": 7.7951868710437076, "learning_rate": 4.621128864568916e-06, "loss": 0.8825, "step": 5598 }, { "epoch": 0.40450087597305256, "grad_norm": 9.242016207864266, "learning_rate": 4.620974036416093e-06, "loss": 0.9692, "step": 5599 }, { "epoch": 0.40457312117326205, "grad_norm": 6.379384048050371, "learning_rate": 4.620819179228782e-06, "loss": 0.8563, "step": 5600 }, { "epoch": 0.40464536637347154, "grad_norm": 6.636669605560392, "learning_rate": 4.6206642930091035e-06, "loss": 0.8873, "step": 5601 }, { "epoch": 0.4047176115736811, "grad_norm": 7.533459098013695, "learning_rate": 4.620509377759177e-06, "loss": 1.0536, "step": 5602 }, { "epoch": 0.40478985677389057, "grad_norm": 6.238078229320084, "learning_rate": 4.620354433481123e-06, "loss": 0.9183, "step": 5603 }, { "epoch": 0.4048621019741001, "grad_norm": 6.342563762532238, "learning_rate": 4.620199460177065e-06, "loss": 0.8271, "step": 5604 }, { "epoch": 0.4049343471743096, "grad_norm": 7.4911619882746034, "learning_rate": 4.620044457849121e-06, "loss": 0.8676, "step": 5605 }, { "epoch": 0.40500659237451914, "grad_norm": 6.955758202674405, "learning_rate": 4.619889426499416e-06, "loss": 0.9174, "step": 5606 }, { "epoch": 0.4050788375747286, "grad_norm": 5.602389009968743, "learning_rate": 4.61973436613007e-06, "loss": 0.8616, "step": 5607 }, { "epoch": 0.40515108277493816, "grad_norm": 6.212934218646179, "learning_rate": 4.619579276743206e-06, "loss": 0.9593, "step": 5608 }, { "epoch": 0.40522332797514765, "grad_norm": 8.840229101880249, "learning_rate": 4.619424158340947e-06, "loss": 0.9436, "step": 5609 }, { "epoch": 0.40529557317535714, "grad_norm": 6.4986755782420955, "learning_rate": 4.619269010925418e-06, "loss": 0.8593, "step": 5610 }, { "epoch": 0.4053678183755667, "grad_norm": 6.364003889196564, "learning_rate": 4.619113834498741e-06, "loss": 0.7917, "step": 5611 }, { "epoch": 0.40544006357577617, "grad_norm": 5.983026975193131, "learning_rate": 4.618958629063042e-06, "loss": 0.9161, "step": 5612 }, { "epoch": 0.4055123087759857, "grad_norm": 8.499910241943367, "learning_rate": 4.618803394620444e-06, "loss": 0.916, "step": 5613 }, { "epoch": 0.4055845539761952, "grad_norm": 9.858988468676099, "learning_rate": 4.618648131173072e-06, "loss": 0.9655, "step": 5614 }, { "epoch": 0.40565679917640474, "grad_norm": 7.1429594468556115, "learning_rate": 4.618492838723052e-06, "loss": 0.9562, "step": 5615 }, { "epoch": 0.4057290443766142, "grad_norm": 5.988261502128234, "learning_rate": 4.618337517272511e-06, "loss": 0.9303, "step": 5616 }, { "epoch": 0.40580128957682376, "grad_norm": 6.103498124974912, "learning_rate": 4.6181821668235735e-06, "loss": 0.9027, "step": 5617 }, { "epoch": 0.40587353477703325, "grad_norm": 8.297048333435407, "learning_rate": 4.618026787378368e-06, "loss": 0.9848, "step": 5618 }, { "epoch": 0.40594577997724274, "grad_norm": 8.245688496673864, "learning_rate": 4.6178713789390195e-06, "loss": 0.9003, "step": 5619 }, { "epoch": 0.4060180251774523, "grad_norm": 9.320195442737257, "learning_rate": 4.617715941507656e-06, "loss": 0.9867, "step": 5620 }, { "epoch": 0.40609027037766177, "grad_norm": 6.474226397987066, "learning_rate": 4.617560475086405e-06, "loss": 0.8244, "step": 5621 }, { "epoch": 0.4061625155778713, "grad_norm": 7.794613313314623, "learning_rate": 4.617404979677396e-06, "loss": 0.9339, "step": 5622 }, { "epoch": 0.4062347607780808, "grad_norm": 6.668217319711498, "learning_rate": 4.617249455282757e-06, "loss": 0.817, "step": 5623 }, { "epoch": 0.40630700597829034, "grad_norm": 7.567796097990598, "learning_rate": 4.617093901904618e-06, "loss": 0.933, "step": 5624 }, { "epoch": 0.4063792511784998, "grad_norm": 6.442326190278117, "learning_rate": 4.616938319545106e-06, "loss": 0.9007, "step": 5625 }, { "epoch": 0.40645149637870936, "grad_norm": 5.743985555009026, "learning_rate": 4.616782708206352e-06, "loss": 0.9312, "step": 5626 }, { "epoch": 0.40652374157891885, "grad_norm": 6.367015550784039, "learning_rate": 4.616627067890487e-06, "loss": 0.9469, "step": 5627 }, { "epoch": 0.40659598677912834, "grad_norm": 6.693292978679291, "learning_rate": 4.61647139859964e-06, "loss": 0.8868, "step": 5628 }, { "epoch": 0.4066682319793379, "grad_norm": 5.956098799972938, "learning_rate": 4.6163157003359425e-06, "loss": 0.9342, "step": 5629 }, { "epoch": 0.40674047717954737, "grad_norm": 7.152450685727765, "learning_rate": 4.616159973101527e-06, "loss": 0.8848, "step": 5630 }, { "epoch": 0.4068127223797569, "grad_norm": 5.4510250912363345, "learning_rate": 4.616004216898525e-06, "loss": 0.8754, "step": 5631 }, { "epoch": 0.4068849675799664, "grad_norm": 7.897028796468699, "learning_rate": 4.6158484317290675e-06, "loss": 0.9678, "step": 5632 }, { "epoch": 0.40695721278017594, "grad_norm": 6.4364095708758695, "learning_rate": 4.615692617595289e-06, "loss": 0.9087, "step": 5633 }, { "epoch": 0.4070294579803854, "grad_norm": 7.17787335112055, "learning_rate": 4.61553677449932e-06, "loss": 0.9296, "step": 5634 }, { "epoch": 0.40710170318059496, "grad_norm": 7.859793134301543, "learning_rate": 4.615380902443296e-06, "loss": 1.0561, "step": 5635 }, { "epoch": 0.40717394838080445, "grad_norm": 7.041194681755839, "learning_rate": 4.615225001429349e-06, "loss": 0.905, "step": 5636 }, { "epoch": 0.40724619358101394, "grad_norm": 6.4257649244663435, "learning_rate": 4.615069071459615e-06, "loss": 0.9216, "step": 5637 }, { "epoch": 0.4073184387812235, "grad_norm": 6.370332093497584, "learning_rate": 4.6149131125362275e-06, "loss": 0.8841, "step": 5638 }, { "epoch": 0.40739068398143297, "grad_norm": 7.543755875958902, "learning_rate": 4.614757124661321e-06, "loss": 0.9115, "step": 5639 }, { "epoch": 0.4074629291816425, "grad_norm": 6.668563986045746, "learning_rate": 4.614601107837033e-06, "loss": 0.96, "step": 5640 }, { "epoch": 0.407535174381852, "grad_norm": 7.835378609804658, "learning_rate": 4.6144450620654956e-06, "loss": 0.8862, "step": 5641 }, { "epoch": 0.40760741958206154, "grad_norm": 8.066726404814826, "learning_rate": 4.614288987348848e-06, "loss": 0.9638, "step": 5642 }, { "epoch": 0.407679664782271, "grad_norm": 5.806985187283676, "learning_rate": 4.614132883689226e-06, "loss": 0.8306, "step": 5643 }, { "epoch": 0.40775190998248056, "grad_norm": 7.663365634730115, "learning_rate": 4.613976751088767e-06, "loss": 0.9867, "step": 5644 }, { "epoch": 0.40782415518269005, "grad_norm": 5.525772572352422, "learning_rate": 4.6138205895496065e-06, "loss": 0.943, "step": 5645 }, { "epoch": 0.40789640038289954, "grad_norm": 7.2149235876402935, "learning_rate": 4.613664399073884e-06, "loss": 0.8635, "step": 5646 }, { "epoch": 0.4079686455831091, "grad_norm": 8.41347218942492, "learning_rate": 4.613508179663737e-06, "loss": 0.8695, "step": 5647 }, { "epoch": 0.40804089078331857, "grad_norm": 6.434853315245088, "learning_rate": 4.613351931321303e-06, "loss": 0.9537, "step": 5648 }, { "epoch": 0.4081131359835281, "grad_norm": 5.79342211275955, "learning_rate": 4.613195654048723e-06, "loss": 0.881, "step": 5649 }, { "epoch": 0.4081853811837376, "grad_norm": 5.939288703125339, "learning_rate": 4.613039347848135e-06, "loss": 1.0039, "step": 5650 }, { "epoch": 0.40825762638394714, "grad_norm": 6.6776232647417535, "learning_rate": 4.61288301272168e-06, "loss": 0.9311, "step": 5651 }, { "epoch": 0.4083298715841566, "grad_norm": 6.871145104004368, "learning_rate": 4.612726648671496e-06, "loss": 0.8714, "step": 5652 }, { "epoch": 0.40840211678436616, "grad_norm": 5.9055298058343295, "learning_rate": 4.6125702556997245e-06, "loss": 0.8291, "step": 5653 }, { "epoch": 0.40847436198457565, "grad_norm": 6.189506744797721, "learning_rate": 4.612413833808507e-06, "loss": 0.8848, "step": 5654 }, { "epoch": 0.40854660718478514, "grad_norm": 8.352032497445093, "learning_rate": 4.612257382999984e-06, "loss": 0.8819, "step": 5655 }, { "epoch": 0.4086188523849947, "grad_norm": 5.786835153180699, "learning_rate": 4.612100903276298e-06, "loss": 0.9971, "step": 5656 }, { "epoch": 0.40869109758520417, "grad_norm": 6.067091628620597, "learning_rate": 4.61194439463959e-06, "loss": 0.993, "step": 5657 }, { "epoch": 0.4087633427854137, "grad_norm": 6.210889592975616, "learning_rate": 4.611787857092004e-06, "loss": 0.8889, "step": 5658 }, { "epoch": 0.4088355879856232, "grad_norm": 5.805061092137383, "learning_rate": 4.611631290635681e-06, "loss": 0.871, "step": 5659 }, { "epoch": 0.40890783318583274, "grad_norm": 6.50292257482435, "learning_rate": 4.611474695272765e-06, "loss": 0.9146, "step": 5660 }, { "epoch": 0.4089800783860422, "grad_norm": 7.0957265108512235, "learning_rate": 4.611318071005401e-06, "loss": 0.9087, "step": 5661 }, { "epoch": 0.40905232358625176, "grad_norm": 6.499163647177456, "learning_rate": 4.61116141783573e-06, "loss": 0.9171, "step": 5662 }, { "epoch": 0.40912456878646125, "grad_norm": 4.8832027187824805, "learning_rate": 4.6110047357659e-06, "loss": 0.8315, "step": 5663 }, { "epoch": 0.40919681398667074, "grad_norm": 8.078252027682344, "learning_rate": 4.610848024798054e-06, "loss": 0.8823, "step": 5664 }, { "epoch": 0.4092690591868803, "grad_norm": 6.371749348358015, "learning_rate": 4.610691284934337e-06, "loss": 0.9087, "step": 5665 }, { "epoch": 0.40934130438708977, "grad_norm": 7.4012666185318405, "learning_rate": 4.6105345161768965e-06, "loss": 0.9569, "step": 5666 }, { "epoch": 0.4094135495872993, "grad_norm": 6.826755272816775, "learning_rate": 4.610377718527876e-06, "loss": 0.8434, "step": 5667 }, { "epoch": 0.4094857947875088, "grad_norm": 5.165793724558454, "learning_rate": 4.610220891989423e-06, "loss": 0.8206, "step": 5668 }, { "epoch": 0.40955803998771834, "grad_norm": 5.820709010193488, "learning_rate": 4.610064036563685e-06, "loss": 0.8519, "step": 5669 }, { "epoch": 0.4096302851879278, "grad_norm": 6.4499574881638635, "learning_rate": 4.609907152252808e-06, "loss": 0.9419, "step": 5670 }, { "epoch": 0.40970253038813736, "grad_norm": 7.367186723306561, "learning_rate": 4.6097502390589415e-06, "loss": 0.9374, "step": 5671 }, { "epoch": 0.40977477558834685, "grad_norm": 7.023249843241781, "learning_rate": 4.609593296984231e-06, "loss": 0.9079, "step": 5672 }, { "epoch": 0.40984702078855634, "grad_norm": 7.620905042352304, "learning_rate": 4.609436326030828e-06, "loss": 0.9444, "step": 5673 }, { "epoch": 0.4099192659887659, "grad_norm": 6.164499090903336, "learning_rate": 4.609279326200879e-06, "loss": 0.8966, "step": 5674 }, { "epoch": 0.40999151118897537, "grad_norm": 5.940321723625395, "learning_rate": 4.609122297496533e-06, "loss": 0.8615, "step": 5675 }, { "epoch": 0.4100637563891849, "grad_norm": 6.709422747686848, "learning_rate": 4.6089652399199405e-06, "loss": 0.8882, "step": 5676 }, { "epoch": 0.4101360015893944, "grad_norm": 7.599219593335514, "learning_rate": 4.608808153473252e-06, "loss": 0.9666, "step": 5677 }, { "epoch": 0.41020824678960394, "grad_norm": 6.54826190979764, "learning_rate": 4.608651038158616e-06, "loss": 0.8825, "step": 5678 }, { "epoch": 0.4102804919898134, "grad_norm": 7.318968430671098, "learning_rate": 4.608493893978186e-06, "loss": 0.8835, "step": 5679 }, { "epoch": 0.41035273719002296, "grad_norm": 5.729147542285789, "learning_rate": 4.60833672093411e-06, "loss": 0.8695, "step": 5680 }, { "epoch": 0.41042498239023245, "grad_norm": 6.714585543955756, "learning_rate": 4.608179519028543e-06, "loss": 0.8074, "step": 5681 }, { "epoch": 0.41049722759044194, "grad_norm": 8.184991117273357, "learning_rate": 4.608022288263635e-06, "loss": 0.9151, "step": 5682 }, { "epoch": 0.4105694727906515, "grad_norm": 6.2188484529149175, "learning_rate": 4.607865028641539e-06, "loss": 0.9211, "step": 5683 }, { "epoch": 0.41064171799086097, "grad_norm": 6.547714871452619, "learning_rate": 4.607707740164406e-06, "loss": 0.8554, "step": 5684 }, { "epoch": 0.4107139631910705, "grad_norm": 8.75973361720741, "learning_rate": 4.6075504228343915e-06, "loss": 0.9122, "step": 5685 }, { "epoch": 0.41078620839128, "grad_norm": 8.24992185613441, "learning_rate": 4.607393076653648e-06, "loss": 0.9138, "step": 5686 }, { "epoch": 0.41085845359148954, "grad_norm": 8.0287093956966, "learning_rate": 4.60723570162433e-06, "loss": 0.8847, "step": 5687 }, { "epoch": 0.410930698791699, "grad_norm": 8.039701178771839, "learning_rate": 4.60707829774859e-06, "loss": 0.9224, "step": 5688 }, { "epoch": 0.41100294399190856, "grad_norm": 6.451564794176451, "learning_rate": 4.606920865028585e-06, "loss": 0.885, "step": 5689 }, { "epoch": 0.41107518919211805, "grad_norm": 7.6941544179351675, "learning_rate": 4.6067634034664695e-06, "loss": 0.9924, "step": 5690 }, { "epoch": 0.41114743439232754, "grad_norm": 7.423125151701423, "learning_rate": 4.606605913064399e-06, "loss": 0.9151, "step": 5691 }, { "epoch": 0.4112196795925371, "grad_norm": 8.558405210297789, "learning_rate": 4.606448393824528e-06, "loss": 0.8031, "step": 5692 }, { "epoch": 0.41129192479274657, "grad_norm": 9.292144033260746, "learning_rate": 4.606290845749015e-06, "loss": 0.9703, "step": 5693 }, { "epoch": 0.4113641699929561, "grad_norm": 9.014964061290938, "learning_rate": 4.606133268840016e-06, "loss": 0.9491, "step": 5694 }, { "epoch": 0.4114364151931656, "grad_norm": 7.556086066576254, "learning_rate": 4.605975663099688e-06, "loss": 1.0175, "step": 5695 }, { "epoch": 0.41150866039337514, "grad_norm": 6.622601344772109, "learning_rate": 4.605818028530188e-06, "loss": 0.9073, "step": 5696 }, { "epoch": 0.4115809055935846, "grad_norm": 7.133836854412978, "learning_rate": 4.6056603651336736e-06, "loss": 0.889, "step": 5697 }, { "epoch": 0.41165315079379416, "grad_norm": 6.3784182490921335, "learning_rate": 4.605502672912304e-06, "loss": 0.9724, "step": 5698 }, { "epoch": 0.41172539599400365, "grad_norm": 7.238275452810685, "learning_rate": 4.605344951868238e-06, "loss": 0.8679, "step": 5699 }, { "epoch": 0.41179764119421314, "grad_norm": 5.4504592632006315, "learning_rate": 4.605187202003635e-06, "loss": 0.859, "step": 5700 }, { "epoch": 0.4118698863944227, "grad_norm": 8.783433527317696, "learning_rate": 4.6050294233206524e-06, "loss": 0.8966, "step": 5701 }, { "epoch": 0.41194213159463217, "grad_norm": 9.75404000333804, "learning_rate": 4.604871615821452e-06, "loss": 0.8976, "step": 5702 }, { "epoch": 0.4120143767948417, "grad_norm": 8.178268703062537, "learning_rate": 4.604713779508194e-06, "loss": 0.9137, "step": 5703 }, { "epoch": 0.4120866219950512, "grad_norm": 7.0825930339440415, "learning_rate": 4.6045559143830375e-06, "loss": 0.8668, "step": 5704 }, { "epoch": 0.41215886719526074, "grad_norm": 7.887944315684774, "learning_rate": 4.604398020448145e-06, "loss": 0.9372, "step": 5705 }, { "epoch": 0.4122311123954702, "grad_norm": 8.880745760558169, "learning_rate": 4.6042400977056775e-06, "loss": 0.8793, "step": 5706 }, { "epoch": 0.41230335759567976, "grad_norm": 7.572948205629462, "learning_rate": 4.604082146157798e-06, "loss": 0.9512, "step": 5707 }, { "epoch": 0.41237560279588925, "grad_norm": 7.846526972786621, "learning_rate": 4.603924165806667e-06, "loss": 0.8988, "step": 5708 }, { "epoch": 0.41244784799609874, "grad_norm": 6.374854291858885, "learning_rate": 4.603766156654448e-06, "loss": 0.8962, "step": 5709 }, { "epoch": 0.4125200931963083, "grad_norm": 7.3645926281673875, "learning_rate": 4.603608118703302e-06, "loss": 0.9129, "step": 5710 }, { "epoch": 0.41259233839651777, "grad_norm": 6.905307342959647, "learning_rate": 4.603450051955396e-06, "loss": 0.8882, "step": 5711 }, { "epoch": 0.4126645835967273, "grad_norm": 7.905933238080767, "learning_rate": 4.603291956412892e-06, "loss": 0.9315, "step": 5712 }, { "epoch": 0.4127368287969368, "grad_norm": 6.428993308135784, "learning_rate": 4.603133832077953e-06, "loss": 0.8942, "step": 5713 }, { "epoch": 0.41280907399714634, "grad_norm": 8.562372137075656, "learning_rate": 4.602975678952746e-06, "loss": 0.8836, "step": 5714 }, { "epoch": 0.4128813191973558, "grad_norm": 7.5289903471975155, "learning_rate": 4.602817497039435e-06, "loss": 0.8982, "step": 5715 }, { "epoch": 0.4129535643975653, "grad_norm": 8.018929973197114, "learning_rate": 4.6026592863401844e-06, "loss": 0.9593, "step": 5716 }, { "epoch": 0.41302580959777485, "grad_norm": 6.864700648260955, "learning_rate": 4.602501046857161e-06, "loss": 0.8924, "step": 5717 }, { "epoch": 0.41309805479798434, "grad_norm": 5.432770875865136, "learning_rate": 4.60234277859253e-06, "loss": 0.8489, "step": 5718 }, { "epoch": 0.4131702999981939, "grad_norm": 6.930790311328111, "learning_rate": 4.6021844815484594e-06, "loss": 0.8626, "step": 5719 }, { "epoch": 0.41324254519840337, "grad_norm": 10.60004429358099, "learning_rate": 4.602026155727116e-06, "loss": 0.9441, "step": 5720 }, { "epoch": 0.4133147903986129, "grad_norm": 5.981451291451093, "learning_rate": 4.601867801130666e-06, "loss": 0.905, "step": 5721 }, { "epoch": 0.4133870355988224, "grad_norm": 7.501425544046351, "learning_rate": 4.601709417761278e-06, "loss": 0.9701, "step": 5722 }, { "epoch": 0.41345928079903194, "grad_norm": 6.73221222385346, "learning_rate": 4.60155100562112e-06, "loss": 0.9316, "step": 5723 }, { "epoch": 0.4135315259992414, "grad_norm": 6.609860954726681, "learning_rate": 4.60139256471236e-06, "loss": 0.8143, "step": 5724 }, { "epoch": 0.4136037711994509, "grad_norm": 7.012746649499635, "learning_rate": 4.6012340950371684e-06, "loss": 0.8702, "step": 5725 }, { "epoch": 0.41367601639966045, "grad_norm": 5.634675753453576, "learning_rate": 4.601075596597713e-06, "loss": 0.8777, "step": 5726 }, { "epoch": 0.41374826159986994, "grad_norm": 7.247567393531384, "learning_rate": 4.6009170693961635e-06, "loss": 0.9442, "step": 5727 }, { "epoch": 0.4138205068000795, "grad_norm": 8.934756411416467, "learning_rate": 4.600758513434691e-06, "loss": 0.8589, "step": 5728 }, { "epoch": 0.41389275200028897, "grad_norm": 6.517096703170559, "learning_rate": 4.600599928715466e-06, "loss": 0.8288, "step": 5729 }, { "epoch": 0.4139649972004985, "grad_norm": 5.519498939716559, "learning_rate": 4.600441315240659e-06, "loss": 0.8202, "step": 5730 }, { "epoch": 0.414037242400708, "grad_norm": 7.0642646677931635, "learning_rate": 4.60028267301244e-06, "loss": 0.927, "step": 5731 }, { "epoch": 0.41410948760091754, "grad_norm": 9.581440437350336, "learning_rate": 4.600124002032983e-06, "loss": 0.936, "step": 5732 }, { "epoch": 0.414181732801127, "grad_norm": 7.419951179528865, "learning_rate": 4.59996530230446e-06, "loss": 0.9257, "step": 5733 }, { "epoch": 0.4142539780013365, "grad_norm": 7.407887981861405, "learning_rate": 4.599806573829041e-06, "loss": 0.9422, "step": 5734 }, { "epoch": 0.41432622320154605, "grad_norm": 7.369303167933058, "learning_rate": 4.599647816608901e-06, "loss": 0.8648, "step": 5735 }, { "epoch": 0.41439846840175554, "grad_norm": 6.331092873192917, "learning_rate": 4.5994890306462124e-06, "loss": 0.9296, "step": 5736 }, { "epoch": 0.4144707136019651, "grad_norm": 6.5896718218161885, "learning_rate": 4.599330215943149e-06, "loss": 0.9285, "step": 5737 }, { "epoch": 0.41454295880217457, "grad_norm": 7.778973717553535, "learning_rate": 4.5991713725018855e-06, "loss": 0.8403, "step": 5738 }, { "epoch": 0.4146152040023841, "grad_norm": 5.966526436179014, "learning_rate": 4.599012500324595e-06, "loss": 0.8787, "step": 5739 }, { "epoch": 0.4146874492025936, "grad_norm": 7.411428451924639, "learning_rate": 4.598853599413455e-06, "loss": 0.9196, "step": 5740 }, { "epoch": 0.41475969440280314, "grad_norm": 7.3218175751489305, "learning_rate": 4.598694669770637e-06, "loss": 0.876, "step": 5741 }, { "epoch": 0.4148319396030126, "grad_norm": 7.230292270182158, "learning_rate": 4.5985357113983195e-06, "loss": 0.8706, "step": 5742 }, { "epoch": 0.4149041848032221, "grad_norm": 6.207448533605678, "learning_rate": 4.598376724298676e-06, "loss": 0.8566, "step": 5743 }, { "epoch": 0.41497643000343165, "grad_norm": 6.224988712162676, "learning_rate": 4.598217708473887e-06, "loss": 0.9033, "step": 5744 }, { "epoch": 0.41504867520364114, "grad_norm": 7.155066246509767, "learning_rate": 4.598058663926125e-06, "loss": 0.9444, "step": 5745 }, { "epoch": 0.4151209204038507, "grad_norm": 6.053237603616749, "learning_rate": 4.597899590657569e-06, "loss": 0.8554, "step": 5746 }, { "epoch": 0.41519316560406017, "grad_norm": 7.436312284120597, "learning_rate": 4.597740488670397e-06, "loss": 0.8965, "step": 5747 }, { "epoch": 0.4152654108042697, "grad_norm": 6.596550658550294, "learning_rate": 4.597581357966786e-06, "loss": 0.8864, "step": 5748 }, { "epoch": 0.4153376560044792, "grad_norm": 6.1325723977681, "learning_rate": 4.597422198548915e-06, "loss": 0.926, "step": 5749 }, { "epoch": 0.41540990120468874, "grad_norm": 6.721094760276136, "learning_rate": 4.597263010418962e-06, "loss": 0.8972, "step": 5750 }, { "epoch": 0.4154821464048982, "grad_norm": 6.742182124943541, "learning_rate": 4.597103793579109e-06, "loss": 0.902, "step": 5751 }, { "epoch": 0.4155543916051077, "grad_norm": 5.777308754753713, "learning_rate": 4.596944548031531e-06, "loss": 0.8902, "step": 5752 }, { "epoch": 0.41562663680531725, "grad_norm": 7.258790310629013, "learning_rate": 4.5967852737784114e-06, "loss": 1.0126, "step": 5753 }, { "epoch": 0.41569888200552674, "grad_norm": 9.541943364247343, "learning_rate": 4.59662597082193e-06, "loss": 0.9309, "step": 5754 }, { "epoch": 0.4157711272057363, "grad_norm": 6.658199114475178, "learning_rate": 4.596466639164266e-06, "loss": 0.8019, "step": 5755 }, { "epoch": 0.41584337240594577, "grad_norm": 5.962814815375448, "learning_rate": 4.596307278807601e-06, "loss": 0.9559, "step": 5756 }, { "epoch": 0.4159156176061553, "grad_norm": 6.916682446320087, "learning_rate": 4.596147889754118e-06, "loss": 0.9581, "step": 5757 }, { "epoch": 0.4159878628063648, "grad_norm": 7.560390461925071, "learning_rate": 4.595988472005998e-06, "loss": 0.9539, "step": 5758 }, { "epoch": 0.41606010800657434, "grad_norm": 7.988243524513608, "learning_rate": 4.595829025565422e-06, "loss": 0.8257, "step": 5759 }, { "epoch": 0.4161323532067838, "grad_norm": 6.809365767691079, "learning_rate": 4.595669550434576e-06, "loss": 0.8793, "step": 5760 }, { "epoch": 0.4162045984069933, "grad_norm": 6.536452853946492, "learning_rate": 4.59551004661564e-06, "loss": 0.9553, "step": 5761 }, { "epoch": 0.41627684360720285, "grad_norm": 6.635965736783579, "learning_rate": 4.595350514110798e-06, "loss": 0.812, "step": 5762 }, { "epoch": 0.41634908880741234, "grad_norm": 5.929047038555217, "learning_rate": 4.595190952922235e-06, "loss": 0.8482, "step": 5763 }, { "epoch": 0.4164213340076219, "grad_norm": 7.774589169555591, "learning_rate": 4.5950313630521345e-06, "loss": 0.9062, "step": 5764 }, { "epoch": 0.41649357920783137, "grad_norm": 9.115041655143436, "learning_rate": 4.594871744502682e-06, "loss": 0.9507, "step": 5765 }, { "epoch": 0.4165658244080409, "grad_norm": 8.177005477502236, "learning_rate": 4.59471209727606e-06, "loss": 1.0295, "step": 5766 }, { "epoch": 0.4166380696082504, "grad_norm": 6.931944951406219, "learning_rate": 4.594552421374457e-06, "loss": 0.8787, "step": 5767 }, { "epoch": 0.41671031480845994, "grad_norm": 6.421483828471903, "learning_rate": 4.594392716800059e-06, "loss": 0.9063, "step": 5768 }, { "epoch": 0.4167825600086694, "grad_norm": 7.476837759023253, "learning_rate": 4.5942329835550496e-06, "loss": 0.9172, "step": 5769 }, { "epoch": 0.4168548052088789, "grad_norm": 6.133762864262034, "learning_rate": 4.594073221641616e-06, "loss": 0.9438, "step": 5770 }, { "epoch": 0.41692705040908845, "grad_norm": 7.005197502799094, "learning_rate": 4.593913431061947e-06, "loss": 0.91, "step": 5771 }, { "epoch": 0.41699929560929794, "grad_norm": 6.549501170025994, "learning_rate": 4.593753611818229e-06, "loss": 0.9825, "step": 5772 }, { "epoch": 0.4170715408095075, "grad_norm": 5.700402014760258, "learning_rate": 4.593593763912649e-06, "loss": 0.8863, "step": 5773 }, { "epoch": 0.41714378600971697, "grad_norm": 6.8739315243047105, "learning_rate": 4.593433887347397e-06, "loss": 0.9772, "step": 5774 }, { "epoch": 0.4172160312099265, "grad_norm": 8.523940983210402, "learning_rate": 4.59327398212466e-06, "loss": 0.9745, "step": 5775 }, { "epoch": 0.417288276410136, "grad_norm": 7.29952582295523, "learning_rate": 4.593114048246627e-06, "loss": 0.8881, "step": 5776 }, { "epoch": 0.41736052161034554, "grad_norm": 5.464785114052833, "learning_rate": 4.592954085715488e-06, "loss": 0.8658, "step": 5777 }, { "epoch": 0.417432766810555, "grad_norm": 6.231409423992603, "learning_rate": 4.592794094533433e-06, "loss": 0.7857, "step": 5778 }, { "epoch": 0.4175050120107645, "grad_norm": 7.924803664606372, "learning_rate": 4.5926340747026515e-06, "loss": 0.8025, "step": 5779 }, { "epoch": 0.41757725721097405, "grad_norm": 6.379053958600624, "learning_rate": 4.5924740262253346e-06, "loss": 0.8745, "step": 5780 }, { "epoch": 0.41764950241118354, "grad_norm": 6.816733068812751, "learning_rate": 4.592313949103673e-06, "loss": 0.886, "step": 5781 }, { "epoch": 0.4177217476113931, "grad_norm": 6.9881955477905855, "learning_rate": 4.592153843339859e-06, "loss": 0.8944, "step": 5782 }, { "epoch": 0.41779399281160257, "grad_norm": 6.3291045007756255, "learning_rate": 4.591993708936081e-06, "loss": 0.9153, "step": 5783 }, { "epoch": 0.4178662380118121, "grad_norm": 7.027795103886908, "learning_rate": 4.591833545894535e-06, "loss": 0.9938, "step": 5784 }, { "epoch": 0.4179384832120216, "grad_norm": 6.734669265159499, "learning_rate": 4.591673354217412e-06, "loss": 0.8341, "step": 5785 }, { "epoch": 0.41801072841223114, "grad_norm": 7.688880362288513, "learning_rate": 4.591513133906904e-06, "loss": 0.91, "step": 5786 }, { "epoch": 0.4180829736124406, "grad_norm": 5.39778594792795, "learning_rate": 4.591352884965206e-06, "loss": 0.8266, "step": 5787 }, { "epoch": 0.4181552188126501, "grad_norm": 5.797521508864969, "learning_rate": 4.59119260739451e-06, "loss": 0.8984, "step": 5788 }, { "epoch": 0.41822746401285965, "grad_norm": 5.602793282999247, "learning_rate": 4.591032301197012e-06, "loss": 0.8808, "step": 5789 }, { "epoch": 0.41829970921306914, "grad_norm": 6.983201169361895, "learning_rate": 4.590871966374905e-06, "loss": 0.8656, "step": 5790 }, { "epoch": 0.4183719544132787, "grad_norm": 8.659667294214275, "learning_rate": 4.590711602930384e-06, "loss": 0.9395, "step": 5791 }, { "epoch": 0.41844419961348817, "grad_norm": 5.3287140193460925, "learning_rate": 4.590551210865644e-06, "loss": 0.8581, "step": 5792 }, { "epoch": 0.4185164448136977, "grad_norm": 5.90617830747659, "learning_rate": 4.590390790182882e-06, "loss": 0.8789, "step": 5793 }, { "epoch": 0.4185886900139072, "grad_norm": 5.988148428382666, "learning_rate": 4.590230340884293e-06, "loss": 0.8446, "step": 5794 }, { "epoch": 0.41866093521411674, "grad_norm": 8.566072707112353, "learning_rate": 4.590069862972073e-06, "loss": 0.8625, "step": 5795 }, { "epoch": 0.4187331804143262, "grad_norm": 6.093207232438546, "learning_rate": 4.5899093564484205e-06, "loss": 0.902, "step": 5796 }, { "epoch": 0.4188054256145357, "grad_norm": 6.613169922091601, "learning_rate": 4.58974882131553e-06, "loss": 0.9502, "step": 5797 }, { "epoch": 0.41887767081474525, "grad_norm": 6.220594583040493, "learning_rate": 4.589588257575602e-06, "loss": 0.8516, "step": 5798 }, { "epoch": 0.41894991601495474, "grad_norm": 7.744209341198913, "learning_rate": 4.589427665230834e-06, "loss": 0.9553, "step": 5799 }, { "epoch": 0.4190221612151643, "grad_norm": 6.459633710651189, "learning_rate": 4.589267044283422e-06, "loss": 0.8186, "step": 5800 }, { "epoch": 0.41909440641537377, "grad_norm": 6.593743636141775, "learning_rate": 4.589106394735567e-06, "loss": 0.8798, "step": 5801 }, { "epoch": 0.4191666516155833, "grad_norm": 8.294495322135385, "learning_rate": 4.588945716589467e-06, "loss": 0.9831, "step": 5802 }, { "epoch": 0.4192388968157928, "grad_norm": 6.3772964829078, "learning_rate": 4.588785009847323e-06, "loss": 0.89, "step": 5803 }, { "epoch": 0.41931114201600234, "grad_norm": 6.622079745444727, "learning_rate": 4.588624274511333e-06, "loss": 0.8526, "step": 5804 }, { "epoch": 0.4193833872162118, "grad_norm": 7.615322287659392, "learning_rate": 4.588463510583699e-06, "loss": 0.8975, "step": 5805 }, { "epoch": 0.4194556324164213, "grad_norm": 8.20638142209597, "learning_rate": 4.588302718066621e-06, "loss": 0.9152, "step": 5806 }, { "epoch": 0.41952787761663085, "grad_norm": 5.569720093630084, "learning_rate": 4.5881418969623e-06, "loss": 0.8867, "step": 5807 }, { "epoch": 0.41960012281684034, "grad_norm": 5.946464116249239, "learning_rate": 4.587981047272939e-06, "loss": 0.8953, "step": 5808 }, { "epoch": 0.4196723680170499, "grad_norm": 6.020321446958379, "learning_rate": 4.587820169000737e-06, "loss": 0.9589, "step": 5809 }, { "epoch": 0.41974461321725937, "grad_norm": 5.579396335393931, "learning_rate": 4.5876592621478995e-06, "loss": 0.9012, "step": 5810 }, { "epoch": 0.4198168584174689, "grad_norm": 7.586142163015433, "learning_rate": 4.587498326716627e-06, "loss": 0.9919, "step": 5811 }, { "epoch": 0.4198891036176784, "grad_norm": 6.259563605847019, "learning_rate": 4.587337362709123e-06, "loss": 0.9207, "step": 5812 }, { "epoch": 0.41996134881788794, "grad_norm": 5.688416124276571, "learning_rate": 4.5871763701275915e-06, "loss": 0.9768, "step": 5813 }, { "epoch": 0.4200335940180974, "grad_norm": 7.677161234887857, "learning_rate": 4.587015348974236e-06, "loss": 0.9622, "step": 5814 }, { "epoch": 0.4201058392183069, "grad_norm": 6.693025961932286, "learning_rate": 4.586854299251261e-06, "loss": 0.9955, "step": 5815 }, { "epoch": 0.42017808441851645, "grad_norm": 5.8426058868097535, "learning_rate": 4.586693220960871e-06, "loss": 0.8921, "step": 5816 }, { "epoch": 0.42025032961872594, "grad_norm": 4.940743069381034, "learning_rate": 4.58653211410527e-06, "loss": 0.8029, "step": 5817 }, { "epoch": 0.4203225748189355, "grad_norm": 6.129390155271331, "learning_rate": 4.586370978686665e-06, "loss": 0.8373, "step": 5818 }, { "epoch": 0.42039482001914497, "grad_norm": 5.847545023887929, "learning_rate": 4.586209814707262e-06, "loss": 0.895, "step": 5819 }, { "epoch": 0.4204670652193545, "grad_norm": 6.703663339760698, "learning_rate": 4.586048622169265e-06, "loss": 0.9475, "step": 5820 }, { "epoch": 0.420539310419564, "grad_norm": 6.197599241532368, "learning_rate": 4.585887401074884e-06, "loss": 0.8869, "step": 5821 }, { "epoch": 0.42061155561977354, "grad_norm": 5.540185123201283, "learning_rate": 4.585726151426323e-06, "loss": 0.861, "step": 5822 }, { "epoch": 0.420683800819983, "grad_norm": 6.075400872210544, "learning_rate": 4.585564873225791e-06, "loss": 0.9641, "step": 5823 }, { "epoch": 0.4207560460201925, "grad_norm": 7.792043041039877, "learning_rate": 4.585403566475494e-06, "loss": 0.9429, "step": 5824 }, { "epoch": 0.42082829122040205, "grad_norm": 6.359410691219715, "learning_rate": 4.585242231177643e-06, "loss": 0.8128, "step": 5825 }, { "epoch": 0.42090053642061154, "grad_norm": 6.9533299105164295, "learning_rate": 4.585080867334444e-06, "loss": 0.8864, "step": 5826 }, { "epoch": 0.4209727816208211, "grad_norm": 5.735071789559089, "learning_rate": 4.584919474948108e-06, "loss": 0.8428, "step": 5827 }, { "epoch": 0.42104502682103057, "grad_norm": 6.072627152815277, "learning_rate": 4.5847580540208415e-06, "loss": 0.8651, "step": 5828 }, { "epoch": 0.4211172720212401, "grad_norm": 7.297442983866555, "learning_rate": 4.584596604554857e-06, "loss": 0.9429, "step": 5829 }, { "epoch": 0.4211895172214496, "grad_norm": 5.201809186921285, "learning_rate": 4.584435126552363e-06, "loss": 0.8717, "step": 5830 }, { "epoch": 0.42126176242165914, "grad_norm": 6.659597495348261, "learning_rate": 4.58427362001557e-06, "loss": 0.8428, "step": 5831 }, { "epoch": 0.4213340076218686, "grad_norm": 7.640345156796926, "learning_rate": 4.5841120849466915e-06, "loss": 0.8676, "step": 5832 }, { "epoch": 0.4214062528220781, "grad_norm": 6.727059836615856, "learning_rate": 4.583950521347935e-06, "loss": 0.8832, "step": 5833 }, { "epoch": 0.42147849802228765, "grad_norm": 7.089144787343391, "learning_rate": 4.583788929221514e-06, "loss": 0.8699, "step": 5834 }, { "epoch": 0.42155074322249714, "grad_norm": 5.777192872755217, "learning_rate": 4.5836273085696415e-06, "loss": 0.9505, "step": 5835 }, { "epoch": 0.4216229884227067, "grad_norm": 5.629518855456949, "learning_rate": 4.583465659394529e-06, "loss": 0.9284, "step": 5836 }, { "epoch": 0.42169523362291617, "grad_norm": 6.135570202532531, "learning_rate": 4.5833039816983886e-06, "loss": 0.8805, "step": 5837 }, { "epoch": 0.4217674788231257, "grad_norm": 7.792043530603028, "learning_rate": 4.583142275483434e-06, "loss": 0.8546, "step": 5838 }, { "epoch": 0.4218397240233352, "grad_norm": 6.6524873943130585, "learning_rate": 4.58298054075188e-06, "loss": 0.8428, "step": 5839 }, { "epoch": 0.42191196922354474, "grad_norm": 6.485512948437132, "learning_rate": 4.58281877750594e-06, "loss": 0.9374, "step": 5840 }, { "epoch": 0.4219842144237542, "grad_norm": 6.839923781923984, "learning_rate": 4.582656985747827e-06, "loss": 0.9151, "step": 5841 }, { "epoch": 0.4220564596239637, "grad_norm": 8.560174110994874, "learning_rate": 4.582495165479758e-06, "loss": 0.9689, "step": 5842 }, { "epoch": 0.42212870482417325, "grad_norm": 5.470293099816534, "learning_rate": 4.582333316703946e-06, "loss": 0.783, "step": 5843 }, { "epoch": 0.42220095002438274, "grad_norm": 7.442475778209057, "learning_rate": 4.582171439422608e-06, "loss": 0.9724, "step": 5844 }, { "epoch": 0.4222731952245923, "grad_norm": 6.903996578953891, "learning_rate": 4.58200953363796e-06, "loss": 0.8868, "step": 5845 }, { "epoch": 0.42234544042480177, "grad_norm": 6.641419127608531, "learning_rate": 4.581847599352219e-06, "loss": 0.8183, "step": 5846 }, { "epoch": 0.4224176856250113, "grad_norm": 6.278851558859579, "learning_rate": 4.581685636567599e-06, "loss": 0.8349, "step": 5847 }, { "epoch": 0.4224899308252208, "grad_norm": 7.791326777206739, "learning_rate": 4.58152364528632e-06, "loss": 0.8718, "step": 5848 }, { "epoch": 0.42256217602543034, "grad_norm": 6.926232613164961, "learning_rate": 4.581361625510599e-06, "loss": 0.8519, "step": 5849 }, { "epoch": 0.4226344212256398, "grad_norm": 8.388217800938204, "learning_rate": 4.581199577242652e-06, "loss": 0.925, "step": 5850 }, { "epoch": 0.4227066664258493, "grad_norm": 5.45141994632154, "learning_rate": 4.581037500484699e-06, "loss": 0.8803, "step": 5851 }, { "epoch": 0.42277891162605885, "grad_norm": 6.258148679620854, "learning_rate": 4.580875395238959e-06, "loss": 0.914, "step": 5852 }, { "epoch": 0.42285115682626834, "grad_norm": 6.217387754671841, "learning_rate": 4.580713261507651e-06, "loss": 0.9347, "step": 5853 }, { "epoch": 0.4229234020264779, "grad_norm": 5.775975476072878, "learning_rate": 4.580551099292993e-06, "loss": 0.8423, "step": 5854 }, { "epoch": 0.42299564722668737, "grad_norm": 8.043174114819882, "learning_rate": 4.580388908597207e-06, "loss": 1.0322, "step": 5855 }, { "epoch": 0.4230678924268969, "grad_norm": 6.798121967151916, "learning_rate": 4.580226689422511e-06, "loss": 0.8463, "step": 5856 }, { "epoch": 0.4231401376271064, "grad_norm": 6.291939062154467, "learning_rate": 4.5800644417711274e-06, "loss": 0.8952, "step": 5857 }, { "epoch": 0.42321238282731594, "grad_norm": 5.303426556170486, "learning_rate": 4.5799021656452766e-06, "loss": 0.8556, "step": 5858 }, { "epoch": 0.4232846280275254, "grad_norm": 5.820986223590842, "learning_rate": 4.57973986104718e-06, "loss": 0.8654, "step": 5859 }, { "epoch": 0.4233568732277349, "grad_norm": 7.164770659179037, "learning_rate": 4.57957752797906e-06, "loss": 0.9833, "step": 5860 }, { "epoch": 0.42342911842794445, "grad_norm": 8.84767780764482, "learning_rate": 4.579415166443137e-06, "loss": 0.8602, "step": 5861 }, { "epoch": 0.42350136362815394, "grad_norm": 6.744182198774989, "learning_rate": 4.579252776441636e-06, "loss": 0.8993, "step": 5862 }, { "epoch": 0.4235736088283635, "grad_norm": 6.857461371290416, "learning_rate": 4.57909035797678e-06, "loss": 0.9733, "step": 5863 }, { "epoch": 0.42364585402857297, "grad_norm": 5.858130727260683, "learning_rate": 4.578927911050789e-06, "loss": 0.8843, "step": 5864 }, { "epoch": 0.4237180992287825, "grad_norm": 5.611716629106518, "learning_rate": 4.578765435665891e-06, "loss": 0.8714, "step": 5865 }, { "epoch": 0.423790344428992, "grad_norm": 7.314197832243753, "learning_rate": 4.578602931824307e-06, "loss": 0.9429, "step": 5866 }, { "epoch": 0.42386258962920154, "grad_norm": 7.669077811972179, "learning_rate": 4.578440399528264e-06, "loss": 0.8986, "step": 5867 }, { "epoch": 0.423934834829411, "grad_norm": 9.182713098460216, "learning_rate": 4.5782778387799845e-06, "loss": 0.8974, "step": 5868 }, { "epoch": 0.4240070800296205, "grad_norm": 7.105354664679918, "learning_rate": 4.578115249581695e-06, "loss": 1.0272, "step": 5869 }, { "epoch": 0.42407932522983005, "grad_norm": 6.059715024586488, "learning_rate": 4.577952631935622e-06, "loss": 0.8887, "step": 5870 }, { "epoch": 0.42415157043003954, "grad_norm": 6.3825378732420495, "learning_rate": 4.57778998584399e-06, "loss": 0.8912, "step": 5871 }, { "epoch": 0.4242238156302491, "grad_norm": 6.278521955975001, "learning_rate": 4.577627311309028e-06, "loss": 0.8794, "step": 5872 }, { "epoch": 0.42429606083045857, "grad_norm": 6.0921328232750955, "learning_rate": 4.5774646083329595e-06, "loss": 0.9806, "step": 5873 }, { "epoch": 0.4243683060306681, "grad_norm": 10.727143229568428, "learning_rate": 4.577301876918016e-06, "loss": 0.8466, "step": 5874 }, { "epoch": 0.4244405512308776, "grad_norm": 7.191729022920102, "learning_rate": 4.57713911706642e-06, "loss": 0.8938, "step": 5875 }, { "epoch": 0.42451279643108714, "grad_norm": 6.668297123212005, "learning_rate": 4.576976328780404e-06, "loss": 0.9059, "step": 5876 }, { "epoch": 0.4245850416312966, "grad_norm": 7.435437597699756, "learning_rate": 4.576813512062194e-06, "loss": 0.9032, "step": 5877 }, { "epoch": 0.4246572868315061, "grad_norm": 5.642583026401904, "learning_rate": 4.57665066691402e-06, "loss": 0.855, "step": 5878 }, { "epoch": 0.42472953203171565, "grad_norm": 5.580018649053002, "learning_rate": 4.57648779333811e-06, "loss": 0.7558, "step": 5879 }, { "epoch": 0.42480177723192514, "grad_norm": 6.152663372153959, "learning_rate": 4.576324891336695e-06, "loss": 0.792, "step": 5880 }, { "epoch": 0.4248740224321347, "grad_norm": 5.87867589353508, "learning_rate": 4.576161960912004e-06, "loss": 0.8974, "step": 5881 }, { "epoch": 0.42494626763234417, "grad_norm": 5.858804334189559, "learning_rate": 4.575999002066268e-06, "loss": 0.8967, "step": 5882 }, { "epoch": 0.4250185128325537, "grad_norm": 6.711841242625673, "learning_rate": 4.575836014801718e-06, "loss": 0.9108, "step": 5883 }, { "epoch": 0.4250907580327632, "grad_norm": 6.723446646047405, "learning_rate": 4.575672999120585e-06, "loss": 0.8467, "step": 5884 }, { "epoch": 0.42516300323297274, "grad_norm": 4.831452738269701, "learning_rate": 4.5755099550250996e-06, "loss": 0.809, "step": 5885 }, { "epoch": 0.4252352484331822, "grad_norm": 7.098089697225128, "learning_rate": 4.5753468825174944e-06, "loss": 0.8239, "step": 5886 }, { "epoch": 0.4253074936333917, "grad_norm": 6.415570879753006, "learning_rate": 4.575183781600002e-06, "loss": 0.8744, "step": 5887 }, { "epoch": 0.42537973883360125, "grad_norm": 6.573632670325822, "learning_rate": 4.575020652274855e-06, "loss": 0.9449, "step": 5888 }, { "epoch": 0.42545198403381074, "grad_norm": 8.283970108987692, "learning_rate": 4.574857494544286e-06, "loss": 0.9328, "step": 5889 }, { "epoch": 0.4255242292340203, "grad_norm": 5.748386322482163, "learning_rate": 4.574694308410529e-06, "loss": 0.8943, "step": 5890 }, { "epoch": 0.42559647443422977, "grad_norm": 5.907090868962461, "learning_rate": 4.574531093875818e-06, "loss": 0.8806, "step": 5891 }, { "epoch": 0.4256687196344393, "grad_norm": 6.30228678057027, "learning_rate": 4.5743678509423875e-06, "loss": 0.9413, "step": 5892 }, { "epoch": 0.4257409648346488, "grad_norm": 5.663860112224667, "learning_rate": 4.574204579612471e-06, "loss": 0.93, "step": 5893 }, { "epoch": 0.42581321003485834, "grad_norm": 7.074728312852981, "learning_rate": 4.574041279888305e-06, "loss": 0.9032, "step": 5894 }, { "epoch": 0.4258854552350678, "grad_norm": 6.755328300624534, "learning_rate": 4.573877951772124e-06, "loss": 0.8869, "step": 5895 }, { "epoch": 0.4259577004352773, "grad_norm": 6.353609287850962, "learning_rate": 4.573714595266164e-06, "loss": 0.8606, "step": 5896 }, { "epoch": 0.42602994563548685, "grad_norm": 7.7970061205157055, "learning_rate": 4.573551210372661e-06, "loss": 0.8324, "step": 5897 }, { "epoch": 0.42610219083569634, "grad_norm": 6.087674756058086, "learning_rate": 4.573387797093852e-06, "loss": 0.9385, "step": 5898 }, { "epoch": 0.4261744360359059, "grad_norm": 5.656495652739248, "learning_rate": 4.573224355431974e-06, "loss": 0.9198, "step": 5899 }, { "epoch": 0.42624668123611537, "grad_norm": 7.043731868148524, "learning_rate": 4.573060885389266e-06, "loss": 0.9297, "step": 5900 }, { "epoch": 0.4263189264363249, "grad_norm": 6.451869888724908, "learning_rate": 4.5728973869679625e-06, "loss": 0.9722, "step": 5901 }, { "epoch": 0.4263911716365344, "grad_norm": 8.445477818398121, "learning_rate": 4.5727338601703035e-06, "loss": 0.9034, "step": 5902 }, { "epoch": 0.42646341683674394, "grad_norm": 7.393629177263942, "learning_rate": 4.572570304998527e-06, "loss": 0.9194, "step": 5903 }, { "epoch": 0.4265356620369534, "grad_norm": 6.608952790611036, "learning_rate": 4.5724067214548725e-06, "loss": 0.9225, "step": 5904 }, { "epoch": 0.4266079072371629, "grad_norm": 7.214606082390934, "learning_rate": 4.572243109541579e-06, "loss": 0.899, "step": 5905 }, { "epoch": 0.42668015243737245, "grad_norm": 6.3477914649060105, "learning_rate": 4.572079469260886e-06, "loss": 0.9228, "step": 5906 }, { "epoch": 0.42675239763758194, "grad_norm": 5.662943720326898, "learning_rate": 4.571915800615035e-06, "loss": 0.8434, "step": 5907 }, { "epoch": 0.4268246428377915, "grad_norm": 6.188596069665281, "learning_rate": 4.571752103606265e-06, "loss": 0.893, "step": 5908 }, { "epoch": 0.42689688803800097, "grad_norm": 6.399199388018566, "learning_rate": 4.571588378236817e-06, "loss": 0.924, "step": 5909 }, { "epoch": 0.4269691332382105, "grad_norm": 6.352445008551426, "learning_rate": 4.5714246245089325e-06, "loss": 1.0217, "step": 5910 }, { "epoch": 0.42704137843842, "grad_norm": 6.045887316886276, "learning_rate": 4.571260842424854e-06, "loss": 0.8557, "step": 5911 }, { "epoch": 0.42711362363862954, "grad_norm": 6.541668189305738, "learning_rate": 4.571097031986822e-06, "loss": 0.8527, "step": 5912 }, { "epoch": 0.427185868838839, "grad_norm": 6.117646142907743, "learning_rate": 4.57093319319708e-06, "loss": 0.8592, "step": 5913 }, { "epoch": 0.4272581140390485, "grad_norm": 6.741198134517213, "learning_rate": 4.57076932605787e-06, "loss": 0.9666, "step": 5914 }, { "epoch": 0.42733035923925805, "grad_norm": 6.066180812677744, "learning_rate": 4.570605430571437e-06, "loss": 0.9324, "step": 5915 }, { "epoch": 0.42740260443946754, "grad_norm": 6.60259589960838, "learning_rate": 4.570441506740023e-06, "loss": 0.8148, "step": 5916 }, { "epoch": 0.4274748496396771, "grad_norm": 7.186416677020733, "learning_rate": 4.570277554565872e-06, "loss": 0.9751, "step": 5917 }, { "epoch": 0.42754709483988657, "grad_norm": 6.175544053794091, "learning_rate": 4.5701135740512285e-06, "loss": 0.8791, "step": 5918 }, { "epoch": 0.4276193400400961, "grad_norm": 6.566482062415567, "learning_rate": 4.569949565198338e-06, "loss": 0.7983, "step": 5919 }, { "epoch": 0.4276915852403056, "grad_norm": 7.3947566865067795, "learning_rate": 4.569785528009445e-06, "loss": 0.8925, "step": 5920 }, { "epoch": 0.42776383044051514, "grad_norm": 7.378391585164968, "learning_rate": 4.569621462486795e-06, "loss": 0.9575, "step": 5921 }, { "epoch": 0.4278360756407246, "grad_norm": 8.67635591227699, "learning_rate": 4.569457368632635e-06, "loss": 0.8661, "step": 5922 }, { "epoch": 0.4279083208409341, "grad_norm": 6.111996607734578, "learning_rate": 4.569293246449209e-06, "loss": 0.9155, "step": 5923 }, { "epoch": 0.42798056604114365, "grad_norm": 7.453858921215427, "learning_rate": 4.569129095938767e-06, "loss": 0.8795, "step": 5924 }, { "epoch": 0.42805281124135314, "grad_norm": 5.849783164676317, "learning_rate": 4.568964917103553e-06, "loss": 0.9694, "step": 5925 }, { "epoch": 0.4281250564415627, "grad_norm": 6.90883812373475, "learning_rate": 4.568800709945816e-06, "loss": 0.8678, "step": 5926 }, { "epoch": 0.42819730164177217, "grad_norm": 8.130866177858673, "learning_rate": 4.568636474467803e-06, "loss": 1.0266, "step": 5927 }, { "epoch": 0.4282695468419817, "grad_norm": 5.344638862078526, "learning_rate": 4.568472210671764e-06, "loss": 0.8706, "step": 5928 }, { "epoch": 0.4283417920421912, "grad_norm": 8.272785510487932, "learning_rate": 4.568307918559946e-06, "loss": 0.8789, "step": 5929 }, { "epoch": 0.4284140372424007, "grad_norm": 5.6185015970383825, "learning_rate": 4.568143598134598e-06, "loss": 0.913, "step": 5930 }, { "epoch": 0.4284862824426102, "grad_norm": 8.455956737454558, "learning_rate": 4.5679792493979705e-06, "loss": 0.9069, "step": 5931 }, { "epoch": 0.4285585276428197, "grad_norm": 5.862107435800472, "learning_rate": 4.567814872352313e-06, "loss": 0.8495, "step": 5932 }, { "epoch": 0.42863077284302925, "grad_norm": 6.58237955773517, "learning_rate": 4.567650466999874e-06, "loss": 1.0102, "step": 5933 }, { "epoch": 0.42870301804323874, "grad_norm": 6.147452793009895, "learning_rate": 4.567486033342907e-06, "loss": 0.9204, "step": 5934 }, { "epoch": 0.4287752632434483, "grad_norm": 6.3731644923830935, "learning_rate": 4.567321571383662e-06, "loss": 0.9532, "step": 5935 }, { "epoch": 0.42884750844365777, "grad_norm": 7.464268061837986, "learning_rate": 4.567157081124388e-06, "loss": 0.9647, "step": 5936 }, { "epoch": 0.4289197536438673, "grad_norm": 7.579364478612848, "learning_rate": 4.56699256256734e-06, "loss": 0.9151, "step": 5937 }, { "epoch": 0.4289919988440768, "grad_norm": 7.871589043039531, "learning_rate": 4.5668280157147685e-06, "loss": 0.8963, "step": 5938 }, { "epoch": 0.4290642440442863, "grad_norm": 6.430898299762991, "learning_rate": 4.566663440568926e-06, "loss": 0.8002, "step": 5939 }, { "epoch": 0.4291364892444958, "grad_norm": 6.942325322965828, "learning_rate": 4.566498837132066e-06, "loss": 0.9851, "step": 5940 }, { "epoch": 0.4292087344447053, "grad_norm": 7.599120701568815, "learning_rate": 4.56633420540644e-06, "loss": 0.9092, "step": 5941 }, { "epoch": 0.42928097964491485, "grad_norm": 5.887792093275932, "learning_rate": 4.566169545394305e-06, "loss": 0.9069, "step": 5942 }, { "epoch": 0.42935322484512434, "grad_norm": 6.132651707187513, "learning_rate": 4.566004857097913e-06, "loss": 0.8764, "step": 5943 }, { "epoch": 0.4294254700453339, "grad_norm": 6.677899180384419, "learning_rate": 4.565840140519518e-06, "loss": 0.9776, "step": 5944 }, { "epoch": 0.42949771524554337, "grad_norm": 6.629071406126165, "learning_rate": 4.565675395661376e-06, "loss": 0.9317, "step": 5945 }, { "epoch": 0.4295699604457529, "grad_norm": 6.378464000763287, "learning_rate": 4.565510622525741e-06, "loss": 0.9041, "step": 5946 }, { "epoch": 0.4296422056459624, "grad_norm": 6.531251752205208, "learning_rate": 4.565345821114871e-06, "loss": 0.8668, "step": 5947 }, { "epoch": 0.4297144508461719, "grad_norm": 6.992860150082603, "learning_rate": 4.565180991431019e-06, "loss": 0.9191, "step": 5948 }, { "epoch": 0.4297866960463814, "grad_norm": 6.536708175524792, "learning_rate": 4.565016133476442e-06, "loss": 0.9275, "step": 5949 }, { "epoch": 0.4298589412465909, "grad_norm": 6.134624471075882, "learning_rate": 4.564851247253398e-06, "loss": 0.8569, "step": 5950 }, { "epoch": 0.42993118644680045, "grad_norm": 7.107250390810484, "learning_rate": 4.5646863327641445e-06, "loss": 0.861, "step": 5951 }, { "epoch": 0.43000343164700994, "grad_norm": 10.466502571844416, "learning_rate": 4.564521390010938e-06, "loss": 0.9202, "step": 5952 }, { "epoch": 0.4300756768472195, "grad_norm": 6.266455388396667, "learning_rate": 4.564356418996036e-06, "loss": 0.9597, "step": 5953 }, { "epoch": 0.43014792204742897, "grad_norm": 6.908263590437248, "learning_rate": 4.564191419721698e-06, "loss": 0.9821, "step": 5954 }, { "epoch": 0.4302201672476385, "grad_norm": 6.943433543907662, "learning_rate": 4.5640263921901825e-06, "loss": 0.8765, "step": 5955 }, { "epoch": 0.430292412447848, "grad_norm": 6.378772591699074, "learning_rate": 4.5638613364037475e-06, "loss": 0.8461, "step": 5956 }, { "epoch": 0.4303646576480575, "grad_norm": 6.2565892389567415, "learning_rate": 4.563696252364654e-06, "loss": 0.9562, "step": 5957 }, { "epoch": 0.430436902848267, "grad_norm": 5.756243425959872, "learning_rate": 4.563531140075161e-06, "loss": 0.8601, "step": 5958 }, { "epoch": 0.4305091480484765, "grad_norm": 5.776411021204141, "learning_rate": 4.563365999537529e-06, "loss": 0.9299, "step": 5959 }, { "epoch": 0.43058139324868605, "grad_norm": 6.823454841956867, "learning_rate": 4.563200830754018e-06, "loss": 0.9929, "step": 5960 }, { "epoch": 0.43065363844889554, "grad_norm": 6.6953506735711645, "learning_rate": 4.56303563372689e-06, "loss": 0.8947, "step": 5961 }, { "epoch": 0.4307258836491051, "grad_norm": 6.241364273195617, "learning_rate": 4.562870408458406e-06, "loss": 0.875, "step": 5962 }, { "epoch": 0.43079812884931457, "grad_norm": 5.083013722528985, "learning_rate": 4.562705154950828e-06, "loss": 0.7822, "step": 5963 }, { "epoch": 0.4308703740495241, "grad_norm": 6.47849236646036, "learning_rate": 4.562539873206418e-06, "loss": 0.964, "step": 5964 }, { "epoch": 0.4309426192497336, "grad_norm": 7.856604267833394, "learning_rate": 4.562374563227438e-06, "loss": 0.877, "step": 5965 }, { "epoch": 0.4310148644499431, "grad_norm": 6.389247456684092, "learning_rate": 4.562209225016152e-06, "loss": 0.9991, "step": 5966 }, { "epoch": 0.4310871096501526, "grad_norm": 6.864369166674858, "learning_rate": 4.562043858574823e-06, "loss": 0.8891, "step": 5967 }, { "epoch": 0.4311593548503621, "grad_norm": 7.291990552702542, "learning_rate": 4.561878463905714e-06, "loss": 0.866, "step": 5968 }, { "epoch": 0.43123160005057165, "grad_norm": 7.628737487332669, "learning_rate": 4.561713041011091e-06, "loss": 0.9795, "step": 5969 }, { "epoch": 0.43130384525078114, "grad_norm": 7.856266324580917, "learning_rate": 4.561547589893217e-06, "loss": 0.9339, "step": 5970 }, { "epoch": 0.4313760904509907, "grad_norm": 5.486846280099383, "learning_rate": 4.5613821105543566e-06, "loss": 0.7987, "step": 5971 }, { "epoch": 0.43144833565120017, "grad_norm": 6.527816234863124, "learning_rate": 4.561216602996775e-06, "loss": 0.8781, "step": 5972 }, { "epoch": 0.4315205808514097, "grad_norm": 7.728313251551811, "learning_rate": 4.561051067222741e-06, "loss": 0.8695, "step": 5973 }, { "epoch": 0.4315928260516192, "grad_norm": 7.1722779004987265, "learning_rate": 4.560885503234516e-06, "loss": 0.8922, "step": 5974 }, { "epoch": 0.4316650712518287, "grad_norm": 7.079008539898719, "learning_rate": 4.560719911034369e-06, "loss": 0.8895, "step": 5975 }, { "epoch": 0.4317373164520382, "grad_norm": 7.737112096575227, "learning_rate": 4.5605542906245665e-06, "loss": 0.9311, "step": 5976 }, { "epoch": 0.4318095616522477, "grad_norm": 7.688011834698161, "learning_rate": 4.5603886420073765e-06, "loss": 0.9371, "step": 5977 }, { "epoch": 0.43188180685245725, "grad_norm": 7.889594949398617, "learning_rate": 4.560222965185065e-06, "loss": 0.8365, "step": 5978 }, { "epoch": 0.43195405205266674, "grad_norm": 6.997450773087106, "learning_rate": 4.5600572601599005e-06, "loss": 0.917, "step": 5979 }, { "epoch": 0.4320262972528763, "grad_norm": 7.6364320198729345, "learning_rate": 4.559891526934152e-06, "loss": 0.9752, "step": 5980 }, { "epoch": 0.43209854245308577, "grad_norm": 7.3364925804207255, "learning_rate": 4.559725765510088e-06, "loss": 0.9058, "step": 5981 }, { "epoch": 0.4321707876532953, "grad_norm": 6.4673197911315405, "learning_rate": 4.559559975889978e-06, "loss": 0.8693, "step": 5982 }, { "epoch": 0.4322430328535048, "grad_norm": 6.713786150079114, "learning_rate": 4.55939415807609e-06, "loss": 0.8955, "step": 5983 }, { "epoch": 0.4323152780537143, "grad_norm": 7.9589105633080415, "learning_rate": 4.559228312070696e-06, "loss": 0.8678, "step": 5984 }, { "epoch": 0.4323875232539238, "grad_norm": 8.045236483666903, "learning_rate": 4.559062437876064e-06, "loss": 0.8012, "step": 5985 }, { "epoch": 0.4324597684541333, "grad_norm": 6.260156542493442, "learning_rate": 4.558896535494467e-06, "loss": 0.886, "step": 5986 }, { "epoch": 0.43253201365434285, "grad_norm": 6.761373474720091, "learning_rate": 4.558730604928175e-06, "loss": 0.8764, "step": 5987 }, { "epoch": 0.43260425885455234, "grad_norm": 6.5116742275022625, "learning_rate": 4.55856464617946e-06, "loss": 0.8838, "step": 5988 }, { "epoch": 0.4326765040547619, "grad_norm": 6.812947389803511, "learning_rate": 4.558398659250593e-06, "loss": 0.8793, "step": 5989 }, { "epoch": 0.43274874925497137, "grad_norm": 5.960725353060577, "learning_rate": 4.558232644143847e-06, "loss": 0.9521, "step": 5990 }, { "epoch": 0.4328209944551809, "grad_norm": 5.9447106195832555, "learning_rate": 4.558066600861493e-06, "loss": 0.7735, "step": 5991 }, { "epoch": 0.4328932396553904, "grad_norm": 5.561723290554229, "learning_rate": 4.5579005294058056e-06, "loss": 0.8324, "step": 5992 }, { "epoch": 0.4329654848555999, "grad_norm": 6.984805646416577, "learning_rate": 4.5577344297790576e-06, "loss": 0.9293, "step": 5993 }, { "epoch": 0.4330377300558094, "grad_norm": 6.636153135844803, "learning_rate": 4.557568301983524e-06, "loss": 0.949, "step": 5994 }, { "epoch": 0.4331099752560189, "grad_norm": 6.837975421675018, "learning_rate": 4.557402146021477e-06, "loss": 0.8056, "step": 5995 }, { "epoch": 0.43318222045622845, "grad_norm": 7.307547693713564, "learning_rate": 4.557235961895192e-06, "loss": 0.8756, "step": 5996 }, { "epoch": 0.43325446565643794, "grad_norm": 7.054436325383941, "learning_rate": 4.557069749606945e-06, "loss": 0.9268, "step": 5997 }, { "epoch": 0.4333267108566475, "grad_norm": 5.702093161687254, "learning_rate": 4.5569035091590095e-06, "loss": 0.8716, "step": 5998 }, { "epoch": 0.43339895605685697, "grad_norm": 6.4371693350300765, "learning_rate": 4.5567372405536626e-06, "loss": 0.9662, "step": 5999 }, { "epoch": 0.4334712012570665, "grad_norm": 5.722262945102281, "learning_rate": 4.556570943793179e-06, "loss": 0.8742, "step": 6000 }, { "epoch": 0.433543446457276, "grad_norm": 6.4138599879533045, "learning_rate": 4.556404618879837e-06, "loss": 0.7794, "step": 6001 }, { "epoch": 0.4336156916574855, "grad_norm": 7.1211968695393, "learning_rate": 4.556238265815912e-06, "loss": 0.9067, "step": 6002 }, { "epoch": 0.433687936857695, "grad_norm": 7.42731092049913, "learning_rate": 4.5560718846036825e-06, "loss": 0.8693, "step": 6003 }, { "epoch": 0.4337601820579045, "grad_norm": 5.947241250976326, "learning_rate": 4.555905475245424e-06, "loss": 0.8699, "step": 6004 }, { "epoch": 0.43383242725811405, "grad_norm": 7.64555739512039, "learning_rate": 4.5557390377434166e-06, "loss": 0.965, "step": 6005 }, { "epoch": 0.43390467245832354, "grad_norm": 6.089493580575572, "learning_rate": 4.555572572099938e-06, "loss": 0.9113, "step": 6006 }, { "epoch": 0.4339769176585331, "grad_norm": 7.63478611055067, "learning_rate": 4.555406078317266e-06, "loss": 0.9329, "step": 6007 }, { "epoch": 0.43404916285874257, "grad_norm": 6.704520838241176, "learning_rate": 4.555239556397681e-06, "loss": 0.9105, "step": 6008 }, { "epoch": 0.4341214080589521, "grad_norm": 6.386014212382553, "learning_rate": 4.555073006343464e-06, "loss": 0.8303, "step": 6009 }, { "epoch": 0.4341936532591616, "grad_norm": 6.975515870190064, "learning_rate": 4.554906428156891e-06, "loss": 0.9068, "step": 6010 }, { "epoch": 0.4342658984593711, "grad_norm": 9.127314861826235, "learning_rate": 4.554739821840246e-06, "loss": 0.9624, "step": 6011 }, { "epoch": 0.4343381436595806, "grad_norm": 7.336729419283318, "learning_rate": 4.554573187395807e-06, "loss": 0.8545, "step": 6012 }, { "epoch": 0.4344103888597901, "grad_norm": 7.01039169632053, "learning_rate": 4.5544065248258575e-06, "loss": 0.8818, "step": 6013 }, { "epoch": 0.43448263405999965, "grad_norm": 5.612879941182678, "learning_rate": 4.554239834132677e-06, "loss": 0.9153, "step": 6014 }, { "epoch": 0.43455487926020914, "grad_norm": 6.609998019819786, "learning_rate": 4.554073115318549e-06, "loss": 0.9227, "step": 6015 }, { "epoch": 0.4346271244604187, "grad_norm": 7.301384718738984, "learning_rate": 4.553906368385754e-06, "loss": 0.8434, "step": 6016 }, { "epoch": 0.43469936966062817, "grad_norm": 7.697118680800506, "learning_rate": 4.553739593336577e-06, "loss": 0.8887, "step": 6017 }, { "epoch": 0.4347716148608377, "grad_norm": 6.5607832343363715, "learning_rate": 4.5535727901732975e-06, "loss": 0.8829, "step": 6018 }, { "epoch": 0.4348438600610472, "grad_norm": 8.658879180336678, "learning_rate": 4.553405958898202e-06, "loss": 0.9778, "step": 6019 }, { "epoch": 0.4349161052612567, "grad_norm": 6.008513450656389, "learning_rate": 4.553239099513574e-06, "loss": 0.9005, "step": 6020 }, { "epoch": 0.4349883504614662, "grad_norm": 6.057890722730555, "learning_rate": 4.553072212021696e-06, "loss": 0.835, "step": 6021 }, { "epoch": 0.4350605956616757, "grad_norm": 6.565443559524427, "learning_rate": 4.552905296424855e-06, "loss": 0.8997, "step": 6022 }, { "epoch": 0.43513284086188525, "grad_norm": 5.957286111351376, "learning_rate": 4.552738352725333e-06, "loss": 0.86, "step": 6023 }, { "epoch": 0.43520508606209474, "grad_norm": 5.759953384210783, "learning_rate": 4.552571380925417e-06, "loss": 0.9201, "step": 6024 }, { "epoch": 0.4352773312623043, "grad_norm": 6.908755577199723, "learning_rate": 4.5524043810273926e-06, "loss": 0.8864, "step": 6025 }, { "epoch": 0.43534957646251377, "grad_norm": 6.520532895643121, "learning_rate": 4.552237353033546e-06, "loss": 0.9671, "step": 6026 }, { "epoch": 0.4354218216627233, "grad_norm": 6.073202223522837, "learning_rate": 4.552070296946164e-06, "loss": 0.854, "step": 6027 }, { "epoch": 0.4354940668629328, "grad_norm": 6.492702495838146, "learning_rate": 4.551903212767532e-06, "loss": 1.0154, "step": 6028 }, { "epoch": 0.4355663120631423, "grad_norm": 7.159393286546376, "learning_rate": 4.551736100499938e-06, "loss": 0.8935, "step": 6029 }, { "epoch": 0.4356385572633518, "grad_norm": 6.898873234398272, "learning_rate": 4.551568960145671e-06, "loss": 0.9499, "step": 6030 }, { "epoch": 0.4357108024635613, "grad_norm": 6.05545137570482, "learning_rate": 4.551401791707017e-06, "loss": 0.9041, "step": 6031 }, { "epoch": 0.43578304766377085, "grad_norm": 7.478324084694555, "learning_rate": 4.551234595186266e-06, "loss": 0.9441, "step": 6032 }, { "epoch": 0.43585529286398034, "grad_norm": 5.477766179521073, "learning_rate": 4.551067370585706e-06, "loss": 0.7793, "step": 6033 }, { "epoch": 0.4359275380641899, "grad_norm": 5.914822931420567, "learning_rate": 4.550900117907625e-06, "loss": 0.8628, "step": 6034 }, { "epoch": 0.43599978326439937, "grad_norm": 5.789554714019127, "learning_rate": 4.550732837154315e-06, "loss": 0.9009, "step": 6035 }, { "epoch": 0.4360720284646089, "grad_norm": 6.558226729047254, "learning_rate": 4.550565528328065e-06, "loss": 0.8532, "step": 6036 }, { "epoch": 0.4361442736648184, "grad_norm": 6.469477663254706, "learning_rate": 4.550398191431163e-06, "loss": 0.8684, "step": 6037 }, { "epoch": 0.4362165188650279, "grad_norm": 5.545773163012163, "learning_rate": 4.550230826465904e-06, "loss": 0.8881, "step": 6038 }, { "epoch": 0.4362887640652374, "grad_norm": 6.920584737074415, "learning_rate": 4.550063433434576e-06, "loss": 0.8684, "step": 6039 }, { "epoch": 0.4363610092654469, "grad_norm": 5.424888630791801, "learning_rate": 4.549896012339472e-06, "loss": 0.8811, "step": 6040 }, { "epoch": 0.43643325446565645, "grad_norm": 8.57097345687963, "learning_rate": 4.549728563182882e-06, "loss": 1.0096, "step": 6041 }, { "epoch": 0.43650549966586594, "grad_norm": 7.932012628634697, "learning_rate": 4.549561085967101e-06, "loss": 0.9863, "step": 6042 }, { "epoch": 0.4365777448660755, "grad_norm": 6.6842378164430665, "learning_rate": 4.54939358069442e-06, "loss": 0.9024, "step": 6043 }, { "epoch": 0.43664999006628497, "grad_norm": 8.321727249146038, "learning_rate": 4.549226047367131e-06, "loss": 0.8562, "step": 6044 }, { "epoch": 0.4367222352664945, "grad_norm": 7.044871787751667, "learning_rate": 4.549058485987529e-06, "loss": 0.861, "step": 6045 }, { "epoch": 0.436794480466704, "grad_norm": 7.085710582864031, "learning_rate": 4.548890896557907e-06, "loss": 0.8628, "step": 6046 }, { "epoch": 0.4368667256669135, "grad_norm": 6.658072208763538, "learning_rate": 4.54872327908056e-06, "loss": 0.9425, "step": 6047 }, { "epoch": 0.436938970867123, "grad_norm": 7.189630607273434, "learning_rate": 4.5485556335577825e-06, "loss": 0.9214, "step": 6048 }, { "epoch": 0.4370112160673325, "grad_norm": 6.545131988476782, "learning_rate": 4.548387959991868e-06, "loss": 0.9027, "step": 6049 }, { "epoch": 0.43708346126754205, "grad_norm": 5.938005606556472, "learning_rate": 4.548220258385114e-06, "loss": 0.8639, "step": 6050 }, { "epoch": 0.43715570646775154, "grad_norm": 6.596850493370663, "learning_rate": 4.548052528739813e-06, "loss": 0.9846, "step": 6051 }, { "epoch": 0.4372279516679611, "grad_norm": 5.958818891609965, "learning_rate": 4.547884771058265e-06, "loss": 0.848, "step": 6052 }, { "epoch": 0.43730019686817057, "grad_norm": 6.070261912092136, "learning_rate": 4.5477169853427635e-06, "loss": 0.8548, "step": 6053 }, { "epoch": 0.4373724420683801, "grad_norm": 6.552149284609039, "learning_rate": 4.547549171595608e-06, "loss": 0.9037, "step": 6054 }, { "epoch": 0.4374446872685896, "grad_norm": 6.034943867622263, "learning_rate": 4.547381329819092e-06, "loss": 0.9019, "step": 6055 }, { "epoch": 0.4375169324687991, "grad_norm": 6.283158804630755, "learning_rate": 4.547213460015516e-06, "loss": 0.8743, "step": 6056 }, { "epoch": 0.4375891776690086, "grad_norm": 6.479613391975841, "learning_rate": 4.547045562187178e-06, "loss": 0.9086, "step": 6057 }, { "epoch": 0.4376614228692181, "grad_norm": 6.953313177755567, "learning_rate": 4.546877636336376e-06, "loss": 0.9487, "step": 6058 }, { "epoch": 0.43773366806942765, "grad_norm": 6.211280208705343, "learning_rate": 4.546709682465407e-06, "loss": 0.8995, "step": 6059 }, { "epoch": 0.43780591326963714, "grad_norm": 6.52484079217886, "learning_rate": 4.5465417005765724e-06, "loss": 0.9199, "step": 6060 }, { "epoch": 0.4378781584698467, "grad_norm": 6.182022907314603, "learning_rate": 4.54637369067217e-06, "loss": 0.8675, "step": 6061 }, { "epoch": 0.43795040367005617, "grad_norm": 6.350934249756217, "learning_rate": 4.546205652754502e-06, "loss": 0.8176, "step": 6062 }, { "epoch": 0.4380226488702657, "grad_norm": 9.63365359010488, "learning_rate": 4.546037586825866e-06, "loss": 0.9469, "step": 6063 }, { "epoch": 0.4380948940704752, "grad_norm": 5.540657792959832, "learning_rate": 4.545869492888566e-06, "loss": 0.9483, "step": 6064 }, { "epoch": 0.4381671392706847, "grad_norm": 6.767858264403928, "learning_rate": 4.5457013709449e-06, "loss": 0.965, "step": 6065 }, { "epoch": 0.4382393844708942, "grad_norm": 6.10195605074829, "learning_rate": 4.545533220997171e-06, "loss": 0.8925, "step": 6066 }, { "epoch": 0.4383116296711037, "grad_norm": 6.535839165976747, "learning_rate": 4.5453650430476796e-06, "loss": 0.9646, "step": 6067 }, { "epoch": 0.43838387487131325, "grad_norm": 6.974262332534375, "learning_rate": 4.54519683709873e-06, "loss": 0.8618, "step": 6068 }, { "epoch": 0.43845612007152274, "grad_norm": 7.1109955334542745, "learning_rate": 4.545028603152623e-06, "loss": 0.9215, "step": 6069 }, { "epoch": 0.4385283652717323, "grad_norm": 6.54730849549311, "learning_rate": 4.544860341211662e-06, "loss": 0.929, "step": 6070 }, { "epoch": 0.43860061047194177, "grad_norm": 5.633049694045919, "learning_rate": 4.544692051278152e-06, "loss": 0.8609, "step": 6071 }, { "epoch": 0.4386728556721513, "grad_norm": 7.2806068758526505, "learning_rate": 4.544523733354394e-06, "loss": 0.9521, "step": 6072 }, { "epoch": 0.4387451008723608, "grad_norm": 6.165294528927062, "learning_rate": 4.544355387442694e-06, "loss": 0.824, "step": 6073 }, { "epoch": 0.4388173460725703, "grad_norm": 8.572843443942942, "learning_rate": 4.544187013545356e-06, "loss": 0.9474, "step": 6074 }, { "epoch": 0.4388895912727798, "grad_norm": 6.939139232627842, "learning_rate": 4.544018611664685e-06, "loss": 1.039, "step": 6075 }, { "epoch": 0.4389618364729893, "grad_norm": 6.462782609024821, "learning_rate": 4.5438501818029875e-06, "loss": 0.9111, "step": 6076 }, { "epoch": 0.43903408167319885, "grad_norm": 6.738949242251973, "learning_rate": 4.543681723962567e-06, "loss": 0.9374, "step": 6077 }, { "epoch": 0.43910632687340834, "grad_norm": 6.846784084592225, "learning_rate": 4.54351323814573e-06, "loss": 0.9556, "step": 6078 }, { "epoch": 0.4391785720736179, "grad_norm": 5.7085941375778235, "learning_rate": 4.543344724354784e-06, "loss": 0.9148, "step": 6079 }, { "epoch": 0.43925081727382737, "grad_norm": 6.543371606068057, "learning_rate": 4.543176182592035e-06, "loss": 0.8541, "step": 6080 }, { "epoch": 0.4393230624740369, "grad_norm": 6.021666190897633, "learning_rate": 4.543007612859791e-06, "loss": 0.8775, "step": 6081 }, { "epoch": 0.4393953076742464, "grad_norm": 6.674723461580578, "learning_rate": 4.542839015160358e-06, "loss": 0.8819, "step": 6082 }, { "epoch": 0.4394675528744559, "grad_norm": 8.805022401710794, "learning_rate": 4.542670389496047e-06, "loss": 0.9435, "step": 6083 }, { "epoch": 0.4395397980746654, "grad_norm": 5.126743485297278, "learning_rate": 4.542501735869163e-06, "loss": 0.9085, "step": 6084 }, { "epoch": 0.4396120432748749, "grad_norm": 5.752830140588617, "learning_rate": 4.542333054282016e-06, "loss": 0.8669, "step": 6085 }, { "epoch": 0.43968428847508445, "grad_norm": 5.749508380605453, "learning_rate": 4.542164344736916e-06, "loss": 0.8152, "step": 6086 }, { "epoch": 0.43975653367529394, "grad_norm": 6.5108458191889325, "learning_rate": 4.541995607236171e-06, "loss": 0.8819, "step": 6087 }, { "epoch": 0.4398287788755035, "grad_norm": 6.553893631156645, "learning_rate": 4.541826841782092e-06, "loss": 0.8443, "step": 6088 }, { "epoch": 0.43990102407571297, "grad_norm": 6.654763499012057, "learning_rate": 4.541658048376989e-06, "loss": 0.8745, "step": 6089 }, { "epoch": 0.4399732692759225, "grad_norm": 5.2577517365661395, "learning_rate": 4.541489227023173e-06, "loss": 0.8394, "step": 6090 }, { "epoch": 0.440045514476132, "grad_norm": 6.781673295130396, "learning_rate": 4.541320377722953e-06, "loss": 0.896, "step": 6091 }, { "epoch": 0.4401177596763415, "grad_norm": 5.862612712240226, "learning_rate": 4.5411515004786436e-06, "loss": 0.8479, "step": 6092 }, { "epoch": 0.440190004876551, "grad_norm": 5.679701940211182, "learning_rate": 4.540982595292555e-06, "loss": 0.8568, "step": 6093 }, { "epoch": 0.4402622500767605, "grad_norm": 6.873082396363168, "learning_rate": 4.540813662166998e-06, "loss": 0.8422, "step": 6094 }, { "epoch": 0.44033449527697005, "grad_norm": 6.275312511671481, "learning_rate": 4.540644701104287e-06, "loss": 0.8848, "step": 6095 }, { "epoch": 0.44040674047717954, "grad_norm": 6.431378166619524, "learning_rate": 4.5404757121067354e-06, "loss": 0.8704, "step": 6096 }, { "epoch": 0.4404789856773891, "grad_norm": 5.874244154337273, "learning_rate": 4.540306695176655e-06, "loss": 0.8108, "step": 6097 }, { "epoch": 0.44055123087759857, "grad_norm": 5.5792310456979015, "learning_rate": 4.540137650316361e-06, "loss": 0.9614, "step": 6098 }, { "epoch": 0.4406234760778081, "grad_norm": 6.610726292769949, "learning_rate": 4.539968577528165e-06, "loss": 0.953, "step": 6099 }, { "epoch": 0.4406957212780176, "grad_norm": 6.343100528637888, "learning_rate": 4.539799476814384e-06, "loss": 0.7811, "step": 6100 }, { "epoch": 0.4407679664782271, "grad_norm": 8.210195520560996, "learning_rate": 4.539630348177332e-06, "loss": 0.8738, "step": 6101 }, { "epoch": 0.4408402116784366, "grad_norm": 6.323325666075411, "learning_rate": 4.539461191619324e-06, "loss": 0.9039, "step": 6102 }, { "epoch": 0.4409124568786461, "grad_norm": 6.82272970705403, "learning_rate": 4.539292007142676e-06, "loss": 0.8662, "step": 6103 }, { "epoch": 0.44098470207885565, "grad_norm": 6.414851076295902, "learning_rate": 4.539122794749704e-06, "loss": 0.9335, "step": 6104 }, { "epoch": 0.44105694727906514, "grad_norm": 9.393766617141615, "learning_rate": 4.538953554442725e-06, "loss": 0.9171, "step": 6105 }, { "epoch": 0.4411291924792747, "grad_norm": 7.438011344193262, "learning_rate": 4.538784286224054e-06, "loss": 0.9353, "step": 6106 }, { "epoch": 0.44120143767948417, "grad_norm": 7.641683119073571, "learning_rate": 4.538614990096008e-06, "loss": 0.8706, "step": 6107 }, { "epoch": 0.4412736828796937, "grad_norm": 6.72794213584665, "learning_rate": 4.5384456660609075e-06, "loss": 0.8595, "step": 6108 }, { "epoch": 0.4413459280799032, "grad_norm": 7.063835625400395, "learning_rate": 4.538276314121069e-06, "loss": 0.9184, "step": 6109 }, { "epoch": 0.4414181732801127, "grad_norm": 6.785066295460131, "learning_rate": 4.538106934278808e-06, "loss": 0.8905, "step": 6110 }, { "epoch": 0.4414904184803222, "grad_norm": 6.854674934008741, "learning_rate": 4.537937526536447e-06, "loss": 0.9945, "step": 6111 }, { "epoch": 0.4415626636805317, "grad_norm": 5.999690365749159, "learning_rate": 4.537768090896304e-06, "loss": 0.9253, "step": 6112 }, { "epoch": 0.44163490888074125, "grad_norm": 7.257678996686566, "learning_rate": 4.537598627360698e-06, "loss": 0.8992, "step": 6113 }, { "epoch": 0.44170715408095074, "grad_norm": 7.482670408705204, "learning_rate": 4.5374291359319474e-06, "loss": 0.9138, "step": 6114 }, { "epoch": 0.4417793992811603, "grad_norm": 6.596050710785057, "learning_rate": 4.537259616612375e-06, "loss": 0.8442, "step": 6115 }, { "epoch": 0.44185164448136977, "grad_norm": 7.552902563100198, "learning_rate": 4.537090069404301e-06, "loss": 0.9189, "step": 6116 }, { "epoch": 0.4419238896815793, "grad_norm": 10.57482371803334, "learning_rate": 4.536920494310044e-06, "loss": 0.9085, "step": 6117 }, { "epoch": 0.4419961348817888, "grad_norm": 6.3314833030381115, "learning_rate": 4.536750891331928e-06, "loss": 0.8833, "step": 6118 }, { "epoch": 0.4420683800819983, "grad_norm": 7.440698785267815, "learning_rate": 4.5365812604722735e-06, "loss": 0.8726, "step": 6119 }, { "epoch": 0.4421406252822078, "grad_norm": 7.299560836750649, "learning_rate": 4.536411601733403e-06, "loss": 0.9283, "step": 6120 }, { "epoch": 0.4422128704824173, "grad_norm": 5.350281232055466, "learning_rate": 4.536241915117639e-06, "loss": 0.8233, "step": 6121 }, { "epoch": 0.44228511568262685, "grad_norm": 7.36279321161155, "learning_rate": 4.536072200627304e-06, "loss": 1.0036, "step": 6122 }, { "epoch": 0.44235736088283634, "grad_norm": 6.993862185971867, "learning_rate": 4.535902458264722e-06, "loss": 0.8822, "step": 6123 }, { "epoch": 0.4424296060830459, "grad_norm": 7.214699934273189, "learning_rate": 4.535732688032215e-06, "loss": 0.9247, "step": 6124 }, { "epoch": 0.44250185128325537, "grad_norm": 6.247263500046534, "learning_rate": 4.535562889932109e-06, "loss": 0.9227, "step": 6125 }, { "epoch": 0.4425740964834649, "grad_norm": 7.719719829713225, "learning_rate": 4.535393063966727e-06, "loss": 0.9839, "step": 6126 }, { "epoch": 0.4426463416836744, "grad_norm": 7.467191014725057, "learning_rate": 4.5352232101383945e-06, "loss": 0.9508, "step": 6127 }, { "epoch": 0.4427185868838839, "grad_norm": 6.539003287609002, "learning_rate": 4.535053328449437e-06, "loss": 0.8762, "step": 6128 }, { "epoch": 0.4427908320840934, "grad_norm": 5.585866989010828, "learning_rate": 4.534883418902179e-06, "loss": 0.7791, "step": 6129 }, { "epoch": 0.4428630772843029, "grad_norm": 4.916808799801322, "learning_rate": 4.5347134814989465e-06, "loss": 0.8592, "step": 6130 }, { "epoch": 0.44293532248451245, "grad_norm": 6.176004539665965, "learning_rate": 4.534543516242068e-06, "loss": 0.8767, "step": 6131 }, { "epoch": 0.44300756768472194, "grad_norm": 5.518120652254435, "learning_rate": 4.534373523133867e-06, "loss": 0.8716, "step": 6132 }, { "epoch": 0.4430798128849315, "grad_norm": 6.949214084016843, "learning_rate": 4.534203502176673e-06, "loss": 0.8765, "step": 6133 }, { "epoch": 0.44315205808514097, "grad_norm": 5.2176466820592315, "learning_rate": 4.534033453372812e-06, "loss": 0.8502, "step": 6134 }, { "epoch": 0.4432243032853505, "grad_norm": 5.600699939581875, "learning_rate": 4.533863376724612e-06, "loss": 0.8969, "step": 6135 }, { "epoch": 0.44329654848556, "grad_norm": 5.248996457052752, "learning_rate": 4.533693272234402e-06, "loss": 0.8636, "step": 6136 }, { "epoch": 0.4433687936857695, "grad_norm": 5.98980800925981, "learning_rate": 4.53352313990451e-06, "loss": 0.8799, "step": 6137 }, { "epoch": 0.443441038885979, "grad_norm": 5.332762369746799, "learning_rate": 4.533352979737265e-06, "loss": 0.8994, "step": 6138 }, { "epoch": 0.4435132840861885, "grad_norm": 6.665461939995994, "learning_rate": 4.533182791734997e-06, "loss": 0.8881, "step": 6139 }, { "epoch": 0.44358552928639805, "grad_norm": 7.734348859646483, "learning_rate": 4.533012575900035e-06, "loss": 0.8694, "step": 6140 }, { "epoch": 0.44365777448660754, "grad_norm": 6.689479356700393, "learning_rate": 4.532842332234709e-06, "loss": 0.8748, "step": 6141 }, { "epoch": 0.4437300196868171, "grad_norm": 7.079519374503795, "learning_rate": 4.532672060741351e-06, "loss": 0.9392, "step": 6142 }, { "epoch": 0.44380226488702657, "grad_norm": 8.391563320439017, "learning_rate": 4.532501761422289e-06, "loss": 0.8815, "step": 6143 }, { "epoch": 0.44387451008723605, "grad_norm": 7.738747580282492, "learning_rate": 4.532331434279857e-06, "loss": 0.8877, "step": 6144 }, { "epoch": 0.4439467552874456, "grad_norm": 6.605070836440328, "learning_rate": 4.532161079316386e-06, "loss": 0.897, "step": 6145 }, { "epoch": 0.4440190004876551, "grad_norm": 5.977236162503033, "learning_rate": 4.531990696534208e-06, "loss": 0.7865, "step": 6146 }, { "epoch": 0.4440912456878646, "grad_norm": 6.828744541580111, "learning_rate": 4.531820285935654e-06, "loss": 0.9396, "step": 6147 }, { "epoch": 0.4441634908880741, "grad_norm": 7.935229359648931, "learning_rate": 4.531649847523059e-06, "loss": 0.8895, "step": 6148 }, { "epoch": 0.44423573608828365, "grad_norm": 7.196516932310467, "learning_rate": 4.531479381298754e-06, "loss": 0.9263, "step": 6149 }, { "epoch": 0.44430798128849314, "grad_norm": 8.09193806636716, "learning_rate": 4.531308887265074e-06, "loss": 0.9549, "step": 6150 }, { "epoch": 0.4443802264887027, "grad_norm": 7.635521055776933, "learning_rate": 4.531138365424352e-06, "loss": 0.7982, "step": 6151 }, { "epoch": 0.44445247168891217, "grad_norm": 6.599704458382078, "learning_rate": 4.530967815778924e-06, "loss": 0.8944, "step": 6152 }, { "epoch": 0.44452471688912165, "grad_norm": 6.357657385710717, "learning_rate": 4.530797238331122e-06, "loss": 0.8836, "step": 6153 }, { "epoch": 0.4445969620893312, "grad_norm": 7.1001098624312675, "learning_rate": 4.530626633083284e-06, "loss": 0.9502, "step": 6154 }, { "epoch": 0.4446692072895407, "grad_norm": 8.29332753580919, "learning_rate": 4.530456000037744e-06, "loss": 0.9841, "step": 6155 }, { "epoch": 0.4447414524897502, "grad_norm": 5.591765152830908, "learning_rate": 4.530285339196838e-06, "loss": 0.8682, "step": 6156 }, { "epoch": 0.4448136976899597, "grad_norm": 7.823062196098369, "learning_rate": 4.530114650562901e-06, "loss": 0.9151, "step": 6157 }, { "epoch": 0.44488594289016925, "grad_norm": 7.132998343947365, "learning_rate": 4.529943934138271e-06, "loss": 0.8584, "step": 6158 }, { "epoch": 0.44495818809037874, "grad_norm": 7.778855043315315, "learning_rate": 4.529773189925286e-06, "loss": 0.8731, "step": 6159 }, { "epoch": 0.4450304332905883, "grad_norm": 6.371328755341587, "learning_rate": 4.529602417926281e-06, "loss": 0.9377, "step": 6160 }, { "epoch": 0.44510267849079777, "grad_norm": 5.189021427459497, "learning_rate": 4.529431618143595e-06, "loss": 0.8654, "step": 6161 }, { "epoch": 0.44517492369100725, "grad_norm": 6.41193208614943, "learning_rate": 4.529260790579566e-06, "loss": 0.9692, "step": 6162 }, { "epoch": 0.4452471688912168, "grad_norm": 7.940047928980149, "learning_rate": 4.529089935236532e-06, "loss": 0.8753, "step": 6163 }, { "epoch": 0.4453194140914263, "grad_norm": 5.663050151987512, "learning_rate": 4.528919052116832e-06, "loss": 0.8744, "step": 6164 }, { "epoch": 0.4453916592916358, "grad_norm": 5.928933800605543, "learning_rate": 4.5287481412228065e-06, "loss": 0.8196, "step": 6165 }, { "epoch": 0.4454639044918453, "grad_norm": 6.567147489394534, "learning_rate": 4.528577202556794e-06, "loss": 0.7633, "step": 6166 }, { "epoch": 0.44553614969205485, "grad_norm": 7.020439551957238, "learning_rate": 4.528406236121134e-06, "loss": 1.0152, "step": 6167 }, { "epoch": 0.44560839489226434, "grad_norm": 7.4177990452546805, "learning_rate": 4.528235241918168e-06, "loss": 0.9086, "step": 6168 }, { "epoch": 0.4456806400924739, "grad_norm": 6.792760405984114, "learning_rate": 4.528064219950236e-06, "loss": 0.8932, "step": 6169 }, { "epoch": 0.44575288529268337, "grad_norm": 6.279488843294224, "learning_rate": 4.52789317021968e-06, "loss": 0.9091, "step": 6170 }, { "epoch": 0.44582513049289285, "grad_norm": 8.132750116085186, "learning_rate": 4.527722092728841e-06, "loss": 0.9439, "step": 6171 }, { "epoch": 0.4458973756931024, "grad_norm": 5.752039340398006, "learning_rate": 4.527550987480061e-06, "loss": 0.7933, "step": 6172 }, { "epoch": 0.4459696208933119, "grad_norm": 8.24596792004896, "learning_rate": 4.527379854475682e-06, "loss": 0.8677, "step": 6173 }, { "epoch": 0.4460418660935214, "grad_norm": 6.688589702489752, "learning_rate": 4.527208693718047e-06, "loss": 0.9229, "step": 6174 }, { "epoch": 0.4461141112937309, "grad_norm": 7.714891712667366, "learning_rate": 4.527037505209499e-06, "loss": 0.8027, "step": 6175 }, { "epoch": 0.44618635649394045, "grad_norm": 6.433972920198048, "learning_rate": 4.526866288952382e-06, "loss": 0.882, "step": 6176 }, { "epoch": 0.44625860169414994, "grad_norm": 6.302497719836551, "learning_rate": 4.526695044949039e-06, "loss": 0.8577, "step": 6177 }, { "epoch": 0.4463308468943595, "grad_norm": 6.094775304368667, "learning_rate": 4.5265237732018144e-06, "loss": 0.8627, "step": 6178 }, { "epoch": 0.44640309209456897, "grad_norm": 7.1913669964300775, "learning_rate": 4.526352473713053e-06, "loss": 0.9448, "step": 6179 }, { "epoch": 0.44647533729477845, "grad_norm": 5.494394393468362, "learning_rate": 4.526181146485098e-06, "loss": 0.7972, "step": 6180 }, { "epoch": 0.446547582494988, "grad_norm": 7.270948465596182, "learning_rate": 4.5260097915202985e-06, "loss": 0.8411, "step": 6181 }, { "epoch": 0.4466198276951975, "grad_norm": 6.113987579003652, "learning_rate": 4.525838408820997e-06, "loss": 0.8969, "step": 6182 }, { "epoch": 0.446692072895407, "grad_norm": 7.83969266600497, "learning_rate": 4.525666998389541e-06, "loss": 0.8819, "step": 6183 }, { "epoch": 0.4467643180956165, "grad_norm": 5.3890564336435665, "learning_rate": 4.525495560228276e-06, "loss": 0.8676, "step": 6184 }, { "epoch": 0.44683656329582605, "grad_norm": 7.698344947006188, "learning_rate": 4.52532409433955e-06, "loss": 0.9224, "step": 6185 }, { "epoch": 0.44690880849603554, "grad_norm": 11.138446011595274, "learning_rate": 4.525152600725709e-06, "loss": 1.0183, "step": 6186 }, { "epoch": 0.4469810536962451, "grad_norm": 7.188347011912535, "learning_rate": 4.524981079389103e-06, "loss": 0.9561, "step": 6187 }, { "epoch": 0.44705329889645457, "grad_norm": 4.940694427484992, "learning_rate": 4.5248095303320775e-06, "loss": 0.8682, "step": 6188 }, { "epoch": 0.44712554409666405, "grad_norm": 4.866996455231836, "learning_rate": 4.5246379535569815e-06, "loss": 0.9103, "step": 6189 }, { "epoch": 0.4471977892968736, "grad_norm": 7.0189035482077236, "learning_rate": 4.524466349066164e-06, "loss": 0.8297, "step": 6190 }, { "epoch": 0.4472700344970831, "grad_norm": 6.708580091530727, "learning_rate": 4.524294716861974e-06, "loss": 0.9415, "step": 6191 }, { "epoch": 0.4473422796972926, "grad_norm": 9.741275111395758, "learning_rate": 4.524123056946761e-06, "loss": 0.8434, "step": 6192 }, { "epoch": 0.4474145248975021, "grad_norm": 5.254388700548816, "learning_rate": 4.523951369322874e-06, "loss": 0.8971, "step": 6193 }, { "epoch": 0.44748677009771165, "grad_norm": 6.611444099795203, "learning_rate": 4.523779653992666e-06, "loss": 0.9844, "step": 6194 }, { "epoch": 0.44755901529792114, "grad_norm": 6.580346536526133, "learning_rate": 4.523607910958485e-06, "loss": 0.9253, "step": 6195 }, { "epoch": 0.4476312604981307, "grad_norm": 7.007730030607969, "learning_rate": 4.523436140222683e-06, "loss": 0.9449, "step": 6196 }, { "epoch": 0.44770350569834017, "grad_norm": 9.64507825187869, "learning_rate": 4.523264341787612e-06, "loss": 0.8767, "step": 6197 }, { "epoch": 0.44777575089854965, "grad_norm": 5.6915667739673665, "learning_rate": 4.523092515655623e-06, "loss": 0.9531, "step": 6198 }, { "epoch": 0.4478479960987592, "grad_norm": 6.830024786662621, "learning_rate": 4.522920661829068e-06, "loss": 0.8257, "step": 6199 }, { "epoch": 0.4479202412989687, "grad_norm": 6.4155090410741, "learning_rate": 4.522748780310299e-06, "loss": 0.8122, "step": 6200 }, { "epoch": 0.4479924864991782, "grad_norm": 6.770783534722718, "learning_rate": 4.52257687110167e-06, "loss": 0.9523, "step": 6201 }, { "epoch": 0.4480647316993877, "grad_norm": 8.18410322474379, "learning_rate": 4.5224049342055355e-06, "loss": 0.9358, "step": 6202 }, { "epoch": 0.44813697689959725, "grad_norm": 6.039141142457708, "learning_rate": 4.5222329696242465e-06, "loss": 0.8116, "step": 6203 }, { "epoch": 0.44820922209980674, "grad_norm": 5.7697828234124815, "learning_rate": 4.522060977360159e-06, "loss": 0.8405, "step": 6204 }, { "epoch": 0.4482814673000163, "grad_norm": 9.147115584201513, "learning_rate": 4.521888957415627e-06, "loss": 0.9484, "step": 6205 }, { "epoch": 0.44835371250022577, "grad_norm": 6.220564227746824, "learning_rate": 4.521716909793004e-06, "loss": 0.856, "step": 6206 }, { "epoch": 0.44842595770043525, "grad_norm": 7.019885020822379, "learning_rate": 4.5215448344946465e-06, "loss": 0.8909, "step": 6207 }, { "epoch": 0.4484982029006448, "grad_norm": 9.978050939402765, "learning_rate": 4.52137273152291e-06, "loss": 0.9277, "step": 6208 }, { "epoch": 0.4485704481008543, "grad_norm": 6.49540724255085, "learning_rate": 4.52120060088015e-06, "loss": 0.9275, "step": 6209 }, { "epoch": 0.4486426933010638, "grad_norm": 6.242095832008394, "learning_rate": 4.521028442568723e-06, "loss": 0.8101, "step": 6210 }, { "epoch": 0.4487149385012733, "grad_norm": 5.825260389017259, "learning_rate": 4.5208562565909875e-06, "loss": 0.98, "step": 6211 }, { "epoch": 0.44878718370148285, "grad_norm": 6.366666845085315, "learning_rate": 4.520684042949297e-06, "loss": 0.9113, "step": 6212 }, { "epoch": 0.44885942890169234, "grad_norm": 7.602848081488628, "learning_rate": 4.520511801646013e-06, "loss": 0.8322, "step": 6213 }, { "epoch": 0.4489316741019019, "grad_norm": 7.981629976623837, "learning_rate": 4.520339532683489e-06, "loss": 0.8635, "step": 6214 }, { "epoch": 0.44900391930211137, "grad_norm": 9.095396751402859, "learning_rate": 4.520167236064087e-06, "loss": 0.8675, "step": 6215 }, { "epoch": 0.44907616450232085, "grad_norm": 7.634751634836248, "learning_rate": 4.519994911790163e-06, "loss": 0.8403, "step": 6216 }, { "epoch": 0.4491484097025304, "grad_norm": 6.6685022052464635, "learning_rate": 4.5198225598640775e-06, "loss": 0.8453, "step": 6217 }, { "epoch": 0.4492206549027399, "grad_norm": 8.494879582782593, "learning_rate": 4.51965018028819e-06, "loss": 0.9268, "step": 6218 }, { "epoch": 0.4492929001029494, "grad_norm": 6.616573805201431, "learning_rate": 4.519477773064858e-06, "loss": 0.8786, "step": 6219 }, { "epoch": 0.4493651453031589, "grad_norm": 8.678062966784047, "learning_rate": 4.519305338196446e-06, "loss": 0.907, "step": 6220 }, { "epoch": 0.44943739050336845, "grad_norm": 7.902455501171648, "learning_rate": 4.51913287568531e-06, "loss": 0.9519, "step": 6221 }, { "epoch": 0.44950963570357794, "grad_norm": 5.139252360552015, "learning_rate": 4.518960385533813e-06, "loss": 0.8221, "step": 6222 }, { "epoch": 0.4495818809037875, "grad_norm": 8.663607668826854, "learning_rate": 4.518787867744317e-06, "loss": 0.9122, "step": 6223 }, { "epoch": 0.44965412610399697, "grad_norm": 6.9938016424241125, "learning_rate": 4.518615322319181e-06, "loss": 0.9225, "step": 6224 }, { "epoch": 0.44972637130420645, "grad_norm": 6.625418883702485, "learning_rate": 4.518442749260768e-06, "loss": 0.9358, "step": 6225 }, { "epoch": 0.449798616504416, "grad_norm": 5.760148091001953, "learning_rate": 4.518270148571443e-06, "loss": 0.8724, "step": 6226 }, { "epoch": 0.4498708617046255, "grad_norm": 5.976251332279481, "learning_rate": 4.5180975202535656e-06, "loss": 0.8015, "step": 6227 }, { "epoch": 0.449943106904835, "grad_norm": 6.018772956273556, "learning_rate": 4.517924864309501e-06, "loss": 0.874, "step": 6228 }, { "epoch": 0.4500153521050445, "grad_norm": 5.619208681863235, "learning_rate": 4.517752180741611e-06, "loss": 0.9214, "step": 6229 }, { "epoch": 0.45008759730525405, "grad_norm": 7.489845522419473, "learning_rate": 4.517579469552261e-06, "loss": 0.9379, "step": 6230 }, { "epoch": 0.45015984250546354, "grad_norm": 6.315925197648792, "learning_rate": 4.517406730743814e-06, "loss": 0.8289, "step": 6231 }, { "epoch": 0.4502320877056731, "grad_norm": 6.0214745555182665, "learning_rate": 4.517233964318635e-06, "loss": 0.8802, "step": 6232 }, { "epoch": 0.45030433290588257, "grad_norm": 6.420665172212464, "learning_rate": 4.5170611702790905e-06, "loss": 0.9613, "step": 6233 }, { "epoch": 0.45037657810609205, "grad_norm": 5.296656421213742, "learning_rate": 4.516888348627543e-06, "loss": 0.8017, "step": 6234 }, { "epoch": 0.4504488233063016, "grad_norm": 6.6893376472696975, "learning_rate": 4.516715499366361e-06, "loss": 0.8587, "step": 6235 }, { "epoch": 0.4505210685065111, "grad_norm": 5.308832855116964, "learning_rate": 4.51654262249791e-06, "loss": 0.8902, "step": 6236 }, { "epoch": 0.4505933137067206, "grad_norm": 6.961353978842765, "learning_rate": 4.516369718024556e-06, "loss": 0.8864, "step": 6237 }, { "epoch": 0.4506655589069301, "grad_norm": 5.658555714764997, "learning_rate": 4.5161967859486665e-06, "loss": 0.7665, "step": 6238 }, { "epoch": 0.45073780410713965, "grad_norm": 6.520948545110483, "learning_rate": 4.516023826272608e-06, "loss": 0.8931, "step": 6239 }, { "epoch": 0.45081004930734914, "grad_norm": 6.754400443711041, "learning_rate": 4.515850838998748e-06, "loss": 0.8645, "step": 6240 }, { "epoch": 0.4508822945075587, "grad_norm": 6.112660024845624, "learning_rate": 4.515677824129456e-06, "loss": 0.8089, "step": 6241 }, { "epoch": 0.45095453970776816, "grad_norm": 6.817880168368639, "learning_rate": 4.515504781667101e-06, "loss": 0.9194, "step": 6242 }, { "epoch": 0.45102678490797765, "grad_norm": 6.188919983349496, "learning_rate": 4.515331711614048e-06, "loss": 0.8037, "step": 6243 }, { "epoch": 0.4510990301081872, "grad_norm": 5.545496293776338, "learning_rate": 4.51515861397267e-06, "loss": 0.8148, "step": 6244 }, { "epoch": 0.4511712753083967, "grad_norm": 7.064053254943077, "learning_rate": 4.514985488745335e-06, "loss": 0.9026, "step": 6245 }, { "epoch": 0.4512435205086062, "grad_norm": 8.226725258806612, "learning_rate": 4.514812335934413e-06, "loss": 0.976, "step": 6246 }, { "epoch": 0.4513157657088157, "grad_norm": 5.311038006838494, "learning_rate": 4.514639155542275e-06, "loss": 0.8527, "step": 6247 }, { "epoch": 0.45138801090902525, "grad_norm": 7.007440155241918, "learning_rate": 4.514465947571291e-06, "loss": 0.9033, "step": 6248 }, { "epoch": 0.45146025610923474, "grad_norm": 6.585648463593783, "learning_rate": 4.514292712023832e-06, "loss": 0.7723, "step": 6249 }, { "epoch": 0.4515325013094443, "grad_norm": 6.201576942971025, "learning_rate": 4.51411944890227e-06, "loss": 0.9084, "step": 6250 }, { "epoch": 0.45160474650965376, "grad_norm": 6.106451481410805, "learning_rate": 4.5139461582089775e-06, "loss": 0.8142, "step": 6251 }, { "epoch": 0.45167699170986325, "grad_norm": 5.414363880218708, "learning_rate": 4.513772839946324e-06, "loss": 0.8422, "step": 6252 }, { "epoch": 0.4517492369100728, "grad_norm": 6.9124631249546225, "learning_rate": 4.513599494116685e-06, "loss": 0.9021, "step": 6253 }, { "epoch": 0.4518214821102823, "grad_norm": 6.403858464141737, "learning_rate": 4.5134261207224324e-06, "loss": 0.9085, "step": 6254 }, { "epoch": 0.4518937273104918, "grad_norm": 8.270666881458986, "learning_rate": 4.5132527197659395e-06, "loss": 0.9188, "step": 6255 }, { "epoch": 0.4519659725107013, "grad_norm": 6.4595584158654775, "learning_rate": 4.51307929124958e-06, "loss": 0.8419, "step": 6256 }, { "epoch": 0.45203821771091085, "grad_norm": 7.273547637793941, "learning_rate": 4.5129058351757285e-06, "loss": 0.849, "step": 6257 }, { "epoch": 0.45211046291112034, "grad_norm": 6.910324620793592, "learning_rate": 4.512732351546758e-06, "loss": 0.8812, "step": 6258 }, { "epoch": 0.4521827081113299, "grad_norm": 7.254166030956798, "learning_rate": 4.512558840365045e-06, "loss": 0.9098, "step": 6259 }, { "epoch": 0.45225495331153936, "grad_norm": 6.91431518782602, "learning_rate": 4.512385301632964e-06, "loss": 0.9458, "step": 6260 }, { "epoch": 0.45232719851174885, "grad_norm": 6.5694755175310195, "learning_rate": 4.512211735352891e-06, "loss": 0.8922, "step": 6261 }, { "epoch": 0.4523994437119584, "grad_norm": 6.9404502300464594, "learning_rate": 4.512038141527202e-06, "loss": 0.9862, "step": 6262 }, { "epoch": 0.4524716889121679, "grad_norm": 5.948895892113517, "learning_rate": 4.511864520158272e-06, "loss": 0.9084, "step": 6263 }, { "epoch": 0.4525439341123774, "grad_norm": 7.369358038304864, "learning_rate": 4.51169087124848e-06, "loss": 0.9262, "step": 6264 }, { "epoch": 0.4526161793125869, "grad_norm": 7.038122184176297, "learning_rate": 4.511517194800202e-06, "loss": 0.7959, "step": 6265 }, { "epoch": 0.45268842451279645, "grad_norm": 6.359724105512926, "learning_rate": 4.511343490815814e-06, "loss": 0.8856, "step": 6266 }, { "epoch": 0.45276066971300594, "grad_norm": 6.0978792725270035, "learning_rate": 4.511169759297696e-06, "loss": 0.9006, "step": 6267 }, { "epoch": 0.4528329149132155, "grad_norm": 6.823239601782285, "learning_rate": 4.510996000248226e-06, "loss": 0.994, "step": 6268 }, { "epoch": 0.45290516011342496, "grad_norm": 6.100651202853426, "learning_rate": 4.510822213669782e-06, "loss": 0.9572, "step": 6269 }, { "epoch": 0.45297740531363445, "grad_norm": 7.293979241724707, "learning_rate": 4.5106483995647435e-06, "loss": 0.97, "step": 6270 }, { "epoch": 0.453049650513844, "grad_norm": 8.133322810598768, "learning_rate": 4.510474557935489e-06, "loss": 1.0196, "step": 6271 }, { "epoch": 0.4531218957140535, "grad_norm": 6.927093398772841, "learning_rate": 4.510300688784399e-06, "loss": 0.8924, "step": 6272 }, { "epoch": 0.453194140914263, "grad_norm": 7.06416800727515, "learning_rate": 4.510126792113853e-06, "loss": 0.8992, "step": 6273 }, { "epoch": 0.4532663861144725, "grad_norm": 6.24845104096823, "learning_rate": 4.5099528679262325e-06, "loss": 0.9025, "step": 6274 }, { "epoch": 0.45333863131468205, "grad_norm": 6.346775179237189, "learning_rate": 4.509778916223918e-06, "loss": 1.0398, "step": 6275 }, { "epoch": 0.45341087651489154, "grad_norm": 6.214681089755393, "learning_rate": 4.509604937009291e-06, "loss": 0.8336, "step": 6276 }, { "epoch": 0.4534831217151011, "grad_norm": 7.174896866858006, "learning_rate": 4.5094309302847315e-06, "loss": 0.834, "step": 6277 }, { "epoch": 0.45355536691531056, "grad_norm": 5.822690500997813, "learning_rate": 4.509256896052624e-06, "loss": 0.8548, "step": 6278 }, { "epoch": 0.45362761211552005, "grad_norm": 5.841115102518452, "learning_rate": 4.5090828343153495e-06, "loss": 0.8776, "step": 6279 }, { "epoch": 0.4536998573157296, "grad_norm": 5.691177017947111, "learning_rate": 4.50890874507529e-06, "loss": 0.817, "step": 6280 }, { "epoch": 0.4537721025159391, "grad_norm": 7.0001465918314105, "learning_rate": 4.508734628334831e-06, "loss": 0.894, "step": 6281 }, { "epoch": 0.4538443477161486, "grad_norm": 6.202236313663592, "learning_rate": 4.508560484096353e-06, "loss": 0.9569, "step": 6282 }, { "epoch": 0.4539165929163581, "grad_norm": 6.1121978872329805, "learning_rate": 4.508386312362243e-06, "loss": 0.7981, "step": 6283 }, { "epoch": 0.45398883811656765, "grad_norm": 6.759231471329253, "learning_rate": 4.508212113134883e-06, "loss": 0.9524, "step": 6284 }, { "epoch": 0.45406108331677714, "grad_norm": 6.008154414136785, "learning_rate": 4.508037886416658e-06, "loss": 0.8478, "step": 6285 }, { "epoch": 0.4541333285169867, "grad_norm": 6.61759592572572, "learning_rate": 4.507863632209955e-06, "loss": 0.8172, "step": 6286 }, { "epoch": 0.45420557371719616, "grad_norm": 7.391817476554021, "learning_rate": 4.507689350517157e-06, "loss": 0.982, "step": 6287 }, { "epoch": 0.45427781891740565, "grad_norm": 7.31391775619168, "learning_rate": 4.50751504134065e-06, "loss": 0.9557, "step": 6288 }, { "epoch": 0.4543500641176152, "grad_norm": 4.948934135894405, "learning_rate": 4.507340704682822e-06, "loss": 0.8358, "step": 6289 }, { "epoch": 0.4544223093178247, "grad_norm": 5.998163896001094, "learning_rate": 4.507166340546058e-06, "loss": 0.7975, "step": 6290 }, { "epoch": 0.4544945545180342, "grad_norm": 7.852054895552098, "learning_rate": 4.5069919489327444e-06, "loss": 0.9864, "step": 6291 }, { "epoch": 0.4545667997182437, "grad_norm": 6.369255319900288, "learning_rate": 4.5068175298452704e-06, "loss": 0.8543, "step": 6292 }, { "epoch": 0.45463904491845325, "grad_norm": 6.317533695567354, "learning_rate": 4.506643083286022e-06, "loss": 0.9242, "step": 6293 }, { "epoch": 0.45471129011866274, "grad_norm": 6.724238651104802, "learning_rate": 4.506468609257389e-06, "loss": 0.9896, "step": 6294 }, { "epoch": 0.4547835353188723, "grad_norm": 5.941676548133255, "learning_rate": 4.506294107761757e-06, "loss": 0.9677, "step": 6295 }, { "epoch": 0.45485578051908176, "grad_norm": 5.3921985927877785, "learning_rate": 4.506119578801518e-06, "loss": 0.8729, "step": 6296 }, { "epoch": 0.45492802571929125, "grad_norm": 7.7066226470024475, "learning_rate": 4.505945022379058e-06, "loss": 0.8978, "step": 6297 }, { "epoch": 0.4550002709195008, "grad_norm": 5.884742288578024, "learning_rate": 4.505770438496769e-06, "loss": 0.9168, "step": 6298 }, { "epoch": 0.4550725161197103, "grad_norm": 7.202571293200962, "learning_rate": 4.50559582715704e-06, "loss": 0.8641, "step": 6299 }, { "epoch": 0.4551447613199198, "grad_norm": 5.2674852475474365, "learning_rate": 4.505421188362261e-06, "loss": 0.895, "step": 6300 }, { "epoch": 0.4552170065201293, "grad_norm": 6.136913624272963, "learning_rate": 4.505246522114824e-06, "loss": 0.8583, "step": 6301 }, { "epoch": 0.45528925172033885, "grad_norm": 6.443697418383155, "learning_rate": 4.505071828417119e-06, "loss": 0.902, "step": 6302 }, { "epoch": 0.45536149692054834, "grad_norm": 6.360966813599245, "learning_rate": 4.504897107271537e-06, "loss": 0.7956, "step": 6303 }, { "epoch": 0.4554337421207579, "grad_norm": 6.338452763013627, "learning_rate": 4.50472235868047e-06, "loss": 0.8242, "step": 6304 }, { "epoch": 0.45550598732096736, "grad_norm": 6.8803102272529975, "learning_rate": 4.5045475826463105e-06, "loss": 0.8627, "step": 6305 }, { "epoch": 0.45557823252117685, "grad_norm": 6.185288467526286, "learning_rate": 4.504372779171452e-06, "loss": 0.8986, "step": 6306 }, { "epoch": 0.4556504777213864, "grad_norm": 5.782616036352142, "learning_rate": 4.5041979482582855e-06, "loss": 0.9138, "step": 6307 }, { "epoch": 0.4557227229215959, "grad_norm": 5.702974500368686, "learning_rate": 4.504023089909206e-06, "loss": 0.8729, "step": 6308 }, { "epoch": 0.4557949681218054, "grad_norm": 7.443937705081156, "learning_rate": 4.503848204126606e-06, "loss": 0.8947, "step": 6309 }, { "epoch": 0.4558672133220149, "grad_norm": 5.517995179204989, "learning_rate": 4.50367329091288e-06, "loss": 0.8739, "step": 6310 }, { "epoch": 0.45593945852222445, "grad_norm": 7.073479144540681, "learning_rate": 4.503498350270422e-06, "loss": 0.8834, "step": 6311 }, { "epoch": 0.45601170372243394, "grad_norm": 5.626161074017179, "learning_rate": 4.503323382201628e-06, "loss": 0.8714, "step": 6312 }, { "epoch": 0.4560839489226435, "grad_norm": 7.20345333782585, "learning_rate": 4.503148386708892e-06, "loss": 0.9213, "step": 6313 }, { "epoch": 0.45615619412285296, "grad_norm": 8.431602847306497, "learning_rate": 4.502973363794609e-06, "loss": 0.8787, "step": 6314 }, { "epoch": 0.45622843932306245, "grad_norm": 5.939985859144442, "learning_rate": 4.502798313461178e-06, "loss": 0.8513, "step": 6315 }, { "epoch": 0.456300684523272, "grad_norm": 6.19203035596558, "learning_rate": 4.502623235710991e-06, "loss": 0.8283, "step": 6316 }, { "epoch": 0.4563729297234815, "grad_norm": 7.220985495814385, "learning_rate": 4.502448130546448e-06, "loss": 0.9485, "step": 6317 }, { "epoch": 0.456445174923691, "grad_norm": 9.072817907368881, "learning_rate": 4.5022729979699445e-06, "loss": 0.905, "step": 6318 }, { "epoch": 0.4565174201239005, "grad_norm": 6.751319191205019, "learning_rate": 4.502097837983879e-06, "loss": 0.9533, "step": 6319 }, { "epoch": 0.45658966532411005, "grad_norm": 5.957036052764457, "learning_rate": 4.501922650590648e-06, "loss": 0.9422, "step": 6320 }, { "epoch": 0.45666191052431954, "grad_norm": 6.480449619000544, "learning_rate": 4.50174743579265e-06, "loss": 0.8369, "step": 6321 }, { "epoch": 0.4567341557245291, "grad_norm": 6.1629512350841615, "learning_rate": 4.501572193592284e-06, "loss": 0.9178, "step": 6322 }, { "epoch": 0.45680640092473856, "grad_norm": 6.407944171467061, "learning_rate": 4.501396923991949e-06, "loss": 0.9337, "step": 6323 }, { "epoch": 0.45687864612494805, "grad_norm": 5.1932126498288875, "learning_rate": 4.5012216269940445e-06, "loss": 0.9388, "step": 6324 }, { "epoch": 0.4569508913251576, "grad_norm": 7.098595934373357, "learning_rate": 4.501046302600969e-06, "loss": 0.8906, "step": 6325 }, { "epoch": 0.4570231365253671, "grad_norm": 5.356492048423047, "learning_rate": 4.500870950815124e-06, "loss": 0.9426, "step": 6326 }, { "epoch": 0.4570953817255766, "grad_norm": 7.334830853696883, "learning_rate": 4.5006955716389086e-06, "loss": 0.9633, "step": 6327 }, { "epoch": 0.4571676269257861, "grad_norm": 7.012280182954601, "learning_rate": 4.500520165074725e-06, "loss": 0.9877, "step": 6328 }, { "epoch": 0.45723987212599565, "grad_norm": 6.555219408170699, "learning_rate": 4.5003447311249734e-06, "loss": 0.7926, "step": 6329 }, { "epoch": 0.45731211732620514, "grad_norm": 6.170736396160383, "learning_rate": 4.500169269792055e-06, "loss": 0.9285, "step": 6330 }, { "epoch": 0.4573843625264147, "grad_norm": 6.230943849440694, "learning_rate": 4.499993781078374e-06, "loss": 0.8224, "step": 6331 }, { "epoch": 0.45745660772662416, "grad_norm": 6.2750304445538845, "learning_rate": 4.499818264986329e-06, "loss": 0.8951, "step": 6332 }, { "epoch": 0.45752885292683365, "grad_norm": 6.3127396037640935, "learning_rate": 4.499642721518326e-06, "loss": 0.9234, "step": 6333 }, { "epoch": 0.4576010981270432, "grad_norm": 5.5155715723664995, "learning_rate": 4.499467150676766e-06, "loss": 0.7933, "step": 6334 }, { "epoch": 0.4576733433272527, "grad_norm": 6.2866090905528615, "learning_rate": 4.499291552464053e-06, "loss": 0.8262, "step": 6335 }, { "epoch": 0.4577455885274622, "grad_norm": 7.300993122845274, "learning_rate": 4.4991159268825925e-06, "loss": 0.9554, "step": 6336 }, { "epoch": 0.4578178337276717, "grad_norm": 6.020685460299785, "learning_rate": 4.498940273934786e-06, "loss": 0.9465, "step": 6337 }, { "epoch": 0.45789007892788125, "grad_norm": 6.460230132453318, "learning_rate": 4.498764593623039e-06, "loss": 0.801, "step": 6338 }, { "epoch": 0.45796232412809074, "grad_norm": 8.319210699495335, "learning_rate": 4.4985888859497575e-06, "loss": 0.939, "step": 6339 }, { "epoch": 0.4580345693283003, "grad_norm": 6.52678531470045, "learning_rate": 4.498413150917345e-06, "loss": 0.9729, "step": 6340 }, { "epoch": 0.45810681452850976, "grad_norm": 6.6748417639103765, "learning_rate": 4.498237388528209e-06, "loss": 0.8403, "step": 6341 }, { "epoch": 0.45817905972871925, "grad_norm": 7.5604177083063515, "learning_rate": 4.498061598784754e-06, "loss": 0.9156, "step": 6342 }, { "epoch": 0.4582513049289288, "grad_norm": 6.808533239834383, "learning_rate": 4.497885781689388e-06, "loss": 0.9234, "step": 6343 }, { "epoch": 0.4583235501291383, "grad_norm": 6.2837449617746985, "learning_rate": 4.497709937244516e-06, "loss": 0.8794, "step": 6344 }, { "epoch": 0.4583957953293478, "grad_norm": 7.744182987746679, "learning_rate": 4.497534065452547e-06, "loss": 1.0452, "step": 6345 }, { "epoch": 0.4584680405295573, "grad_norm": 6.481778353396434, "learning_rate": 4.497358166315887e-06, "loss": 0.9663, "step": 6346 }, { "epoch": 0.45854028572976685, "grad_norm": 6.964426658238172, "learning_rate": 4.497182239836944e-06, "loss": 0.876, "step": 6347 }, { "epoch": 0.45861253092997634, "grad_norm": 7.657868879788246, "learning_rate": 4.497006286018128e-06, "loss": 0.8959, "step": 6348 }, { "epoch": 0.4586847761301859, "grad_norm": 6.356695186962418, "learning_rate": 4.4968303048618455e-06, "loss": 0.9085, "step": 6349 }, { "epoch": 0.45875702133039536, "grad_norm": 6.071354014475832, "learning_rate": 4.496654296370507e-06, "loss": 0.8736, "step": 6350 }, { "epoch": 0.45882926653060485, "grad_norm": 6.814824538907348, "learning_rate": 4.496478260546522e-06, "loss": 0.9647, "step": 6351 }, { "epoch": 0.4589015117308144, "grad_norm": 6.24144342258379, "learning_rate": 4.496302197392299e-06, "loss": 0.9202, "step": 6352 }, { "epoch": 0.4589737569310239, "grad_norm": 5.702220270168585, "learning_rate": 4.496126106910249e-06, "loss": 1.0396, "step": 6353 }, { "epoch": 0.4590460021312334, "grad_norm": 7.16559986188283, "learning_rate": 4.495949989102783e-06, "loss": 0.8792, "step": 6354 }, { "epoch": 0.4591182473314429, "grad_norm": 5.167484136740702, "learning_rate": 4.495773843972311e-06, "loss": 0.8392, "step": 6355 }, { "epoch": 0.45919049253165245, "grad_norm": 6.620932427975709, "learning_rate": 4.495597671521245e-06, "loss": 0.9156, "step": 6356 }, { "epoch": 0.45926273773186194, "grad_norm": 5.904500066281212, "learning_rate": 4.4954214717519965e-06, "loss": 0.9036, "step": 6357 }, { "epoch": 0.4593349829320714, "grad_norm": 6.098596767932915, "learning_rate": 4.495245244666977e-06, "loss": 0.9322, "step": 6358 }, { "epoch": 0.45940722813228096, "grad_norm": 5.2860639854135085, "learning_rate": 4.495068990268599e-06, "loss": 0.9454, "step": 6359 }, { "epoch": 0.45947947333249045, "grad_norm": 6.1991799827474035, "learning_rate": 4.4948927085592765e-06, "loss": 0.8238, "step": 6360 }, { "epoch": 0.4595517185327, "grad_norm": 6.087715173321937, "learning_rate": 4.494716399541421e-06, "loss": 0.8949, "step": 6361 }, { "epoch": 0.4596239637329095, "grad_norm": 5.260935520481047, "learning_rate": 4.494540063217447e-06, "loss": 0.8627, "step": 6362 }, { "epoch": 0.459696208933119, "grad_norm": 6.717182491111618, "learning_rate": 4.494363699589768e-06, "loss": 1.0549, "step": 6363 }, { "epoch": 0.4597684541333285, "grad_norm": 7.748860675373988, "learning_rate": 4.494187308660799e-06, "loss": 0.885, "step": 6364 }, { "epoch": 0.45984069933353805, "grad_norm": 7.292919843251254, "learning_rate": 4.494010890432954e-06, "loss": 0.9774, "step": 6365 }, { "epoch": 0.45991294453374754, "grad_norm": 6.585715076315271, "learning_rate": 4.493834444908648e-06, "loss": 0.9206, "step": 6366 }, { "epoch": 0.459985189733957, "grad_norm": 6.306671473290922, "learning_rate": 4.4936579720902965e-06, "loss": 0.9827, "step": 6367 }, { "epoch": 0.46005743493416656, "grad_norm": 6.901270387326117, "learning_rate": 4.493481471980316e-06, "loss": 0.8804, "step": 6368 }, { "epoch": 0.46012968013437605, "grad_norm": 5.4949020688022765, "learning_rate": 4.493304944581121e-06, "loss": 0.8008, "step": 6369 }, { "epoch": 0.4602019253345856, "grad_norm": 8.590623099450726, "learning_rate": 4.49312838989513e-06, "loss": 0.9922, "step": 6370 }, { "epoch": 0.4602741705347951, "grad_norm": 6.479610153998675, "learning_rate": 4.492951807924758e-06, "loss": 0.8164, "step": 6371 }, { "epoch": 0.4603464157350046, "grad_norm": 6.351993410102251, "learning_rate": 4.4927751986724235e-06, "loss": 0.8335, "step": 6372 }, { "epoch": 0.4604186609352141, "grad_norm": 5.764157653833902, "learning_rate": 4.492598562140544e-06, "loss": 0.8227, "step": 6373 }, { "epoch": 0.46049090613542365, "grad_norm": 5.579403856215023, "learning_rate": 4.492421898331536e-06, "loss": 0.7902, "step": 6374 }, { "epoch": 0.46056315133563314, "grad_norm": 5.45852000764201, "learning_rate": 4.492245207247821e-06, "loss": 0.8584, "step": 6375 }, { "epoch": 0.4606353965358426, "grad_norm": 5.702090485688773, "learning_rate": 4.492068488891815e-06, "loss": 0.8968, "step": 6376 }, { "epoch": 0.46070764173605216, "grad_norm": 7.036425775150974, "learning_rate": 4.491891743265939e-06, "loss": 0.9068, "step": 6377 }, { "epoch": 0.46077988693626165, "grad_norm": 6.6534495305456085, "learning_rate": 4.491714970372611e-06, "loss": 0.8857, "step": 6378 }, { "epoch": 0.4608521321364712, "grad_norm": 6.686885342025282, "learning_rate": 4.491538170214251e-06, "loss": 0.8231, "step": 6379 }, { "epoch": 0.4609243773366807, "grad_norm": 9.5448829083382, "learning_rate": 4.4913613427932816e-06, "loss": 1.0138, "step": 6380 }, { "epoch": 0.4609966225368902, "grad_norm": 7.15730404483319, "learning_rate": 4.49118448811212e-06, "loss": 0.9243, "step": 6381 }, { "epoch": 0.4610688677370997, "grad_norm": 7.0528582358965854, "learning_rate": 4.491007606173189e-06, "loss": 0.9342, "step": 6382 }, { "epoch": 0.46114111293730925, "grad_norm": 7.930171436895591, "learning_rate": 4.49083069697891e-06, "loss": 0.8767, "step": 6383 }, { "epoch": 0.46121335813751874, "grad_norm": 7.648175649086346, "learning_rate": 4.490653760531705e-06, "loss": 0.9223, "step": 6384 }, { "epoch": 0.4612856033377282, "grad_norm": 5.786379626196919, "learning_rate": 4.490476796833995e-06, "loss": 0.9062, "step": 6385 }, { "epoch": 0.46135784853793776, "grad_norm": 6.030544417621049, "learning_rate": 4.490299805888204e-06, "loss": 0.8931, "step": 6386 }, { "epoch": 0.46143009373814725, "grad_norm": 6.929865304758083, "learning_rate": 4.4901227876967525e-06, "loss": 0.9567, "step": 6387 }, { "epoch": 0.4615023389383568, "grad_norm": 6.906259942371237, "learning_rate": 4.489945742262066e-06, "loss": 0.8744, "step": 6388 }, { "epoch": 0.4615745841385663, "grad_norm": 6.873935408962743, "learning_rate": 4.489768669586568e-06, "loss": 0.767, "step": 6389 }, { "epoch": 0.4616468293387758, "grad_norm": 6.2206298440542405, "learning_rate": 4.489591569672682e-06, "loss": 0.8997, "step": 6390 }, { "epoch": 0.4617190745389853, "grad_norm": 5.644646476440676, "learning_rate": 4.489414442522831e-06, "loss": 0.9193, "step": 6391 }, { "epoch": 0.46179131973919485, "grad_norm": 5.7766454557076, "learning_rate": 4.489237288139442e-06, "loss": 0.8163, "step": 6392 }, { "epoch": 0.46186356493940434, "grad_norm": 5.702023250814752, "learning_rate": 4.489060106524938e-06, "loss": 0.808, "step": 6393 }, { "epoch": 0.4619358101396138, "grad_norm": 7.2897273799956475, "learning_rate": 4.488882897681747e-06, "loss": 0.9502, "step": 6394 }, { "epoch": 0.46200805533982336, "grad_norm": 6.579499814580026, "learning_rate": 4.488705661612293e-06, "loss": 0.8737, "step": 6395 }, { "epoch": 0.46208030054003285, "grad_norm": 6.793499970243845, "learning_rate": 4.488528398319002e-06, "loss": 0.8723, "step": 6396 }, { "epoch": 0.4621525457402424, "grad_norm": 6.57270557127058, "learning_rate": 4.488351107804302e-06, "loss": 0.9206, "step": 6397 }, { "epoch": 0.4622247909404519, "grad_norm": 6.865054896302581, "learning_rate": 4.488173790070618e-06, "loss": 0.8634, "step": 6398 }, { "epoch": 0.4622970361406614, "grad_norm": 7.632273722348509, "learning_rate": 4.487996445120379e-06, "loss": 0.8621, "step": 6399 }, { "epoch": 0.4623692813408709, "grad_norm": 9.034359512131095, "learning_rate": 4.487819072956012e-06, "loss": 0.9834, "step": 6400 }, { "epoch": 0.46244152654108045, "grad_norm": 7.79886236453165, "learning_rate": 4.487641673579946e-06, "loss": 0.8949, "step": 6401 }, { "epoch": 0.46251377174128994, "grad_norm": 6.138755766210099, "learning_rate": 4.48746424699461e-06, "loss": 0.8522, "step": 6402 }, { "epoch": 0.4625860169414994, "grad_norm": 8.427315902487132, "learning_rate": 4.487286793202429e-06, "loss": 0.9731, "step": 6403 }, { "epoch": 0.46265826214170896, "grad_norm": 6.6074348650293535, "learning_rate": 4.487109312205836e-06, "loss": 0.7772, "step": 6404 }, { "epoch": 0.46273050734191845, "grad_norm": 5.269148284592068, "learning_rate": 4.48693180400726e-06, "loss": 0.8452, "step": 6405 }, { "epoch": 0.462802752542128, "grad_norm": 6.717464732172289, "learning_rate": 4.486754268609129e-06, "loss": 0.8279, "step": 6406 }, { "epoch": 0.4628749977423375, "grad_norm": 5.871713246487316, "learning_rate": 4.486576706013876e-06, "loss": 0.9118, "step": 6407 }, { "epoch": 0.462947242942547, "grad_norm": 7.467181052927354, "learning_rate": 4.48639911622393e-06, "loss": 0.869, "step": 6408 }, { "epoch": 0.4630194881427565, "grad_norm": 6.585908539008091, "learning_rate": 4.486221499241722e-06, "loss": 0.9367, "step": 6409 }, { "epoch": 0.46309173334296605, "grad_norm": 8.004850347717655, "learning_rate": 4.486043855069685e-06, "loss": 0.8752, "step": 6410 }, { "epoch": 0.46316397854317554, "grad_norm": 6.725528296385238, "learning_rate": 4.485866183710248e-06, "loss": 0.8256, "step": 6411 }, { "epoch": 0.463236223743385, "grad_norm": 8.467122840106498, "learning_rate": 4.485688485165845e-06, "loss": 0.925, "step": 6412 }, { "epoch": 0.46330846894359456, "grad_norm": 7.883854081722008, "learning_rate": 4.48551075943891e-06, "loss": 0.9308, "step": 6413 }, { "epoch": 0.46338071414380405, "grad_norm": 7.4062355781768865, "learning_rate": 4.485333006531874e-06, "loss": 0.9383, "step": 6414 }, { "epoch": 0.4634529593440136, "grad_norm": 5.740704860142156, "learning_rate": 4.48515522644717e-06, "loss": 0.831, "step": 6415 }, { "epoch": 0.4635252045442231, "grad_norm": 5.751063372927966, "learning_rate": 4.484977419187232e-06, "loss": 0.8431, "step": 6416 }, { "epoch": 0.4635974497444326, "grad_norm": 6.581392541339798, "learning_rate": 4.4847995847544954e-06, "loss": 0.893, "step": 6417 }, { "epoch": 0.4636696949446421, "grad_norm": 6.705619152364138, "learning_rate": 4.484621723151393e-06, "loss": 1.0039, "step": 6418 }, { "epoch": 0.46374194014485165, "grad_norm": 6.587296785698295, "learning_rate": 4.4844438343803595e-06, "loss": 0.8979, "step": 6419 }, { "epoch": 0.46381418534506114, "grad_norm": 7.334524260254892, "learning_rate": 4.484265918443832e-06, "loss": 0.9199, "step": 6420 }, { "epoch": 0.4638864305452706, "grad_norm": 5.911765777614587, "learning_rate": 4.484087975344244e-06, "loss": 0.9327, "step": 6421 }, { "epoch": 0.46395867574548016, "grad_norm": 5.9529837043394345, "learning_rate": 4.483910005084033e-06, "loss": 0.7932, "step": 6422 }, { "epoch": 0.46403092094568965, "grad_norm": 5.17805006287244, "learning_rate": 4.483732007665633e-06, "loss": 0.8756, "step": 6423 }, { "epoch": 0.4641031661458992, "grad_norm": 8.076603818180006, "learning_rate": 4.483553983091483e-06, "loss": 0.8805, "step": 6424 }, { "epoch": 0.4641754113461087, "grad_norm": 5.633307701067573, "learning_rate": 4.483375931364019e-06, "loss": 0.871, "step": 6425 }, { "epoch": 0.4642476565463182, "grad_norm": 7.134897952937248, "learning_rate": 4.4831978524856785e-06, "loss": 0.8718, "step": 6426 }, { "epoch": 0.4643199017465277, "grad_norm": 5.833293515024071, "learning_rate": 4.483019746458899e-06, "loss": 0.8519, "step": 6427 }, { "epoch": 0.46439214694673725, "grad_norm": 5.644610658502851, "learning_rate": 4.482841613286119e-06, "loss": 0.8503, "step": 6428 }, { "epoch": 0.46446439214694674, "grad_norm": 5.905762667456775, "learning_rate": 4.482663452969778e-06, "loss": 0.936, "step": 6429 }, { "epoch": 0.4645366373471562, "grad_norm": 5.587230612727544, "learning_rate": 4.482485265512312e-06, "loss": 0.9204, "step": 6430 }, { "epoch": 0.46460888254736576, "grad_norm": 5.29881228688197, "learning_rate": 4.482307050916163e-06, "loss": 0.8683, "step": 6431 }, { "epoch": 0.46468112774757525, "grad_norm": 6.55732130347461, "learning_rate": 4.4821288091837685e-06, "loss": 0.7811, "step": 6432 }, { "epoch": 0.4647533729477848, "grad_norm": 6.807106332705246, "learning_rate": 4.4819505403175715e-06, "loss": 0.849, "step": 6433 }, { "epoch": 0.4648256181479943, "grad_norm": 5.707311311360645, "learning_rate": 4.48177224432001e-06, "loss": 0.8602, "step": 6434 }, { "epoch": 0.4648978633482038, "grad_norm": 7.012449637655467, "learning_rate": 4.481593921193524e-06, "loss": 0.8725, "step": 6435 }, { "epoch": 0.4649701085484133, "grad_norm": 9.583902977861499, "learning_rate": 4.481415570940557e-06, "loss": 1.0169, "step": 6436 }, { "epoch": 0.46504235374862285, "grad_norm": 5.79543597104501, "learning_rate": 4.481237193563548e-06, "loss": 0.9231, "step": 6437 }, { "epoch": 0.46511459894883234, "grad_norm": 5.716579004325516, "learning_rate": 4.481058789064942e-06, "loss": 0.9228, "step": 6438 }, { "epoch": 0.4651868441490418, "grad_norm": 6.848075440410161, "learning_rate": 4.480880357447178e-06, "loss": 0.9324, "step": 6439 }, { "epoch": 0.46525908934925136, "grad_norm": 6.505740052099244, "learning_rate": 4.4807018987127e-06, "loss": 0.883, "step": 6440 }, { "epoch": 0.46533133454946085, "grad_norm": 6.05435325438295, "learning_rate": 4.480523412863952e-06, "loss": 0.8924, "step": 6441 }, { "epoch": 0.4654035797496704, "grad_norm": 7.646446459835756, "learning_rate": 4.480344899903375e-06, "loss": 0.8487, "step": 6442 }, { "epoch": 0.4654758249498799, "grad_norm": 7.449879065434526, "learning_rate": 4.480166359833415e-06, "loss": 0.911, "step": 6443 }, { "epoch": 0.4655480701500894, "grad_norm": 6.252109019163053, "learning_rate": 4.479987792656514e-06, "loss": 0.9483, "step": 6444 }, { "epoch": 0.4656203153502989, "grad_norm": 6.927485755952572, "learning_rate": 4.479809198375118e-06, "loss": 0.9609, "step": 6445 }, { "epoch": 0.46569256055050845, "grad_norm": 6.492093779877838, "learning_rate": 4.479630576991672e-06, "loss": 0.7989, "step": 6446 }, { "epoch": 0.46576480575071794, "grad_norm": 6.239465303742888, "learning_rate": 4.479451928508619e-06, "loss": 0.8819, "step": 6447 }, { "epoch": 0.4658370509509274, "grad_norm": 5.552526898823472, "learning_rate": 4.479273252928407e-06, "loss": 0.8776, "step": 6448 }, { "epoch": 0.46590929615113696, "grad_norm": 5.687046913292886, "learning_rate": 4.479094550253481e-06, "loss": 0.8985, "step": 6449 }, { "epoch": 0.46598154135134645, "grad_norm": 8.043790177820695, "learning_rate": 4.478915820486287e-06, "loss": 0.8419, "step": 6450 }, { "epoch": 0.466053786551556, "grad_norm": 5.498292831421276, "learning_rate": 4.478737063629271e-06, "loss": 0.8333, "step": 6451 }, { "epoch": 0.4661260317517655, "grad_norm": 6.344665160666603, "learning_rate": 4.4785582796848835e-06, "loss": 0.868, "step": 6452 }, { "epoch": 0.466198276951975, "grad_norm": 7.954771459762423, "learning_rate": 4.478379468655567e-06, "loss": 0.9126, "step": 6453 }, { "epoch": 0.4662705221521845, "grad_norm": 6.329589674247134, "learning_rate": 4.478200630543773e-06, "loss": 0.8001, "step": 6454 }, { "epoch": 0.46634276735239405, "grad_norm": 7.240383479041975, "learning_rate": 4.4780217653519475e-06, "loss": 0.9547, "step": 6455 }, { "epoch": 0.46641501255260354, "grad_norm": 7.449523695498587, "learning_rate": 4.4778428730825405e-06, "loss": 0.8696, "step": 6456 }, { "epoch": 0.466487257752813, "grad_norm": 7.68643329942146, "learning_rate": 4.477663953738001e-06, "loss": 0.8413, "step": 6457 }, { "epoch": 0.46655950295302256, "grad_norm": 7.753486341353588, "learning_rate": 4.477485007320776e-06, "loss": 0.9209, "step": 6458 }, { "epoch": 0.46663174815323205, "grad_norm": 5.8246071351472795, "learning_rate": 4.4773060338333174e-06, "loss": 0.8479, "step": 6459 }, { "epoch": 0.4667039933534416, "grad_norm": 5.996479273029717, "learning_rate": 4.477127033278074e-06, "loss": 0.892, "step": 6460 }, { "epoch": 0.4667762385536511, "grad_norm": 7.208682748194106, "learning_rate": 4.476948005657497e-06, "loss": 0.9346, "step": 6461 }, { "epoch": 0.4668484837538606, "grad_norm": 7.9179010600060495, "learning_rate": 4.476768950974037e-06, "loss": 0.9081, "step": 6462 }, { "epoch": 0.4669207289540701, "grad_norm": 8.194003485185638, "learning_rate": 4.476589869230145e-06, "loss": 0.9229, "step": 6463 }, { "epoch": 0.46699297415427965, "grad_norm": 7.491892947074185, "learning_rate": 4.476410760428272e-06, "loss": 0.8638, "step": 6464 }, { "epoch": 0.46706521935448914, "grad_norm": 8.75439517125273, "learning_rate": 4.4762316245708705e-06, "loss": 0.9064, "step": 6465 }, { "epoch": 0.4671374645546986, "grad_norm": 8.555876683185136, "learning_rate": 4.476052461660392e-06, "loss": 0.8534, "step": 6466 }, { "epoch": 0.46720970975490816, "grad_norm": 8.97683277171454, "learning_rate": 4.47587327169929e-06, "loss": 0.9391, "step": 6467 }, { "epoch": 0.46728195495511765, "grad_norm": 6.015559684101579, "learning_rate": 4.475694054690018e-06, "loss": 0.7995, "step": 6468 }, { "epoch": 0.4673542001553272, "grad_norm": 8.807624063172973, "learning_rate": 4.4755148106350264e-06, "loss": 0.9035, "step": 6469 }, { "epoch": 0.4674264453555367, "grad_norm": 5.810446755965177, "learning_rate": 4.475335539536773e-06, "loss": 0.9572, "step": 6470 }, { "epoch": 0.4674986905557462, "grad_norm": 7.291519454196617, "learning_rate": 4.475156241397708e-06, "loss": 0.8776, "step": 6471 }, { "epoch": 0.4675709357559557, "grad_norm": 7.376789716385806, "learning_rate": 4.47497691622029e-06, "loss": 0.9185, "step": 6472 }, { "epoch": 0.46764318095616525, "grad_norm": 6.624714107462694, "learning_rate": 4.4747975640069685e-06, "loss": 0.9058, "step": 6473 }, { "epoch": 0.46771542615637474, "grad_norm": 6.3269213626726355, "learning_rate": 4.474618184760203e-06, "loss": 0.8637, "step": 6474 }, { "epoch": 0.4677876713565842, "grad_norm": 7.802424411083636, "learning_rate": 4.4744387784824485e-06, "loss": 0.8511, "step": 6475 }, { "epoch": 0.46785991655679376, "grad_norm": 8.915165237033651, "learning_rate": 4.47425934517616e-06, "loss": 0.8432, "step": 6476 }, { "epoch": 0.46793216175700325, "grad_norm": 6.3023230977333, "learning_rate": 4.474079884843793e-06, "loss": 0.8072, "step": 6477 }, { "epoch": 0.4680044069572128, "grad_norm": 7.124393905827402, "learning_rate": 4.4739003974878055e-06, "loss": 0.8921, "step": 6478 }, { "epoch": 0.4680766521574223, "grad_norm": 8.671131988861761, "learning_rate": 4.473720883110655e-06, "loss": 0.8354, "step": 6479 }, { "epoch": 0.4681488973576318, "grad_norm": 7.212506918019591, "learning_rate": 4.473541341714798e-06, "loss": 0.9661, "step": 6480 }, { "epoch": 0.4682211425578413, "grad_norm": 5.986876759117179, "learning_rate": 4.473361773302691e-06, "loss": 0.8476, "step": 6481 }, { "epoch": 0.46829338775805085, "grad_norm": 5.75205724651091, "learning_rate": 4.473182177876795e-06, "loss": 0.9299, "step": 6482 }, { "epoch": 0.46836563295826034, "grad_norm": 6.750668810453356, "learning_rate": 4.473002555439567e-06, "loss": 0.8658, "step": 6483 }, { "epoch": 0.4684378781584698, "grad_norm": 6.067888519711489, "learning_rate": 4.472822905993465e-06, "loss": 0.822, "step": 6484 }, { "epoch": 0.46851012335867936, "grad_norm": 5.858056166682081, "learning_rate": 4.472643229540949e-06, "loss": 0.8561, "step": 6485 }, { "epoch": 0.46858236855888885, "grad_norm": 6.0647676297272834, "learning_rate": 4.4724635260844805e-06, "loss": 0.848, "step": 6486 }, { "epoch": 0.4686546137590984, "grad_norm": 6.761630176160195, "learning_rate": 4.4722837956265165e-06, "loss": 0.9346, "step": 6487 }, { "epoch": 0.4687268589593079, "grad_norm": 6.058622398284138, "learning_rate": 4.472104038169519e-06, "loss": 0.9212, "step": 6488 }, { "epoch": 0.4687991041595174, "grad_norm": 8.821749114902842, "learning_rate": 4.471924253715949e-06, "loss": 0.8671, "step": 6489 }, { "epoch": 0.4688713493597269, "grad_norm": 5.918622011696133, "learning_rate": 4.471744442268266e-06, "loss": 0.883, "step": 6490 }, { "epoch": 0.46894359455993645, "grad_norm": 6.436156790142307, "learning_rate": 4.471564603828934e-06, "loss": 0.9719, "step": 6491 }, { "epoch": 0.46901583976014594, "grad_norm": 6.9048466011703225, "learning_rate": 4.471384738400412e-06, "loss": 0.8932, "step": 6492 }, { "epoch": 0.4690880849603554, "grad_norm": 7.901902522593056, "learning_rate": 4.471204845985164e-06, "loss": 0.8316, "step": 6493 }, { "epoch": 0.46916033016056496, "grad_norm": 10.51274425390663, "learning_rate": 4.471024926585653e-06, "loss": 0.8985, "step": 6494 }, { "epoch": 0.46923257536077445, "grad_norm": 6.617326719222465, "learning_rate": 4.4708449802043405e-06, "loss": 0.8923, "step": 6495 }, { "epoch": 0.469304820560984, "grad_norm": 7.093310909713829, "learning_rate": 4.470665006843691e-06, "loss": 0.8834, "step": 6496 }, { "epoch": 0.4693770657611935, "grad_norm": 6.550594320342776, "learning_rate": 4.470485006506166e-06, "loss": 0.8782, "step": 6497 }, { "epoch": 0.469449310961403, "grad_norm": 8.15430412611953, "learning_rate": 4.470304979194233e-06, "loss": 0.8651, "step": 6498 }, { "epoch": 0.4695215561616125, "grad_norm": 7.9944927809986455, "learning_rate": 4.470124924910354e-06, "loss": 0.8714, "step": 6499 }, { "epoch": 0.46959380136182205, "grad_norm": 5.2192002519094105, "learning_rate": 4.469944843656995e-06, "loss": 0.8696, "step": 6500 }, { "epoch": 0.46966604656203154, "grad_norm": 5.532424026291681, "learning_rate": 4.4697647354366205e-06, "loss": 0.843, "step": 6501 }, { "epoch": 0.469738291762241, "grad_norm": 6.880340166775686, "learning_rate": 4.469584600251695e-06, "loss": 0.823, "step": 6502 }, { "epoch": 0.46981053696245056, "grad_norm": 6.877039104120291, "learning_rate": 4.4694044381046875e-06, "loss": 0.9217, "step": 6503 }, { "epoch": 0.46988278216266005, "grad_norm": 6.6564901245933354, "learning_rate": 4.469224248998061e-06, "loss": 0.8476, "step": 6504 }, { "epoch": 0.4699550273628696, "grad_norm": 8.572234252121333, "learning_rate": 4.4690440329342845e-06, "loss": 0.8972, "step": 6505 }, { "epoch": 0.4700272725630791, "grad_norm": 8.958139769362912, "learning_rate": 4.4688637899158225e-06, "loss": 0.9891, "step": 6506 }, { "epoch": 0.4700995177632886, "grad_norm": 7.012544291147311, "learning_rate": 4.468683519945146e-06, "loss": 0.847, "step": 6507 }, { "epoch": 0.4701717629634981, "grad_norm": 5.525990199773094, "learning_rate": 4.468503223024719e-06, "loss": 0.8997, "step": 6508 }, { "epoch": 0.47024400816370765, "grad_norm": 6.096151807822183, "learning_rate": 4.468322899157013e-06, "loss": 0.864, "step": 6509 }, { "epoch": 0.47031625336391714, "grad_norm": 8.16802619465094, "learning_rate": 4.468142548344493e-06, "loss": 0.9341, "step": 6510 }, { "epoch": 0.4703884985641266, "grad_norm": 8.548667705757257, "learning_rate": 4.467962170589631e-06, "loss": 0.8962, "step": 6511 }, { "epoch": 0.47046074376433616, "grad_norm": 7.4565969253755116, "learning_rate": 4.467781765894894e-06, "loss": 1.0679, "step": 6512 }, { "epoch": 0.47053298896454565, "grad_norm": 6.338162371389183, "learning_rate": 4.467601334262753e-06, "loss": 0.8375, "step": 6513 }, { "epoch": 0.4706052341647552, "grad_norm": 6.337805758291586, "learning_rate": 4.467420875695677e-06, "loss": 0.8471, "step": 6514 }, { "epoch": 0.4706774793649647, "grad_norm": 7.403685564401249, "learning_rate": 4.467240390196138e-06, "loss": 0.8915, "step": 6515 }, { "epoch": 0.4707497245651742, "grad_norm": 5.710806243682311, "learning_rate": 4.467059877766604e-06, "loss": 0.86, "step": 6516 }, { "epoch": 0.4708219697653837, "grad_norm": 6.023076503044224, "learning_rate": 4.466879338409549e-06, "loss": 0.8572, "step": 6517 }, { "epoch": 0.47089421496559325, "grad_norm": 7.9191733424014625, "learning_rate": 4.466698772127442e-06, "loss": 0.8607, "step": 6518 }, { "epoch": 0.47096646016580274, "grad_norm": 6.963389708621144, "learning_rate": 4.466518178922756e-06, "loss": 0.9057, "step": 6519 }, { "epoch": 0.4710387053660122, "grad_norm": 7.056821447569089, "learning_rate": 4.4663375587979635e-06, "loss": 0.8687, "step": 6520 }, { "epoch": 0.47111095056622176, "grad_norm": 8.47338470099261, "learning_rate": 4.466156911755536e-06, "loss": 0.865, "step": 6521 }, { "epoch": 0.47118319576643125, "grad_norm": 5.989106461711697, "learning_rate": 4.465976237797948e-06, "loss": 0.8765, "step": 6522 }, { "epoch": 0.4712554409666408, "grad_norm": 6.47513136564754, "learning_rate": 4.465795536927671e-06, "loss": 0.9902, "step": 6523 }, { "epoch": 0.4713276861668503, "grad_norm": 7.523203750100574, "learning_rate": 4.4656148091471795e-06, "loss": 0.8984, "step": 6524 }, { "epoch": 0.4713999313670598, "grad_norm": 7.636660805406767, "learning_rate": 4.465434054458947e-06, "loss": 0.894, "step": 6525 }, { "epoch": 0.4714721765672693, "grad_norm": 8.781275630384082, "learning_rate": 4.465253272865449e-06, "loss": 0.8998, "step": 6526 }, { "epoch": 0.47154442176747885, "grad_norm": 7.838434736128689, "learning_rate": 4.46507246436916e-06, "loss": 0.9479, "step": 6527 }, { "epoch": 0.47161666696768834, "grad_norm": 7.299370610526257, "learning_rate": 4.464891628972554e-06, "loss": 0.93, "step": 6528 }, { "epoch": 0.4716889121678978, "grad_norm": 7.415895772010173, "learning_rate": 4.4647107666781076e-06, "loss": 0.9104, "step": 6529 }, { "epoch": 0.47176115736810736, "grad_norm": 7.782972275434723, "learning_rate": 4.464529877488296e-06, "loss": 0.9063, "step": 6530 }, { "epoch": 0.47183340256831685, "grad_norm": 6.5329710230632125, "learning_rate": 4.464348961405596e-06, "loss": 1.0224, "step": 6531 }, { "epoch": 0.4719056477685264, "grad_norm": 6.076643030089617, "learning_rate": 4.464168018432483e-06, "loss": 0.8976, "step": 6532 }, { "epoch": 0.4719778929687359, "grad_norm": 6.020205173481261, "learning_rate": 4.463987048571437e-06, "loss": 0.8266, "step": 6533 }, { "epoch": 0.4720501381689454, "grad_norm": 6.295977128526571, "learning_rate": 4.463806051824932e-06, "loss": 0.8839, "step": 6534 }, { "epoch": 0.4721223833691549, "grad_norm": 7.051295482026939, "learning_rate": 4.463625028195447e-06, "loss": 0.875, "step": 6535 }, { "epoch": 0.47219462856936445, "grad_norm": 5.77843707960966, "learning_rate": 4.463443977685459e-06, "loss": 0.8274, "step": 6536 }, { "epoch": 0.47226687376957394, "grad_norm": 7.814303990938218, "learning_rate": 4.463262900297449e-06, "loss": 0.9985, "step": 6537 }, { "epoch": 0.4723391189697834, "grad_norm": 7.325760775172678, "learning_rate": 4.463081796033893e-06, "loss": 0.9261, "step": 6538 }, { "epoch": 0.47241136416999296, "grad_norm": 8.184818673242994, "learning_rate": 4.462900664897273e-06, "loss": 0.9211, "step": 6539 }, { "epoch": 0.47248360937020245, "grad_norm": 6.731210341199471, "learning_rate": 4.4627195068900655e-06, "loss": 0.884, "step": 6540 }, { "epoch": 0.472555854570412, "grad_norm": 9.296130238682695, "learning_rate": 4.462538322014753e-06, "loss": 0.9596, "step": 6541 }, { "epoch": 0.4726280997706215, "grad_norm": 5.66379478084968, "learning_rate": 4.462357110273814e-06, "loss": 0.8923, "step": 6542 }, { "epoch": 0.472700344970831, "grad_norm": 7.082048486603404, "learning_rate": 4.46217587166973e-06, "loss": 0.7627, "step": 6543 }, { "epoch": 0.4727725901710405, "grad_norm": 6.7454694449109, "learning_rate": 4.461994606204983e-06, "loss": 0.9484, "step": 6544 }, { "epoch": 0.47284483537125005, "grad_norm": 6.534692468506761, "learning_rate": 4.4618133138820515e-06, "loss": 0.8844, "step": 6545 }, { "epoch": 0.47291708057145954, "grad_norm": 8.986969734009346, "learning_rate": 4.461631994703419e-06, "loss": 0.9046, "step": 6546 }, { "epoch": 0.472989325771669, "grad_norm": 9.422747873642507, "learning_rate": 4.4614506486715685e-06, "loss": 1.0268, "step": 6547 }, { "epoch": 0.47306157097187856, "grad_norm": 6.174004218858489, "learning_rate": 4.461269275788981e-06, "loss": 0.8263, "step": 6548 }, { "epoch": 0.47313381617208805, "grad_norm": 6.041618645137368, "learning_rate": 4.46108787605814e-06, "loss": 0.8328, "step": 6549 }, { "epoch": 0.4732060613722976, "grad_norm": 8.738207909280959, "learning_rate": 4.460906449481529e-06, "loss": 0.8535, "step": 6550 }, { "epoch": 0.4732783065725071, "grad_norm": 6.166847054532336, "learning_rate": 4.460724996061632e-06, "loss": 0.8676, "step": 6551 }, { "epoch": 0.4733505517727166, "grad_norm": 5.986665212776897, "learning_rate": 4.460543515800931e-06, "loss": 0.8796, "step": 6552 }, { "epoch": 0.4734227969729261, "grad_norm": 7.3127394416928215, "learning_rate": 4.4603620087019116e-06, "loss": 0.9552, "step": 6553 }, { "epoch": 0.47349504217313565, "grad_norm": 6.3108185898830955, "learning_rate": 4.460180474767059e-06, "loss": 0.8109, "step": 6554 }, { "epoch": 0.47356728737334514, "grad_norm": 6.117572562812745, "learning_rate": 4.459998913998858e-06, "loss": 0.8914, "step": 6555 }, { "epoch": 0.4736395325735546, "grad_norm": 6.271088688953981, "learning_rate": 4.459817326399792e-06, "loss": 0.8432, "step": 6556 }, { "epoch": 0.47371177777376416, "grad_norm": 6.65077062745099, "learning_rate": 4.45963571197235e-06, "loss": 0.9525, "step": 6557 }, { "epoch": 0.47378402297397365, "grad_norm": 6.3415809267975956, "learning_rate": 4.4594540707190165e-06, "loss": 0.9308, "step": 6558 }, { "epoch": 0.4738562681741832, "grad_norm": 6.837803595740083, "learning_rate": 4.459272402642278e-06, "loss": 0.7895, "step": 6559 }, { "epoch": 0.4739285133743927, "grad_norm": 5.439623538623179, "learning_rate": 4.459090707744621e-06, "loss": 0.7817, "step": 6560 }, { "epoch": 0.4740007585746022, "grad_norm": 6.180370500439684, "learning_rate": 4.458908986028535e-06, "loss": 0.897, "step": 6561 }, { "epoch": 0.4740730037748117, "grad_norm": 8.710634453809357, "learning_rate": 4.458727237496504e-06, "loss": 0.8878, "step": 6562 }, { "epoch": 0.47414524897502125, "grad_norm": 6.19107075934501, "learning_rate": 4.45854546215102e-06, "loss": 0.9092, "step": 6563 }, { "epoch": 0.47421749417523074, "grad_norm": 8.032674343996476, "learning_rate": 4.458363659994567e-06, "loss": 0.9604, "step": 6564 }, { "epoch": 0.4742897393754402, "grad_norm": 7.600901811200159, "learning_rate": 4.4581818310296375e-06, "loss": 0.9959, "step": 6565 }, { "epoch": 0.47436198457564976, "grad_norm": 6.489795597660535, "learning_rate": 4.4579999752587185e-06, "loss": 0.8803, "step": 6566 }, { "epoch": 0.47443422977585925, "grad_norm": 5.760986447106953, "learning_rate": 4.457818092684301e-06, "loss": 0.8499, "step": 6567 }, { "epoch": 0.4745064749760688, "grad_norm": 6.710017717454604, "learning_rate": 4.457636183308873e-06, "loss": 0.8288, "step": 6568 }, { "epoch": 0.4745787201762783, "grad_norm": 6.758103204549324, "learning_rate": 4.457454247134927e-06, "loss": 0.9005, "step": 6569 }, { "epoch": 0.4746509653764878, "grad_norm": 6.2576454702271755, "learning_rate": 4.457272284164951e-06, "loss": 0.9116, "step": 6570 }, { "epoch": 0.4747232105766973, "grad_norm": 5.487130279946562, "learning_rate": 4.4570902944014375e-06, "loss": 0.8248, "step": 6571 }, { "epoch": 0.4747954557769068, "grad_norm": 6.180520485005787, "learning_rate": 4.456908277846878e-06, "loss": 0.8907, "step": 6572 }, { "epoch": 0.47486770097711634, "grad_norm": 5.958707499815659, "learning_rate": 4.456726234503763e-06, "loss": 0.8082, "step": 6573 }, { "epoch": 0.4749399461773258, "grad_norm": 7.1004147583074495, "learning_rate": 4.456544164374585e-06, "loss": 0.9299, "step": 6574 }, { "epoch": 0.47501219137753536, "grad_norm": 4.839352882252928, "learning_rate": 4.456362067461837e-06, "loss": 0.7849, "step": 6575 }, { "epoch": 0.47508443657774485, "grad_norm": 5.851105166645236, "learning_rate": 4.456179943768011e-06, "loss": 0.8305, "step": 6576 }, { "epoch": 0.4751566817779544, "grad_norm": 6.0796957070347455, "learning_rate": 4.4559977932956e-06, "loss": 0.9285, "step": 6577 }, { "epoch": 0.4752289269781639, "grad_norm": 8.936756449894254, "learning_rate": 4.455815616047099e-06, "loss": 0.9392, "step": 6578 }, { "epoch": 0.4753011721783734, "grad_norm": 6.642864684351143, "learning_rate": 4.455633412025e-06, "loss": 0.8639, "step": 6579 }, { "epoch": 0.4753734173785829, "grad_norm": 6.947795479864768, "learning_rate": 4.455451181231799e-06, "loss": 0.9056, "step": 6580 }, { "epoch": 0.4754456625787924, "grad_norm": 6.01772614482427, "learning_rate": 4.455268923669989e-06, "loss": 0.8102, "step": 6581 }, { "epoch": 0.47551790777900194, "grad_norm": 7.117404838116114, "learning_rate": 4.455086639342065e-06, "loss": 0.8841, "step": 6582 }, { "epoch": 0.4755901529792114, "grad_norm": 6.133730213482421, "learning_rate": 4.4549043282505235e-06, "loss": 0.9679, "step": 6583 }, { "epoch": 0.47566239817942096, "grad_norm": 10.494937948086196, "learning_rate": 4.45472199039786e-06, "loss": 0.9437, "step": 6584 }, { "epoch": 0.47573464337963045, "grad_norm": 5.771185284657195, "learning_rate": 4.4545396257865696e-06, "loss": 0.8618, "step": 6585 }, { "epoch": 0.47580688857984, "grad_norm": 6.102815272318741, "learning_rate": 4.45435723441915e-06, "loss": 0.8617, "step": 6586 }, { "epoch": 0.4758791337800495, "grad_norm": 7.205903483493776, "learning_rate": 4.454174816298097e-06, "loss": 0.865, "step": 6587 }, { "epoch": 0.475951378980259, "grad_norm": 6.983428686154073, "learning_rate": 4.453992371425908e-06, "loss": 0.892, "step": 6588 }, { "epoch": 0.4760236241804685, "grad_norm": 9.518326198989563, "learning_rate": 4.45380989980508e-06, "loss": 0.8771, "step": 6589 }, { "epoch": 0.476095869380678, "grad_norm": 7.581595532624548, "learning_rate": 4.453627401438112e-06, "loss": 0.9819, "step": 6590 }, { "epoch": 0.47616811458088754, "grad_norm": 7.641739528007744, "learning_rate": 4.453444876327502e-06, "loss": 0.8642, "step": 6591 }, { "epoch": 0.476240359781097, "grad_norm": 8.63663129619902, "learning_rate": 4.453262324475749e-06, "loss": 0.8584, "step": 6592 }, { "epoch": 0.47631260498130656, "grad_norm": 6.8632868081223934, "learning_rate": 4.4530797458853505e-06, "loss": 0.8357, "step": 6593 }, { "epoch": 0.47638485018151605, "grad_norm": 7.2145228043463545, "learning_rate": 4.452897140558807e-06, "loss": 0.9323, "step": 6594 }, { "epoch": 0.4764570953817256, "grad_norm": 7.095267381421731, "learning_rate": 4.452714508498618e-06, "loss": 0.897, "step": 6595 }, { "epoch": 0.4765293405819351, "grad_norm": 5.820005759534575, "learning_rate": 4.452531849707283e-06, "loss": 0.8404, "step": 6596 }, { "epoch": 0.4766015857821446, "grad_norm": 6.835410973026129, "learning_rate": 4.452349164187303e-06, "loss": 0.7917, "step": 6597 }, { "epoch": 0.4766738309823541, "grad_norm": 7.617863301351382, "learning_rate": 4.45216645194118e-06, "loss": 0.8767, "step": 6598 }, { "epoch": 0.4767460761825636, "grad_norm": 7.090478352711027, "learning_rate": 4.451983712971413e-06, "loss": 0.9148, "step": 6599 }, { "epoch": 0.47681832138277314, "grad_norm": 9.99812299278751, "learning_rate": 4.451800947280505e-06, "loss": 0.9217, "step": 6600 }, { "epoch": 0.4768905665829826, "grad_norm": 6.840305245264181, "learning_rate": 4.451618154870958e-06, "loss": 0.8282, "step": 6601 }, { "epoch": 0.47696281178319216, "grad_norm": 5.976048346450297, "learning_rate": 4.451435335745272e-06, "loss": 0.8965, "step": 6602 }, { "epoch": 0.47703505698340165, "grad_norm": 6.917915813603225, "learning_rate": 4.4512524899059525e-06, "loss": 0.8584, "step": 6603 }, { "epoch": 0.4771073021836112, "grad_norm": 7.31824287462316, "learning_rate": 4.451069617355502e-06, "loss": 0.8853, "step": 6604 }, { "epoch": 0.4771795473838207, "grad_norm": 6.774055285588318, "learning_rate": 4.450886718096423e-06, "loss": 0.7718, "step": 6605 }, { "epoch": 0.4772517925840302, "grad_norm": 6.093548111138836, "learning_rate": 4.4507037921312204e-06, "loss": 0.8862, "step": 6606 }, { "epoch": 0.4773240377842397, "grad_norm": 6.417371372647266, "learning_rate": 4.450520839462396e-06, "loss": 0.8045, "step": 6607 }, { "epoch": 0.4773962829844492, "grad_norm": 9.971908021284639, "learning_rate": 4.450337860092457e-06, "loss": 0.9523, "step": 6608 }, { "epoch": 0.47746852818465874, "grad_norm": 6.613459774521123, "learning_rate": 4.450154854023907e-06, "loss": 0.8481, "step": 6609 }, { "epoch": 0.4775407733848682, "grad_norm": 6.306262872416248, "learning_rate": 4.4499718212592504e-06, "loss": 0.8633, "step": 6610 }, { "epoch": 0.47761301858507776, "grad_norm": 6.236342805533926, "learning_rate": 4.449788761800994e-06, "loss": 0.9344, "step": 6611 }, { "epoch": 0.47768526378528725, "grad_norm": 6.500720644396617, "learning_rate": 4.449605675651643e-06, "loss": 0.9822, "step": 6612 }, { "epoch": 0.4777575089854968, "grad_norm": 6.9221166564228716, "learning_rate": 4.449422562813704e-06, "loss": 0.9072, "step": 6613 }, { "epoch": 0.4778297541857063, "grad_norm": 6.0574546348142, "learning_rate": 4.4492394232896845e-06, "loss": 0.8732, "step": 6614 }, { "epoch": 0.4779019993859158, "grad_norm": 6.353417157437092, "learning_rate": 4.4490562570820905e-06, "loss": 0.7824, "step": 6615 }, { "epoch": 0.4779742445861253, "grad_norm": 7.9169371976974245, "learning_rate": 4.44887306419343e-06, "loss": 0.8311, "step": 6616 }, { "epoch": 0.4780464897863348, "grad_norm": 6.287928736120293, "learning_rate": 4.448689844626209e-06, "loss": 0.8532, "step": 6617 }, { "epoch": 0.47811873498654434, "grad_norm": 6.071379775163763, "learning_rate": 4.448506598382939e-06, "loss": 0.8857, "step": 6618 }, { "epoch": 0.4781909801867538, "grad_norm": 7.57998678402516, "learning_rate": 4.448323325466125e-06, "loss": 0.805, "step": 6619 }, { "epoch": 0.47826322538696336, "grad_norm": 7.365172222788677, "learning_rate": 4.448140025878279e-06, "loss": 0.7794, "step": 6620 }, { "epoch": 0.47833547058717285, "grad_norm": 7.099287787162945, "learning_rate": 4.447956699621908e-06, "loss": 0.8928, "step": 6621 }, { "epoch": 0.4784077157873824, "grad_norm": 8.252665695935296, "learning_rate": 4.447773346699522e-06, "loss": 0.934, "step": 6622 }, { "epoch": 0.4784799609875919, "grad_norm": 7.238178217443302, "learning_rate": 4.447589967113631e-06, "loss": 0.8748, "step": 6623 }, { "epoch": 0.4785522061878014, "grad_norm": 6.505050164512359, "learning_rate": 4.447406560866746e-06, "loss": 0.8776, "step": 6624 }, { "epoch": 0.4786244513880109, "grad_norm": 5.348920495760869, "learning_rate": 4.447223127961377e-06, "loss": 0.8165, "step": 6625 }, { "epoch": 0.4786966965882204, "grad_norm": 6.05744235462779, "learning_rate": 4.447039668400036e-06, "loss": 0.8318, "step": 6626 }, { "epoch": 0.47876894178842994, "grad_norm": 6.717858543694258, "learning_rate": 4.446856182185233e-06, "loss": 0.8662, "step": 6627 }, { "epoch": 0.4788411869886394, "grad_norm": 8.442035275614913, "learning_rate": 4.4466726693194805e-06, "loss": 0.9801, "step": 6628 }, { "epoch": 0.47891343218884896, "grad_norm": 6.895792643109116, "learning_rate": 4.446489129805291e-06, "loss": 0.8517, "step": 6629 }, { "epoch": 0.47898567738905845, "grad_norm": 6.805185012937028, "learning_rate": 4.446305563645177e-06, "loss": 0.8856, "step": 6630 }, { "epoch": 0.479057922589268, "grad_norm": 6.93962078498561, "learning_rate": 4.4461219708416504e-06, "loss": 0.9403, "step": 6631 }, { "epoch": 0.4791301677894775, "grad_norm": 6.320962804056331, "learning_rate": 4.445938351397225e-06, "loss": 0.9907, "step": 6632 }, { "epoch": 0.479202412989687, "grad_norm": 6.615389204620789, "learning_rate": 4.445754705314415e-06, "loss": 0.9554, "step": 6633 }, { "epoch": 0.4792746581898965, "grad_norm": 8.276589748574647, "learning_rate": 4.445571032595734e-06, "loss": 0.8665, "step": 6634 }, { "epoch": 0.479346903390106, "grad_norm": 8.04815770268119, "learning_rate": 4.445387333243695e-06, "loss": 0.8669, "step": 6635 }, { "epoch": 0.47941914859031554, "grad_norm": 8.06811328033369, "learning_rate": 4.445203607260815e-06, "loss": 0.9599, "step": 6636 }, { "epoch": 0.479491393790525, "grad_norm": 7.1366262779654965, "learning_rate": 4.445019854649607e-06, "loss": 0.8804, "step": 6637 }, { "epoch": 0.47956363899073456, "grad_norm": 6.486441041812992, "learning_rate": 4.444836075412589e-06, "loss": 0.7644, "step": 6638 }, { "epoch": 0.47963588419094405, "grad_norm": 6.707654298649085, "learning_rate": 4.444652269552274e-06, "loss": 0.8624, "step": 6639 }, { "epoch": 0.4797081293911536, "grad_norm": 8.131296388462989, "learning_rate": 4.44446843707118e-06, "loss": 0.9165, "step": 6640 }, { "epoch": 0.4797803745913631, "grad_norm": 10.325615328355966, "learning_rate": 4.444284577971822e-06, "loss": 1.031, "step": 6641 }, { "epoch": 0.4798526197915726, "grad_norm": 6.958549937762043, "learning_rate": 4.444100692256719e-06, "loss": 0.8941, "step": 6642 }, { "epoch": 0.4799248649917821, "grad_norm": 9.480205191150063, "learning_rate": 4.443916779928385e-06, "loss": 0.8816, "step": 6643 }, { "epoch": 0.4799971101919916, "grad_norm": 6.334399919879601, "learning_rate": 4.443732840989341e-06, "loss": 0.9162, "step": 6644 }, { "epoch": 0.48006935539220114, "grad_norm": 9.0863132546711, "learning_rate": 4.443548875442104e-06, "loss": 0.9281, "step": 6645 }, { "epoch": 0.4801416005924106, "grad_norm": 8.73205863250046, "learning_rate": 4.443364883289192e-06, "loss": 0.935, "step": 6646 }, { "epoch": 0.48021384579262016, "grad_norm": 7.132873468044474, "learning_rate": 4.443180864533123e-06, "loss": 0.8088, "step": 6647 }, { "epoch": 0.48028609099282965, "grad_norm": 9.21037755401482, "learning_rate": 4.442996819176417e-06, "loss": 0.959, "step": 6648 }, { "epoch": 0.4803583361930392, "grad_norm": 7.149827498909338, "learning_rate": 4.442812747221593e-06, "loss": 0.95, "step": 6649 }, { "epoch": 0.4804305813932487, "grad_norm": 5.906643324576957, "learning_rate": 4.442628648671171e-06, "loss": 0.8374, "step": 6650 }, { "epoch": 0.4805028265934582, "grad_norm": 7.469561034024397, "learning_rate": 4.442444523527672e-06, "loss": 0.8784, "step": 6651 }, { "epoch": 0.4805750717936677, "grad_norm": 7.732699619285596, "learning_rate": 4.442260371793614e-06, "loss": 0.9121, "step": 6652 }, { "epoch": 0.4806473169938772, "grad_norm": 5.505810356124909, "learning_rate": 4.442076193471521e-06, "loss": 0.8757, "step": 6653 }, { "epoch": 0.48071956219408674, "grad_norm": 9.242563918792705, "learning_rate": 4.441891988563912e-06, "loss": 0.9538, "step": 6654 }, { "epoch": 0.4807918073942962, "grad_norm": 7.698796354074686, "learning_rate": 4.44170775707331e-06, "loss": 0.8303, "step": 6655 }, { "epoch": 0.48086405259450576, "grad_norm": 6.2024755642404585, "learning_rate": 4.441523499002236e-06, "loss": 0.8062, "step": 6656 }, { "epoch": 0.48093629779471525, "grad_norm": 6.079013318260126, "learning_rate": 4.441339214353213e-06, "loss": 0.915, "step": 6657 }, { "epoch": 0.4810085429949248, "grad_norm": 6.401122781494629, "learning_rate": 4.441154903128764e-06, "loss": 0.9039, "step": 6658 }, { "epoch": 0.4810807881951343, "grad_norm": 8.65461092968588, "learning_rate": 4.44097056533141e-06, "loss": 0.9069, "step": 6659 }, { "epoch": 0.4811530333953438, "grad_norm": 8.182988678407021, "learning_rate": 4.4407862009636785e-06, "loss": 0.8934, "step": 6660 }, { "epoch": 0.4812252785955533, "grad_norm": 9.153222322321026, "learning_rate": 4.440601810028089e-06, "loss": 0.8521, "step": 6661 }, { "epoch": 0.4812975237957628, "grad_norm": 8.619178023301485, "learning_rate": 4.440417392527167e-06, "loss": 0.8776, "step": 6662 }, { "epoch": 0.48136976899597234, "grad_norm": 5.997226073865708, "learning_rate": 4.44023294846344e-06, "loss": 0.9021, "step": 6663 }, { "epoch": 0.4814420141961818, "grad_norm": 6.807002658054981, "learning_rate": 4.4400484778394285e-06, "loss": 0.8872, "step": 6664 }, { "epoch": 0.48151425939639136, "grad_norm": 8.799891765969251, "learning_rate": 4.439863980657661e-06, "loss": 0.9801, "step": 6665 }, { "epoch": 0.48158650459660085, "grad_norm": 7.092990649748657, "learning_rate": 4.4396794569206605e-06, "loss": 0.9027, "step": 6666 }, { "epoch": 0.4816587497968104, "grad_norm": 7.550449575213505, "learning_rate": 4.439494906630954e-06, "loss": 0.847, "step": 6667 }, { "epoch": 0.4817309949970199, "grad_norm": 7.30226344722215, "learning_rate": 4.439310329791069e-06, "loss": 0.8582, "step": 6668 }, { "epoch": 0.4818032401972294, "grad_norm": 6.626509872259922, "learning_rate": 4.4391257264035315e-06, "loss": 0.8662, "step": 6669 }, { "epoch": 0.4818754853974389, "grad_norm": 7.097127639661173, "learning_rate": 4.438941096470868e-06, "loss": 0.8918, "step": 6670 }, { "epoch": 0.4819477305976484, "grad_norm": 6.051210569218968, "learning_rate": 4.4387564399956066e-06, "loss": 0.8123, "step": 6671 }, { "epoch": 0.48201997579785794, "grad_norm": 6.370805145593844, "learning_rate": 4.438571756980275e-06, "loss": 0.9307, "step": 6672 }, { "epoch": 0.4820922209980674, "grad_norm": 6.183067198147424, "learning_rate": 4.438387047427402e-06, "loss": 0.9129, "step": 6673 }, { "epoch": 0.48216446619827696, "grad_norm": 7.826597144594165, "learning_rate": 4.438202311339514e-06, "loss": 1.0619, "step": 6674 }, { "epoch": 0.48223671139848645, "grad_norm": 7.403898614322195, "learning_rate": 4.438017548719141e-06, "loss": 0.9693, "step": 6675 }, { "epoch": 0.482308956598696, "grad_norm": 7.086487131556979, "learning_rate": 4.437832759568814e-06, "loss": 0.9086, "step": 6676 }, { "epoch": 0.4823812017989055, "grad_norm": 6.210889592975616, "learning_rate": 4.4376479438910605e-06, "loss": 0.915, "step": 6677 }, { "epoch": 0.482453446999115, "grad_norm": 6.658147836837772, "learning_rate": 4.437463101688411e-06, "loss": 0.8949, "step": 6678 }, { "epoch": 0.4825256921993245, "grad_norm": 7.314161845418555, "learning_rate": 4.4372782329633955e-06, "loss": 0.9342, "step": 6679 }, { "epoch": 0.482597937399534, "grad_norm": 5.7580248354886, "learning_rate": 4.437093337718547e-06, "loss": 0.9291, "step": 6680 }, { "epoch": 0.48267018259974354, "grad_norm": 7.095100173511856, "learning_rate": 4.436908415956393e-06, "loss": 0.974, "step": 6681 }, { "epoch": 0.482742427799953, "grad_norm": 6.336926628825767, "learning_rate": 4.436723467679467e-06, "loss": 0.828, "step": 6682 }, { "epoch": 0.48281467300016256, "grad_norm": 5.2829733299724495, "learning_rate": 4.436538492890301e-06, "loss": 0.8314, "step": 6683 }, { "epoch": 0.48288691820037205, "grad_norm": 8.574197839524617, "learning_rate": 4.436353491591427e-06, "loss": 0.9484, "step": 6684 }, { "epoch": 0.4829591634005816, "grad_norm": 6.050040114088508, "learning_rate": 4.436168463785376e-06, "loss": 0.9023, "step": 6685 }, { "epoch": 0.4830314086007911, "grad_norm": 7.899989367127808, "learning_rate": 4.435983409474682e-06, "loss": 0.879, "step": 6686 }, { "epoch": 0.4831036538010006, "grad_norm": 6.048197441491123, "learning_rate": 4.43579832866188e-06, "loss": 0.9236, "step": 6687 }, { "epoch": 0.4831758990012101, "grad_norm": 6.230282312551478, "learning_rate": 4.4356132213495e-06, "loss": 0.855, "step": 6688 }, { "epoch": 0.4832481442014196, "grad_norm": 6.020582499663459, "learning_rate": 4.435428087540079e-06, "loss": 0.9903, "step": 6689 }, { "epoch": 0.48332038940162914, "grad_norm": 7.306200492958741, "learning_rate": 4.43524292723615e-06, "loss": 0.9413, "step": 6690 }, { "epoch": 0.4833926346018386, "grad_norm": 6.737738882157769, "learning_rate": 4.4350577404402485e-06, "loss": 0.8251, "step": 6691 }, { "epoch": 0.48346487980204816, "grad_norm": 5.842584993644421, "learning_rate": 4.434872527154908e-06, "loss": 0.9116, "step": 6692 }, { "epoch": 0.48353712500225765, "grad_norm": 6.464391152019546, "learning_rate": 4.434687287382665e-06, "loss": 0.8953, "step": 6693 }, { "epoch": 0.4836093702024672, "grad_norm": 5.677174344047824, "learning_rate": 4.4345020211260555e-06, "loss": 0.8363, "step": 6694 }, { "epoch": 0.4836816154026767, "grad_norm": 6.756079867300256, "learning_rate": 4.434316728387616e-06, "loss": 0.8997, "step": 6695 }, { "epoch": 0.4837538606028862, "grad_norm": 6.53949388838021, "learning_rate": 4.434131409169882e-06, "loss": 0.8465, "step": 6696 }, { "epoch": 0.4838261058030957, "grad_norm": 5.756177817688371, "learning_rate": 4.43394606347539e-06, "loss": 0.9172, "step": 6697 }, { "epoch": 0.4838983510033052, "grad_norm": 6.387709869102241, "learning_rate": 4.43376069130668e-06, "loss": 0.9277, "step": 6698 }, { "epoch": 0.48397059620351474, "grad_norm": 6.53577379591186, "learning_rate": 4.433575292666285e-06, "loss": 0.9491, "step": 6699 }, { "epoch": 0.4840428414037242, "grad_norm": 5.636307265819223, "learning_rate": 4.4333898675567475e-06, "loss": 0.9245, "step": 6700 }, { "epoch": 0.48411508660393376, "grad_norm": 5.953273020428487, "learning_rate": 4.433204415980603e-06, "loss": 0.7505, "step": 6701 }, { "epoch": 0.48418733180414325, "grad_norm": 6.237066390689234, "learning_rate": 4.433018937940392e-06, "loss": 0.9712, "step": 6702 }, { "epoch": 0.4842595770043528, "grad_norm": 8.017967551063196, "learning_rate": 4.432833433438651e-06, "loss": 0.9636, "step": 6703 }, { "epoch": 0.4843318222045623, "grad_norm": 6.234542959144947, "learning_rate": 4.432647902477922e-06, "loss": 0.942, "step": 6704 }, { "epoch": 0.4844040674047718, "grad_norm": 5.588673424448919, "learning_rate": 4.4324623450607446e-06, "loss": 0.874, "step": 6705 }, { "epoch": 0.4844763126049813, "grad_norm": 7.252938562944785, "learning_rate": 4.432276761189658e-06, "loss": 0.8436, "step": 6706 }, { "epoch": 0.4845485578051908, "grad_norm": 7.201061424854759, "learning_rate": 4.432091150867201e-06, "loss": 0.9594, "step": 6707 }, { "epoch": 0.48462080300540034, "grad_norm": 6.428863361330084, "learning_rate": 4.431905514095918e-06, "loss": 0.8874, "step": 6708 }, { "epoch": 0.4846930482056098, "grad_norm": 7.023490727342802, "learning_rate": 4.431719850878348e-06, "loss": 0.906, "step": 6709 }, { "epoch": 0.48476529340581936, "grad_norm": 5.8015210229041845, "learning_rate": 4.431534161217034e-06, "loss": 0.8662, "step": 6710 }, { "epoch": 0.48483753860602885, "grad_norm": 6.6092220496117555, "learning_rate": 4.431348445114516e-06, "loss": 0.9208, "step": 6711 }, { "epoch": 0.4849097838062384, "grad_norm": 6.204735073732405, "learning_rate": 4.431162702573338e-06, "loss": 0.8936, "step": 6712 }, { "epoch": 0.4849820290064479, "grad_norm": 5.8692309685226425, "learning_rate": 4.4309769335960415e-06, "loss": 0.9141, "step": 6713 }, { "epoch": 0.4850542742066574, "grad_norm": 5.136607910816998, "learning_rate": 4.43079113818517e-06, "loss": 0.8368, "step": 6714 }, { "epoch": 0.4851265194068669, "grad_norm": 6.70340128864966, "learning_rate": 4.4306053163432675e-06, "loss": 0.8615, "step": 6715 }, { "epoch": 0.4851987646070764, "grad_norm": 6.041806168973585, "learning_rate": 4.430419468072877e-06, "loss": 0.9302, "step": 6716 }, { "epoch": 0.48527100980728594, "grad_norm": 6.087362374819906, "learning_rate": 4.430233593376543e-06, "loss": 0.9103, "step": 6717 }, { "epoch": 0.4853432550074954, "grad_norm": 7.50788795203046, "learning_rate": 4.430047692256809e-06, "loss": 1.0285, "step": 6718 }, { "epoch": 0.48541550020770496, "grad_norm": 5.6097833596352915, "learning_rate": 4.429861764716222e-06, "loss": 0.8955, "step": 6719 }, { "epoch": 0.48548774540791445, "grad_norm": 6.72768244796122, "learning_rate": 4.429675810757325e-06, "loss": 0.8994, "step": 6720 }, { "epoch": 0.485559990608124, "grad_norm": 6.103549687404953, "learning_rate": 4.429489830382665e-06, "loss": 0.8479, "step": 6721 }, { "epoch": 0.4856322358083335, "grad_norm": 5.980088254857839, "learning_rate": 4.4293038235947875e-06, "loss": 0.8464, "step": 6722 }, { "epoch": 0.485704481008543, "grad_norm": 8.143073067156575, "learning_rate": 4.429117790396238e-06, "loss": 0.977, "step": 6723 }, { "epoch": 0.4857767262087525, "grad_norm": 5.890450472485006, "learning_rate": 4.428931730789564e-06, "loss": 0.8579, "step": 6724 }, { "epoch": 0.485848971408962, "grad_norm": 7.284483883181837, "learning_rate": 4.4287456447773124e-06, "loss": 0.8213, "step": 6725 }, { "epoch": 0.48592121660917154, "grad_norm": 7.070987428939104, "learning_rate": 4.42855953236203e-06, "loss": 0.8386, "step": 6726 }, { "epoch": 0.485993461809381, "grad_norm": 8.155913247358477, "learning_rate": 4.428373393546266e-06, "loss": 0.878, "step": 6727 }, { "epoch": 0.48606570700959056, "grad_norm": 9.551675891125157, "learning_rate": 4.428187228332566e-06, "loss": 0.8298, "step": 6728 }, { "epoch": 0.48613795220980005, "grad_norm": 6.378358741525574, "learning_rate": 4.428001036723482e-06, "loss": 0.922, "step": 6729 }, { "epoch": 0.4862101974100096, "grad_norm": 7.445149823199838, "learning_rate": 4.42781481872156e-06, "loss": 0.8728, "step": 6730 }, { "epoch": 0.4862824426102191, "grad_norm": 6.843059626904696, "learning_rate": 4.4276285743293496e-06, "loss": 0.8589, "step": 6731 }, { "epoch": 0.4863546878104286, "grad_norm": 7.359398066328634, "learning_rate": 4.427442303549401e-06, "loss": 0.8656, "step": 6732 }, { "epoch": 0.4864269330106381, "grad_norm": 6.505469148740531, "learning_rate": 4.427256006384264e-06, "loss": 0.9492, "step": 6733 }, { "epoch": 0.4864991782108476, "grad_norm": 6.492677819273999, "learning_rate": 4.427069682836488e-06, "loss": 0.8301, "step": 6734 }, { "epoch": 0.48657142341105714, "grad_norm": 6.692191216545489, "learning_rate": 4.426883332908625e-06, "loss": 0.9164, "step": 6735 }, { "epoch": 0.4866436686112666, "grad_norm": 7.135870955733084, "learning_rate": 4.4266969566032245e-06, "loss": 0.9245, "step": 6736 }, { "epoch": 0.48671591381147616, "grad_norm": 7.286461532697505, "learning_rate": 4.426510553922839e-06, "loss": 0.9155, "step": 6737 }, { "epoch": 0.48678815901168565, "grad_norm": 7.503349573475658, "learning_rate": 4.426324124870021e-06, "loss": 0.8339, "step": 6738 }, { "epoch": 0.4868604042118952, "grad_norm": 5.382344395806381, "learning_rate": 4.42613766944732e-06, "loss": 0.7906, "step": 6739 }, { "epoch": 0.4869326494121047, "grad_norm": 5.891157616939726, "learning_rate": 4.4259511876572905e-06, "loss": 0.9265, "step": 6740 }, { "epoch": 0.4870048946123142, "grad_norm": 6.826053401153582, "learning_rate": 4.425764679502485e-06, "loss": 0.8947, "step": 6741 }, { "epoch": 0.4870771398125237, "grad_norm": 5.772939286267412, "learning_rate": 4.425578144985455e-06, "loss": 0.7846, "step": 6742 }, { "epoch": 0.4871493850127332, "grad_norm": 5.833440652495867, "learning_rate": 4.425391584108757e-06, "loss": 0.9238, "step": 6743 }, { "epoch": 0.48722163021294274, "grad_norm": 6.35200842386451, "learning_rate": 4.425204996874942e-06, "loss": 0.9115, "step": 6744 }, { "epoch": 0.4872938754131522, "grad_norm": 6.251244993186857, "learning_rate": 4.425018383286566e-06, "loss": 0.834, "step": 6745 }, { "epoch": 0.48736612061336176, "grad_norm": 6.711461571868237, "learning_rate": 4.424831743346182e-06, "loss": 0.8804, "step": 6746 }, { "epoch": 0.48743836581357125, "grad_norm": 8.528842684194514, "learning_rate": 4.424645077056347e-06, "loss": 0.9378, "step": 6747 }, { "epoch": 0.4875106110137808, "grad_norm": 6.96021380874379, "learning_rate": 4.4244583844196154e-06, "loss": 0.7874, "step": 6748 }, { "epoch": 0.4875828562139903, "grad_norm": 7.5635294568235985, "learning_rate": 4.424271665438542e-06, "loss": 1.0115, "step": 6749 }, { "epoch": 0.4876551014141998, "grad_norm": 6.137719784781164, "learning_rate": 4.4240849201156844e-06, "loss": 0.8074, "step": 6750 }, { "epoch": 0.4877273466144093, "grad_norm": 8.687133342223222, "learning_rate": 4.423898148453597e-06, "loss": 0.8541, "step": 6751 }, { "epoch": 0.4877995918146188, "grad_norm": 6.8755268241864504, "learning_rate": 4.423711350454839e-06, "loss": 0.8498, "step": 6752 }, { "epoch": 0.48787183701482834, "grad_norm": 6.19576228121781, "learning_rate": 4.4235245261219654e-06, "loss": 0.8648, "step": 6753 }, { "epoch": 0.4879440822150378, "grad_norm": 7.425767379820029, "learning_rate": 4.423337675457535e-06, "loss": 0.9117, "step": 6754 }, { "epoch": 0.48801632741524736, "grad_norm": 6.83186318314577, "learning_rate": 4.423150798464105e-06, "loss": 0.9439, "step": 6755 }, { "epoch": 0.48808857261545685, "grad_norm": 5.91038634765651, "learning_rate": 4.422963895144234e-06, "loss": 0.8176, "step": 6756 }, { "epoch": 0.4881608178156664, "grad_norm": 8.017882388029975, "learning_rate": 4.42277696550048e-06, "loss": 0.9454, "step": 6757 }, { "epoch": 0.4882330630158759, "grad_norm": 6.67967036730259, "learning_rate": 4.422590009535404e-06, "loss": 0.8942, "step": 6758 }, { "epoch": 0.4883053082160854, "grad_norm": 7.291384475333794, "learning_rate": 4.4224030272515615e-06, "loss": 1.0277, "step": 6759 }, { "epoch": 0.4883775534162949, "grad_norm": 6.344293580467116, "learning_rate": 4.422216018651515e-06, "loss": 0.8549, "step": 6760 }, { "epoch": 0.4884497986165044, "grad_norm": 7.640345156796926, "learning_rate": 4.422028983737823e-06, "loss": 0.8922, "step": 6761 }, { "epoch": 0.48852204381671394, "grad_norm": 6.146083434452542, "learning_rate": 4.421841922513047e-06, "loss": 0.8474, "step": 6762 }, { "epoch": 0.4885942890169234, "grad_norm": 5.6756085174593425, "learning_rate": 4.421654834979748e-06, "loss": 0.9193, "step": 6763 }, { "epoch": 0.48866653421713296, "grad_norm": 6.4649709083982385, "learning_rate": 4.4214677211404855e-06, "loss": 0.8231, "step": 6764 }, { "epoch": 0.48873877941734245, "grad_norm": 5.99931935582315, "learning_rate": 4.421280580997822e-06, "loss": 0.9123, "step": 6765 }, { "epoch": 0.488811024617552, "grad_norm": 6.58490061714642, "learning_rate": 4.4210934145543195e-06, "loss": 0.8517, "step": 6766 }, { "epoch": 0.4888832698177615, "grad_norm": 5.922250217437579, "learning_rate": 4.42090622181254e-06, "loss": 0.8232, "step": 6767 }, { "epoch": 0.488955515017971, "grad_norm": 5.772458211517078, "learning_rate": 4.4207190027750444e-06, "loss": 0.8597, "step": 6768 }, { "epoch": 0.4890277602181805, "grad_norm": 8.241219211487877, "learning_rate": 4.4205317574443986e-06, "loss": 0.885, "step": 6769 }, { "epoch": 0.48910000541839, "grad_norm": 5.978619310089953, "learning_rate": 4.420344485823164e-06, "loss": 0.8542, "step": 6770 }, { "epoch": 0.48917225061859954, "grad_norm": 6.745419396213296, "learning_rate": 4.420157187913904e-06, "loss": 0.9047, "step": 6771 }, { "epoch": 0.489244495818809, "grad_norm": 6.112176979438547, "learning_rate": 4.419969863719182e-06, "loss": 0.8796, "step": 6772 }, { "epoch": 0.48931674101901856, "grad_norm": 6.326110066944897, "learning_rate": 4.419782513241565e-06, "loss": 0.8915, "step": 6773 }, { "epoch": 0.48938898621922805, "grad_norm": 5.95882689381387, "learning_rate": 4.419595136483615e-06, "loss": 0.9363, "step": 6774 }, { "epoch": 0.4894612314194376, "grad_norm": 6.600920480060491, "learning_rate": 4.419407733447899e-06, "loss": 0.8146, "step": 6775 }, { "epoch": 0.4895334766196471, "grad_norm": 7.418440817325178, "learning_rate": 4.41922030413698e-06, "loss": 0.8698, "step": 6776 }, { "epoch": 0.4896057218198566, "grad_norm": 5.355974636767092, "learning_rate": 4.419032848553426e-06, "loss": 0.9129, "step": 6777 }, { "epoch": 0.4896779670200661, "grad_norm": 7.417707248797125, "learning_rate": 4.418845366699803e-06, "loss": 0.9056, "step": 6778 }, { "epoch": 0.4897502122202756, "grad_norm": 6.123715947296529, "learning_rate": 4.418657858578677e-06, "loss": 0.9025, "step": 6779 }, { "epoch": 0.48982245742048514, "grad_norm": 7.321541698259695, "learning_rate": 4.418470324192613e-06, "loss": 0.9932, "step": 6780 }, { "epoch": 0.4898947026206946, "grad_norm": 7.381348286102747, "learning_rate": 4.418282763544181e-06, "loss": 0.9639, "step": 6781 }, { "epoch": 0.48996694782090416, "grad_norm": 6.185597136774796, "learning_rate": 4.418095176635947e-06, "loss": 0.8707, "step": 6782 }, { "epoch": 0.49003919302111365, "grad_norm": 5.362577943746719, "learning_rate": 4.4179075634704795e-06, "loss": 0.8793, "step": 6783 }, { "epoch": 0.4901114382213232, "grad_norm": 6.347764121679572, "learning_rate": 4.417719924050347e-06, "loss": 0.9553, "step": 6784 }, { "epoch": 0.4901836834215327, "grad_norm": 6.019336377949666, "learning_rate": 4.417532258378117e-06, "loss": 0.8959, "step": 6785 }, { "epoch": 0.4902559286217422, "grad_norm": 6.179035909567812, "learning_rate": 4.4173445664563595e-06, "loss": 0.9413, "step": 6786 }, { "epoch": 0.4903281738219517, "grad_norm": 7.721890324230419, "learning_rate": 4.417156848287644e-06, "loss": 0.8818, "step": 6787 }, { "epoch": 0.4904004190221612, "grad_norm": 4.8541842393461545, "learning_rate": 4.41696910387454e-06, "loss": 0.8295, "step": 6788 }, { "epoch": 0.49047266422237074, "grad_norm": 5.488696360246698, "learning_rate": 4.416781333219617e-06, "loss": 0.8642, "step": 6789 }, { "epoch": 0.4905449094225802, "grad_norm": 6.83080862282427, "learning_rate": 4.416593536325445e-06, "loss": 0.9461, "step": 6790 }, { "epoch": 0.49061715462278976, "grad_norm": 5.227440456669721, "learning_rate": 4.416405713194597e-06, "loss": 0.826, "step": 6791 }, { "epoch": 0.49068939982299925, "grad_norm": 6.772850635980738, "learning_rate": 4.416217863829642e-06, "loss": 0.8831, "step": 6792 }, { "epoch": 0.4907616450232088, "grad_norm": 5.868171457002538, "learning_rate": 4.416029988233152e-06, "loss": 0.8857, "step": 6793 }, { "epoch": 0.4908338902234183, "grad_norm": 8.130890105109476, "learning_rate": 4.4158420864077e-06, "loss": 0.847, "step": 6794 }, { "epoch": 0.49090613542362777, "grad_norm": 8.211846647009667, "learning_rate": 4.415654158355856e-06, "loss": 0.9166, "step": 6795 }, { "epoch": 0.4909783806238373, "grad_norm": 6.165446117685088, "learning_rate": 4.415466204080196e-06, "loss": 0.8587, "step": 6796 }, { "epoch": 0.4910506258240468, "grad_norm": 6.143856683311253, "learning_rate": 4.41527822358329e-06, "loss": 1.0003, "step": 6797 }, { "epoch": 0.49112287102425634, "grad_norm": 7.09817837194524, "learning_rate": 4.415090216867712e-06, "loss": 0.9167, "step": 6798 }, { "epoch": 0.4911951162244658, "grad_norm": 7.399614540577171, "learning_rate": 4.4149021839360365e-06, "loss": 0.8473, "step": 6799 }, { "epoch": 0.49126736142467536, "grad_norm": 6.5576508544603005, "learning_rate": 4.414714124790837e-06, "loss": 0.848, "step": 6800 }, { "epoch": 0.49133960662488485, "grad_norm": 5.7932258903862035, "learning_rate": 4.414526039434687e-06, "loss": 0.874, "step": 6801 }, { "epoch": 0.4914118518250944, "grad_norm": 9.079702742496025, "learning_rate": 4.4143379278701615e-06, "loss": 0.9104, "step": 6802 }, { "epoch": 0.4914840970253039, "grad_norm": 6.273077207411565, "learning_rate": 4.4141497900998374e-06, "loss": 0.8947, "step": 6803 }, { "epoch": 0.49155634222551337, "grad_norm": 5.474850287633176, "learning_rate": 4.413961626126288e-06, "loss": 0.8295, "step": 6804 }, { "epoch": 0.4916285874257229, "grad_norm": 6.787613234444797, "learning_rate": 4.413773435952092e-06, "loss": 0.8911, "step": 6805 }, { "epoch": 0.4917008326259324, "grad_norm": 5.314632391718777, "learning_rate": 4.413585219579821e-06, "loss": 0.9056, "step": 6806 }, { "epoch": 0.49177307782614194, "grad_norm": 6.76135513853001, "learning_rate": 4.413396977012055e-06, "loss": 0.8994, "step": 6807 }, { "epoch": 0.4918453230263514, "grad_norm": 8.563534862047103, "learning_rate": 4.413208708251371e-06, "loss": 0.9302, "step": 6808 }, { "epoch": 0.49191756822656096, "grad_norm": 8.21554677061937, "learning_rate": 4.4130204133003445e-06, "loss": 0.8827, "step": 6809 }, { "epoch": 0.49198981342677045, "grad_norm": 5.930019766403114, "learning_rate": 4.412832092161554e-06, "loss": 0.8729, "step": 6810 }, { "epoch": 0.49206205862698, "grad_norm": 5.825454550104398, "learning_rate": 4.412643744837578e-06, "loss": 0.8508, "step": 6811 }, { "epoch": 0.4921343038271895, "grad_norm": 6.492250077259334, "learning_rate": 4.412455371330994e-06, "loss": 0.9005, "step": 6812 }, { "epoch": 0.49220654902739897, "grad_norm": 6.864025164666986, "learning_rate": 4.41226697164438e-06, "loss": 0.9436, "step": 6813 }, { "epoch": 0.4922787942276085, "grad_norm": 5.685039616885413, "learning_rate": 4.4120785457803165e-06, "loss": 0.8036, "step": 6814 }, { "epoch": 0.492351039427818, "grad_norm": 4.696357191911987, "learning_rate": 4.411890093741382e-06, "loss": 0.8203, "step": 6815 }, { "epoch": 0.49242328462802754, "grad_norm": 5.901522566034304, "learning_rate": 4.411701615530157e-06, "loss": 0.8386, "step": 6816 }, { "epoch": 0.492495529828237, "grad_norm": 7.622059739842924, "learning_rate": 4.411513111149222e-06, "loss": 0.8149, "step": 6817 }, { "epoch": 0.49256777502844656, "grad_norm": 8.354714493773018, "learning_rate": 4.411324580601155e-06, "loss": 0.8629, "step": 6818 }, { "epoch": 0.49264002022865605, "grad_norm": 5.559505728288361, "learning_rate": 4.411136023888539e-06, "loss": 0.8543, "step": 6819 }, { "epoch": 0.4927122654288656, "grad_norm": 7.374130456385551, "learning_rate": 4.410947441013956e-06, "loss": 0.9366, "step": 6820 }, { "epoch": 0.4927845106290751, "grad_norm": 6.0957372310604665, "learning_rate": 4.410758831979985e-06, "loss": 0.9117, "step": 6821 }, { "epoch": 0.49285675582928457, "grad_norm": 6.958478945124166, "learning_rate": 4.41057019678921e-06, "loss": 0.8542, "step": 6822 }, { "epoch": 0.4929290010294941, "grad_norm": 5.8140463464108105, "learning_rate": 4.410381535444212e-06, "loss": 0.899, "step": 6823 }, { "epoch": 0.4930012462297036, "grad_norm": 7.917032601448748, "learning_rate": 4.410192847947574e-06, "loss": 0.9571, "step": 6824 }, { "epoch": 0.49307349142991314, "grad_norm": 6.438865146514563, "learning_rate": 4.410004134301879e-06, "loss": 0.9269, "step": 6825 }, { "epoch": 0.4931457366301226, "grad_norm": 7.302363224805528, "learning_rate": 4.409815394509711e-06, "loss": 0.9521, "step": 6826 }, { "epoch": 0.49321798183033216, "grad_norm": 7.0835788759046165, "learning_rate": 4.409626628573654e-06, "loss": 0.8655, "step": 6827 }, { "epoch": 0.49329022703054165, "grad_norm": 7.307293464803622, "learning_rate": 4.4094378364962886e-06, "loss": 0.8839, "step": 6828 }, { "epoch": 0.4933624722307512, "grad_norm": 6.898276857469184, "learning_rate": 4.409249018280204e-06, "loss": 0.9064, "step": 6829 }, { "epoch": 0.4934347174309607, "grad_norm": 5.331968828331962, "learning_rate": 4.4090601739279815e-06, "loss": 0.8177, "step": 6830 }, { "epoch": 0.49350696263117017, "grad_norm": 5.900273281169311, "learning_rate": 4.408871303442208e-06, "loss": 0.8224, "step": 6831 }, { "epoch": 0.4935792078313797, "grad_norm": 6.708910741499126, "learning_rate": 4.408682406825469e-06, "loss": 0.8275, "step": 6832 }, { "epoch": 0.4936514530315892, "grad_norm": 5.99746364390082, "learning_rate": 4.408493484080349e-06, "loss": 0.8353, "step": 6833 }, { "epoch": 0.49372369823179874, "grad_norm": 6.556731095313426, "learning_rate": 4.4083045352094355e-06, "loss": 0.8841, "step": 6834 }, { "epoch": 0.4937959434320082, "grad_norm": 8.569522400482029, "learning_rate": 4.408115560215314e-06, "loss": 0.9538, "step": 6835 }, { "epoch": 0.49386818863221776, "grad_norm": 6.412561201431872, "learning_rate": 4.4079265591005725e-06, "loss": 0.9109, "step": 6836 }, { "epoch": 0.49394043383242725, "grad_norm": 6.884847073986301, "learning_rate": 4.407737531867798e-06, "loss": 0.9438, "step": 6837 }, { "epoch": 0.4940126790326368, "grad_norm": 5.753066199073149, "learning_rate": 4.407548478519578e-06, "loss": 0.7883, "step": 6838 }, { "epoch": 0.4940849242328463, "grad_norm": 8.062836869173669, "learning_rate": 4.4073593990585005e-06, "loss": 0.8667, "step": 6839 }, { "epoch": 0.49415716943305577, "grad_norm": 5.05510595555858, "learning_rate": 4.407170293487153e-06, "loss": 0.8769, "step": 6840 }, { "epoch": 0.4942294146332653, "grad_norm": 5.3346841214164655, "learning_rate": 4.406981161808126e-06, "loss": 0.7886, "step": 6841 }, { "epoch": 0.4943016598334748, "grad_norm": 7.518447314326978, "learning_rate": 4.406792004024007e-06, "loss": 0.8959, "step": 6842 }, { "epoch": 0.49437390503368434, "grad_norm": 6.966425349727097, "learning_rate": 4.406602820137385e-06, "loss": 0.9125, "step": 6843 }, { "epoch": 0.4944461502338938, "grad_norm": 8.18475948215717, "learning_rate": 4.406413610150852e-06, "loss": 0.9365, "step": 6844 }, { "epoch": 0.49451839543410336, "grad_norm": 6.3942350529865415, "learning_rate": 4.406224374066998e-06, "loss": 0.8499, "step": 6845 }, { "epoch": 0.49459064063431285, "grad_norm": 7.677270798139075, "learning_rate": 4.40603511188841e-06, "loss": 0.8627, "step": 6846 }, { "epoch": 0.4946628858345224, "grad_norm": 6.213125167492107, "learning_rate": 4.405845823617683e-06, "loss": 0.9048, "step": 6847 }, { "epoch": 0.4947351310347319, "grad_norm": 7.3012365992128725, "learning_rate": 4.405656509257406e-06, "loss": 0.9067, "step": 6848 }, { "epoch": 0.49480737623494137, "grad_norm": 6.236377059978695, "learning_rate": 4.405467168810172e-06, "loss": 0.8654, "step": 6849 }, { "epoch": 0.4948796214351509, "grad_norm": 6.50516041070427, "learning_rate": 4.40527780227857e-06, "loss": 0.945, "step": 6850 }, { "epoch": 0.4949518666353604, "grad_norm": 6.71822876793581, "learning_rate": 4.4050884096651955e-06, "loss": 0.8254, "step": 6851 }, { "epoch": 0.49502411183556994, "grad_norm": 7.8162390954609435, "learning_rate": 4.404898990972639e-06, "loss": 0.9859, "step": 6852 }, { "epoch": 0.4950963570357794, "grad_norm": 10.936837661307091, "learning_rate": 4.404709546203495e-06, "loss": 0.9774, "step": 6853 }, { "epoch": 0.49516860223598896, "grad_norm": 7.734671908758174, "learning_rate": 4.404520075360357e-06, "loss": 0.8629, "step": 6854 }, { "epoch": 0.49524084743619845, "grad_norm": 7.066311778148209, "learning_rate": 4.404330578445816e-06, "loss": 0.8519, "step": 6855 }, { "epoch": 0.495313092636408, "grad_norm": 8.340345509642562, "learning_rate": 4.40414105546247e-06, "loss": 0.9287, "step": 6856 }, { "epoch": 0.4953853378366175, "grad_norm": 11.933157725258576, "learning_rate": 4.4039515064129105e-06, "loss": 0.8801, "step": 6857 }, { "epoch": 0.49545758303682697, "grad_norm": 7.978280624981615, "learning_rate": 4.403761931299733e-06, "loss": 0.9138, "step": 6858 }, { "epoch": 0.4955298282370365, "grad_norm": 5.9614484775463294, "learning_rate": 4.403572330125533e-06, "loss": 0.8201, "step": 6859 }, { "epoch": 0.495602073437246, "grad_norm": 8.154554870544361, "learning_rate": 4.403382702892905e-06, "loss": 0.9439, "step": 6860 }, { "epoch": 0.49567431863745554, "grad_norm": 6.1424213140755315, "learning_rate": 4.403193049604447e-06, "loss": 0.9249, "step": 6861 }, { "epoch": 0.495746563837665, "grad_norm": 9.655781854929225, "learning_rate": 4.403003370262754e-06, "loss": 0.8806, "step": 6862 }, { "epoch": 0.49581880903787456, "grad_norm": 6.034468192472871, "learning_rate": 4.402813664870421e-06, "loss": 0.8597, "step": 6863 }, { "epoch": 0.49589105423808405, "grad_norm": 5.777000061087982, "learning_rate": 4.402623933430048e-06, "loss": 0.8379, "step": 6864 }, { "epoch": 0.4959632994382936, "grad_norm": 7.228844920390577, "learning_rate": 4.4024341759442295e-06, "loss": 0.929, "step": 6865 }, { "epoch": 0.4960355446385031, "grad_norm": 9.20541607199771, "learning_rate": 4.402244392415565e-06, "loss": 0.9212, "step": 6866 }, { "epoch": 0.49610778983871257, "grad_norm": 7.362879734576672, "learning_rate": 4.402054582846651e-06, "loss": 0.8654, "step": 6867 }, { "epoch": 0.4961800350389221, "grad_norm": 10.369645908825145, "learning_rate": 4.401864747240087e-06, "loss": 0.9, "step": 6868 }, { "epoch": 0.4962522802391316, "grad_norm": 6.3725521399488905, "learning_rate": 4.401674885598471e-06, "loss": 0.9179, "step": 6869 }, { "epoch": 0.49632452543934114, "grad_norm": 5.802611769257578, "learning_rate": 4.401484997924403e-06, "loss": 0.9305, "step": 6870 }, { "epoch": 0.4963967706395506, "grad_norm": 7.346233840672763, "learning_rate": 4.401295084220482e-06, "loss": 0.8811, "step": 6871 }, { "epoch": 0.49646901583976016, "grad_norm": 5.909442018868214, "learning_rate": 4.401105144489307e-06, "loss": 0.9138, "step": 6872 }, { "epoch": 0.49654126103996965, "grad_norm": 6.406726949661546, "learning_rate": 4.400915178733478e-06, "loss": 0.8442, "step": 6873 }, { "epoch": 0.4966135062401792, "grad_norm": 6.955018340798937, "learning_rate": 4.400725186955597e-06, "loss": 0.9645, "step": 6874 }, { "epoch": 0.4966857514403887, "grad_norm": 7.772390691921701, "learning_rate": 4.400535169158264e-06, "loss": 0.974, "step": 6875 }, { "epoch": 0.49675799664059817, "grad_norm": 7.182934181588416, "learning_rate": 4.40034512534408e-06, "loss": 0.8561, "step": 6876 }, { "epoch": 0.4968302418408077, "grad_norm": 5.762182515763511, "learning_rate": 4.400155055515647e-06, "loss": 0.8156, "step": 6877 }, { "epoch": 0.4969024870410172, "grad_norm": 6.461004217649783, "learning_rate": 4.399964959675567e-06, "loss": 0.9253, "step": 6878 }, { "epoch": 0.49697473224122674, "grad_norm": 7.435551235797907, "learning_rate": 4.3997748378264415e-06, "loss": 0.9684, "step": 6879 }, { "epoch": 0.4970469774414362, "grad_norm": 7.345299090689871, "learning_rate": 4.3995846899708734e-06, "loss": 0.8631, "step": 6880 }, { "epoch": 0.49711922264164576, "grad_norm": 8.239498043141323, "learning_rate": 4.399394516111466e-06, "loss": 0.96, "step": 6881 }, { "epoch": 0.49719146784185525, "grad_norm": 7.107311846496509, "learning_rate": 4.399204316250823e-06, "loss": 0.8969, "step": 6882 }, { "epoch": 0.4972637130420648, "grad_norm": 6.208357365026195, "learning_rate": 4.399014090391546e-06, "loss": 0.8763, "step": 6883 }, { "epoch": 0.4973359582422743, "grad_norm": 6.6509788309074995, "learning_rate": 4.398823838536242e-06, "loss": 0.974, "step": 6884 }, { "epoch": 0.49740820344248377, "grad_norm": 7.412578727728657, "learning_rate": 4.398633560687513e-06, "loss": 0.8613, "step": 6885 }, { "epoch": 0.4974804486426933, "grad_norm": 8.015318985538967, "learning_rate": 4.398443256847965e-06, "loss": 0.8943, "step": 6886 }, { "epoch": 0.4975526938429028, "grad_norm": 8.603474611494184, "learning_rate": 4.398252927020203e-06, "loss": 0.9716, "step": 6887 }, { "epoch": 0.49762493904311234, "grad_norm": 6.9925590203324015, "learning_rate": 4.398062571206833e-06, "loss": 0.9396, "step": 6888 }, { "epoch": 0.4976971842433218, "grad_norm": 7.602065066811076, "learning_rate": 4.39787218941046e-06, "loss": 0.9078, "step": 6889 }, { "epoch": 0.49776942944353136, "grad_norm": 5.557055434044715, "learning_rate": 4.39768178163369e-06, "loss": 0.8748, "step": 6890 }, { "epoch": 0.49784167464374085, "grad_norm": 8.329338743929965, "learning_rate": 4.3974913478791294e-06, "loss": 0.9315, "step": 6891 }, { "epoch": 0.4979139198439504, "grad_norm": 7.341871695049721, "learning_rate": 4.397300888149386e-06, "loss": 0.8618, "step": 6892 }, { "epoch": 0.4979861650441599, "grad_norm": 9.319434944017864, "learning_rate": 4.397110402447067e-06, "loss": 0.8358, "step": 6893 }, { "epoch": 0.49805841024436937, "grad_norm": 7.258636592073008, "learning_rate": 4.396919890774779e-06, "loss": 0.9757, "step": 6894 }, { "epoch": 0.4981306554445789, "grad_norm": 7.678377274501434, "learning_rate": 4.39672935313513e-06, "loss": 0.924, "step": 6895 }, { "epoch": 0.4982029006447884, "grad_norm": 6.703268409742137, "learning_rate": 4.39653878953073e-06, "loss": 0.8243, "step": 6896 }, { "epoch": 0.49827514584499794, "grad_norm": 8.347183264121151, "learning_rate": 4.396348199964187e-06, "loss": 0.8418, "step": 6897 }, { "epoch": 0.4983473910452074, "grad_norm": 7.433979900391433, "learning_rate": 4.396157584438107e-06, "loss": 0.919, "step": 6898 }, { "epoch": 0.49841963624541696, "grad_norm": 6.443751586558667, "learning_rate": 4.395966942955105e-06, "loss": 0.8689, "step": 6899 }, { "epoch": 0.49849188144562645, "grad_norm": 6.682437584924831, "learning_rate": 4.395776275517786e-06, "loss": 0.9429, "step": 6900 }, { "epoch": 0.498564126645836, "grad_norm": 6.461101340784609, "learning_rate": 4.395585582128762e-06, "loss": 0.884, "step": 6901 }, { "epoch": 0.4986363718460455, "grad_norm": 6.694556960360246, "learning_rate": 4.395394862790643e-06, "loss": 0.8094, "step": 6902 }, { "epoch": 0.49870861704625497, "grad_norm": 11.271115979348357, "learning_rate": 4.395204117506041e-06, "loss": 0.9055, "step": 6903 }, { "epoch": 0.4987808622464645, "grad_norm": 10.724243535749386, "learning_rate": 4.395013346277565e-06, "loss": 0.8605, "step": 6904 }, { "epoch": 0.498853107446674, "grad_norm": 7.317174737737108, "learning_rate": 4.394822549107828e-06, "loss": 0.9193, "step": 6905 }, { "epoch": 0.49892535264688354, "grad_norm": 5.65324026720949, "learning_rate": 4.394631725999442e-06, "loss": 0.8705, "step": 6906 }, { "epoch": 0.498997597847093, "grad_norm": 8.621436668389485, "learning_rate": 4.394440876955018e-06, "loss": 0.8867, "step": 6907 }, { "epoch": 0.49906984304730256, "grad_norm": 10.436369874727006, "learning_rate": 4.39425000197717e-06, "loss": 0.9431, "step": 6908 }, { "epoch": 0.49914208824751205, "grad_norm": 9.400090935449857, "learning_rate": 4.39405910106851e-06, "loss": 0.9361, "step": 6909 }, { "epoch": 0.4992143334477216, "grad_norm": 7.700581902648936, "learning_rate": 4.393868174231651e-06, "loss": 0.8319, "step": 6910 }, { "epoch": 0.4992865786479311, "grad_norm": 5.674280586520246, "learning_rate": 4.393677221469208e-06, "loss": 0.8573, "step": 6911 }, { "epoch": 0.49935882384814057, "grad_norm": 6.79656125802468, "learning_rate": 4.3934862427837945e-06, "loss": 0.9799, "step": 6912 }, { "epoch": 0.4994310690483501, "grad_norm": 7.782044151869136, "learning_rate": 4.393295238178023e-06, "loss": 0.9293, "step": 6913 }, { "epoch": 0.4995033142485596, "grad_norm": 7.540467758847377, "learning_rate": 4.393104207654512e-06, "loss": 0.9122, "step": 6914 }, { "epoch": 0.49957555944876914, "grad_norm": 7.754810931952631, "learning_rate": 4.392913151215872e-06, "loss": 0.8475, "step": 6915 }, { "epoch": 0.4996478046489786, "grad_norm": 6.20743655015256, "learning_rate": 4.392722068864722e-06, "loss": 0.8711, "step": 6916 }, { "epoch": 0.49972004984918816, "grad_norm": 5.388085162253678, "learning_rate": 4.3925309606036765e-06, "loss": 0.8052, "step": 6917 }, { "epoch": 0.49979229504939765, "grad_norm": 5.415786710813258, "learning_rate": 4.392339826435351e-06, "loss": 0.8617, "step": 6918 }, { "epoch": 0.4998645402496072, "grad_norm": 9.222734010684464, "learning_rate": 4.392148666362363e-06, "loss": 0.984, "step": 6919 }, { "epoch": 0.4999367854498167, "grad_norm": 8.38242841909025, "learning_rate": 4.391957480387329e-06, "loss": 0.8758, "step": 6920 }, { "epoch": 0.5000090306500262, "grad_norm": 5.652109221659154, "learning_rate": 4.391766268512866e-06, "loss": 0.812, "step": 6921 }, { "epoch": 0.5000812758502357, "grad_norm": 6.648863800044559, "learning_rate": 4.391575030741592e-06, "loss": 0.8428, "step": 6922 }, { "epoch": 0.5001535210504452, "grad_norm": 6.0846310977497335, "learning_rate": 4.391383767076124e-06, "loss": 0.9815, "step": 6923 }, { "epoch": 0.5002257662506547, "grad_norm": 6.448927726625736, "learning_rate": 4.3911924775190805e-06, "loss": 0.8512, "step": 6924 }, { "epoch": 0.5002980114508643, "grad_norm": 5.724476099821856, "learning_rate": 4.391001162073081e-06, "loss": 0.8496, "step": 6925 }, { "epoch": 0.5003702566510737, "grad_norm": 6.404683437937081, "learning_rate": 4.390809820740744e-06, "loss": 0.8255, "step": 6926 }, { "epoch": 0.5004425018512833, "grad_norm": 5.974796767349733, "learning_rate": 4.390618453524688e-06, "loss": 0.944, "step": 6927 }, { "epoch": 0.5005147470514928, "grad_norm": 7.776881450897806, "learning_rate": 4.390427060427534e-06, "loss": 0.8524, "step": 6928 }, { "epoch": 0.5005869922517022, "grad_norm": 6.398706675027158, "learning_rate": 4.3902356414519005e-06, "loss": 0.8422, "step": 6929 }, { "epoch": 0.5006592374519118, "grad_norm": 7.206504575098664, "learning_rate": 4.390044196600409e-06, "loss": 0.8835, "step": 6930 }, { "epoch": 0.5007314826521213, "grad_norm": 7.383609466326121, "learning_rate": 4.389852725875681e-06, "loss": 0.9299, "step": 6931 }, { "epoch": 0.5008037278523308, "grad_norm": 6.496376788406991, "learning_rate": 4.389661229280335e-06, "loss": 0.8886, "step": 6932 }, { "epoch": 0.5008759730525403, "grad_norm": 6.872365828363433, "learning_rate": 4.389469706816995e-06, "loss": 0.8547, "step": 6933 }, { "epoch": 0.5009482182527498, "grad_norm": 5.635072463753499, "learning_rate": 4.389278158488282e-06, "loss": 0.8267, "step": 6934 }, { "epoch": 0.5010204634529594, "grad_norm": 5.993185305919047, "learning_rate": 4.389086584296817e-06, "loss": 0.9444, "step": 6935 }, { "epoch": 0.5010927086531689, "grad_norm": 7.452193163953875, "learning_rate": 4.3888949842452246e-06, "loss": 0.9232, "step": 6936 }, { "epoch": 0.5011649538533783, "grad_norm": 6.3329083484141835, "learning_rate": 4.388703358336125e-06, "loss": 0.8767, "step": 6937 }, { "epoch": 0.5012371990535879, "grad_norm": 7.826196490087943, "learning_rate": 4.388511706572145e-06, "loss": 0.9234, "step": 6938 }, { "epoch": 0.5013094442537974, "grad_norm": 6.609748414946421, "learning_rate": 4.388320028955904e-06, "loss": 0.9129, "step": 6939 }, { "epoch": 0.5013816894540069, "grad_norm": 4.976806826260497, "learning_rate": 4.388128325490029e-06, "loss": 0.8158, "step": 6940 }, { "epoch": 0.5014539346542164, "grad_norm": 7.709610319469861, "learning_rate": 4.387936596177143e-06, "loss": 0.9236, "step": 6941 }, { "epoch": 0.5015261798544259, "grad_norm": 6.972884931474068, "learning_rate": 4.387744841019871e-06, "loss": 0.9652, "step": 6942 }, { "epoch": 0.5015984250546355, "grad_norm": 8.699863336300893, "learning_rate": 4.387553060020838e-06, "loss": 0.9957, "step": 6943 }, { "epoch": 0.5016706702548449, "grad_norm": 7.482115211266662, "learning_rate": 4.387361253182669e-06, "loss": 0.8312, "step": 6944 }, { "epoch": 0.5017429154550545, "grad_norm": 6.398150129001522, "learning_rate": 4.38716942050799e-06, "loss": 0.8934, "step": 6945 }, { "epoch": 0.501815160655264, "grad_norm": 6.128740375284834, "learning_rate": 4.386977561999427e-06, "loss": 0.9033, "step": 6946 }, { "epoch": 0.5018874058554734, "grad_norm": 8.342905523305461, "learning_rate": 4.386785677659606e-06, "loss": 0.894, "step": 6947 }, { "epoch": 0.501959651055683, "grad_norm": 6.884172458933313, "learning_rate": 4.386593767491154e-06, "loss": 0.9149, "step": 6948 }, { "epoch": 0.5020318962558925, "grad_norm": 8.891601079849657, "learning_rate": 4.386401831496698e-06, "loss": 0.8203, "step": 6949 }, { "epoch": 0.502104141456102, "grad_norm": 6.809842492521337, "learning_rate": 4.386209869678867e-06, "loss": 0.8643, "step": 6950 }, { "epoch": 0.5021763866563115, "grad_norm": 5.778659879562225, "learning_rate": 4.386017882040286e-06, "loss": 0.881, "step": 6951 }, { "epoch": 0.502248631856521, "grad_norm": 7.390024689051123, "learning_rate": 4.385825868583585e-06, "loss": 0.8683, "step": 6952 }, { "epoch": 0.5023208770567306, "grad_norm": 6.5260131867790205, "learning_rate": 4.385633829311392e-06, "loss": 0.8349, "step": 6953 }, { "epoch": 0.5023931222569401, "grad_norm": 6.36786087313545, "learning_rate": 4.385441764226337e-06, "loss": 0.7764, "step": 6954 }, { "epoch": 0.5024653674571495, "grad_norm": 7.646469408510257, "learning_rate": 4.385249673331047e-06, "loss": 0.9013, "step": 6955 }, { "epoch": 0.5025376126573591, "grad_norm": 5.5040630591683595, "learning_rate": 4.385057556628153e-06, "loss": 0.8197, "step": 6956 }, { "epoch": 0.5026098578575686, "grad_norm": 6.967006585013053, "learning_rate": 4.384865414120285e-06, "loss": 0.9262, "step": 6957 }, { "epoch": 0.502682103057778, "grad_norm": 6.564813405949299, "learning_rate": 4.384673245810073e-06, "loss": 0.8728, "step": 6958 }, { "epoch": 0.5027543482579876, "grad_norm": 8.67421711403997, "learning_rate": 4.3844810517001465e-06, "loss": 0.8898, "step": 6959 }, { "epoch": 0.5028265934581971, "grad_norm": 6.509529171279462, "learning_rate": 4.384288831793138e-06, "loss": 0.8841, "step": 6960 }, { "epoch": 0.5028988386584067, "grad_norm": 5.929011330230691, "learning_rate": 4.3840965860916805e-06, "loss": 0.868, "step": 6961 }, { "epoch": 0.5029710838586161, "grad_norm": 6.0895386840283185, "learning_rate": 4.3839043145984015e-06, "loss": 0.9412, "step": 6962 }, { "epoch": 0.5030433290588257, "grad_norm": 8.507209413169027, "learning_rate": 4.383712017315936e-06, "loss": 0.8656, "step": 6963 }, { "epoch": 0.5031155742590352, "grad_norm": 6.265541589804845, "learning_rate": 4.383519694246915e-06, "loss": 0.8741, "step": 6964 }, { "epoch": 0.5031878194592446, "grad_norm": 6.781932603083259, "learning_rate": 4.383327345393973e-06, "loss": 0.948, "step": 6965 }, { "epoch": 0.5032600646594542, "grad_norm": 7.230451867214217, "learning_rate": 4.383134970759741e-06, "loss": 0.8576, "step": 6966 }, { "epoch": 0.5033323098596637, "grad_norm": 5.639496666653704, "learning_rate": 4.382942570346853e-06, "loss": 0.7842, "step": 6967 }, { "epoch": 0.5034045550598732, "grad_norm": 5.627170906764822, "learning_rate": 4.382750144157945e-06, "loss": 0.8304, "step": 6968 }, { "epoch": 0.5034768002600827, "grad_norm": 6.175456338306354, "learning_rate": 4.382557692195648e-06, "loss": 0.8348, "step": 6969 }, { "epoch": 0.5035490454602922, "grad_norm": 7.688386943483309, "learning_rate": 4.382365214462598e-06, "loss": 0.8531, "step": 6970 }, { "epoch": 0.5036212906605018, "grad_norm": 6.782372447697568, "learning_rate": 4.3821727109614295e-06, "loss": 0.898, "step": 6971 }, { "epoch": 0.5036935358607113, "grad_norm": 5.532809107816314, "learning_rate": 4.3819801816947785e-06, "loss": 0.8661, "step": 6972 }, { "epoch": 0.5037657810609207, "grad_norm": 5.403434593637166, "learning_rate": 4.38178762666528e-06, "loss": 0.9564, "step": 6973 }, { "epoch": 0.5038380262611303, "grad_norm": 5.604822207922556, "learning_rate": 4.381595045875571e-06, "loss": 0.8037, "step": 6974 }, { "epoch": 0.5039102714613398, "grad_norm": 6.886069801120826, "learning_rate": 4.381402439328285e-06, "loss": 0.8877, "step": 6975 }, { "epoch": 0.5039825166615493, "grad_norm": 7.4156305968162615, "learning_rate": 4.381209807026061e-06, "loss": 0.9497, "step": 6976 }, { "epoch": 0.5040547618617588, "grad_norm": 5.597528130076006, "learning_rate": 4.381017148971536e-06, "loss": 0.8537, "step": 6977 }, { "epoch": 0.5041270070619683, "grad_norm": 6.670613010952931, "learning_rate": 4.380824465167346e-06, "loss": 0.9299, "step": 6978 }, { "epoch": 0.5041992522621779, "grad_norm": 6.178563610169609, "learning_rate": 4.380631755616129e-06, "loss": 0.8785, "step": 6979 }, { "epoch": 0.5042714974623873, "grad_norm": 6.845471030748176, "learning_rate": 4.380439020320525e-06, "loss": 0.9619, "step": 6980 }, { "epoch": 0.5043437426625969, "grad_norm": 7.275858311310066, "learning_rate": 4.38024625928317e-06, "loss": 0.8834, "step": 6981 }, { "epoch": 0.5044159878628064, "grad_norm": 7.557948989377038, "learning_rate": 4.380053472506703e-06, "loss": 0.9666, "step": 6982 }, { "epoch": 0.5044882330630158, "grad_norm": 5.9294613680737065, "learning_rate": 4.379860659993765e-06, "loss": 0.8562, "step": 6983 }, { "epoch": 0.5045604782632254, "grad_norm": 5.479131468839932, "learning_rate": 4.379667821746994e-06, "loss": 0.9547, "step": 6984 }, { "epoch": 0.5046327234634349, "grad_norm": 6.810161504188779, "learning_rate": 4.3794749577690295e-06, "loss": 0.9053, "step": 6985 }, { "epoch": 0.5047049686636444, "grad_norm": 6.399027404793241, "learning_rate": 4.379282068062512e-06, "loss": 0.769, "step": 6986 }, { "epoch": 0.5047772138638539, "grad_norm": 7.3276838157605955, "learning_rate": 4.379089152630083e-06, "loss": 0.8447, "step": 6987 }, { "epoch": 0.5048494590640634, "grad_norm": 5.644466708649173, "learning_rate": 4.378896211474382e-06, "loss": 0.8923, "step": 6988 }, { "epoch": 0.504921704264273, "grad_norm": 6.751125383164571, "learning_rate": 4.378703244598051e-06, "loss": 0.8327, "step": 6989 }, { "epoch": 0.5049939494644825, "grad_norm": 6.685795647029446, "learning_rate": 4.37851025200373e-06, "loss": 0.9836, "step": 6990 }, { "epoch": 0.5050661946646919, "grad_norm": 6.875188928955908, "learning_rate": 4.378317233694064e-06, "loss": 0.9376, "step": 6991 }, { "epoch": 0.5051384398649015, "grad_norm": 9.008411291622037, "learning_rate": 4.378124189671693e-06, "loss": 0.9188, "step": 6992 }, { "epoch": 0.505210685065111, "grad_norm": 9.187737494273108, "learning_rate": 4.3779311199392595e-06, "loss": 0.8548, "step": 6993 }, { "epoch": 0.5052829302653205, "grad_norm": 6.292233708915096, "learning_rate": 4.3777380244994076e-06, "loss": 0.9179, "step": 6994 }, { "epoch": 0.50535517546553, "grad_norm": 6.557579012011925, "learning_rate": 4.37754490335478e-06, "loss": 0.8835, "step": 6995 }, { "epoch": 0.5054274206657395, "grad_norm": 5.844451372620345, "learning_rate": 4.37735175650802e-06, "loss": 0.8499, "step": 6996 }, { "epoch": 0.5054996658659491, "grad_norm": 6.4937567438231945, "learning_rate": 4.3771585839617725e-06, "loss": 0.7582, "step": 6997 }, { "epoch": 0.5055719110661585, "grad_norm": 7.020290395267614, "learning_rate": 4.376965385718682e-06, "loss": 0.9155, "step": 6998 }, { "epoch": 0.505644156266368, "grad_norm": 7.301902460387678, "learning_rate": 4.376772161781392e-06, "loss": 0.8953, "step": 6999 }, { "epoch": 0.5057164014665776, "grad_norm": 7.0987318921282245, "learning_rate": 4.376578912152549e-06, "loss": 0.805, "step": 7000 }, { "epoch": 0.505788646666787, "grad_norm": 5.669508763584489, "learning_rate": 4.376385636834797e-06, "loss": 0.796, "step": 7001 }, { "epoch": 0.5058608918669966, "grad_norm": 8.168434833964138, "learning_rate": 4.376192335830783e-06, "loss": 0.9204, "step": 7002 }, { "epoch": 0.5059331370672061, "grad_norm": 6.813216985367284, "learning_rate": 4.375999009143152e-06, "loss": 0.9142, "step": 7003 }, { "epoch": 0.5060053822674156, "grad_norm": 7.920470224158244, "learning_rate": 4.375805656774553e-06, "loss": 0.8685, "step": 7004 }, { "epoch": 0.5060776274676251, "grad_norm": 8.367433232706945, "learning_rate": 4.375612278727629e-06, "loss": 0.8903, "step": 7005 }, { "epoch": 0.5061498726678346, "grad_norm": 6.0863743675438275, "learning_rate": 4.375418875005031e-06, "loss": 0.83, "step": 7006 }, { "epoch": 0.5062221178680442, "grad_norm": 5.9642332872578, "learning_rate": 4.375225445609404e-06, "loss": 0.9878, "step": 7007 }, { "epoch": 0.5062943630682537, "grad_norm": 6.301351541534562, "learning_rate": 4.375031990543396e-06, "loss": 0.8208, "step": 7008 }, { "epoch": 0.5063666082684631, "grad_norm": 6.400803944638151, "learning_rate": 4.374838509809657e-06, "loss": 0.7924, "step": 7009 }, { "epoch": 0.5064388534686727, "grad_norm": 9.174021688997433, "learning_rate": 4.374645003410833e-06, "loss": 0.9051, "step": 7010 }, { "epoch": 0.5065110986688822, "grad_norm": 5.561177471542795, "learning_rate": 4.374451471349576e-06, "loss": 0.8258, "step": 7011 }, { "epoch": 0.5065833438690917, "grad_norm": 6.762535323581353, "learning_rate": 4.374257913628533e-06, "loss": 0.9028, "step": 7012 }, { "epoch": 0.5066555890693012, "grad_norm": 5.603554939246453, "learning_rate": 4.374064330250355e-06, "loss": 0.8179, "step": 7013 }, { "epoch": 0.5067278342695107, "grad_norm": 5.8095396973737365, "learning_rate": 4.373870721217691e-06, "loss": 0.8882, "step": 7014 }, { "epoch": 0.5068000794697203, "grad_norm": 5.333846166909107, "learning_rate": 4.373677086533191e-06, "loss": 0.8338, "step": 7015 }, { "epoch": 0.5068723246699297, "grad_norm": 7.363217527078378, "learning_rate": 4.373483426199508e-06, "loss": 0.914, "step": 7016 }, { "epoch": 0.5069445698701392, "grad_norm": 7.6907765918781985, "learning_rate": 4.373289740219291e-06, "loss": 0.8918, "step": 7017 }, { "epoch": 0.5070168150703488, "grad_norm": 6.172637660552947, "learning_rate": 4.3730960285951915e-06, "loss": 0.9713, "step": 7018 }, { "epoch": 0.5070890602705582, "grad_norm": 5.9885099383518, "learning_rate": 4.372902291329863e-06, "loss": 0.8277, "step": 7019 }, { "epoch": 0.5071613054707678, "grad_norm": 7.125239585729924, "learning_rate": 4.372708528425955e-06, "loss": 0.9035, "step": 7020 }, { "epoch": 0.5072335506709773, "grad_norm": 5.617767091860312, "learning_rate": 4.372514739886122e-06, "loss": 0.8004, "step": 7021 }, { "epoch": 0.5073057958711868, "grad_norm": 9.20841001821034, "learning_rate": 4.3723209257130154e-06, "loss": 0.8319, "step": 7022 }, { "epoch": 0.5073780410713963, "grad_norm": 7.497138431145694, "learning_rate": 4.37212708590929e-06, "loss": 0.8341, "step": 7023 }, { "epoch": 0.5074502862716058, "grad_norm": 6.742150157413968, "learning_rate": 4.371933220477598e-06, "loss": 0.8442, "step": 7024 }, { "epoch": 0.5075225314718154, "grad_norm": 6.016278754046119, "learning_rate": 4.371739329420593e-06, "loss": 0.8831, "step": 7025 }, { "epoch": 0.5075947766720249, "grad_norm": 5.657412920529059, "learning_rate": 4.371545412740931e-06, "loss": 0.879, "step": 7026 }, { "epoch": 0.5076670218722343, "grad_norm": 9.115973201322312, "learning_rate": 4.371351470441265e-06, "loss": 0.9002, "step": 7027 }, { "epoch": 0.5077392670724439, "grad_norm": 7.79679060223297, "learning_rate": 4.371157502524251e-06, "loss": 0.9576, "step": 7028 }, { "epoch": 0.5078115122726534, "grad_norm": 6.064100545809535, "learning_rate": 4.3709635089925425e-06, "loss": 0.8515, "step": 7029 }, { "epoch": 0.5078837574728629, "grad_norm": 5.5183396183593745, "learning_rate": 4.370769489848797e-06, "loss": 0.8282, "step": 7030 }, { "epoch": 0.5079560026730724, "grad_norm": 5.348435338894995, "learning_rate": 4.37057544509567e-06, "loss": 0.8978, "step": 7031 }, { "epoch": 0.5080282478732819, "grad_norm": 6.040553689803157, "learning_rate": 4.370381374735817e-06, "loss": 0.8923, "step": 7032 }, { "epoch": 0.5081004930734915, "grad_norm": 6.714721323509456, "learning_rate": 4.370187278771894e-06, "loss": 0.8762, "step": 7033 }, { "epoch": 0.5081727382737009, "grad_norm": 7.274276078639146, "learning_rate": 4.369993157206561e-06, "loss": 0.9382, "step": 7034 }, { "epoch": 0.5082449834739104, "grad_norm": 9.263166387681855, "learning_rate": 4.369799010042473e-06, "loss": 0.9246, "step": 7035 }, { "epoch": 0.50831722867412, "grad_norm": 5.214187912294378, "learning_rate": 4.36960483728229e-06, "loss": 0.8832, "step": 7036 }, { "epoch": 0.5083894738743294, "grad_norm": 7.29081209533763, "learning_rate": 4.3694106389286665e-06, "loss": 0.8598, "step": 7037 }, { "epoch": 0.508461719074539, "grad_norm": 5.737016365760678, "learning_rate": 4.369216414984263e-06, "loss": 0.9093, "step": 7038 }, { "epoch": 0.5085339642747485, "grad_norm": 6.1748790528348545, "learning_rate": 4.369022165451739e-06, "loss": 0.8043, "step": 7039 }, { "epoch": 0.508606209474958, "grad_norm": 5.818135800504766, "learning_rate": 4.368827890333752e-06, "loss": 0.8884, "step": 7040 }, { "epoch": 0.5086784546751675, "grad_norm": 6.069440506566, "learning_rate": 4.368633589632962e-06, "loss": 0.9014, "step": 7041 }, { "epoch": 0.508750699875377, "grad_norm": 7.766176384552847, "learning_rate": 4.3684392633520305e-06, "loss": 0.8507, "step": 7042 }, { "epoch": 0.5088229450755866, "grad_norm": 6.73108821238804, "learning_rate": 4.368244911493615e-06, "loss": 0.9461, "step": 7043 }, { "epoch": 0.5088951902757961, "grad_norm": 5.810209417721809, "learning_rate": 4.3680505340603774e-06, "loss": 0.9193, "step": 7044 }, { "epoch": 0.5089674354760055, "grad_norm": 5.4290874780875065, "learning_rate": 4.3678561310549796e-06, "loss": 0.8196, "step": 7045 }, { "epoch": 0.5090396806762151, "grad_norm": 8.079683666268913, "learning_rate": 4.3676617024800795e-06, "loss": 0.936, "step": 7046 }, { "epoch": 0.5091119258764246, "grad_norm": 7.596136526591346, "learning_rate": 4.367467248338343e-06, "loss": 0.9016, "step": 7047 }, { "epoch": 0.509184171076634, "grad_norm": 7.064976080916749, "learning_rate": 4.367272768632428e-06, "loss": 0.9115, "step": 7048 }, { "epoch": 0.5092564162768436, "grad_norm": 6.684612756096991, "learning_rate": 4.367078263365e-06, "loss": 0.8125, "step": 7049 }, { "epoch": 0.5093286614770531, "grad_norm": 5.685136240968121, "learning_rate": 4.36688373253872e-06, "loss": 0.8414, "step": 7050 }, { "epoch": 0.5094009066772627, "grad_norm": 5.964019658507217, "learning_rate": 4.366689176156251e-06, "loss": 0.8337, "step": 7051 }, { "epoch": 0.5094731518774721, "grad_norm": 5.509441594734535, "learning_rate": 4.366494594220257e-06, "loss": 0.8651, "step": 7052 }, { "epoch": 0.5095453970776816, "grad_norm": 6.369416428308565, "learning_rate": 4.3662999867334e-06, "loss": 0.9146, "step": 7053 }, { "epoch": 0.5096176422778912, "grad_norm": 6.26257402144066, "learning_rate": 4.366105353698346e-06, "loss": 0.8452, "step": 7054 }, { "epoch": 0.5096898874781006, "grad_norm": 6.374742390532679, "learning_rate": 4.365910695117759e-06, "loss": 0.866, "step": 7055 }, { "epoch": 0.5097621326783102, "grad_norm": 5.918126029263164, "learning_rate": 4.365716010994303e-06, "loss": 0.8268, "step": 7056 }, { "epoch": 0.5098343778785197, "grad_norm": 5.604470151414903, "learning_rate": 4.3655213013306444e-06, "loss": 0.8219, "step": 7057 }, { "epoch": 0.5099066230787292, "grad_norm": 5.744919232231492, "learning_rate": 4.3653265661294465e-06, "loss": 0.8199, "step": 7058 }, { "epoch": 0.5099788682789387, "grad_norm": 5.372711936636576, "learning_rate": 4.3651318053933765e-06, "loss": 0.7759, "step": 7059 }, { "epoch": 0.5100511134791482, "grad_norm": 9.15778345800825, "learning_rate": 4.364937019125101e-06, "loss": 0.8632, "step": 7060 }, { "epoch": 0.5101233586793578, "grad_norm": 6.069240637450719, "learning_rate": 4.3647422073272844e-06, "loss": 0.8944, "step": 7061 }, { "epoch": 0.5101956038795673, "grad_norm": 6.439272738317387, "learning_rate": 4.364547370002595e-06, "loss": 0.8323, "step": 7062 }, { "epoch": 0.5102678490797767, "grad_norm": 5.877921494039189, "learning_rate": 4.364352507153701e-06, "loss": 0.9182, "step": 7063 }, { "epoch": 0.5103400942799863, "grad_norm": 8.110014726693628, "learning_rate": 4.364157618783268e-06, "loss": 0.9251, "step": 7064 }, { "epoch": 0.5104123394801958, "grad_norm": 7.229895768423334, "learning_rate": 4.363962704893965e-06, "loss": 0.8967, "step": 7065 }, { "epoch": 0.5104845846804053, "grad_norm": 8.14046473720606, "learning_rate": 4.363767765488459e-06, "loss": 0.8837, "step": 7066 }, { "epoch": 0.5105568298806148, "grad_norm": 7.154548009515275, "learning_rate": 4.36357280056942e-06, "loss": 0.9012, "step": 7067 }, { "epoch": 0.5106290750808243, "grad_norm": 6.013159308498717, "learning_rate": 4.363377810139517e-06, "loss": 0.8475, "step": 7068 }, { "epoch": 0.5107013202810339, "grad_norm": 7.120655542263121, "learning_rate": 4.363182794201418e-06, "loss": 0.8111, "step": 7069 }, { "epoch": 0.5107735654812433, "grad_norm": 8.113577927742531, "learning_rate": 4.362987752757793e-06, "loss": 0.8324, "step": 7070 }, { "epoch": 0.5108458106814528, "grad_norm": 7.664046323488917, "learning_rate": 4.362792685811312e-06, "loss": 0.8789, "step": 7071 }, { "epoch": 0.5109180558816624, "grad_norm": 9.321446569599718, "learning_rate": 4.362597593364646e-06, "loss": 0.9056, "step": 7072 }, { "epoch": 0.5109903010818718, "grad_norm": 7.146618621057357, "learning_rate": 4.362402475420465e-06, "loss": 0.8414, "step": 7073 }, { "epoch": 0.5110625462820814, "grad_norm": 7.781269854784534, "learning_rate": 4.36220733198144e-06, "loss": 0.8839, "step": 7074 }, { "epoch": 0.5111347914822909, "grad_norm": 5.686478072360047, "learning_rate": 4.3620121630502435e-06, "loss": 0.7543, "step": 7075 }, { "epoch": 0.5112070366825004, "grad_norm": 7.768122741468087, "learning_rate": 4.361816968629545e-06, "loss": 0.8915, "step": 7076 }, { "epoch": 0.5112792818827099, "grad_norm": 7.497144028168431, "learning_rate": 4.361621748722018e-06, "loss": 0.8222, "step": 7077 }, { "epoch": 0.5113515270829194, "grad_norm": 6.20698730760484, "learning_rate": 4.361426503330335e-06, "loss": 0.8611, "step": 7078 }, { "epoch": 0.511423772283129, "grad_norm": 7.023714495024494, "learning_rate": 4.361231232457169e-06, "loss": 0.8935, "step": 7079 }, { "epoch": 0.5114960174833385, "grad_norm": 5.93768599369458, "learning_rate": 4.3610359361051915e-06, "loss": 0.9053, "step": 7080 }, { "epoch": 0.5115682626835479, "grad_norm": 6.833505705853591, "learning_rate": 4.360840614277078e-06, "loss": 0.8486, "step": 7081 }, { "epoch": 0.5116405078837575, "grad_norm": 5.440412245725721, "learning_rate": 4.360645266975501e-06, "loss": 0.8284, "step": 7082 }, { "epoch": 0.511712753083967, "grad_norm": 9.21314423377618, "learning_rate": 4.360449894203135e-06, "loss": 0.933, "step": 7083 }, { "epoch": 0.5117849982841765, "grad_norm": 5.941949401948194, "learning_rate": 4.360254495962654e-06, "loss": 0.8803, "step": 7084 }, { "epoch": 0.511857243484386, "grad_norm": 5.633986859908538, "learning_rate": 4.360059072256734e-06, "loss": 0.9189, "step": 7085 }, { "epoch": 0.5119294886845955, "grad_norm": 6.3291397599767345, "learning_rate": 4.3598636230880485e-06, "loss": 0.8541, "step": 7086 }, { "epoch": 0.5120017338848051, "grad_norm": 7.053458038025707, "learning_rate": 4.359668148459275e-06, "loss": 0.8837, "step": 7087 }, { "epoch": 0.5120739790850145, "grad_norm": 6.445658134671936, "learning_rate": 4.359472648373089e-06, "loss": 0.8873, "step": 7088 }, { "epoch": 0.512146224285224, "grad_norm": 5.597748930670284, "learning_rate": 4.359277122832165e-06, "loss": 0.8807, "step": 7089 }, { "epoch": 0.5122184694854336, "grad_norm": 8.910229459777055, "learning_rate": 4.359081571839182e-06, "loss": 0.9705, "step": 7090 }, { "epoch": 0.512290714685643, "grad_norm": 6.17916709743849, "learning_rate": 4.358885995396815e-06, "loss": 0.956, "step": 7091 }, { "epoch": 0.5123629598858526, "grad_norm": 7.144659392645255, "learning_rate": 4.358690393507742e-06, "loss": 0.9109, "step": 7092 }, { "epoch": 0.5124352050860621, "grad_norm": 5.668892741062224, "learning_rate": 4.358494766174641e-06, "loss": 0.9519, "step": 7093 }, { "epoch": 0.5125074502862716, "grad_norm": 6.3187940586565166, "learning_rate": 4.358299113400189e-06, "loss": 0.7959, "step": 7094 }, { "epoch": 0.5125796954864811, "grad_norm": 6.4000374792908925, "learning_rate": 4.358103435187067e-06, "loss": 0.9122, "step": 7095 }, { "epoch": 0.5126519406866906, "grad_norm": 5.793181113879739, "learning_rate": 4.357907731537949e-06, "loss": 0.947, "step": 7096 }, { "epoch": 0.5127241858869002, "grad_norm": 5.940223791870957, "learning_rate": 4.357712002455519e-06, "loss": 0.8882, "step": 7097 }, { "epoch": 0.5127964310871097, "grad_norm": 6.448944585045395, "learning_rate": 4.3575162479424524e-06, "loss": 0.9351, "step": 7098 }, { "epoch": 0.5128686762873191, "grad_norm": 5.352205780794559, "learning_rate": 4.357320468001432e-06, "loss": 0.8805, "step": 7099 }, { "epoch": 0.5129409214875287, "grad_norm": 6.867881835276457, "learning_rate": 4.357124662635136e-06, "loss": 0.8823, "step": 7100 }, { "epoch": 0.5130131666877382, "grad_norm": 7.584117667190754, "learning_rate": 4.356928831846246e-06, "loss": 0.8495, "step": 7101 }, { "epoch": 0.5130854118879477, "grad_norm": 6.93071600711917, "learning_rate": 4.356732975637441e-06, "loss": 0.85, "step": 7102 }, { "epoch": 0.5131576570881572, "grad_norm": 6.377038368017552, "learning_rate": 4.3565370940114026e-06, "loss": 0.8552, "step": 7103 }, { "epoch": 0.5132299022883667, "grad_norm": 7.955913182371316, "learning_rate": 4.356341186970814e-06, "loss": 0.9055, "step": 7104 }, { "epoch": 0.5133021474885763, "grad_norm": 6.398376091929071, "learning_rate": 4.356145254518356e-06, "loss": 0.9189, "step": 7105 }, { "epoch": 0.5133743926887857, "grad_norm": 6.865377454501204, "learning_rate": 4.355949296656711e-06, "loss": 0.8804, "step": 7106 }, { "epoch": 0.5134466378889952, "grad_norm": 7.463325605617042, "learning_rate": 4.35575331338856e-06, "loss": 0.9049, "step": 7107 }, { "epoch": 0.5135188830892048, "grad_norm": 7.778730482456708, "learning_rate": 4.355557304716588e-06, "loss": 0.8594, "step": 7108 }, { "epoch": 0.5135911282894142, "grad_norm": 5.613322024904098, "learning_rate": 4.355361270643477e-06, "loss": 0.87, "step": 7109 }, { "epoch": 0.5136633734896238, "grad_norm": 6.589316952491406, "learning_rate": 4.35516521117191e-06, "loss": 0.921, "step": 7110 }, { "epoch": 0.5137356186898333, "grad_norm": 9.235997091432479, "learning_rate": 4.3549691263045726e-06, "loss": 0.9076, "step": 7111 }, { "epoch": 0.5138078638900428, "grad_norm": 6.210066210014485, "learning_rate": 4.354773016044148e-06, "loss": 0.7867, "step": 7112 }, { "epoch": 0.5138801090902523, "grad_norm": 6.964675875623375, "learning_rate": 4.354576880393321e-06, "loss": 0.8263, "step": 7113 }, { "epoch": 0.5139523542904618, "grad_norm": 6.758675827711505, "learning_rate": 4.354380719354776e-06, "loss": 0.8358, "step": 7114 }, { "epoch": 0.5140245994906714, "grad_norm": 9.671461253645269, "learning_rate": 4.354184532931199e-06, "loss": 0.9064, "step": 7115 }, { "epoch": 0.5140968446908808, "grad_norm": 7.728043741122152, "learning_rate": 4.353988321125277e-06, "loss": 0.8942, "step": 7116 }, { "epoch": 0.5141690898910903, "grad_norm": 6.323014670737938, "learning_rate": 4.3537920839396915e-06, "loss": 0.8376, "step": 7117 }, { "epoch": 0.5142413350912999, "grad_norm": 6.182722308600288, "learning_rate": 4.353595821377134e-06, "loss": 0.9012, "step": 7118 }, { "epoch": 0.5143135802915094, "grad_norm": 7.188517357516528, "learning_rate": 4.353399533440288e-06, "loss": 0.9115, "step": 7119 }, { "epoch": 0.5143858254917189, "grad_norm": 8.392961514180536, "learning_rate": 4.353203220131842e-06, "loss": 0.9106, "step": 7120 }, { "epoch": 0.5144580706919284, "grad_norm": 8.450317142110473, "learning_rate": 4.353006881454483e-06, "loss": 0.8678, "step": 7121 }, { "epoch": 0.5145303158921379, "grad_norm": 6.071413389555598, "learning_rate": 4.352810517410897e-06, "loss": 0.8099, "step": 7122 }, { "epoch": 0.5146025610923475, "grad_norm": 6.127164497170826, "learning_rate": 4.3526141280037744e-06, "loss": 0.9145, "step": 7123 }, { "epoch": 0.5146748062925569, "grad_norm": 5.521448794433467, "learning_rate": 4.352417713235804e-06, "loss": 0.9316, "step": 7124 }, { "epoch": 0.5147470514927664, "grad_norm": 5.72047873460998, "learning_rate": 4.352221273109672e-06, "loss": 0.7782, "step": 7125 }, { "epoch": 0.514819296692976, "grad_norm": 8.46272271678861, "learning_rate": 4.35202480762807e-06, "loss": 1.0258, "step": 7126 }, { "epoch": 0.5148915418931854, "grad_norm": 7.935017355195015, "learning_rate": 4.351828316793686e-06, "loss": 0.9316, "step": 7127 }, { "epoch": 0.514963787093395, "grad_norm": 8.275148843308273, "learning_rate": 4.35163180060921e-06, "loss": 0.9451, "step": 7128 }, { "epoch": 0.5150360322936045, "grad_norm": 8.10756608783748, "learning_rate": 4.351435259077332e-06, "loss": 0.8839, "step": 7129 }, { "epoch": 0.515108277493814, "grad_norm": 6.794892962625324, "learning_rate": 4.3512386922007435e-06, "loss": 0.9094, "step": 7130 }, { "epoch": 0.5151805226940235, "grad_norm": 9.479453102786557, "learning_rate": 4.351042099982134e-06, "loss": 0.9225, "step": 7131 }, { "epoch": 0.515252767894233, "grad_norm": 5.8219298298209825, "learning_rate": 4.350845482424196e-06, "loss": 0.7787, "step": 7132 }, { "epoch": 0.5153250130944426, "grad_norm": 7.88527239554558, "learning_rate": 4.35064883952962e-06, "loss": 0.905, "step": 7133 }, { "epoch": 0.515397258294652, "grad_norm": 5.69230264635715, "learning_rate": 4.3504521713010985e-06, "loss": 0.9376, "step": 7134 }, { "epoch": 0.5154695034948615, "grad_norm": 6.405183433508718, "learning_rate": 4.350255477741324e-06, "loss": 0.8643, "step": 7135 }, { "epoch": 0.5155417486950711, "grad_norm": 5.810768115535957, "learning_rate": 4.350058758852988e-06, "loss": 0.8403, "step": 7136 }, { "epoch": 0.5156139938952806, "grad_norm": 5.386855601563377, "learning_rate": 4.349862014638785e-06, "loss": 0.9154, "step": 7137 }, { "epoch": 0.51568623909549, "grad_norm": 5.799789806371926, "learning_rate": 4.349665245101407e-06, "loss": 0.8298, "step": 7138 }, { "epoch": 0.5157584842956996, "grad_norm": 5.9895169550216805, "learning_rate": 4.349468450243547e-06, "loss": 0.928, "step": 7139 }, { "epoch": 0.5158307294959091, "grad_norm": 6.676762311799164, "learning_rate": 4.3492716300679005e-06, "loss": 0.7919, "step": 7140 }, { "epoch": 0.5159029746961187, "grad_norm": 6.0459068765336434, "learning_rate": 4.349074784577161e-06, "loss": 0.8583, "step": 7141 }, { "epoch": 0.5159752198963281, "grad_norm": 6.274948071268729, "learning_rate": 4.3488779137740244e-06, "loss": 0.8467, "step": 7142 }, { "epoch": 0.5160474650965376, "grad_norm": 7.009830112112818, "learning_rate": 4.348681017661183e-06, "loss": 0.8942, "step": 7143 }, { "epoch": 0.5161197102967472, "grad_norm": 5.095046738399903, "learning_rate": 4.348484096241336e-06, "loss": 0.7983, "step": 7144 }, { "epoch": 0.5161919554969566, "grad_norm": 7.060758933105264, "learning_rate": 4.348287149517176e-06, "loss": 0.8089, "step": 7145 }, { "epoch": 0.5162642006971662, "grad_norm": 5.661497937730418, "learning_rate": 4.3480901774913996e-06, "loss": 0.805, "step": 7146 }, { "epoch": 0.5163364458973757, "grad_norm": 5.91741308241115, "learning_rate": 4.347893180166704e-06, "loss": 0.9177, "step": 7147 }, { "epoch": 0.5164086910975852, "grad_norm": 6.133333414879398, "learning_rate": 4.347696157545785e-06, "loss": 0.9734, "step": 7148 }, { "epoch": 0.5164809362977947, "grad_norm": 7.918944288849515, "learning_rate": 4.3474991096313414e-06, "loss": 0.9476, "step": 7149 }, { "epoch": 0.5165531814980042, "grad_norm": 8.802858529951498, "learning_rate": 4.347302036426069e-06, "loss": 0.8804, "step": 7150 }, { "epoch": 0.5166254266982138, "grad_norm": 6.552274748690276, "learning_rate": 4.347104937932666e-06, "loss": 0.905, "step": 7151 }, { "epoch": 0.5166976718984232, "grad_norm": 5.629823608616503, "learning_rate": 4.346907814153831e-06, "loss": 0.813, "step": 7152 }, { "epoch": 0.5167699170986327, "grad_norm": 5.829087410588659, "learning_rate": 4.346710665092261e-06, "loss": 0.7986, "step": 7153 }, { "epoch": 0.5168421622988423, "grad_norm": 6.790952430411317, "learning_rate": 4.346513490750657e-06, "loss": 0.8546, "step": 7154 }, { "epoch": 0.5169144074990518, "grad_norm": 6.598896350484193, "learning_rate": 4.3463162911317154e-06, "loss": 0.8614, "step": 7155 }, { "epoch": 0.5169866526992613, "grad_norm": 5.927371732626383, "learning_rate": 4.346119066238139e-06, "loss": 0.832, "step": 7156 }, { "epoch": 0.5170588978994708, "grad_norm": 5.88567016415718, "learning_rate": 4.345921816072626e-06, "loss": 0.8997, "step": 7157 }, { "epoch": 0.5171311430996803, "grad_norm": 7.649680795430137, "learning_rate": 4.345724540637875e-06, "loss": 0.9303, "step": 7158 }, { "epoch": 0.5172033882998899, "grad_norm": 7.74643231381578, "learning_rate": 4.34552723993659e-06, "loss": 0.918, "step": 7159 }, { "epoch": 0.5172756335000993, "grad_norm": 7.355300196389261, "learning_rate": 4.345329913971469e-06, "loss": 0.9193, "step": 7160 }, { "epoch": 0.5173478787003088, "grad_norm": 6.459883505889867, "learning_rate": 4.345132562745215e-06, "loss": 0.9212, "step": 7161 }, { "epoch": 0.5174201239005184, "grad_norm": 6.234110049782003, "learning_rate": 4.344935186260528e-06, "loss": 0.8618, "step": 7162 }, { "epoch": 0.5174923691007278, "grad_norm": 8.115491732155698, "learning_rate": 4.344737784520111e-06, "loss": 0.95, "step": 7163 }, { "epoch": 0.5175646143009374, "grad_norm": 7.3420504287488395, "learning_rate": 4.344540357526667e-06, "loss": 0.9226, "step": 7164 }, { "epoch": 0.5176368595011469, "grad_norm": 6.898161004509871, "learning_rate": 4.3443429052828965e-06, "loss": 0.8582, "step": 7165 }, { "epoch": 0.5177091047013564, "grad_norm": 7.449761805470827, "learning_rate": 4.344145427791504e-06, "loss": 0.8841, "step": 7166 }, { "epoch": 0.5177813499015659, "grad_norm": 6.055317822532915, "learning_rate": 4.343947925055193e-06, "loss": 0.8499, "step": 7167 }, { "epoch": 0.5178535951017754, "grad_norm": 5.46976378717136, "learning_rate": 4.343750397076666e-06, "loss": 0.8896, "step": 7168 }, { "epoch": 0.517925840301985, "grad_norm": 7.047492091190357, "learning_rate": 4.3435528438586275e-06, "loss": 0.9081, "step": 7169 }, { "epoch": 0.5179980855021944, "grad_norm": 7.575506957561487, "learning_rate": 4.343355265403783e-06, "loss": 0.7665, "step": 7170 }, { "epoch": 0.5180703307024039, "grad_norm": 5.76311423778615, "learning_rate": 4.3431576617148355e-06, "loss": 0.8298, "step": 7171 }, { "epoch": 0.5181425759026135, "grad_norm": 6.666077142717166, "learning_rate": 4.34296003279449e-06, "loss": 0.8588, "step": 7172 }, { "epoch": 0.518214821102823, "grad_norm": 7.3333500370644575, "learning_rate": 4.342762378645454e-06, "loss": 0.9677, "step": 7173 }, { "epoch": 0.5182870663030325, "grad_norm": 8.060904884753024, "learning_rate": 4.342564699270431e-06, "loss": 0.8965, "step": 7174 }, { "epoch": 0.518359311503242, "grad_norm": 6.042822611052105, "learning_rate": 4.342366994672129e-06, "loss": 0.9381, "step": 7175 }, { "epoch": 0.5184315567034515, "grad_norm": 6.651136843230114, "learning_rate": 4.342169264853252e-06, "loss": 0.9348, "step": 7176 }, { "epoch": 0.5185038019036611, "grad_norm": 5.50644791384879, "learning_rate": 4.341971509816509e-06, "loss": 0.9152, "step": 7177 }, { "epoch": 0.5185760471038705, "grad_norm": 5.537182745876324, "learning_rate": 4.3417737295646055e-06, "loss": 0.9246, "step": 7178 }, { "epoch": 0.51864829230408, "grad_norm": 8.005753832175126, "learning_rate": 4.34157592410025e-06, "loss": 0.8976, "step": 7179 }, { "epoch": 0.5187205375042896, "grad_norm": 6.0237193159187505, "learning_rate": 4.3413780934261504e-06, "loss": 0.8309, "step": 7180 }, { "epoch": 0.518792782704499, "grad_norm": 6.065456338116664, "learning_rate": 4.341180237545014e-06, "loss": 0.8438, "step": 7181 }, { "epoch": 0.5188650279047086, "grad_norm": 6.902186514461915, "learning_rate": 4.34098235645955e-06, "loss": 0.8684, "step": 7182 }, { "epoch": 0.5189372731049181, "grad_norm": 6.198764911809833, "learning_rate": 4.340784450172466e-06, "loss": 0.8875, "step": 7183 }, { "epoch": 0.5190095183051276, "grad_norm": 5.561514606418581, "learning_rate": 4.340586518686474e-06, "loss": 0.9162, "step": 7184 }, { "epoch": 0.5190817635053371, "grad_norm": 7.672183028678155, "learning_rate": 4.34038856200428e-06, "loss": 1.0113, "step": 7185 }, { "epoch": 0.5191540087055466, "grad_norm": 6.313133887766304, "learning_rate": 4.340190580128596e-06, "loss": 0.9312, "step": 7186 }, { "epoch": 0.5192262539057562, "grad_norm": 5.983979772360166, "learning_rate": 4.339992573062133e-06, "loss": 0.8613, "step": 7187 }, { "epoch": 0.5192984991059656, "grad_norm": 6.188685448112838, "learning_rate": 4.339794540807599e-06, "loss": 0.8483, "step": 7188 }, { "epoch": 0.5193707443061751, "grad_norm": 7.3356626741485655, "learning_rate": 4.339596483367706e-06, "loss": 0.9645, "step": 7189 }, { "epoch": 0.5194429895063847, "grad_norm": 6.968550195951529, "learning_rate": 4.339398400745166e-06, "loss": 0.8937, "step": 7190 }, { "epoch": 0.5195152347065942, "grad_norm": 6.279122215203095, "learning_rate": 4.339200292942691e-06, "loss": 0.8996, "step": 7191 }, { "epoch": 0.5195874799068037, "grad_norm": 8.658957598458057, "learning_rate": 4.33900215996299e-06, "loss": 0.8438, "step": 7192 }, { "epoch": 0.5196597251070132, "grad_norm": 6.370274905705999, "learning_rate": 4.3388040018087795e-06, "loss": 0.8691, "step": 7193 }, { "epoch": 0.5197319703072227, "grad_norm": 6.74428174854157, "learning_rate": 4.338605818482768e-06, "loss": 0.9437, "step": 7194 }, { "epoch": 0.5198042155074323, "grad_norm": 5.970141183790759, "learning_rate": 4.338407609987672e-06, "loss": 0.9394, "step": 7195 }, { "epoch": 0.5198764607076417, "grad_norm": 6.318019153803827, "learning_rate": 4.338209376326202e-06, "loss": 0.8226, "step": 7196 }, { "epoch": 0.5199487059078512, "grad_norm": 8.706675053700044, "learning_rate": 4.338011117501074e-06, "loss": 0.9069, "step": 7197 }, { "epoch": 0.5200209511080608, "grad_norm": 8.511482112933626, "learning_rate": 4.337812833515001e-06, "loss": 0.8298, "step": 7198 }, { "epoch": 0.5200931963082702, "grad_norm": 6.621308647876885, "learning_rate": 4.337614524370697e-06, "loss": 0.9109, "step": 7199 }, { "epoch": 0.5201654415084798, "grad_norm": 6.498096187256533, "learning_rate": 4.337416190070877e-06, "loss": 0.9459, "step": 7200 }, { "epoch": 0.5202376867086893, "grad_norm": 7.600061878153218, "learning_rate": 4.337217830618255e-06, "loss": 0.8507, "step": 7201 }, { "epoch": 0.5203099319088988, "grad_norm": 6.536208611286355, "learning_rate": 4.337019446015548e-06, "loss": 0.8693, "step": 7202 }, { "epoch": 0.5203821771091083, "grad_norm": 6.027131092327556, "learning_rate": 4.336821036265471e-06, "loss": 0.8614, "step": 7203 }, { "epoch": 0.5204544223093178, "grad_norm": 6.766119183470805, "learning_rate": 4.336622601370741e-06, "loss": 0.8992, "step": 7204 }, { "epoch": 0.5205266675095274, "grad_norm": 5.70119127908654, "learning_rate": 4.336424141334073e-06, "loss": 0.9005, "step": 7205 }, { "epoch": 0.5205989127097368, "grad_norm": 10.87799872785625, "learning_rate": 4.336225656158185e-06, "loss": 1.027, "step": 7206 }, { "epoch": 0.5206711579099463, "grad_norm": 5.82967145514242, "learning_rate": 4.336027145845792e-06, "loss": 0.8692, "step": 7207 }, { "epoch": 0.5207434031101559, "grad_norm": 6.958573236346349, "learning_rate": 4.335828610399615e-06, "loss": 0.8333, "step": 7208 }, { "epoch": 0.5208156483103654, "grad_norm": 5.179115045136267, "learning_rate": 4.335630049822369e-06, "loss": 0.8444, "step": 7209 }, { "epoch": 0.5208878935105749, "grad_norm": 8.303389755582081, "learning_rate": 4.335431464116772e-06, "loss": 0.8637, "step": 7210 }, { "epoch": 0.5209601387107844, "grad_norm": 7.046801378495689, "learning_rate": 4.335232853285544e-06, "loss": 0.9408, "step": 7211 }, { "epoch": 0.5210323839109939, "grad_norm": 5.8501382273458065, "learning_rate": 4.335034217331403e-06, "loss": 0.8076, "step": 7212 }, { "epoch": 0.5211046291112035, "grad_norm": 9.023288689249128, "learning_rate": 4.334835556257069e-06, "loss": 0.8367, "step": 7213 }, { "epoch": 0.5211768743114129, "grad_norm": 8.279545907362849, "learning_rate": 4.334636870065261e-06, "loss": 0.8404, "step": 7214 }, { "epoch": 0.5212491195116224, "grad_norm": 5.946288340783541, "learning_rate": 4.3344381587586985e-06, "loss": 0.8194, "step": 7215 }, { "epoch": 0.521321364711832, "grad_norm": 5.783720900918093, "learning_rate": 4.334239422340101e-06, "loss": 0.9317, "step": 7216 }, { "epoch": 0.5213936099120414, "grad_norm": 7.755550980710803, "learning_rate": 4.334040660812191e-06, "loss": 0.8472, "step": 7217 }, { "epoch": 0.521465855112251, "grad_norm": 7.396257185991312, "learning_rate": 4.333841874177688e-06, "loss": 0.9214, "step": 7218 }, { "epoch": 0.5215381003124605, "grad_norm": 6.799426211663843, "learning_rate": 4.333643062439314e-06, "loss": 0.8066, "step": 7219 }, { "epoch": 0.52161034551267, "grad_norm": 8.576760994839349, "learning_rate": 4.33344422559979e-06, "loss": 0.8509, "step": 7220 }, { "epoch": 0.5216825907128795, "grad_norm": 7.237713366906301, "learning_rate": 4.3332453636618374e-06, "loss": 0.9054, "step": 7221 }, { "epoch": 0.521754835913089, "grad_norm": 6.148198938649349, "learning_rate": 4.33304647662818e-06, "loss": 0.8861, "step": 7222 }, { "epoch": 0.5218270811132986, "grad_norm": 8.30289632979835, "learning_rate": 4.33284756450154e-06, "loss": 0.9402, "step": 7223 }, { "epoch": 0.521899326313508, "grad_norm": 9.229951731843887, "learning_rate": 4.332648627284639e-06, "loss": 0.9007, "step": 7224 }, { "epoch": 0.5219715715137175, "grad_norm": 5.581755324979282, "learning_rate": 4.332449664980202e-06, "loss": 0.7919, "step": 7225 }, { "epoch": 0.5220438167139271, "grad_norm": 6.824201980159704, "learning_rate": 4.332250677590951e-06, "loss": 0.9296, "step": 7226 }, { "epoch": 0.5221160619141366, "grad_norm": 5.9210974351403625, "learning_rate": 4.332051665119612e-06, "loss": 0.9195, "step": 7227 }, { "epoch": 0.522188307114346, "grad_norm": 6.272087740556898, "learning_rate": 4.331852627568907e-06, "loss": 0.8652, "step": 7228 }, { "epoch": 0.5222605523145556, "grad_norm": 5.895636169947114, "learning_rate": 4.331653564941563e-06, "loss": 0.9158, "step": 7229 }, { "epoch": 0.5223327975147651, "grad_norm": 6.227340821904499, "learning_rate": 4.331454477240303e-06, "loss": 0.8809, "step": 7230 }, { "epoch": 0.5224050427149747, "grad_norm": 7.771550640582023, "learning_rate": 4.331255364467853e-06, "loss": 0.8523, "step": 7231 }, { "epoch": 0.5224772879151841, "grad_norm": 6.651906203498116, "learning_rate": 4.331056226626941e-06, "loss": 0.8336, "step": 7232 }, { "epoch": 0.5225495331153936, "grad_norm": 7.105472776507243, "learning_rate": 4.330857063720289e-06, "loss": 0.9224, "step": 7233 }, { "epoch": 0.5226217783156032, "grad_norm": 7.017277784155334, "learning_rate": 4.330657875750626e-06, "loss": 0.8876, "step": 7234 }, { "epoch": 0.5226940235158126, "grad_norm": 7.7255764940570835, "learning_rate": 4.330458662720678e-06, "loss": 0.9062, "step": 7235 }, { "epoch": 0.5227662687160222, "grad_norm": 6.050422199744908, "learning_rate": 4.330259424633172e-06, "loss": 0.8434, "step": 7236 }, { "epoch": 0.5228385139162317, "grad_norm": 6.589833330237761, "learning_rate": 4.330060161490836e-06, "loss": 0.9912, "step": 7237 }, { "epoch": 0.5229107591164412, "grad_norm": 7.943456382337333, "learning_rate": 4.329860873296397e-06, "loss": 0.9264, "step": 7238 }, { "epoch": 0.5229830043166507, "grad_norm": 6.479637823933194, "learning_rate": 4.3296615600525835e-06, "loss": 0.9117, "step": 7239 }, { "epoch": 0.5230552495168602, "grad_norm": 5.944675647046186, "learning_rate": 4.329462221762124e-06, "loss": 0.9053, "step": 7240 }, { "epoch": 0.5231274947170698, "grad_norm": 5.407012709249746, "learning_rate": 4.329262858427747e-06, "loss": 0.8304, "step": 7241 }, { "epoch": 0.5231997399172792, "grad_norm": 5.3833553223214965, "learning_rate": 4.329063470052182e-06, "loss": 0.8178, "step": 7242 }, { "epoch": 0.5232719851174887, "grad_norm": 5.317881114019632, "learning_rate": 4.328864056638158e-06, "loss": 0.8871, "step": 7243 }, { "epoch": 0.5233442303176983, "grad_norm": 7.513084442204371, "learning_rate": 4.3286646181884055e-06, "loss": 0.8636, "step": 7244 }, { "epoch": 0.5234164755179078, "grad_norm": 6.63164834453886, "learning_rate": 4.3284651547056536e-06, "loss": 0.8259, "step": 7245 }, { "epoch": 0.5234887207181173, "grad_norm": 5.197439759214067, "learning_rate": 4.328265666192634e-06, "loss": 0.897, "step": 7246 }, { "epoch": 0.5235609659183268, "grad_norm": 6.15885444194855, "learning_rate": 4.328066152652077e-06, "loss": 0.8765, "step": 7247 }, { "epoch": 0.5236332111185363, "grad_norm": 5.891588855681018, "learning_rate": 4.327866614086713e-06, "loss": 0.7942, "step": 7248 }, { "epoch": 0.5237054563187459, "grad_norm": 5.594716931529719, "learning_rate": 4.327667050499276e-06, "loss": 0.8397, "step": 7249 }, { "epoch": 0.5237777015189553, "grad_norm": 6.525787551531586, "learning_rate": 4.327467461892495e-06, "loss": 0.9515, "step": 7250 }, { "epoch": 0.5238499467191648, "grad_norm": 8.458565644580165, "learning_rate": 4.3272678482691035e-06, "loss": 0.9404, "step": 7251 }, { "epoch": 0.5239221919193744, "grad_norm": 7.571068063672117, "learning_rate": 4.327068209631833e-06, "loss": 0.8741, "step": 7252 }, { "epoch": 0.5239944371195838, "grad_norm": 5.814705707060057, "learning_rate": 4.3268685459834185e-06, "loss": 0.9054, "step": 7253 }, { "epoch": 0.5240666823197934, "grad_norm": 6.398131049972108, "learning_rate": 4.326668857326592e-06, "loss": 0.85, "step": 7254 }, { "epoch": 0.5241389275200029, "grad_norm": 4.87320363130889, "learning_rate": 4.326469143664087e-06, "loss": 0.8093, "step": 7255 }, { "epoch": 0.5242111727202124, "grad_norm": 7.774032738673838, "learning_rate": 4.326269404998637e-06, "loss": 0.8412, "step": 7256 }, { "epoch": 0.5242834179204219, "grad_norm": 7.8285808620853485, "learning_rate": 4.326069641332977e-06, "loss": 0.9719, "step": 7257 }, { "epoch": 0.5243556631206314, "grad_norm": 7.163651948602937, "learning_rate": 4.325869852669843e-06, "loss": 0.8918, "step": 7258 }, { "epoch": 0.524427908320841, "grad_norm": 5.887356040798199, "learning_rate": 4.325670039011967e-06, "loss": 0.9373, "step": 7259 }, { "epoch": 0.5245001535210504, "grad_norm": 5.5191495618978, "learning_rate": 4.325470200362086e-06, "loss": 0.912, "step": 7260 }, { "epoch": 0.5245723987212599, "grad_norm": 5.287063739355597, "learning_rate": 4.325270336722936e-06, "loss": 0.8367, "step": 7261 }, { "epoch": 0.5246446439214695, "grad_norm": 5.734219000666736, "learning_rate": 4.325070448097251e-06, "loss": 0.8927, "step": 7262 }, { "epoch": 0.524716889121679, "grad_norm": 6.3193141304339004, "learning_rate": 4.32487053448777e-06, "loss": 0.8757, "step": 7263 }, { "epoch": 0.5247891343218885, "grad_norm": 6.056249168561282, "learning_rate": 4.324670595897227e-06, "loss": 0.8873, "step": 7264 }, { "epoch": 0.524861379522098, "grad_norm": 6.471302358213977, "learning_rate": 4.324470632328361e-06, "loss": 0.8831, "step": 7265 }, { "epoch": 0.5249336247223075, "grad_norm": 6.534670577440309, "learning_rate": 4.324270643783908e-06, "loss": 0.8404, "step": 7266 }, { "epoch": 0.5250058699225171, "grad_norm": 6.037823351302454, "learning_rate": 4.324070630266607e-06, "loss": 1.0025, "step": 7267 }, { "epoch": 0.5250781151227265, "grad_norm": 6.357357670566732, "learning_rate": 4.323870591779196e-06, "loss": 0.9235, "step": 7268 }, { "epoch": 0.525150360322936, "grad_norm": 9.756449986814612, "learning_rate": 4.3236705283244115e-06, "loss": 0.9071, "step": 7269 }, { "epoch": 0.5252226055231456, "grad_norm": 6.996006780325569, "learning_rate": 4.323470439904994e-06, "loss": 0.8561, "step": 7270 }, { "epoch": 0.525294850723355, "grad_norm": 7.01999886493728, "learning_rate": 4.323270326523682e-06, "loss": 0.9239, "step": 7271 }, { "epoch": 0.5253670959235646, "grad_norm": 5.737727461151935, "learning_rate": 4.323070188183215e-06, "loss": 0.8552, "step": 7272 }, { "epoch": 0.5254393411237741, "grad_norm": 6.515688815822077, "learning_rate": 4.322870024886332e-06, "loss": 0.8845, "step": 7273 }, { "epoch": 0.5255115863239836, "grad_norm": 5.404689497449671, "learning_rate": 4.322669836635774e-06, "loss": 0.8803, "step": 7274 }, { "epoch": 0.5255838315241931, "grad_norm": 8.167145799348296, "learning_rate": 4.322469623434282e-06, "loss": 0.9162, "step": 7275 }, { "epoch": 0.5256560767244026, "grad_norm": 6.028523041124837, "learning_rate": 4.322269385284596e-06, "loss": 0.8386, "step": 7276 }, { "epoch": 0.5257283219246122, "grad_norm": 6.49944244340969, "learning_rate": 4.322069122189456e-06, "loss": 0.9089, "step": 7277 }, { "epoch": 0.5258005671248216, "grad_norm": 6.493110233106133, "learning_rate": 4.321868834151605e-06, "loss": 0.8154, "step": 7278 }, { "epoch": 0.5258728123250311, "grad_norm": 5.592015343760995, "learning_rate": 4.321668521173783e-06, "loss": 0.9618, "step": 7279 }, { "epoch": 0.5259450575252407, "grad_norm": 6.916018658658611, "learning_rate": 4.321468183258735e-06, "loss": 0.8653, "step": 7280 }, { "epoch": 0.5260173027254502, "grad_norm": 8.62721467263986, "learning_rate": 4.321267820409201e-06, "loss": 0.8319, "step": 7281 }, { "epoch": 0.5260895479256597, "grad_norm": 6.378438583208717, "learning_rate": 4.3210674326279255e-06, "loss": 0.861, "step": 7282 }, { "epoch": 0.5261617931258692, "grad_norm": 6.289979855940301, "learning_rate": 4.32086701991765e-06, "loss": 0.7861, "step": 7283 }, { "epoch": 0.5262340383260787, "grad_norm": 6.50051496373181, "learning_rate": 4.320666582281119e-06, "loss": 0.9049, "step": 7284 }, { "epoch": 0.5263062835262883, "grad_norm": 9.066520272737332, "learning_rate": 4.320466119721077e-06, "loss": 0.8473, "step": 7285 }, { "epoch": 0.5263785287264977, "grad_norm": 4.959600890314758, "learning_rate": 4.320265632240266e-06, "loss": 0.8467, "step": 7286 }, { "epoch": 0.5264507739267072, "grad_norm": 9.079367468896383, "learning_rate": 4.320065119841432e-06, "loss": 1.0124, "step": 7287 }, { "epoch": 0.5265230191269168, "grad_norm": 5.352698257102841, "learning_rate": 4.31986458252732e-06, "loss": 0.8212, "step": 7288 }, { "epoch": 0.5265952643271262, "grad_norm": 5.129928080331633, "learning_rate": 4.319664020300675e-06, "loss": 0.8862, "step": 7289 }, { "epoch": 0.5266675095273358, "grad_norm": 5.727655532836453, "learning_rate": 4.319463433164243e-06, "loss": 0.7974, "step": 7290 }, { "epoch": 0.5267397547275453, "grad_norm": 7.185561739757661, "learning_rate": 4.319262821120769e-06, "loss": 0.8703, "step": 7291 }, { "epoch": 0.5268119999277548, "grad_norm": 8.691608143722508, "learning_rate": 4.319062184172999e-06, "loss": 0.8637, "step": 7292 }, { "epoch": 0.5268842451279643, "grad_norm": 5.511211585509922, "learning_rate": 4.3188615223236795e-06, "loss": 0.8589, "step": 7293 }, { "epoch": 0.5269564903281738, "grad_norm": 7.377356976965136, "learning_rate": 4.318660835575559e-06, "loss": 0.8898, "step": 7294 }, { "epoch": 0.5270287355283834, "grad_norm": 4.973566563807508, "learning_rate": 4.3184601239313836e-06, "loss": 0.8106, "step": 7295 }, { "epoch": 0.5271009807285928, "grad_norm": 6.244770150761343, "learning_rate": 4.318259387393902e-06, "loss": 0.8974, "step": 7296 }, { "epoch": 0.5271732259288023, "grad_norm": 4.973346239811272, "learning_rate": 4.318058625965859e-06, "loss": 0.846, "step": 7297 }, { "epoch": 0.5272454711290119, "grad_norm": 6.731920967911974, "learning_rate": 4.317857839650007e-06, "loss": 0.9422, "step": 7298 }, { "epoch": 0.5273177163292214, "grad_norm": 6.318076210810949, "learning_rate": 4.317657028449092e-06, "loss": 0.8392, "step": 7299 }, { "epoch": 0.5273899615294309, "grad_norm": 5.937136990342218, "learning_rate": 4.317456192365863e-06, "loss": 0.8187, "step": 7300 }, { "epoch": 0.5274622067296404, "grad_norm": 6.39734071080836, "learning_rate": 4.317255331403071e-06, "loss": 0.8672, "step": 7301 }, { "epoch": 0.5275344519298499, "grad_norm": 5.415345935708406, "learning_rate": 4.3170544455634645e-06, "loss": 0.9089, "step": 7302 }, { "epoch": 0.5276066971300595, "grad_norm": 5.367952206258162, "learning_rate": 4.316853534849793e-06, "loss": 0.7719, "step": 7303 }, { "epoch": 0.5276789423302689, "grad_norm": 6.747591224667146, "learning_rate": 4.3166525992648064e-06, "loss": 0.8664, "step": 7304 }, { "epoch": 0.5277511875304784, "grad_norm": 5.693827033751833, "learning_rate": 4.316451638811258e-06, "loss": 0.8466, "step": 7305 }, { "epoch": 0.527823432730688, "grad_norm": 6.128253306320308, "learning_rate": 4.316250653491896e-06, "loss": 0.9082, "step": 7306 }, { "epoch": 0.5278956779308974, "grad_norm": 5.970920667344581, "learning_rate": 4.316049643309473e-06, "loss": 0.8895, "step": 7307 }, { "epoch": 0.527967923131107, "grad_norm": 6.382374705285025, "learning_rate": 4.31584860826674e-06, "loss": 0.9399, "step": 7308 }, { "epoch": 0.5280401683313165, "grad_norm": 5.508900117016788, "learning_rate": 4.31564754836645e-06, "loss": 0.9212, "step": 7309 }, { "epoch": 0.528112413531526, "grad_norm": 6.711229950790882, "learning_rate": 4.315446463611354e-06, "loss": 0.8065, "step": 7310 }, { "epoch": 0.5281846587317355, "grad_norm": 6.084125763304941, "learning_rate": 4.315245354004207e-06, "loss": 0.8895, "step": 7311 }, { "epoch": 0.528256903931945, "grad_norm": 7.124210246972391, "learning_rate": 4.315044219547759e-06, "loss": 0.909, "step": 7312 }, { "epoch": 0.5283291491321546, "grad_norm": 7.66747896065327, "learning_rate": 4.314843060244767e-06, "loss": 0.8281, "step": 7313 }, { "epoch": 0.528401394332364, "grad_norm": 5.756851095320376, "learning_rate": 4.3146418760979806e-06, "loss": 0.8646, "step": 7314 }, { "epoch": 0.5284736395325735, "grad_norm": 6.516850857030061, "learning_rate": 4.314440667110157e-06, "loss": 0.7838, "step": 7315 }, { "epoch": 0.5285458847327831, "grad_norm": 8.402629313632978, "learning_rate": 4.314239433284049e-06, "loss": 0.9798, "step": 7316 }, { "epoch": 0.5286181299329926, "grad_norm": 6.802650821335862, "learning_rate": 4.314038174622412e-06, "loss": 0.8482, "step": 7317 }, { "epoch": 0.528690375133202, "grad_norm": 6.008162985550367, "learning_rate": 4.3138368911280004e-06, "loss": 0.8883, "step": 7318 }, { "epoch": 0.5287626203334116, "grad_norm": 7.335749517110246, "learning_rate": 4.313635582803571e-06, "loss": 0.8831, "step": 7319 }, { "epoch": 0.5288348655336211, "grad_norm": 6.7637347604822775, "learning_rate": 4.3134342496518775e-06, "loss": 0.8523, "step": 7320 }, { "epoch": 0.5289071107338307, "grad_norm": 6.2716939172989905, "learning_rate": 4.313232891675679e-06, "loss": 0.8968, "step": 7321 }, { "epoch": 0.5289793559340401, "grad_norm": 6.247841424119013, "learning_rate": 4.313031508877729e-06, "loss": 0.9178, "step": 7322 }, { "epoch": 0.5290516011342496, "grad_norm": 7.134713227805135, "learning_rate": 4.312830101260785e-06, "loss": 0.8748, "step": 7323 }, { "epoch": 0.5291238463344592, "grad_norm": 7.221483910146304, "learning_rate": 4.312628668827605e-06, "loss": 0.9294, "step": 7324 }, { "epoch": 0.5291960915346686, "grad_norm": 5.016887185117704, "learning_rate": 4.312427211580945e-06, "loss": 0.8299, "step": 7325 }, { "epoch": 0.5292683367348782, "grad_norm": 6.755903699894344, "learning_rate": 4.312225729523565e-06, "loss": 0.9, "step": 7326 }, { "epoch": 0.5293405819350877, "grad_norm": 4.805573668595848, "learning_rate": 4.312024222658221e-06, "loss": 0.8125, "step": 7327 }, { "epoch": 0.5294128271352972, "grad_norm": 6.271085951607677, "learning_rate": 4.311822690987673e-06, "loss": 0.8904, "step": 7328 }, { "epoch": 0.5294850723355067, "grad_norm": 5.653994790343673, "learning_rate": 4.3116211345146785e-06, "loss": 0.8052, "step": 7329 }, { "epoch": 0.5295573175357162, "grad_norm": 6.4022987648091645, "learning_rate": 4.311419553241998e-06, "loss": 0.8153, "step": 7330 }, { "epoch": 0.5296295627359258, "grad_norm": 7.205815340351034, "learning_rate": 4.31121794717239e-06, "loss": 0.9124, "step": 7331 }, { "epoch": 0.5297018079361352, "grad_norm": 6.626095951047663, "learning_rate": 4.311016316308615e-06, "loss": 0.9019, "step": 7332 }, { "epoch": 0.5297740531363447, "grad_norm": 6.445012126050008, "learning_rate": 4.310814660653431e-06, "loss": 0.955, "step": 7333 }, { "epoch": 0.5298462983365543, "grad_norm": 5.267390195827309, "learning_rate": 4.310612980209603e-06, "loss": 0.8324, "step": 7334 }, { "epoch": 0.5299185435367638, "grad_norm": 5.958341940849151, "learning_rate": 4.310411274979888e-06, "loss": 0.9343, "step": 7335 }, { "epoch": 0.5299907887369733, "grad_norm": 6.030113627057505, "learning_rate": 4.310209544967048e-06, "loss": 0.8028, "step": 7336 }, { "epoch": 0.5300630339371828, "grad_norm": 6.816550634215822, "learning_rate": 4.310007790173845e-06, "loss": 0.8969, "step": 7337 }, { "epoch": 0.5301352791373923, "grad_norm": 7.399016247905364, "learning_rate": 4.30980601060304e-06, "loss": 0.8956, "step": 7338 }, { "epoch": 0.5302075243376018, "grad_norm": 6.472120208736693, "learning_rate": 4.309604206257398e-06, "loss": 0.9429, "step": 7339 }, { "epoch": 0.5302797695378113, "grad_norm": 5.698822823486121, "learning_rate": 4.309402377139678e-06, "loss": 0.8649, "step": 7340 }, { "epoch": 0.5303520147380208, "grad_norm": 5.6641261445777715, "learning_rate": 4.309200523252644e-06, "loss": 0.7914, "step": 7341 }, { "epoch": 0.5304242599382304, "grad_norm": 7.6040130460367354, "learning_rate": 4.30899864459906e-06, "loss": 0.9263, "step": 7342 }, { "epoch": 0.5304965051384398, "grad_norm": 8.175800846548421, "learning_rate": 4.3087967411816895e-06, "loss": 0.906, "step": 7343 }, { "epoch": 0.5305687503386494, "grad_norm": 6.90051390351243, "learning_rate": 4.308594813003295e-06, "loss": 0.9095, "step": 7344 }, { "epoch": 0.5306409955388589, "grad_norm": 6.475738730689673, "learning_rate": 4.308392860066644e-06, "loss": 0.8503, "step": 7345 }, { "epoch": 0.5307132407390684, "grad_norm": 7.517239656903734, "learning_rate": 4.308190882374496e-06, "loss": 0.8527, "step": 7346 }, { "epoch": 0.5307854859392779, "grad_norm": 6.0162182007644125, "learning_rate": 4.307988879929621e-06, "loss": 0.813, "step": 7347 }, { "epoch": 0.5308577311394874, "grad_norm": 5.965413224391429, "learning_rate": 4.307786852734782e-06, "loss": 0.8071, "step": 7348 }, { "epoch": 0.530929976339697, "grad_norm": 5.475166959263573, "learning_rate": 4.307584800792743e-06, "loss": 0.9125, "step": 7349 }, { "epoch": 0.5310022215399064, "grad_norm": 7.9210664535911475, "learning_rate": 4.307382724106273e-06, "loss": 0.89, "step": 7350 }, { "epoch": 0.5310744667401159, "grad_norm": 7.433673804088119, "learning_rate": 4.3071806226781365e-06, "loss": 0.8787, "step": 7351 }, { "epoch": 0.5311467119403255, "grad_norm": 5.336739922393406, "learning_rate": 4.306978496511101e-06, "loss": 0.8537, "step": 7352 }, { "epoch": 0.531218957140535, "grad_norm": 5.35006376567167, "learning_rate": 4.306776345607932e-06, "loss": 0.8805, "step": 7353 }, { "epoch": 0.5312912023407445, "grad_norm": 6.705070160022942, "learning_rate": 4.306574169971398e-06, "loss": 0.889, "step": 7354 }, { "epoch": 0.531363447540954, "grad_norm": 5.6492306480867995, "learning_rate": 4.306371969604266e-06, "loss": 0.8694, "step": 7355 }, { "epoch": 0.5314356927411635, "grad_norm": 6.878267378765459, "learning_rate": 4.306169744509304e-06, "loss": 0.7894, "step": 7356 }, { "epoch": 0.531507937941373, "grad_norm": 5.878215153523498, "learning_rate": 4.305967494689282e-06, "loss": 0.8265, "step": 7357 }, { "epoch": 0.5315801831415825, "grad_norm": 6.37797746127893, "learning_rate": 4.305765220146966e-06, "loss": 0.8968, "step": 7358 }, { "epoch": 0.531652428341792, "grad_norm": 6.538949908491555, "learning_rate": 4.305562920885127e-06, "loss": 0.8591, "step": 7359 }, { "epoch": 0.5317246735420016, "grad_norm": 6.788332848358824, "learning_rate": 4.305360596906534e-06, "loss": 0.893, "step": 7360 }, { "epoch": 0.531796918742211, "grad_norm": 5.87859121093176, "learning_rate": 4.305158248213955e-06, "loss": 0.8256, "step": 7361 }, { "epoch": 0.5318691639424206, "grad_norm": 6.7143335775507165, "learning_rate": 4.304955874810162e-06, "loss": 0.9322, "step": 7362 }, { "epoch": 0.5319414091426301, "grad_norm": 6.659102280974258, "learning_rate": 4.304753476697924e-06, "loss": 0.8537, "step": 7363 }, { "epoch": 0.5320136543428396, "grad_norm": 6.485284433590428, "learning_rate": 4.304551053880012e-06, "loss": 0.9135, "step": 7364 }, { "epoch": 0.5320858995430491, "grad_norm": 5.853659321622098, "learning_rate": 4.304348606359198e-06, "loss": 0.8721, "step": 7365 }, { "epoch": 0.5321581447432586, "grad_norm": 6.911060987797082, "learning_rate": 4.304146134138252e-06, "loss": 0.8583, "step": 7366 }, { "epoch": 0.5322303899434682, "grad_norm": 7.6219849175429335, "learning_rate": 4.303943637219946e-06, "loss": 0.9286, "step": 7367 }, { "epoch": 0.5323026351436776, "grad_norm": 5.750058049447978, "learning_rate": 4.3037411156070526e-06, "loss": 0.898, "step": 7368 }, { "epoch": 0.5323748803438871, "grad_norm": 6.937449687053621, "learning_rate": 4.303538569302344e-06, "loss": 0.823, "step": 7369 }, { "epoch": 0.5324471255440967, "grad_norm": 6.016410637618514, "learning_rate": 4.303335998308592e-06, "loss": 0.9108, "step": 7370 }, { "epoch": 0.5325193707443062, "grad_norm": 6.200267527561216, "learning_rate": 4.3031334026285715e-06, "loss": 0.8492, "step": 7371 }, { "epoch": 0.5325916159445157, "grad_norm": 6.506411398169237, "learning_rate": 4.302930782265054e-06, "loss": 0.8922, "step": 7372 }, { "epoch": 0.5326638611447252, "grad_norm": 6.548631819306532, "learning_rate": 4.302728137220815e-06, "loss": 0.8562, "step": 7373 }, { "epoch": 0.5327361063449347, "grad_norm": 5.538855028234961, "learning_rate": 4.302525467498626e-06, "loss": 0.887, "step": 7374 }, { "epoch": 0.5328083515451442, "grad_norm": 6.8191515446796345, "learning_rate": 4.302322773101264e-06, "loss": 0.9184, "step": 7375 }, { "epoch": 0.5328805967453537, "grad_norm": 7.283569752269943, "learning_rate": 4.302120054031502e-06, "loss": 0.9642, "step": 7376 }, { "epoch": 0.5329528419455632, "grad_norm": 7.726073215833887, "learning_rate": 4.301917310292116e-06, "loss": 0.9119, "step": 7377 }, { "epoch": 0.5330250871457728, "grad_norm": 7.3666502674777545, "learning_rate": 4.301714541885882e-06, "loss": 0.9082, "step": 7378 }, { "epoch": 0.5330973323459822, "grad_norm": 6.761188135863764, "learning_rate": 4.301511748815574e-06, "loss": 0.8246, "step": 7379 }, { "epoch": 0.5331695775461918, "grad_norm": 8.724319702559102, "learning_rate": 4.301308931083969e-06, "loss": 0.8662, "step": 7380 }, { "epoch": 0.5332418227464013, "grad_norm": 6.7134850033267055, "learning_rate": 4.301106088693844e-06, "loss": 0.8763, "step": 7381 }, { "epoch": 0.5333140679466108, "grad_norm": 7.843240546593513, "learning_rate": 4.300903221647974e-06, "loss": 0.7906, "step": 7382 }, { "epoch": 0.5333863131468203, "grad_norm": 7.18937618812406, "learning_rate": 4.300700329949138e-06, "loss": 1.0158, "step": 7383 }, { "epoch": 0.5334585583470298, "grad_norm": 5.793043160555477, "learning_rate": 4.300497413600112e-06, "loss": 0.8557, "step": 7384 }, { "epoch": 0.5335308035472394, "grad_norm": 5.90870090878218, "learning_rate": 4.300294472603674e-06, "loss": 0.8857, "step": 7385 }, { "epoch": 0.5336030487474488, "grad_norm": 5.990207628526855, "learning_rate": 4.300091506962604e-06, "loss": 0.8912, "step": 7386 }, { "epoch": 0.5336752939476583, "grad_norm": 6.654426718996618, "learning_rate": 4.299888516679677e-06, "loss": 0.826, "step": 7387 }, { "epoch": 0.5337475391478679, "grad_norm": 5.516754985613288, "learning_rate": 4.2996855017576755e-06, "loss": 0.7991, "step": 7388 }, { "epoch": 0.5338197843480774, "grad_norm": 6.321030093881369, "learning_rate": 4.2994824621993765e-06, "loss": 0.9272, "step": 7389 }, { "epoch": 0.5338920295482869, "grad_norm": 6.8891267769597215, "learning_rate": 4.2992793980075594e-06, "loss": 0.8925, "step": 7390 }, { "epoch": 0.5339642747484964, "grad_norm": 6.412913360062794, "learning_rate": 4.299076309185005e-06, "loss": 1.0065, "step": 7391 }, { "epoch": 0.5340365199487059, "grad_norm": 6.831234989690737, "learning_rate": 4.298873195734492e-06, "loss": 0.9505, "step": 7392 }, { "epoch": 0.5341087651489154, "grad_norm": 6.916198193721315, "learning_rate": 4.298670057658803e-06, "loss": 0.9057, "step": 7393 }, { "epoch": 0.5341810103491249, "grad_norm": 5.464832406785494, "learning_rate": 4.298466894960716e-06, "loss": 0.8703, "step": 7394 }, { "epoch": 0.5342532555493344, "grad_norm": 5.536818637649305, "learning_rate": 4.298263707643014e-06, "loss": 0.8716, "step": 7395 }, { "epoch": 0.534325500749544, "grad_norm": 6.113344273836475, "learning_rate": 4.298060495708478e-06, "loss": 0.875, "step": 7396 }, { "epoch": 0.5343977459497534, "grad_norm": 6.864634519935141, "learning_rate": 4.29785725915989e-06, "loss": 0.8497, "step": 7397 }, { "epoch": 0.534469991149963, "grad_norm": 6.132973599595863, "learning_rate": 4.297653998000033e-06, "loss": 0.9562, "step": 7398 }, { "epoch": 0.5345422363501725, "grad_norm": 5.551718389124538, "learning_rate": 4.297450712231688e-06, "loss": 0.873, "step": 7399 }, { "epoch": 0.534614481550382, "grad_norm": 6.516326061560775, "learning_rate": 4.297247401857638e-06, "loss": 0.8954, "step": 7400 }, { "epoch": 0.5346867267505915, "grad_norm": 6.137867393296739, "learning_rate": 4.297044066880667e-06, "loss": 0.9442, "step": 7401 }, { "epoch": 0.534758971950801, "grad_norm": 6.791979761064297, "learning_rate": 4.296840707303558e-06, "loss": 0.8094, "step": 7402 }, { "epoch": 0.5348312171510106, "grad_norm": 5.944208792098617, "learning_rate": 4.296637323129093e-06, "loss": 0.8161, "step": 7403 }, { "epoch": 0.53490346235122, "grad_norm": 7.3267748143987745, "learning_rate": 4.296433914360061e-06, "loss": 0.9195, "step": 7404 }, { "epoch": 0.5349757075514295, "grad_norm": 5.9682709666450995, "learning_rate": 4.2962304809992415e-06, "loss": 0.8572, "step": 7405 }, { "epoch": 0.5350479527516391, "grad_norm": 5.465346667017, "learning_rate": 4.296027023049423e-06, "loss": 0.9439, "step": 7406 }, { "epoch": 0.5351201979518486, "grad_norm": 6.803831972984162, "learning_rate": 4.295823540513388e-06, "loss": 0.961, "step": 7407 }, { "epoch": 0.535192443152058, "grad_norm": 7.102762689509366, "learning_rate": 4.295620033393924e-06, "loss": 0.8821, "step": 7408 }, { "epoch": 0.5352646883522676, "grad_norm": 7.631418247227138, "learning_rate": 4.295416501693814e-06, "loss": 0.9037, "step": 7409 }, { "epoch": 0.5353369335524771, "grad_norm": 7.201768859171178, "learning_rate": 4.295212945415847e-06, "loss": 0.8977, "step": 7410 }, { "epoch": 0.5354091787526866, "grad_norm": 5.445121072170993, "learning_rate": 4.29500936456281e-06, "loss": 0.8877, "step": 7411 }, { "epoch": 0.5354814239528961, "grad_norm": 7.012690620755463, "learning_rate": 4.294805759137487e-06, "loss": 0.939, "step": 7412 }, { "epoch": 0.5355536691531056, "grad_norm": 6.926977752416324, "learning_rate": 4.294602129142667e-06, "loss": 0.9274, "step": 7413 }, { "epoch": 0.5356259143533152, "grad_norm": 7.976034029230894, "learning_rate": 4.294398474581138e-06, "loss": 0.9042, "step": 7414 }, { "epoch": 0.5356981595535246, "grad_norm": 6.055527600487316, "learning_rate": 4.294194795455687e-06, "loss": 0.8251, "step": 7415 }, { "epoch": 0.5357704047537342, "grad_norm": 9.657542120643933, "learning_rate": 4.293991091769102e-06, "loss": 0.8588, "step": 7416 }, { "epoch": 0.5358426499539437, "grad_norm": 7.242806384525991, "learning_rate": 4.293787363524172e-06, "loss": 1.0189, "step": 7417 }, { "epoch": 0.5359148951541532, "grad_norm": 6.121602575872092, "learning_rate": 4.293583610723686e-06, "loss": 0.834, "step": 7418 }, { "epoch": 0.5359871403543627, "grad_norm": 6.4235810846766785, "learning_rate": 4.293379833370433e-06, "loss": 0.8445, "step": 7419 }, { "epoch": 0.5360593855545722, "grad_norm": 8.278130861584845, "learning_rate": 4.293176031467202e-06, "loss": 0.9012, "step": 7420 }, { "epoch": 0.5361316307547818, "grad_norm": 5.710331625051771, "learning_rate": 4.292972205016784e-06, "loss": 0.8148, "step": 7421 }, { "epoch": 0.5362038759549912, "grad_norm": 5.085149888611784, "learning_rate": 4.292768354021969e-06, "loss": 0.7677, "step": 7422 }, { "epoch": 0.5362761211552007, "grad_norm": 6.114921842061555, "learning_rate": 4.292564478485547e-06, "loss": 0.9007, "step": 7423 }, { "epoch": 0.5363483663554103, "grad_norm": 7.489900273676489, "learning_rate": 4.292360578410308e-06, "loss": 0.928, "step": 7424 }, { "epoch": 0.5364206115556198, "grad_norm": 6.356631575302352, "learning_rate": 4.2921566537990455e-06, "loss": 0.7903, "step": 7425 }, { "epoch": 0.5364928567558293, "grad_norm": 10.050860953580202, "learning_rate": 4.29195270465455e-06, "loss": 0.887, "step": 7426 }, { "epoch": 0.5365651019560388, "grad_norm": 6.994995235416952, "learning_rate": 4.2917487309796125e-06, "loss": 0.8224, "step": 7427 }, { "epoch": 0.5366373471562483, "grad_norm": 7.275220740466214, "learning_rate": 4.291544732777027e-06, "loss": 0.9156, "step": 7428 }, { "epoch": 0.5367095923564578, "grad_norm": 6.846531133056229, "learning_rate": 4.291340710049584e-06, "loss": 0.8661, "step": 7429 }, { "epoch": 0.5367818375566673, "grad_norm": 5.31964008490561, "learning_rate": 4.291136662800078e-06, "loss": 0.7623, "step": 7430 }, { "epoch": 0.5368540827568768, "grad_norm": 6.263507437621718, "learning_rate": 4.290932591031303e-06, "loss": 0.9634, "step": 7431 }, { "epoch": 0.5369263279570864, "grad_norm": 6.556894287802421, "learning_rate": 4.29072849474605e-06, "loss": 0.9518, "step": 7432 }, { "epoch": 0.5369985731572958, "grad_norm": 6.8023311770406085, "learning_rate": 4.290524373947115e-06, "loss": 0.8819, "step": 7433 }, { "epoch": 0.5370708183575054, "grad_norm": 7.18550732397718, "learning_rate": 4.290320228637291e-06, "loss": 0.9039, "step": 7434 }, { "epoch": 0.5371430635577149, "grad_norm": 6.2412044433789164, "learning_rate": 4.290116058819373e-06, "loss": 0.7918, "step": 7435 }, { "epoch": 0.5372153087579244, "grad_norm": 5.3045611134727615, "learning_rate": 4.289911864496157e-06, "loss": 0.8477, "step": 7436 }, { "epoch": 0.5372875539581339, "grad_norm": 6.672557644014423, "learning_rate": 4.289707645670437e-06, "loss": 0.8387, "step": 7437 }, { "epoch": 0.5373597991583434, "grad_norm": 6.700114679066806, "learning_rate": 4.289503402345009e-06, "loss": 0.8864, "step": 7438 }, { "epoch": 0.537432044358553, "grad_norm": 5.816378704182591, "learning_rate": 4.289299134522669e-06, "loss": 0.9255, "step": 7439 }, { "epoch": 0.5375042895587624, "grad_norm": 6.85869621098643, "learning_rate": 4.2890948422062126e-06, "loss": 0.8155, "step": 7440 }, { "epoch": 0.5375765347589719, "grad_norm": 5.8420074639199235, "learning_rate": 4.288890525398437e-06, "loss": 0.896, "step": 7441 }, { "epoch": 0.5376487799591815, "grad_norm": 6.319716153108411, "learning_rate": 4.28868618410214e-06, "loss": 0.938, "step": 7442 }, { "epoch": 0.537721025159391, "grad_norm": 7.28729337788665, "learning_rate": 4.288481818320117e-06, "loss": 0.9202, "step": 7443 }, { "epoch": 0.5377932703596005, "grad_norm": 5.939494229757469, "learning_rate": 4.288277428055166e-06, "loss": 0.7723, "step": 7444 }, { "epoch": 0.53786551555981, "grad_norm": 5.455670219109001, "learning_rate": 4.288073013310088e-06, "loss": 0.9015, "step": 7445 }, { "epoch": 0.5379377607600195, "grad_norm": 5.562861227243525, "learning_rate": 4.287868574087676e-06, "loss": 0.8773, "step": 7446 }, { "epoch": 0.538010005960229, "grad_norm": 8.462757425604783, "learning_rate": 4.287664110390734e-06, "loss": 0.8373, "step": 7447 }, { "epoch": 0.5380822511604385, "grad_norm": 6.5639914134897515, "learning_rate": 4.287459622222056e-06, "loss": 0.9372, "step": 7448 }, { "epoch": 0.538154496360648, "grad_norm": 5.944483454844658, "learning_rate": 4.287255109584445e-06, "loss": 0.9103, "step": 7449 }, { "epoch": 0.5382267415608576, "grad_norm": 7.091552662932327, "learning_rate": 4.287050572480699e-06, "loss": 0.8758, "step": 7450 }, { "epoch": 0.538298986761067, "grad_norm": 8.899747489175956, "learning_rate": 4.286846010913618e-06, "loss": 0.9324, "step": 7451 }, { "epoch": 0.5383712319612766, "grad_norm": 6.8597893035829305, "learning_rate": 4.2866414248860025e-06, "loss": 0.9201, "step": 7452 }, { "epoch": 0.5384434771614861, "grad_norm": 6.923705812565629, "learning_rate": 4.286436814400653e-06, "loss": 0.8599, "step": 7453 }, { "epoch": 0.5385157223616956, "grad_norm": 7.345896825085071, "learning_rate": 4.286232179460371e-06, "loss": 0.8944, "step": 7454 }, { "epoch": 0.5385879675619051, "grad_norm": 7.365889013104199, "learning_rate": 4.286027520067957e-06, "loss": 0.9389, "step": 7455 }, { "epoch": 0.5386602127621146, "grad_norm": 5.447388875667326, "learning_rate": 4.285822836226214e-06, "loss": 0.82, "step": 7456 }, { "epoch": 0.5387324579623242, "grad_norm": 7.177813031031615, "learning_rate": 4.285618127937942e-06, "loss": 0.8796, "step": 7457 }, { "epoch": 0.5388047031625336, "grad_norm": 5.3059310691431, "learning_rate": 4.285413395205944e-06, "loss": 0.8233, "step": 7458 }, { "epoch": 0.5388769483627431, "grad_norm": 6.769167776414905, "learning_rate": 4.2852086380330235e-06, "loss": 0.8449, "step": 7459 }, { "epoch": 0.5389491935629527, "grad_norm": 5.849288845037128, "learning_rate": 4.2850038564219826e-06, "loss": 0.8178, "step": 7460 }, { "epoch": 0.5390214387631622, "grad_norm": 7.106402033351276, "learning_rate": 4.284799050375625e-06, "loss": 0.8545, "step": 7461 }, { "epoch": 0.5390936839633717, "grad_norm": 6.002575321638485, "learning_rate": 4.2845942198967545e-06, "loss": 0.8896, "step": 7462 }, { "epoch": 0.5391659291635812, "grad_norm": 6.622047486158307, "learning_rate": 4.284389364988174e-06, "loss": 0.9084, "step": 7463 }, { "epoch": 0.5392381743637907, "grad_norm": 6.184819112736879, "learning_rate": 4.28418448565269e-06, "loss": 0.9085, "step": 7464 }, { "epoch": 0.5393104195640002, "grad_norm": 6.850870130086151, "learning_rate": 4.283979581893104e-06, "loss": 0.9149, "step": 7465 }, { "epoch": 0.5393826647642097, "grad_norm": 7.6942139126992695, "learning_rate": 4.283774653712224e-06, "loss": 0.9392, "step": 7466 }, { "epoch": 0.5394549099644192, "grad_norm": 6.684489490586088, "learning_rate": 4.283569701112853e-06, "loss": 0.8491, "step": 7467 }, { "epoch": 0.5395271551646288, "grad_norm": 7.512068638659633, "learning_rate": 4.283364724097798e-06, "loss": 0.9279, "step": 7468 }, { "epoch": 0.5395994003648382, "grad_norm": 7.171924998123651, "learning_rate": 4.283159722669865e-06, "loss": 0.9556, "step": 7469 }, { "epoch": 0.5396716455650478, "grad_norm": 6.18479845043948, "learning_rate": 4.28295469683186e-06, "loss": 0.9608, "step": 7470 }, { "epoch": 0.5397438907652573, "grad_norm": 6.094989670078952, "learning_rate": 4.282749646586589e-06, "loss": 0.9047, "step": 7471 }, { "epoch": 0.5398161359654668, "grad_norm": 6.2610263453539945, "learning_rate": 4.28254457193686e-06, "loss": 0.8573, "step": 7472 }, { "epoch": 0.5398883811656763, "grad_norm": 5.33971962439174, "learning_rate": 4.28233947288548e-06, "loss": 0.7763, "step": 7473 }, { "epoch": 0.5399606263658858, "grad_norm": 7.743667476482195, "learning_rate": 4.282134349435256e-06, "loss": 0.8608, "step": 7474 }, { "epoch": 0.5400328715660954, "grad_norm": 6.89107672839031, "learning_rate": 4.281929201588997e-06, "loss": 0.8791, "step": 7475 }, { "epoch": 0.5401051167663048, "grad_norm": 6.288366736552418, "learning_rate": 4.28172402934951e-06, "loss": 0.8631, "step": 7476 }, { "epoch": 0.5401773619665143, "grad_norm": 5.53175394812113, "learning_rate": 4.2815188327196054e-06, "loss": 0.8042, "step": 7477 }, { "epoch": 0.5402496071667239, "grad_norm": 5.926813728444626, "learning_rate": 4.281313611702091e-06, "loss": 0.8642, "step": 7478 }, { "epoch": 0.5403218523669334, "grad_norm": 6.439799074154295, "learning_rate": 4.281108366299776e-06, "loss": 0.8058, "step": 7479 }, { "epoch": 0.5403940975671429, "grad_norm": 5.462785880073756, "learning_rate": 4.2809030965154705e-06, "loss": 0.8331, "step": 7480 }, { "epoch": 0.5404663427673524, "grad_norm": 5.139504725124794, "learning_rate": 4.280697802351984e-06, "loss": 0.8293, "step": 7481 }, { "epoch": 0.5405385879675619, "grad_norm": 5.682003509273338, "learning_rate": 4.280492483812128e-06, "loss": 0.838, "step": 7482 }, { "epoch": 0.5406108331677714, "grad_norm": 6.241946715866048, "learning_rate": 4.280287140898712e-06, "loss": 0.8657, "step": 7483 }, { "epoch": 0.5406830783679809, "grad_norm": 8.754431338139515, "learning_rate": 4.2800817736145476e-06, "loss": 0.9277, "step": 7484 }, { "epoch": 0.5407553235681904, "grad_norm": 6.943545345171242, "learning_rate": 4.2798763819624455e-06, "loss": 0.8852, "step": 7485 }, { "epoch": 0.5408275687684, "grad_norm": 6.056472771073755, "learning_rate": 4.279670965945218e-06, "loss": 0.835, "step": 7486 }, { "epoch": 0.5408998139686094, "grad_norm": 7.236123057148307, "learning_rate": 4.279465525565677e-06, "loss": 0.9056, "step": 7487 }, { "epoch": 0.540972059168819, "grad_norm": 7.478867832118239, "learning_rate": 4.279260060826634e-06, "loss": 0.8649, "step": 7488 }, { "epoch": 0.5410443043690285, "grad_norm": 5.732515371362745, "learning_rate": 4.279054571730903e-06, "loss": 0.8958, "step": 7489 }, { "epoch": 0.541116549569238, "grad_norm": 6.26202304264923, "learning_rate": 4.278849058281295e-06, "loss": 0.9092, "step": 7490 }, { "epoch": 0.5411887947694475, "grad_norm": 5.453865241495035, "learning_rate": 4.2786435204806254e-06, "loss": 0.8678, "step": 7491 }, { "epoch": 0.541261039969657, "grad_norm": 5.6919454451390745, "learning_rate": 4.278437958331707e-06, "loss": 0.869, "step": 7492 }, { "epoch": 0.5413332851698666, "grad_norm": 5.399051526449243, "learning_rate": 4.2782323718373545e-06, "loss": 0.8058, "step": 7493 }, { "epoch": 0.541405530370076, "grad_norm": 6.258194700992239, "learning_rate": 4.278026761000381e-06, "loss": 0.9067, "step": 7494 }, { "epoch": 0.5414777755702855, "grad_norm": 6.2982806205848565, "learning_rate": 4.277821125823602e-06, "loss": 0.9243, "step": 7495 }, { "epoch": 0.5415500207704951, "grad_norm": 6.299363773251542, "learning_rate": 4.277615466309831e-06, "loss": 0.8982, "step": 7496 }, { "epoch": 0.5416222659707046, "grad_norm": 7.0148536580945455, "learning_rate": 4.277409782461885e-06, "loss": 0.8888, "step": 7497 }, { "epoch": 0.541694511170914, "grad_norm": 6.828768841659693, "learning_rate": 4.277204074282579e-06, "loss": 0.8852, "step": 7498 }, { "epoch": 0.5417667563711236, "grad_norm": 7.307904747720636, "learning_rate": 4.27699834177473e-06, "loss": 0.8791, "step": 7499 }, { "epoch": 0.5418390015713331, "grad_norm": 7.290662452938385, "learning_rate": 4.276792584941153e-06, "loss": 0.9229, "step": 7500 }, { "epoch": 0.5419112467715426, "grad_norm": 5.873289792739836, "learning_rate": 4.276586803784665e-06, "loss": 0.9536, "step": 7501 }, { "epoch": 0.5419834919717521, "grad_norm": 6.782398038801489, "learning_rate": 4.276380998308084e-06, "loss": 0.9011, "step": 7502 }, { "epoch": 0.5420557371719616, "grad_norm": 8.056844457648793, "learning_rate": 4.276175168514225e-06, "loss": 0.8784, "step": 7503 }, { "epoch": 0.5421279823721712, "grad_norm": 8.659985338863798, "learning_rate": 4.275969314405908e-06, "loss": 0.9286, "step": 7504 }, { "epoch": 0.5422002275723806, "grad_norm": 9.093141725522026, "learning_rate": 4.275763435985949e-06, "loss": 0.8981, "step": 7505 }, { "epoch": 0.5422724727725902, "grad_norm": 6.046380705431123, "learning_rate": 4.275557533257169e-06, "loss": 0.858, "step": 7506 }, { "epoch": 0.5423447179727997, "grad_norm": 6.533429088020575, "learning_rate": 4.275351606222383e-06, "loss": 0.8852, "step": 7507 }, { "epoch": 0.5424169631730092, "grad_norm": 5.924906978601278, "learning_rate": 4.275145654884413e-06, "loss": 0.8176, "step": 7508 }, { "epoch": 0.5424892083732187, "grad_norm": 8.196746031676991, "learning_rate": 4.2749396792460774e-06, "loss": 0.8808, "step": 7509 }, { "epoch": 0.5425614535734282, "grad_norm": 7.807712888817007, "learning_rate": 4.274733679310196e-06, "loss": 0.8771, "step": 7510 }, { "epoch": 0.5426336987736378, "grad_norm": 10.743161887626473, "learning_rate": 4.274527655079588e-06, "loss": 0.9069, "step": 7511 }, { "epoch": 0.5427059439738472, "grad_norm": 7.838517468859729, "learning_rate": 4.274321606557074e-06, "loss": 0.8744, "step": 7512 }, { "epoch": 0.5427781891740567, "grad_norm": 6.441994292632414, "learning_rate": 4.274115533745475e-06, "loss": 0.8701, "step": 7513 }, { "epoch": 0.5428504343742663, "grad_norm": 7.5818108787526395, "learning_rate": 4.273909436647613e-06, "loss": 0.8377, "step": 7514 }, { "epoch": 0.5429226795744758, "grad_norm": 7.79247359994939, "learning_rate": 4.273703315266307e-06, "loss": 0.8387, "step": 7515 }, { "epoch": 0.5429949247746853, "grad_norm": 11.051561367833605, "learning_rate": 4.27349716960438e-06, "loss": 0.8516, "step": 7516 }, { "epoch": 0.5430671699748948, "grad_norm": 6.142535895125106, "learning_rate": 4.2732909996646535e-06, "loss": 0.8307, "step": 7517 }, { "epoch": 0.5431394151751043, "grad_norm": 7.74298861426672, "learning_rate": 4.273084805449951e-06, "loss": 0.8679, "step": 7518 }, { "epoch": 0.5432116603753138, "grad_norm": 5.819456470186751, "learning_rate": 4.272878586963094e-06, "loss": 0.9223, "step": 7519 }, { "epoch": 0.5432839055755233, "grad_norm": 6.153357121403356, "learning_rate": 4.272672344206905e-06, "loss": 0.8705, "step": 7520 }, { "epoch": 0.5433561507757328, "grad_norm": 5.6662138963715085, "learning_rate": 4.272466077184208e-06, "loss": 0.8532, "step": 7521 }, { "epoch": 0.5434283959759424, "grad_norm": 5.608783318811916, "learning_rate": 4.272259785897828e-06, "loss": 0.8638, "step": 7522 }, { "epoch": 0.5435006411761518, "grad_norm": 7.38237897619355, "learning_rate": 4.272053470350586e-06, "loss": 0.871, "step": 7523 }, { "epoch": 0.5435728863763614, "grad_norm": 6.75919986661987, "learning_rate": 4.271847130545309e-06, "loss": 0.8909, "step": 7524 }, { "epoch": 0.5436451315765709, "grad_norm": 7.311257802978985, "learning_rate": 4.271640766484819e-06, "loss": 0.8016, "step": 7525 }, { "epoch": 0.5437173767767804, "grad_norm": 8.731448316130928, "learning_rate": 4.271434378171945e-06, "loss": 0.8887, "step": 7526 }, { "epoch": 0.5437896219769899, "grad_norm": 6.186625409207949, "learning_rate": 4.2712279656095075e-06, "loss": 0.8708, "step": 7527 }, { "epoch": 0.5438618671771994, "grad_norm": 6.100867863188157, "learning_rate": 4.271021528800336e-06, "loss": 0.8648, "step": 7528 }, { "epoch": 0.543934112377409, "grad_norm": 5.708966667886498, "learning_rate": 4.270815067747253e-06, "loss": 0.7941, "step": 7529 }, { "epoch": 0.5440063575776184, "grad_norm": 6.022538450758877, "learning_rate": 4.270608582453088e-06, "loss": 0.864, "step": 7530 }, { "epoch": 0.5440786027778279, "grad_norm": 6.612863039604435, "learning_rate": 4.270402072920666e-06, "loss": 0.8889, "step": 7531 }, { "epoch": 0.5441508479780375, "grad_norm": 5.556795259546509, "learning_rate": 4.270195539152815e-06, "loss": 0.9238, "step": 7532 }, { "epoch": 0.544223093178247, "grad_norm": 5.968407426360172, "learning_rate": 4.269988981152361e-06, "loss": 0.8267, "step": 7533 }, { "epoch": 0.5442953383784565, "grad_norm": 7.147336514428874, "learning_rate": 4.269782398922132e-06, "loss": 0.8989, "step": 7534 }, { "epoch": 0.544367583578666, "grad_norm": 6.278621294334132, "learning_rate": 4.269575792464956e-06, "loss": 0.9284, "step": 7535 }, { "epoch": 0.5444398287788755, "grad_norm": 5.44696413931292, "learning_rate": 4.269369161783661e-06, "loss": 0.8365, "step": 7536 }, { "epoch": 0.544512073979085, "grad_norm": 5.684770537000558, "learning_rate": 4.269162506881077e-06, "loss": 0.9147, "step": 7537 }, { "epoch": 0.5445843191792945, "grad_norm": 5.714890809038132, "learning_rate": 4.268955827760031e-06, "loss": 0.7987, "step": 7538 }, { "epoch": 0.544656564379504, "grad_norm": 8.462194854779339, "learning_rate": 4.268749124423354e-06, "loss": 0.7508, "step": 7539 }, { "epoch": 0.5447288095797136, "grad_norm": 5.855616307451155, "learning_rate": 4.2685423968738745e-06, "loss": 0.7776, "step": 7540 }, { "epoch": 0.544801054779923, "grad_norm": 6.300127046303158, "learning_rate": 4.268335645114423e-06, "loss": 0.8063, "step": 7541 }, { "epoch": 0.5448732999801326, "grad_norm": 6.088350535042215, "learning_rate": 4.2681288691478295e-06, "loss": 0.8714, "step": 7542 }, { "epoch": 0.5449455451803421, "grad_norm": 7.4375410319246305, "learning_rate": 4.267922068976924e-06, "loss": 0.9276, "step": 7543 }, { "epoch": 0.5450177903805515, "grad_norm": 6.122386141084771, "learning_rate": 4.267715244604539e-06, "loss": 0.9237, "step": 7544 }, { "epoch": 0.5450900355807611, "grad_norm": 7.756553586171906, "learning_rate": 4.267508396033504e-06, "loss": 0.8731, "step": 7545 }, { "epoch": 0.5451622807809706, "grad_norm": 5.212232894823286, "learning_rate": 4.267301523266652e-06, "loss": 0.864, "step": 7546 }, { "epoch": 0.5452345259811802, "grad_norm": 7.157103908183747, "learning_rate": 4.267094626306815e-06, "loss": 0.9827, "step": 7547 }, { "epoch": 0.5453067711813896, "grad_norm": 5.683301444910406, "learning_rate": 4.2668877051568234e-06, "loss": 0.8197, "step": 7548 }, { "epoch": 0.5453790163815991, "grad_norm": 5.958724144698862, "learning_rate": 4.2666807598195115e-06, "loss": 0.8916, "step": 7549 }, { "epoch": 0.5454512615818087, "grad_norm": 5.493113454627989, "learning_rate": 4.2664737902977115e-06, "loss": 0.8807, "step": 7550 }, { "epoch": 0.5455235067820182, "grad_norm": 7.029062973671654, "learning_rate": 4.2662667965942575e-06, "loss": 0.8374, "step": 7551 }, { "epoch": 0.5455957519822277, "grad_norm": 7.972967011872814, "learning_rate": 4.266059778711982e-06, "loss": 0.8928, "step": 7552 }, { "epoch": 0.5456679971824372, "grad_norm": 7.359334050609269, "learning_rate": 4.2658527366537195e-06, "loss": 0.9325, "step": 7553 }, { "epoch": 0.5457402423826467, "grad_norm": 6.0075569247288385, "learning_rate": 4.265645670422304e-06, "loss": 0.8833, "step": 7554 }, { "epoch": 0.5458124875828562, "grad_norm": 6.519385846472469, "learning_rate": 4.26543858002057e-06, "loss": 0.8767, "step": 7555 }, { "epoch": 0.5458847327830657, "grad_norm": 7.093269499919366, "learning_rate": 4.265231465451353e-06, "loss": 0.8637, "step": 7556 }, { "epoch": 0.5459569779832752, "grad_norm": 6.9684543974581, "learning_rate": 4.265024326717488e-06, "loss": 0.9094, "step": 7557 }, { "epoch": 0.5460292231834848, "grad_norm": 5.551566877076184, "learning_rate": 4.264817163821809e-06, "loss": 0.8862, "step": 7558 }, { "epoch": 0.5461014683836942, "grad_norm": 7.597785285697313, "learning_rate": 4.264609976767156e-06, "loss": 0.8488, "step": 7559 }, { "epoch": 0.5461737135839038, "grad_norm": 5.931789181544394, "learning_rate": 4.264402765556361e-06, "loss": 0.7909, "step": 7560 }, { "epoch": 0.5462459587841133, "grad_norm": 6.442074529937353, "learning_rate": 4.2641955301922615e-06, "loss": 0.9222, "step": 7561 }, { "epoch": 0.5463182039843227, "grad_norm": 6.12182877636566, "learning_rate": 4.263988270677696e-06, "loss": 0.9164, "step": 7562 }, { "epoch": 0.5463904491845323, "grad_norm": 7.699886856238665, "learning_rate": 4.2637809870155e-06, "loss": 0.9671, "step": 7563 }, { "epoch": 0.5464626943847418, "grad_norm": 6.273913905770582, "learning_rate": 4.2635736792085125e-06, "loss": 0.8698, "step": 7564 }, { "epoch": 0.5465349395849514, "grad_norm": 7.22825518605064, "learning_rate": 4.2633663472595696e-06, "loss": 0.89, "step": 7565 }, { "epoch": 0.5466071847851608, "grad_norm": 5.855855713948058, "learning_rate": 4.2631589911715124e-06, "loss": 0.8372, "step": 7566 }, { "epoch": 0.5466794299853703, "grad_norm": 6.230285680108234, "learning_rate": 4.262951610947176e-06, "loss": 0.8801, "step": 7567 }, { "epoch": 0.5467516751855799, "grad_norm": 5.704843758559065, "learning_rate": 4.2627442065894014e-06, "loss": 0.9643, "step": 7568 }, { "epoch": 0.5468239203857894, "grad_norm": 5.690615293357317, "learning_rate": 4.262536778101028e-06, "loss": 0.9189, "step": 7569 }, { "epoch": 0.5468961655859989, "grad_norm": 5.2562130086461245, "learning_rate": 4.262329325484893e-06, "loss": 0.8232, "step": 7570 }, { "epoch": 0.5469684107862084, "grad_norm": 6.323809473225937, "learning_rate": 4.26212184874384e-06, "loss": 0.8793, "step": 7571 }, { "epoch": 0.5470406559864179, "grad_norm": 6.1985399803650205, "learning_rate": 4.2619143478807045e-06, "loss": 0.7791, "step": 7572 }, { "epoch": 0.5471129011866274, "grad_norm": 5.94818022011003, "learning_rate": 4.2617068228983316e-06, "loss": 0.8625, "step": 7573 }, { "epoch": 0.5471851463868369, "grad_norm": 5.040056943723124, "learning_rate": 4.26149927379956e-06, "loss": 0.8362, "step": 7574 }, { "epoch": 0.5472573915870464, "grad_norm": 7.208745455858688, "learning_rate": 4.26129170058723e-06, "loss": 0.9337, "step": 7575 }, { "epoch": 0.547329636787256, "grad_norm": 5.457734266802147, "learning_rate": 4.2610841032641855e-06, "loss": 0.8269, "step": 7576 }, { "epoch": 0.5474018819874654, "grad_norm": 5.110490123139468, "learning_rate": 4.260876481833266e-06, "loss": 0.8594, "step": 7577 }, { "epoch": 0.547474127187675, "grad_norm": 7.068261691090691, "learning_rate": 4.260668836297315e-06, "loss": 0.8814, "step": 7578 }, { "epoch": 0.5475463723878845, "grad_norm": 5.6795501482489446, "learning_rate": 4.260461166659175e-06, "loss": 0.8603, "step": 7579 }, { "epoch": 0.5476186175880939, "grad_norm": 5.309129790357303, "learning_rate": 4.260253472921688e-06, "loss": 0.8637, "step": 7580 }, { "epoch": 0.5476908627883035, "grad_norm": 7.364935262671346, "learning_rate": 4.2600457550876975e-06, "loss": 0.9053, "step": 7581 }, { "epoch": 0.547763107988513, "grad_norm": 8.52926042434105, "learning_rate": 4.259838013160048e-06, "loss": 0.9353, "step": 7582 }, { "epoch": 0.5478353531887226, "grad_norm": 5.455239834080521, "learning_rate": 4.259630247141583e-06, "loss": 0.8967, "step": 7583 }, { "epoch": 0.547907598388932, "grad_norm": 7.9024796372863, "learning_rate": 4.259422457035145e-06, "loss": 0.944, "step": 7584 }, { "epoch": 0.5479798435891415, "grad_norm": 5.878110346352515, "learning_rate": 4.25921464284358e-06, "loss": 0.8829, "step": 7585 }, { "epoch": 0.5480520887893511, "grad_norm": 6.436199760603957, "learning_rate": 4.259006804569732e-06, "loss": 0.9253, "step": 7586 }, { "epoch": 0.5481243339895606, "grad_norm": 6.3518465736366405, "learning_rate": 4.258798942216448e-06, "loss": 0.9079, "step": 7587 }, { "epoch": 0.54819657918977, "grad_norm": 6.098174538050974, "learning_rate": 4.2585910557865705e-06, "loss": 0.9337, "step": 7588 }, { "epoch": 0.5482688243899796, "grad_norm": 6.7831058335953935, "learning_rate": 4.258383145282948e-06, "loss": 0.8178, "step": 7589 }, { "epoch": 0.5483410695901891, "grad_norm": 6.536100347880865, "learning_rate": 4.258175210708425e-06, "loss": 0.8603, "step": 7590 }, { "epoch": 0.5484133147903986, "grad_norm": 5.806204062486469, "learning_rate": 4.257967252065849e-06, "loss": 0.8171, "step": 7591 }, { "epoch": 0.5484855599906081, "grad_norm": 6.758941379699838, "learning_rate": 4.257759269358066e-06, "loss": 0.9304, "step": 7592 }, { "epoch": 0.5485578051908176, "grad_norm": 5.919245234136334, "learning_rate": 4.257551262587923e-06, "loss": 1.0344, "step": 7593 }, { "epoch": 0.5486300503910272, "grad_norm": 6.694791437480141, "learning_rate": 4.257343231758269e-06, "loss": 0.981, "step": 7594 }, { "epoch": 0.5487022955912366, "grad_norm": 6.350517984725058, "learning_rate": 4.257135176871949e-06, "loss": 0.8644, "step": 7595 }, { "epoch": 0.5487745407914462, "grad_norm": 6.402861801703125, "learning_rate": 4.256927097931814e-06, "loss": 0.7756, "step": 7596 }, { "epoch": 0.5488467859916557, "grad_norm": 6.003576484315607, "learning_rate": 4.256718994940711e-06, "loss": 0.8071, "step": 7597 }, { "epoch": 0.5489190311918651, "grad_norm": 5.936813796696055, "learning_rate": 4.256510867901489e-06, "loss": 0.9321, "step": 7598 }, { "epoch": 0.5489912763920747, "grad_norm": 7.289282563415047, "learning_rate": 4.256302716816997e-06, "loss": 0.8692, "step": 7599 }, { "epoch": 0.5490635215922842, "grad_norm": 9.247197319413065, "learning_rate": 4.256094541690085e-06, "loss": 0.8167, "step": 7600 }, { "epoch": 0.5491357667924938, "grad_norm": 5.693067906347095, "learning_rate": 4.255886342523601e-06, "loss": 0.8801, "step": 7601 }, { "epoch": 0.5492080119927032, "grad_norm": 5.795480401020547, "learning_rate": 4.255678119320397e-06, "loss": 0.8819, "step": 7602 }, { "epoch": 0.5492802571929127, "grad_norm": 5.728810950131253, "learning_rate": 4.255469872083323e-06, "loss": 0.8356, "step": 7603 }, { "epoch": 0.5493525023931223, "grad_norm": 6.490234962759528, "learning_rate": 4.255261600815229e-06, "loss": 0.7917, "step": 7604 }, { "epoch": 0.5494247475933318, "grad_norm": 8.158785488027519, "learning_rate": 4.2550533055189666e-06, "loss": 0.8632, "step": 7605 }, { "epoch": 0.5494969927935413, "grad_norm": 7.7102517979691125, "learning_rate": 4.254844986197387e-06, "loss": 0.962, "step": 7606 }, { "epoch": 0.5495692379937508, "grad_norm": 7.104351171817857, "learning_rate": 4.254636642853343e-06, "loss": 0.9548, "step": 7607 }, { "epoch": 0.5496414831939603, "grad_norm": 6.15018222197728, "learning_rate": 4.254428275489685e-06, "loss": 0.9217, "step": 7608 }, { "epoch": 0.5497137283941698, "grad_norm": 5.4270652531513655, "learning_rate": 4.254219884109266e-06, "loss": 0.8413, "step": 7609 }, { "epoch": 0.5497859735943793, "grad_norm": 5.757385155231808, "learning_rate": 4.254011468714939e-06, "loss": 0.9042, "step": 7610 }, { "epoch": 0.5498582187945888, "grad_norm": 7.091629316304067, "learning_rate": 4.253803029309557e-06, "loss": 0.8549, "step": 7611 }, { "epoch": 0.5499304639947984, "grad_norm": 6.57138767585113, "learning_rate": 4.2535945658959736e-06, "loss": 0.8707, "step": 7612 }, { "epoch": 0.5500027091950078, "grad_norm": 6.229890745132619, "learning_rate": 4.253386078477042e-06, "loss": 0.8077, "step": 7613 }, { "epoch": 0.5500749543952174, "grad_norm": 8.14643403657128, "learning_rate": 4.253177567055616e-06, "loss": 0.8591, "step": 7614 }, { "epoch": 0.5501471995954269, "grad_norm": 7.120026575960431, "learning_rate": 4.25296903163455e-06, "loss": 0.9313, "step": 7615 }, { "epoch": 0.5502194447956363, "grad_norm": 7.766887354955672, "learning_rate": 4.252760472216701e-06, "loss": 0.879, "step": 7616 }, { "epoch": 0.5502916899958459, "grad_norm": 5.425345505642089, "learning_rate": 4.25255188880492e-06, "loss": 0.8402, "step": 7617 }, { "epoch": 0.5503639351960554, "grad_norm": 4.989057010082518, "learning_rate": 4.252343281402065e-06, "loss": 0.8615, "step": 7618 }, { "epoch": 0.550436180396265, "grad_norm": 5.13422419674258, "learning_rate": 4.2521346500109916e-06, "loss": 0.9121, "step": 7619 }, { "epoch": 0.5505084255964744, "grad_norm": 5.685838729197599, "learning_rate": 4.251925994634555e-06, "loss": 0.9861, "step": 7620 }, { "epoch": 0.5505806707966839, "grad_norm": 6.292743245494046, "learning_rate": 4.251717315275612e-06, "loss": 0.8, "step": 7621 }, { "epoch": 0.5506529159968935, "grad_norm": 7.460331341950534, "learning_rate": 4.251508611937018e-06, "loss": 0.8896, "step": 7622 }, { "epoch": 0.550725161197103, "grad_norm": 7.149321154399906, "learning_rate": 4.251299884621633e-06, "loss": 0.8608, "step": 7623 }, { "epoch": 0.5507974063973125, "grad_norm": 6.579180924937876, "learning_rate": 4.25109113333231e-06, "loss": 0.9859, "step": 7624 }, { "epoch": 0.550869651597522, "grad_norm": 7.746362139986007, "learning_rate": 4.2508823580719105e-06, "loss": 0.8105, "step": 7625 }, { "epoch": 0.5509418967977315, "grad_norm": 9.024710323513983, "learning_rate": 4.250673558843291e-06, "loss": 0.9694, "step": 7626 }, { "epoch": 0.551014141997941, "grad_norm": 6.222758012742923, "learning_rate": 4.25046473564931e-06, "loss": 0.8866, "step": 7627 }, { "epoch": 0.5510863871981505, "grad_norm": 7.515818381250401, "learning_rate": 4.250255888492825e-06, "loss": 0.8801, "step": 7628 }, { "epoch": 0.55115863239836, "grad_norm": 5.753761388090749, "learning_rate": 4.2500470173766965e-06, "loss": 0.809, "step": 7629 }, { "epoch": 0.5512308775985696, "grad_norm": 7.635108376004238, "learning_rate": 4.249838122303783e-06, "loss": 0.8651, "step": 7630 }, { "epoch": 0.551303122798779, "grad_norm": 8.375741043328784, "learning_rate": 4.2496292032769434e-06, "loss": 0.9271, "step": 7631 }, { "epoch": 0.5513753679989886, "grad_norm": 8.613438915790526, "learning_rate": 4.2494202602990405e-06, "loss": 0.9039, "step": 7632 }, { "epoch": 0.5514476131991981, "grad_norm": 10.938633800051132, "learning_rate": 4.249211293372931e-06, "loss": 0.9204, "step": 7633 }, { "epoch": 0.5515198583994075, "grad_norm": 8.99430624677856, "learning_rate": 4.249002302501478e-06, "loss": 0.818, "step": 7634 }, { "epoch": 0.5515921035996171, "grad_norm": 7.303409762496936, "learning_rate": 4.248793287687541e-06, "loss": 0.9245, "step": 7635 }, { "epoch": 0.5516643487998266, "grad_norm": 5.610546276674409, "learning_rate": 4.248584248933982e-06, "loss": 0.8415, "step": 7636 }, { "epoch": 0.5517365940000362, "grad_norm": 8.205526062293897, "learning_rate": 4.248375186243662e-06, "loss": 0.9366, "step": 7637 }, { "epoch": 0.5518088392002456, "grad_norm": 14.06212184821421, "learning_rate": 4.248166099619442e-06, "loss": 0.954, "step": 7638 }, { "epoch": 0.5518810844004551, "grad_norm": 9.807430439900967, "learning_rate": 4.247956989064188e-06, "loss": 0.9069, "step": 7639 }, { "epoch": 0.5519533296006647, "grad_norm": 5.472334727740667, "learning_rate": 4.247747854580758e-06, "loss": 0.8661, "step": 7640 }, { "epoch": 0.5520255748008742, "grad_norm": 8.49136407504592, "learning_rate": 4.247538696172018e-06, "loss": 0.8732, "step": 7641 }, { "epoch": 0.5520978200010837, "grad_norm": 6.891146477929013, "learning_rate": 4.24732951384083e-06, "loss": 0.8245, "step": 7642 }, { "epoch": 0.5521700652012932, "grad_norm": 10.858428957779866, "learning_rate": 4.247120307590057e-06, "loss": 0.9333, "step": 7643 }, { "epoch": 0.5522423104015027, "grad_norm": 8.321861559878135, "learning_rate": 4.246911077422564e-06, "loss": 0.8743, "step": 7644 }, { "epoch": 0.5523145556017122, "grad_norm": 6.443997261638758, "learning_rate": 4.246701823341215e-06, "loss": 0.8565, "step": 7645 }, { "epoch": 0.5523868008019217, "grad_norm": 5.879899275040274, "learning_rate": 4.246492545348874e-06, "loss": 0.8463, "step": 7646 }, { "epoch": 0.5524590460021312, "grad_norm": 6.588549547354065, "learning_rate": 4.2462832434484055e-06, "loss": 0.849, "step": 7647 }, { "epoch": 0.5525312912023408, "grad_norm": 5.8093249765656605, "learning_rate": 4.246073917642677e-06, "loss": 0.7671, "step": 7648 }, { "epoch": 0.5526035364025502, "grad_norm": 8.980397387167525, "learning_rate": 4.245864567934551e-06, "loss": 0.8301, "step": 7649 }, { "epoch": 0.5526757816027598, "grad_norm": 7.708195315663327, "learning_rate": 4.245655194326894e-06, "loss": 0.7983, "step": 7650 }, { "epoch": 0.5527480268029693, "grad_norm": 8.223601675161483, "learning_rate": 4.245445796822575e-06, "loss": 0.9341, "step": 7651 }, { "epoch": 0.5528202720031787, "grad_norm": 7.436849357290836, "learning_rate": 4.245236375424457e-06, "loss": 0.9418, "step": 7652 }, { "epoch": 0.5528925172033883, "grad_norm": 6.0449335495646555, "learning_rate": 4.245026930135408e-06, "loss": 0.935, "step": 7653 }, { "epoch": 0.5529647624035978, "grad_norm": 6.436210429087912, "learning_rate": 4.244817460958295e-06, "loss": 0.878, "step": 7654 }, { "epoch": 0.5530370076038074, "grad_norm": 5.931675352837095, "learning_rate": 4.244607967895986e-06, "loss": 0.8099, "step": 7655 }, { "epoch": 0.5531092528040168, "grad_norm": 9.19910471333246, "learning_rate": 4.244398450951348e-06, "loss": 0.981, "step": 7656 }, { "epoch": 0.5531814980042263, "grad_norm": 8.166573141172323, "learning_rate": 4.24418891012725e-06, "loss": 0.9486, "step": 7657 }, { "epoch": 0.5532537432044359, "grad_norm": 6.9815291168779385, "learning_rate": 4.243979345426561e-06, "loss": 0.93, "step": 7658 }, { "epoch": 0.5533259884046454, "grad_norm": 7.187636399011467, "learning_rate": 4.243769756852148e-06, "loss": 0.8516, "step": 7659 }, { "epoch": 0.5533982336048549, "grad_norm": 6.692302084936462, "learning_rate": 4.243560144406881e-06, "loss": 0.8925, "step": 7660 }, { "epoch": 0.5534704788050644, "grad_norm": 6.597206981262092, "learning_rate": 4.24335050809363e-06, "loss": 0.9559, "step": 7661 }, { "epoch": 0.5535427240052739, "grad_norm": 5.8244450382557575, "learning_rate": 4.243140847915264e-06, "loss": 0.8219, "step": 7662 }, { "epoch": 0.5536149692054834, "grad_norm": 7.714696893380729, "learning_rate": 4.242931163874653e-06, "loss": 0.8663, "step": 7663 }, { "epoch": 0.5536872144056929, "grad_norm": 5.662828866195964, "learning_rate": 4.242721455974668e-06, "loss": 0.831, "step": 7664 }, { "epoch": 0.5537594596059024, "grad_norm": 7.0686267845417206, "learning_rate": 4.242511724218178e-06, "loss": 0.8624, "step": 7665 }, { "epoch": 0.553831704806112, "grad_norm": 5.90627454318644, "learning_rate": 4.2423019686080565e-06, "loss": 0.8487, "step": 7666 }, { "epoch": 0.5539039500063214, "grad_norm": 5.514756261258856, "learning_rate": 4.2420921891471745e-06, "loss": 0.9375, "step": 7667 }, { "epoch": 0.553976195206531, "grad_norm": 6.623518580223683, "learning_rate": 4.2418823858384025e-06, "loss": 0.9167, "step": 7668 }, { "epoch": 0.5540484404067405, "grad_norm": 5.341306071773847, "learning_rate": 4.241672558684613e-06, "loss": 0.8534, "step": 7669 }, { "epoch": 0.5541206856069499, "grad_norm": 5.507815789837227, "learning_rate": 4.241462707688678e-06, "loss": 0.8008, "step": 7670 }, { "epoch": 0.5541929308071595, "grad_norm": 8.073671157199174, "learning_rate": 4.241252832853472e-06, "loss": 0.9813, "step": 7671 }, { "epoch": 0.554265176007369, "grad_norm": 6.391633119789361, "learning_rate": 4.241042934181866e-06, "loss": 0.8986, "step": 7672 }, { "epoch": 0.5543374212075786, "grad_norm": 6.839760649659381, "learning_rate": 4.240833011676734e-06, "loss": 0.8271, "step": 7673 }, { "epoch": 0.554409666407788, "grad_norm": 6.156097255302912, "learning_rate": 4.24062306534095e-06, "loss": 0.93, "step": 7674 }, { "epoch": 0.5544819116079975, "grad_norm": 6.688625062799745, "learning_rate": 4.240413095177387e-06, "loss": 0.8707, "step": 7675 }, { "epoch": 0.5545541568082071, "grad_norm": 5.983986147206638, "learning_rate": 4.240203101188921e-06, "loss": 0.8369, "step": 7676 }, { "epoch": 0.5546264020084166, "grad_norm": 7.10780722969482, "learning_rate": 4.239993083378425e-06, "loss": 0.8717, "step": 7677 }, { "epoch": 0.554698647208626, "grad_norm": 6.148759496611326, "learning_rate": 4.239783041748775e-06, "loss": 0.8723, "step": 7678 }, { "epoch": 0.5547708924088356, "grad_norm": 6.656898144610209, "learning_rate": 4.239572976302846e-06, "loss": 0.8901, "step": 7679 }, { "epoch": 0.5548431376090451, "grad_norm": 6.527303717243336, "learning_rate": 4.239362887043514e-06, "loss": 0.9029, "step": 7680 }, { "epoch": 0.5549153828092546, "grad_norm": 7.323054855439189, "learning_rate": 4.239152773973654e-06, "loss": 0.8577, "step": 7681 }, { "epoch": 0.5549876280094641, "grad_norm": 6.629234256395822, "learning_rate": 4.238942637096144e-06, "loss": 0.9163, "step": 7682 }, { "epoch": 0.5550598732096736, "grad_norm": 6.204000646634194, "learning_rate": 4.238732476413858e-06, "loss": 0.9265, "step": 7683 }, { "epoch": 0.5551321184098832, "grad_norm": 5.598677697060834, "learning_rate": 4.238522291929675e-06, "loss": 0.9123, "step": 7684 }, { "epoch": 0.5552043636100926, "grad_norm": 7.034500388032438, "learning_rate": 4.238312083646472e-06, "loss": 0.7851, "step": 7685 }, { "epoch": 0.5552766088103022, "grad_norm": 5.342941669206392, "learning_rate": 4.238101851567126e-06, "loss": 0.9319, "step": 7686 }, { "epoch": 0.5553488540105117, "grad_norm": 6.378306410292172, "learning_rate": 4.237891595694515e-06, "loss": 0.8431, "step": 7687 }, { "epoch": 0.5554210992107211, "grad_norm": 7.552098206253315, "learning_rate": 4.237681316031519e-06, "loss": 0.9447, "step": 7688 }, { "epoch": 0.5554933444109307, "grad_norm": 6.060474175260869, "learning_rate": 4.237471012581014e-06, "loss": 0.846, "step": 7689 }, { "epoch": 0.5555655896111402, "grad_norm": 7.789738571226677, "learning_rate": 4.237260685345879e-06, "loss": 0.8252, "step": 7690 }, { "epoch": 0.5556378348113498, "grad_norm": 7.226148900748656, "learning_rate": 4.2370503343289945e-06, "loss": 0.8922, "step": 7691 }, { "epoch": 0.5557100800115592, "grad_norm": 5.304796445286825, "learning_rate": 4.23683995953324e-06, "loss": 0.7991, "step": 7692 }, { "epoch": 0.5557823252117687, "grad_norm": 6.990006533989424, "learning_rate": 4.236629560961495e-06, "loss": 0.9017, "step": 7693 }, { "epoch": 0.5558545704119783, "grad_norm": 6.502801438164128, "learning_rate": 4.2364191386166395e-06, "loss": 0.8696, "step": 7694 }, { "epoch": 0.5559268156121878, "grad_norm": 9.608493671474506, "learning_rate": 4.2362086925015544e-06, "loss": 0.9384, "step": 7695 }, { "epoch": 0.5559990608123972, "grad_norm": 8.845427337208307, "learning_rate": 4.235998222619121e-06, "loss": 1.1045, "step": 7696 }, { "epoch": 0.5560713060126068, "grad_norm": 7.715441037005455, "learning_rate": 4.235787728972218e-06, "loss": 0.9469, "step": 7697 }, { "epoch": 0.5561435512128163, "grad_norm": 5.746760907267064, "learning_rate": 4.23557721156373e-06, "loss": 0.8926, "step": 7698 }, { "epoch": 0.5562157964130258, "grad_norm": 5.7872566979433895, "learning_rate": 4.235366670396538e-06, "loss": 0.7925, "step": 7699 }, { "epoch": 0.5562880416132353, "grad_norm": 6.280193183017193, "learning_rate": 4.235156105473524e-06, "loss": 0.8686, "step": 7700 }, { "epoch": 0.5563602868134448, "grad_norm": 5.879912574792845, "learning_rate": 4.234945516797569e-06, "loss": 0.9131, "step": 7701 }, { "epoch": 0.5564325320136544, "grad_norm": 6.845277102025573, "learning_rate": 4.234734904371558e-06, "loss": 0.8391, "step": 7702 }, { "epoch": 0.5565047772138638, "grad_norm": 5.304532347981878, "learning_rate": 4.234524268198372e-06, "loss": 0.8461, "step": 7703 }, { "epoch": 0.5565770224140734, "grad_norm": 7.797544034030618, "learning_rate": 4.234313608280895e-06, "loss": 0.9392, "step": 7704 }, { "epoch": 0.5566492676142829, "grad_norm": 7.533942410088186, "learning_rate": 4.234102924622013e-06, "loss": 0.8173, "step": 7705 }, { "epoch": 0.5567215128144923, "grad_norm": 6.452634039945765, "learning_rate": 4.233892217224608e-06, "loss": 0.9187, "step": 7706 }, { "epoch": 0.5567937580147019, "grad_norm": 5.835892724372242, "learning_rate": 4.233681486091564e-06, "loss": 0.8768, "step": 7707 }, { "epoch": 0.5568660032149114, "grad_norm": 6.273670082958973, "learning_rate": 4.233470731225768e-06, "loss": 0.8166, "step": 7708 }, { "epoch": 0.556938248415121, "grad_norm": 6.696016966749274, "learning_rate": 4.233259952630102e-06, "loss": 0.9266, "step": 7709 }, { "epoch": 0.5570104936153304, "grad_norm": 6.901749607457733, "learning_rate": 4.233049150307454e-06, "loss": 0.898, "step": 7710 }, { "epoch": 0.5570827388155399, "grad_norm": 6.341622131966587, "learning_rate": 4.232838324260709e-06, "loss": 0.795, "step": 7711 }, { "epoch": 0.5571549840157495, "grad_norm": 5.891577524736517, "learning_rate": 4.232627474492752e-06, "loss": 0.9564, "step": 7712 }, { "epoch": 0.557227229215959, "grad_norm": 6.444663201196434, "learning_rate": 4.232416601006471e-06, "loss": 0.8338, "step": 7713 }, { "epoch": 0.5572994744161684, "grad_norm": 4.461684182870933, "learning_rate": 4.2322057038047515e-06, "loss": 0.8103, "step": 7714 }, { "epoch": 0.557371719616378, "grad_norm": 6.642924693711959, "learning_rate": 4.231994782890481e-06, "loss": 0.8804, "step": 7715 }, { "epoch": 0.5574439648165875, "grad_norm": 5.913461309436557, "learning_rate": 4.231783838266546e-06, "loss": 0.8521, "step": 7716 }, { "epoch": 0.557516210016797, "grad_norm": 6.499015146618491, "learning_rate": 4.231572869935836e-06, "loss": 0.886, "step": 7717 }, { "epoch": 0.5575884552170065, "grad_norm": 5.403794806085295, "learning_rate": 4.231361877901237e-06, "loss": 0.8468, "step": 7718 }, { "epoch": 0.557660700417216, "grad_norm": 6.324475703164799, "learning_rate": 4.231150862165638e-06, "loss": 0.871, "step": 7719 }, { "epoch": 0.5577329456174256, "grad_norm": 5.629105489620997, "learning_rate": 4.230939822731929e-06, "loss": 0.9641, "step": 7720 }, { "epoch": 0.557805190817635, "grad_norm": 6.551852061966489, "learning_rate": 4.2307287596029975e-06, "loss": 0.8266, "step": 7721 }, { "epoch": 0.5578774360178446, "grad_norm": 9.003909215699078, "learning_rate": 4.230517672781732e-06, "loss": 0.8832, "step": 7722 }, { "epoch": 0.5579496812180541, "grad_norm": 9.49611945954973, "learning_rate": 4.230306562271024e-06, "loss": 0.8982, "step": 7723 }, { "epoch": 0.5580219264182635, "grad_norm": 9.10117102833004, "learning_rate": 4.230095428073763e-06, "loss": 0.93, "step": 7724 }, { "epoch": 0.5580941716184731, "grad_norm": 7.612688725579705, "learning_rate": 4.229884270192839e-06, "loss": 0.8474, "step": 7725 }, { "epoch": 0.5581664168186826, "grad_norm": 10.01899517328093, "learning_rate": 4.229673088631142e-06, "loss": 0.9356, "step": 7726 }, { "epoch": 0.5582386620188922, "grad_norm": 7.727558993580546, "learning_rate": 4.229461883391564e-06, "loss": 0.9601, "step": 7727 }, { "epoch": 0.5583109072191016, "grad_norm": 8.311166756772884, "learning_rate": 4.229250654476994e-06, "loss": 0.9353, "step": 7728 }, { "epoch": 0.5583831524193111, "grad_norm": 6.982257701304205, "learning_rate": 4.229039401890327e-06, "loss": 0.8773, "step": 7729 }, { "epoch": 0.5584553976195207, "grad_norm": 6.680008444409544, "learning_rate": 4.228828125634452e-06, "loss": 0.8816, "step": 7730 }, { "epoch": 0.5585276428197302, "grad_norm": 7.062830824403936, "learning_rate": 4.228616825712263e-06, "loss": 0.8634, "step": 7731 }, { "epoch": 0.5585998880199396, "grad_norm": 9.164921218691639, "learning_rate": 4.2284055021266525e-06, "loss": 0.8981, "step": 7732 }, { "epoch": 0.5586721332201492, "grad_norm": 6.8397012518173135, "learning_rate": 4.228194154880511e-06, "loss": 0.8551, "step": 7733 }, { "epoch": 0.5587443784203587, "grad_norm": 9.444677995773475, "learning_rate": 4.227982783976735e-06, "loss": 0.8575, "step": 7734 }, { "epoch": 0.5588166236205682, "grad_norm": 6.53540257493035, "learning_rate": 4.227771389418215e-06, "loss": 0.7458, "step": 7735 }, { "epoch": 0.5588888688207777, "grad_norm": 6.663745367234251, "learning_rate": 4.227559971207847e-06, "loss": 0.9285, "step": 7736 }, { "epoch": 0.5589611140209872, "grad_norm": 6.865046283429764, "learning_rate": 4.227348529348524e-06, "loss": 0.8457, "step": 7737 }, { "epoch": 0.5590333592211968, "grad_norm": 6.722255058663757, "learning_rate": 4.227137063843141e-06, "loss": 0.8621, "step": 7738 }, { "epoch": 0.5591056044214062, "grad_norm": 7.25813231980022, "learning_rate": 4.226925574694592e-06, "loss": 0.8164, "step": 7739 }, { "epoch": 0.5591778496216158, "grad_norm": 7.498055269515358, "learning_rate": 4.2267140619057745e-06, "loss": 0.8928, "step": 7740 }, { "epoch": 0.5592500948218253, "grad_norm": 5.685750168127794, "learning_rate": 4.2265025254795814e-06, "loss": 0.9025, "step": 7741 }, { "epoch": 0.5593223400220347, "grad_norm": 7.9797591212733305, "learning_rate": 4.2262909654189084e-06, "loss": 0.93, "step": 7742 }, { "epoch": 0.5593945852222443, "grad_norm": 6.862147854801173, "learning_rate": 4.226079381726653e-06, "loss": 0.9045, "step": 7743 }, { "epoch": 0.5594668304224538, "grad_norm": 8.186748441024745, "learning_rate": 4.225867774405711e-06, "loss": 0.8793, "step": 7744 }, { "epoch": 0.5595390756226634, "grad_norm": 6.394525880991942, "learning_rate": 4.225656143458979e-06, "loss": 0.895, "step": 7745 }, { "epoch": 0.5596113208228728, "grad_norm": 7.136777813880523, "learning_rate": 4.225444488889356e-06, "loss": 0.8487, "step": 7746 }, { "epoch": 0.5596835660230823, "grad_norm": 6.497800308103823, "learning_rate": 4.225232810699736e-06, "loss": 0.7627, "step": 7747 }, { "epoch": 0.5597558112232919, "grad_norm": 6.24407372843036, "learning_rate": 4.2250211088930185e-06, "loss": 0.8916, "step": 7748 }, { "epoch": 0.5598280564235014, "grad_norm": 6.164530031679188, "learning_rate": 4.224809383472101e-06, "loss": 0.7581, "step": 7749 }, { "epoch": 0.5599003016237108, "grad_norm": 5.157634942142681, "learning_rate": 4.2245976344398835e-06, "loss": 0.9485, "step": 7750 }, { "epoch": 0.5599725468239204, "grad_norm": 6.320609445280227, "learning_rate": 4.224385861799262e-06, "loss": 0.7883, "step": 7751 }, { "epoch": 0.5600447920241299, "grad_norm": 7.064091055917177, "learning_rate": 4.2241740655531375e-06, "loss": 0.9419, "step": 7752 }, { "epoch": 0.5601170372243394, "grad_norm": 7.740590936541925, "learning_rate": 4.223962245704409e-06, "loss": 0.9586, "step": 7753 }, { "epoch": 0.5601892824245489, "grad_norm": 6.598487634488679, "learning_rate": 4.223750402255976e-06, "loss": 0.8664, "step": 7754 }, { "epoch": 0.5602615276247584, "grad_norm": 5.28122002406896, "learning_rate": 4.223538535210738e-06, "loss": 0.956, "step": 7755 }, { "epoch": 0.560333772824968, "grad_norm": 7.572625309841492, "learning_rate": 4.223326644571597e-06, "loss": 1.0034, "step": 7756 }, { "epoch": 0.5604060180251774, "grad_norm": 6.289813377146888, "learning_rate": 4.223114730341451e-06, "loss": 0.9104, "step": 7757 }, { "epoch": 0.560478263225387, "grad_norm": 9.09517655866563, "learning_rate": 4.222902792523203e-06, "loss": 0.9693, "step": 7758 }, { "epoch": 0.5605505084255965, "grad_norm": 6.216393105604584, "learning_rate": 4.222690831119752e-06, "loss": 0.825, "step": 7759 }, { "epoch": 0.5606227536258059, "grad_norm": 6.999304328137251, "learning_rate": 4.222478846134003e-06, "loss": 0.933, "step": 7760 }, { "epoch": 0.5606949988260155, "grad_norm": 5.87219593415541, "learning_rate": 4.222266837568855e-06, "loss": 0.9201, "step": 7761 }, { "epoch": 0.560767244026225, "grad_norm": 6.461460890321801, "learning_rate": 4.222054805427211e-06, "loss": 0.8829, "step": 7762 }, { "epoch": 0.5608394892264346, "grad_norm": 6.992483190326037, "learning_rate": 4.221842749711975e-06, "loss": 0.7985, "step": 7763 }, { "epoch": 0.560911734426644, "grad_norm": 5.210513988482319, "learning_rate": 4.221630670426048e-06, "loss": 0.8123, "step": 7764 }, { "epoch": 0.5609839796268535, "grad_norm": 5.118062790420275, "learning_rate": 4.221418567572334e-06, "loss": 0.9357, "step": 7765 }, { "epoch": 0.5610562248270631, "grad_norm": 6.259827478731069, "learning_rate": 4.221206441153736e-06, "loss": 0.8687, "step": 7766 }, { "epoch": 0.5611284700272725, "grad_norm": 5.404340285446359, "learning_rate": 4.2209942911731586e-06, "loss": 0.8609, "step": 7767 }, { "epoch": 0.561200715227482, "grad_norm": 6.989721108304322, "learning_rate": 4.220782117633505e-06, "loss": 0.8605, "step": 7768 }, { "epoch": 0.5612729604276916, "grad_norm": 7.012567682330601, "learning_rate": 4.220569920537681e-06, "loss": 0.9187, "step": 7769 }, { "epoch": 0.5613452056279011, "grad_norm": 6.392322715834139, "learning_rate": 4.220357699888591e-06, "loss": 0.8477, "step": 7770 }, { "epoch": 0.5614174508281106, "grad_norm": 5.2648829268347805, "learning_rate": 4.220145455689139e-06, "loss": 0.822, "step": 7771 }, { "epoch": 0.5614896960283201, "grad_norm": 6.269502957003917, "learning_rate": 4.219933187942232e-06, "loss": 0.8922, "step": 7772 }, { "epoch": 0.5615619412285296, "grad_norm": 6.576122858166743, "learning_rate": 4.2197208966507746e-06, "loss": 0.8836, "step": 7773 }, { "epoch": 0.5616341864287392, "grad_norm": 7.486074871792559, "learning_rate": 4.219508581817674e-06, "loss": 0.9047, "step": 7774 }, { "epoch": 0.5617064316289486, "grad_norm": 5.672063314711301, "learning_rate": 4.219296243445835e-06, "loss": 0.8566, "step": 7775 }, { "epoch": 0.5617786768291582, "grad_norm": 6.032115182439635, "learning_rate": 4.219083881538166e-06, "loss": 0.8723, "step": 7776 }, { "epoch": 0.5618509220293677, "grad_norm": 7.242526707940701, "learning_rate": 4.218871496097574e-06, "loss": 0.9077, "step": 7777 }, { "epoch": 0.5619231672295771, "grad_norm": 5.392749489365672, "learning_rate": 4.2186590871269654e-06, "loss": 0.8139, "step": 7778 }, { "epoch": 0.5619954124297867, "grad_norm": 6.574436050423148, "learning_rate": 4.218446654629248e-06, "loss": 0.8866, "step": 7779 }, { "epoch": 0.5620676576299962, "grad_norm": 5.761877315085058, "learning_rate": 4.2182341986073305e-06, "loss": 0.9432, "step": 7780 }, { "epoch": 0.5621399028302058, "grad_norm": 6.674149065391806, "learning_rate": 4.218021719064122e-06, "loss": 0.8559, "step": 7781 }, { "epoch": 0.5622121480304152, "grad_norm": 6.774276311869818, "learning_rate": 4.217809216002528e-06, "loss": 0.8856, "step": 7782 }, { "epoch": 0.5622843932306247, "grad_norm": 6.353582269862578, "learning_rate": 4.217596689425461e-06, "loss": 0.8833, "step": 7783 }, { "epoch": 0.5623566384308343, "grad_norm": 6.15988036646414, "learning_rate": 4.217384139335828e-06, "loss": 0.913, "step": 7784 }, { "epoch": 0.5624288836310437, "grad_norm": 7.7266269291581935, "learning_rate": 4.21717156573654e-06, "loss": 0.8923, "step": 7785 }, { "epoch": 0.5625011288312532, "grad_norm": 5.552062969158489, "learning_rate": 4.216958968630507e-06, "loss": 0.9269, "step": 7786 }, { "epoch": 0.5625733740314628, "grad_norm": 6.34452176214889, "learning_rate": 4.216746348020639e-06, "loss": 0.8827, "step": 7787 }, { "epoch": 0.5626456192316723, "grad_norm": 6.020294517333803, "learning_rate": 4.216533703909846e-06, "loss": 0.8652, "step": 7788 }, { "epoch": 0.5627178644318818, "grad_norm": 6.186116690086693, "learning_rate": 4.216321036301038e-06, "loss": 0.8339, "step": 7789 }, { "epoch": 0.5627901096320913, "grad_norm": 7.070877642727792, "learning_rate": 4.216108345197131e-06, "loss": 0.8313, "step": 7790 }, { "epoch": 0.5628623548323008, "grad_norm": 6.435011299319006, "learning_rate": 4.215895630601031e-06, "loss": 0.871, "step": 7791 }, { "epoch": 0.5629346000325104, "grad_norm": 6.007067650307519, "learning_rate": 4.215682892515652e-06, "loss": 0.8293, "step": 7792 }, { "epoch": 0.5630068452327198, "grad_norm": 6.379630109055629, "learning_rate": 4.215470130943907e-06, "loss": 0.9483, "step": 7793 }, { "epoch": 0.5630790904329294, "grad_norm": 6.4609256914946505, "learning_rate": 4.215257345888708e-06, "loss": 0.9438, "step": 7794 }, { "epoch": 0.5631513356331389, "grad_norm": 8.06114481100932, "learning_rate": 4.215044537352967e-06, "loss": 0.8742, "step": 7795 }, { "epoch": 0.5632235808333483, "grad_norm": 5.244697981612951, "learning_rate": 4.214831705339598e-06, "loss": 0.8815, "step": 7796 }, { "epoch": 0.5632958260335579, "grad_norm": 7.336806890709506, "learning_rate": 4.214618849851515e-06, "loss": 0.8676, "step": 7797 }, { "epoch": 0.5633680712337674, "grad_norm": 5.76505497955018, "learning_rate": 4.214405970891631e-06, "loss": 0.9096, "step": 7798 }, { "epoch": 0.563440316433977, "grad_norm": 6.762881667671536, "learning_rate": 4.21419306846286e-06, "loss": 0.851, "step": 7799 }, { "epoch": 0.5635125616341864, "grad_norm": 8.514325355956856, "learning_rate": 4.2139801425681185e-06, "loss": 0.9518, "step": 7800 }, { "epoch": 0.5635848068343959, "grad_norm": 5.53445307344308, "learning_rate": 4.213767193210318e-06, "loss": 0.8705, "step": 7801 }, { "epoch": 0.5636570520346055, "grad_norm": 6.993101535491484, "learning_rate": 4.2135542203923755e-06, "loss": 0.8268, "step": 7802 }, { "epoch": 0.5637292972348149, "grad_norm": 6.3396539101467875, "learning_rate": 4.213341224117207e-06, "loss": 0.8814, "step": 7803 }, { "epoch": 0.5638015424350244, "grad_norm": 6.702737721457291, "learning_rate": 4.2131282043877266e-06, "loss": 0.8265, "step": 7804 }, { "epoch": 0.563873787635234, "grad_norm": 4.797631020876406, "learning_rate": 4.2129151612068516e-06, "loss": 0.7843, "step": 7805 }, { "epoch": 0.5639460328354435, "grad_norm": 6.137076791751229, "learning_rate": 4.212702094577499e-06, "loss": 0.8857, "step": 7806 }, { "epoch": 0.564018278035653, "grad_norm": 5.669677645015515, "learning_rate": 4.212489004502583e-06, "loss": 0.8769, "step": 7807 }, { "epoch": 0.5640905232358625, "grad_norm": 8.015960031984628, "learning_rate": 4.212275890985022e-06, "loss": 0.8414, "step": 7808 }, { "epoch": 0.564162768436072, "grad_norm": 5.0621041979124115, "learning_rate": 4.2120627540277345e-06, "loss": 0.842, "step": 7809 }, { "epoch": 0.5642350136362816, "grad_norm": 7.391270937929475, "learning_rate": 4.211849593633638e-06, "loss": 0.8845, "step": 7810 }, { "epoch": 0.564307258836491, "grad_norm": 6.071352129543155, "learning_rate": 4.2116364098056485e-06, "loss": 0.8141, "step": 7811 }, { "epoch": 0.5643795040367006, "grad_norm": 5.821712289728427, "learning_rate": 4.211423202546686e-06, "loss": 0.9096, "step": 7812 }, { "epoch": 0.5644517492369101, "grad_norm": 5.198865825502907, "learning_rate": 4.2112099718596684e-06, "loss": 0.7861, "step": 7813 }, { "epoch": 0.5645239944371195, "grad_norm": 7.5116806624424415, "learning_rate": 4.210996717747515e-06, "loss": 0.919, "step": 7814 }, { "epoch": 0.5645962396373291, "grad_norm": 5.976895351740257, "learning_rate": 4.210783440213145e-06, "loss": 0.8478, "step": 7815 }, { "epoch": 0.5646684848375386, "grad_norm": 5.93671837731526, "learning_rate": 4.210570139259478e-06, "loss": 0.9793, "step": 7816 }, { "epoch": 0.5647407300377482, "grad_norm": 6.007112737612206, "learning_rate": 4.210356814889434e-06, "loss": 0.838, "step": 7817 }, { "epoch": 0.5648129752379576, "grad_norm": 5.241283264855198, "learning_rate": 4.210143467105934e-06, "loss": 0.8965, "step": 7818 }, { "epoch": 0.5648852204381671, "grad_norm": 6.8931835726930775, "learning_rate": 4.209930095911897e-06, "loss": 0.8474, "step": 7819 }, { "epoch": 0.5649574656383767, "grad_norm": 5.818769785584145, "learning_rate": 4.2097167013102445e-06, "loss": 0.8418, "step": 7820 }, { "epoch": 0.5650297108385861, "grad_norm": 5.827310050051704, "learning_rate": 4.209503283303899e-06, "loss": 0.8823, "step": 7821 }, { "epoch": 0.5651019560387956, "grad_norm": 6.830842688476788, "learning_rate": 4.209289841895779e-06, "loss": 0.8926, "step": 7822 }, { "epoch": 0.5651742012390052, "grad_norm": 6.528023100587699, "learning_rate": 4.20907637708881e-06, "loss": 0.7664, "step": 7823 }, { "epoch": 0.5652464464392147, "grad_norm": 5.79325058316451, "learning_rate": 4.2088628888859114e-06, "loss": 0.8134, "step": 7824 }, { "epoch": 0.5653186916394242, "grad_norm": 6.3635579067199295, "learning_rate": 4.208649377290007e-06, "loss": 0.8704, "step": 7825 }, { "epoch": 0.5653909368396337, "grad_norm": 7.371294027982648, "learning_rate": 4.208435842304019e-06, "loss": 0.8659, "step": 7826 }, { "epoch": 0.5654631820398432, "grad_norm": 7.588769606023152, "learning_rate": 4.208222283930871e-06, "loss": 0.9155, "step": 7827 }, { "epoch": 0.5655354272400528, "grad_norm": 5.512248013158455, "learning_rate": 4.2080087021734865e-06, "loss": 0.9383, "step": 7828 }, { "epoch": 0.5656076724402622, "grad_norm": 6.027347547390249, "learning_rate": 4.207795097034789e-06, "loss": 0.7954, "step": 7829 }, { "epoch": 0.5656799176404718, "grad_norm": 7.419363010343142, "learning_rate": 4.207581468517703e-06, "loss": 0.8459, "step": 7830 }, { "epoch": 0.5657521628406813, "grad_norm": 6.067219892715485, "learning_rate": 4.207367816625152e-06, "loss": 0.8651, "step": 7831 }, { "epoch": 0.5658244080408907, "grad_norm": 4.960300771146279, "learning_rate": 4.2071541413600615e-06, "loss": 0.8451, "step": 7832 }, { "epoch": 0.5658966532411003, "grad_norm": 7.79606694561794, "learning_rate": 4.206940442725356e-06, "loss": 0.9602, "step": 7833 }, { "epoch": 0.5659688984413098, "grad_norm": 6.489711541817255, "learning_rate": 4.206726720723962e-06, "loss": 0.8322, "step": 7834 }, { "epoch": 0.5660411436415194, "grad_norm": 5.922113016221854, "learning_rate": 4.206512975358804e-06, "loss": 0.8456, "step": 7835 }, { "epoch": 0.5661133888417288, "grad_norm": 5.491375316403039, "learning_rate": 4.206299206632808e-06, "loss": 0.867, "step": 7836 }, { "epoch": 0.5661856340419383, "grad_norm": 5.682040434218481, "learning_rate": 4.2060854145489e-06, "loss": 0.8325, "step": 7837 }, { "epoch": 0.5662578792421479, "grad_norm": 6.77849498282456, "learning_rate": 4.205871599110008e-06, "loss": 0.8557, "step": 7838 }, { "epoch": 0.5663301244423573, "grad_norm": 5.753914205697147, "learning_rate": 4.20565776031906e-06, "loss": 0.8444, "step": 7839 }, { "epoch": 0.5664023696425668, "grad_norm": 6.256505708299043, "learning_rate": 4.20544389817898e-06, "loss": 0.9019, "step": 7840 }, { "epoch": 0.5664746148427764, "grad_norm": 5.233025858081312, "learning_rate": 4.205230012692697e-06, "loss": 0.7972, "step": 7841 }, { "epoch": 0.5665468600429859, "grad_norm": 6.043416613877988, "learning_rate": 4.20501610386314e-06, "loss": 0.8277, "step": 7842 }, { "epoch": 0.5666191052431954, "grad_norm": 5.465004472093768, "learning_rate": 4.204802171693236e-06, "loss": 0.8852, "step": 7843 }, { "epoch": 0.5666913504434049, "grad_norm": 6.4608787524724836, "learning_rate": 4.204588216185914e-06, "loss": 0.8886, "step": 7844 }, { "epoch": 0.5667635956436144, "grad_norm": 7.52437724327867, "learning_rate": 4.2043742373441024e-06, "loss": 0.9307, "step": 7845 }, { "epoch": 0.566835840843824, "grad_norm": 6.363981710698996, "learning_rate": 4.2041602351707315e-06, "loss": 0.8015, "step": 7846 }, { "epoch": 0.5669080860440334, "grad_norm": 6.561300113520757, "learning_rate": 4.20394620966873e-06, "loss": 0.9179, "step": 7847 }, { "epoch": 0.566980331244243, "grad_norm": 5.6793120416220555, "learning_rate": 4.203732160841027e-06, "loss": 0.8681, "step": 7848 }, { "epoch": 0.5670525764444525, "grad_norm": 6.300845425510126, "learning_rate": 4.2035180886905555e-06, "loss": 0.9167, "step": 7849 }, { "epoch": 0.5671248216446619, "grad_norm": 6.150736707276319, "learning_rate": 4.203303993220243e-06, "loss": 0.887, "step": 7850 }, { "epoch": 0.5671970668448715, "grad_norm": 6.450405480702629, "learning_rate": 4.203089874433021e-06, "loss": 0.8894, "step": 7851 }, { "epoch": 0.567269312045081, "grad_norm": 7.247515548684112, "learning_rate": 4.2028757323318214e-06, "loss": 0.9686, "step": 7852 }, { "epoch": 0.5673415572452906, "grad_norm": 6.64063706340816, "learning_rate": 4.2026615669195755e-06, "loss": 0.8299, "step": 7853 }, { "epoch": 0.5674138024455, "grad_norm": 9.859677946390692, "learning_rate": 4.202447378199214e-06, "loss": 0.8956, "step": 7854 }, { "epoch": 0.5674860476457095, "grad_norm": 6.058252478702347, "learning_rate": 4.20223316617367e-06, "loss": 0.8211, "step": 7855 }, { "epoch": 0.5675582928459191, "grad_norm": 6.205690713289207, "learning_rate": 4.202018930845876e-06, "loss": 0.889, "step": 7856 }, { "epoch": 0.5676305380461285, "grad_norm": 6.139389884532247, "learning_rate": 4.201804672218764e-06, "loss": 0.8239, "step": 7857 }, { "epoch": 0.567702783246338, "grad_norm": 7.715603453496773, "learning_rate": 4.201590390295268e-06, "loss": 0.8185, "step": 7858 }, { "epoch": 0.5677750284465476, "grad_norm": 6.559946780570405, "learning_rate": 4.2013760850783206e-06, "loss": 0.8622, "step": 7859 }, { "epoch": 0.5678472736467571, "grad_norm": 7.703049727066303, "learning_rate": 4.201161756570855e-06, "loss": 0.832, "step": 7860 }, { "epoch": 0.5679195188469666, "grad_norm": 7.939942232013094, "learning_rate": 4.200947404775807e-06, "loss": 0.8749, "step": 7861 }, { "epoch": 0.5679917640471761, "grad_norm": 5.50471537201555, "learning_rate": 4.200733029696109e-06, "loss": 0.885, "step": 7862 }, { "epoch": 0.5680640092473856, "grad_norm": 7.52338045758165, "learning_rate": 4.200518631334696e-06, "loss": 0.8477, "step": 7863 }, { "epoch": 0.5681362544475952, "grad_norm": 8.150600537600598, "learning_rate": 4.200304209694503e-06, "loss": 0.9013, "step": 7864 }, { "epoch": 0.5682084996478046, "grad_norm": 5.633951143474759, "learning_rate": 4.200089764778467e-06, "loss": 0.8865, "step": 7865 }, { "epoch": 0.5682807448480142, "grad_norm": 7.160455658974548, "learning_rate": 4.199875296589521e-06, "loss": 0.9447, "step": 7866 }, { "epoch": 0.5683529900482237, "grad_norm": 5.853247772900683, "learning_rate": 4.199660805130603e-06, "loss": 0.8154, "step": 7867 }, { "epoch": 0.5684252352484331, "grad_norm": 5.3996441123389785, "learning_rate": 4.199446290404647e-06, "loss": 0.8792, "step": 7868 }, { "epoch": 0.5684974804486427, "grad_norm": 6.504361669959827, "learning_rate": 4.199231752414592e-06, "loss": 0.8191, "step": 7869 }, { "epoch": 0.5685697256488522, "grad_norm": 7.636094074115394, "learning_rate": 4.1990171911633725e-06, "loss": 0.9341, "step": 7870 }, { "epoch": 0.5686419708490618, "grad_norm": 6.36920950208081, "learning_rate": 4.198802606653929e-06, "loss": 0.8834, "step": 7871 }, { "epoch": 0.5687142160492712, "grad_norm": 5.456031352727754, "learning_rate": 4.198587998889195e-06, "loss": 0.8685, "step": 7872 }, { "epoch": 0.5687864612494807, "grad_norm": 6.8133135668523215, "learning_rate": 4.19837336787211e-06, "loss": 0.8961, "step": 7873 }, { "epoch": 0.5688587064496903, "grad_norm": 5.643424145320319, "learning_rate": 4.198158713605614e-06, "loss": 0.7781, "step": 7874 }, { "epoch": 0.5689309516498997, "grad_norm": 6.35773748736545, "learning_rate": 4.197944036092642e-06, "loss": 0.8958, "step": 7875 }, { "epoch": 0.5690031968501092, "grad_norm": 5.911932255461328, "learning_rate": 4.197729335336135e-06, "loss": 0.9362, "step": 7876 }, { "epoch": 0.5690754420503188, "grad_norm": 5.87780435063766, "learning_rate": 4.1975146113390315e-06, "loss": 0.9232, "step": 7877 }, { "epoch": 0.5691476872505283, "grad_norm": 6.2985973794944705, "learning_rate": 4.197299864104271e-06, "loss": 0.8295, "step": 7878 }, { "epoch": 0.5692199324507378, "grad_norm": 6.566334793818521, "learning_rate": 4.197085093634794e-06, "loss": 0.8853, "step": 7879 }, { "epoch": 0.5692921776509473, "grad_norm": 6.102715260023351, "learning_rate": 4.196870299933539e-06, "loss": 0.8601, "step": 7880 }, { "epoch": 0.5693644228511568, "grad_norm": 7.0961560441571, "learning_rate": 4.196655483003448e-06, "loss": 0.8824, "step": 7881 }, { "epoch": 0.5694366680513664, "grad_norm": 8.680223669870747, "learning_rate": 4.196440642847459e-06, "loss": 0.8696, "step": 7882 }, { "epoch": 0.5695089132515758, "grad_norm": 8.07277621895638, "learning_rate": 4.196225779468517e-06, "loss": 0.8876, "step": 7883 }, { "epoch": 0.5695811584517854, "grad_norm": 6.018490591826679, "learning_rate": 4.19601089286956e-06, "loss": 0.833, "step": 7884 }, { "epoch": 0.5696534036519949, "grad_norm": 6.477404715670153, "learning_rate": 4.195795983053531e-06, "loss": 0.9678, "step": 7885 }, { "epoch": 0.5697256488522043, "grad_norm": 7.310184470796227, "learning_rate": 4.195581050023371e-06, "loss": 0.8375, "step": 7886 }, { "epoch": 0.5697978940524139, "grad_norm": 6.1074662214571624, "learning_rate": 4.195366093782023e-06, "loss": 0.8461, "step": 7887 }, { "epoch": 0.5698701392526234, "grad_norm": 5.507907904481785, "learning_rate": 4.195151114332431e-06, "loss": 0.8676, "step": 7888 }, { "epoch": 0.569942384452833, "grad_norm": 7.935874231803201, "learning_rate": 4.1949361116775345e-06, "loss": 0.8463, "step": 7889 }, { "epoch": 0.5700146296530424, "grad_norm": 6.180253534640905, "learning_rate": 4.19472108582028e-06, "loss": 0.8808, "step": 7890 }, { "epoch": 0.5700868748532519, "grad_norm": 5.021842549093971, "learning_rate": 4.194506036763609e-06, "loss": 0.9005, "step": 7891 }, { "epoch": 0.5701591200534615, "grad_norm": 6.757408032964608, "learning_rate": 4.194290964510467e-06, "loss": 0.9426, "step": 7892 }, { "epoch": 0.5702313652536709, "grad_norm": 6.7148340923069005, "learning_rate": 4.194075869063796e-06, "loss": 0.8888, "step": 7893 }, { "epoch": 0.5703036104538804, "grad_norm": 5.104076379509442, "learning_rate": 4.193860750426543e-06, "loss": 0.8645, "step": 7894 }, { "epoch": 0.57037585565409, "grad_norm": 6.82724335519503, "learning_rate": 4.193645608601651e-06, "loss": 1.0236, "step": 7895 }, { "epoch": 0.5704481008542995, "grad_norm": 7.74504250738252, "learning_rate": 4.193430443592065e-06, "loss": 0.9778, "step": 7896 }, { "epoch": 0.570520346054509, "grad_norm": 6.306419238720333, "learning_rate": 4.193215255400732e-06, "loss": 0.9082, "step": 7897 }, { "epoch": 0.5705925912547185, "grad_norm": 5.843979122137627, "learning_rate": 4.193000044030597e-06, "loss": 0.9411, "step": 7898 }, { "epoch": 0.570664836454928, "grad_norm": 6.011505856874493, "learning_rate": 4.192784809484605e-06, "loss": 0.8591, "step": 7899 }, { "epoch": 0.5707370816551376, "grad_norm": 6.315018255429374, "learning_rate": 4.192569551765703e-06, "loss": 0.9386, "step": 7900 }, { "epoch": 0.570809326855347, "grad_norm": 5.769408269254634, "learning_rate": 4.1923542708768386e-06, "loss": 0.8415, "step": 7901 }, { "epoch": 0.5708815720555566, "grad_norm": 6.547297425400816, "learning_rate": 4.192138966820959e-06, "loss": 0.7953, "step": 7902 }, { "epoch": 0.5709538172557661, "grad_norm": 5.787176939678996, "learning_rate": 4.19192363960101e-06, "loss": 0.8381, "step": 7903 }, { "epoch": 0.5710260624559755, "grad_norm": 5.285489699846884, "learning_rate": 4.1917082892199405e-06, "loss": 0.8902, "step": 7904 }, { "epoch": 0.5710983076561851, "grad_norm": 6.054185967131285, "learning_rate": 4.191492915680697e-06, "loss": 0.9268, "step": 7905 }, { "epoch": 0.5711705528563946, "grad_norm": 5.783926349471722, "learning_rate": 4.19127751898623e-06, "loss": 0.8308, "step": 7906 }, { "epoch": 0.5712427980566042, "grad_norm": 5.819989701094806, "learning_rate": 4.191062099139487e-06, "loss": 0.8796, "step": 7907 }, { "epoch": 0.5713150432568136, "grad_norm": 6.448074396409432, "learning_rate": 4.190846656143417e-06, "loss": 0.8134, "step": 7908 }, { "epoch": 0.5713872884570231, "grad_norm": 5.798330119235266, "learning_rate": 4.190631190000967e-06, "loss": 0.839, "step": 7909 }, { "epoch": 0.5714595336572327, "grad_norm": 5.972027423057269, "learning_rate": 4.190415700715092e-06, "loss": 0.8975, "step": 7910 }, { "epoch": 0.5715317788574421, "grad_norm": 7.940598491337713, "learning_rate": 4.190200188288735e-06, "loss": 0.8754, "step": 7911 }, { "epoch": 0.5716040240576516, "grad_norm": 7.013931851732804, "learning_rate": 4.189984652724853e-06, "loss": 0.9844, "step": 7912 }, { "epoch": 0.5716762692578612, "grad_norm": 5.965789859785797, "learning_rate": 4.189769094026392e-06, "loss": 0.8712, "step": 7913 }, { "epoch": 0.5717485144580707, "grad_norm": 6.308034998475531, "learning_rate": 4.189553512196304e-06, "loss": 0.8743, "step": 7914 }, { "epoch": 0.5718207596582802, "grad_norm": 7.429874133023275, "learning_rate": 4.1893379072375405e-06, "loss": 0.8646, "step": 7915 }, { "epoch": 0.5718930048584897, "grad_norm": 6.3944850167273355, "learning_rate": 4.189122279153052e-06, "loss": 0.8481, "step": 7916 }, { "epoch": 0.5719652500586992, "grad_norm": 7.6018713705074115, "learning_rate": 4.188906627945792e-06, "loss": 0.9147, "step": 7917 }, { "epoch": 0.5720374952589088, "grad_norm": 8.373331843252515, "learning_rate": 4.188690953618711e-06, "loss": 0.8879, "step": 7918 }, { "epoch": 0.5721097404591182, "grad_norm": 6.474983492207933, "learning_rate": 4.188475256174762e-06, "loss": 0.8064, "step": 7919 }, { "epoch": 0.5721819856593278, "grad_norm": 6.191067678538504, "learning_rate": 4.188259535616898e-06, "loss": 0.8706, "step": 7920 }, { "epoch": 0.5722542308595373, "grad_norm": 6.317288838983578, "learning_rate": 4.188043791948072e-06, "loss": 0.94, "step": 7921 }, { "epoch": 0.5723264760597467, "grad_norm": 7.100593123040822, "learning_rate": 4.187828025171238e-06, "loss": 0.8102, "step": 7922 }, { "epoch": 0.5723987212599563, "grad_norm": 6.10193510786102, "learning_rate": 4.187612235289347e-06, "loss": 0.8835, "step": 7923 }, { "epoch": 0.5724709664601658, "grad_norm": 5.44546188959578, "learning_rate": 4.187396422305356e-06, "loss": 0.7964, "step": 7924 }, { "epoch": 0.5725432116603754, "grad_norm": 6.466159467699413, "learning_rate": 4.187180586222217e-06, "loss": 0.8785, "step": 7925 }, { "epoch": 0.5726154568605848, "grad_norm": 6.203559764164329, "learning_rate": 4.186964727042887e-06, "loss": 0.854, "step": 7926 }, { "epoch": 0.5726877020607943, "grad_norm": 5.8719263355717315, "learning_rate": 4.1867488447703195e-06, "loss": 0.8647, "step": 7927 }, { "epoch": 0.5727599472610039, "grad_norm": 7.225985777240422, "learning_rate": 4.186532939407471e-06, "loss": 0.9191, "step": 7928 }, { "epoch": 0.5728321924612133, "grad_norm": 7.131501026064964, "learning_rate": 4.1863170109572935e-06, "loss": 0.8351, "step": 7929 }, { "epoch": 0.5729044376614228, "grad_norm": 5.334479963703519, "learning_rate": 4.1861010594227475e-06, "loss": 0.9375, "step": 7930 }, { "epoch": 0.5729766828616324, "grad_norm": 7.501926174775856, "learning_rate": 4.185885084806787e-06, "loss": 0.821, "step": 7931 }, { "epoch": 0.5730489280618419, "grad_norm": 5.963850796834567, "learning_rate": 4.185669087112367e-06, "loss": 0.9867, "step": 7932 }, { "epoch": 0.5731211732620514, "grad_norm": 7.613300039660626, "learning_rate": 4.1854530663424484e-06, "loss": 0.8858, "step": 7933 }, { "epoch": 0.5731934184622609, "grad_norm": 5.260339093029766, "learning_rate": 4.185237022499984e-06, "loss": 0.8744, "step": 7934 }, { "epoch": 0.5732656636624704, "grad_norm": 5.602660174005793, "learning_rate": 4.185020955587935e-06, "loss": 0.899, "step": 7935 }, { "epoch": 0.57333790886268, "grad_norm": 6.396457538111641, "learning_rate": 4.184804865609257e-06, "loss": 0.8173, "step": 7936 }, { "epoch": 0.5734101540628894, "grad_norm": 5.613904054371561, "learning_rate": 4.184588752566907e-06, "loss": 0.7891, "step": 7937 }, { "epoch": 0.573482399263099, "grad_norm": 7.667509060342198, "learning_rate": 4.184372616463846e-06, "loss": 0.8981, "step": 7938 }, { "epoch": 0.5735546444633085, "grad_norm": 7.0230551202917555, "learning_rate": 4.184156457303032e-06, "loss": 0.791, "step": 7939 }, { "epoch": 0.5736268896635179, "grad_norm": 6.963251656138028, "learning_rate": 4.183940275087424e-06, "loss": 0.9212, "step": 7940 }, { "epoch": 0.5736991348637275, "grad_norm": 6.111899242249671, "learning_rate": 4.183724069819981e-06, "loss": 0.8941, "step": 7941 }, { "epoch": 0.573771380063937, "grad_norm": 4.643285945331947, "learning_rate": 4.1835078415036625e-06, "loss": 0.8422, "step": 7942 }, { "epoch": 0.5738436252641466, "grad_norm": 6.509460313872484, "learning_rate": 4.183291590141429e-06, "loss": 0.8435, "step": 7943 }, { "epoch": 0.573915870464356, "grad_norm": 9.072553438316762, "learning_rate": 4.18307531573624e-06, "loss": 0.9389, "step": 7944 }, { "epoch": 0.5739881156645655, "grad_norm": 7.829414794411471, "learning_rate": 4.182859018291057e-06, "loss": 0.9148, "step": 7945 }, { "epoch": 0.5740603608647751, "grad_norm": 6.648695979937914, "learning_rate": 4.182642697808841e-06, "loss": 0.8387, "step": 7946 }, { "epoch": 0.5741326060649845, "grad_norm": 6.623943892457852, "learning_rate": 4.182426354292554e-06, "loss": 0.879, "step": 7947 }, { "epoch": 0.574204851265194, "grad_norm": 5.132069247937424, "learning_rate": 4.182209987745155e-06, "loss": 0.8272, "step": 7948 }, { "epoch": 0.5742770964654036, "grad_norm": 6.41351769534595, "learning_rate": 4.181993598169607e-06, "loss": 0.9244, "step": 7949 }, { "epoch": 0.5743493416656131, "grad_norm": 5.9345323072719625, "learning_rate": 4.181777185568874e-06, "loss": 0.8172, "step": 7950 }, { "epoch": 0.5744215868658226, "grad_norm": 6.987304346283185, "learning_rate": 4.181560749945916e-06, "loss": 0.9383, "step": 7951 }, { "epoch": 0.5744938320660321, "grad_norm": 6.723007485558744, "learning_rate": 4.181344291303698e-06, "loss": 0.9046, "step": 7952 }, { "epoch": 0.5745660772662416, "grad_norm": 7.084922372935188, "learning_rate": 4.181127809645182e-06, "loss": 0.8599, "step": 7953 }, { "epoch": 0.5746383224664512, "grad_norm": 10.134385268505822, "learning_rate": 4.180911304973331e-06, "loss": 0.9025, "step": 7954 }, { "epoch": 0.5747105676666606, "grad_norm": 6.0389715387716265, "learning_rate": 4.18069477729111e-06, "loss": 0.8507, "step": 7955 }, { "epoch": 0.5747828128668702, "grad_norm": 6.908421239841966, "learning_rate": 4.180478226601482e-06, "loss": 0.8643, "step": 7956 }, { "epoch": 0.5748550580670797, "grad_norm": 5.547368245290062, "learning_rate": 4.180261652907412e-06, "loss": 0.8589, "step": 7957 }, { "epoch": 0.5749273032672891, "grad_norm": 6.1614410666715305, "learning_rate": 4.180045056211865e-06, "loss": 0.8828, "step": 7958 }, { "epoch": 0.5749995484674987, "grad_norm": 8.558446662645258, "learning_rate": 4.179828436517805e-06, "loss": 0.8823, "step": 7959 }, { "epoch": 0.5750717936677082, "grad_norm": 8.405980442646083, "learning_rate": 4.179611793828198e-06, "loss": 1.0353, "step": 7960 }, { "epoch": 0.5751440388679178, "grad_norm": 9.592430595602208, "learning_rate": 4.17939512814601e-06, "loss": 0.887, "step": 7961 }, { "epoch": 0.5752162840681272, "grad_norm": 7.006149043530691, "learning_rate": 4.179178439474206e-06, "loss": 0.8716, "step": 7962 }, { "epoch": 0.5752885292683367, "grad_norm": 7.1040979943261116, "learning_rate": 4.1789617278157545e-06, "loss": 0.8607, "step": 7963 }, { "epoch": 0.5753607744685463, "grad_norm": 6.737682831217423, "learning_rate": 4.178744993173619e-06, "loss": 0.7935, "step": 7964 }, { "epoch": 0.5754330196687557, "grad_norm": 7.0686027693656674, "learning_rate": 4.1785282355507695e-06, "loss": 0.8542, "step": 7965 }, { "epoch": 0.5755052648689652, "grad_norm": 8.448880129310222, "learning_rate": 4.178311454950171e-06, "loss": 0.8432, "step": 7966 }, { "epoch": 0.5755775100691748, "grad_norm": 5.509932998554481, "learning_rate": 4.178094651374791e-06, "loss": 0.8842, "step": 7967 }, { "epoch": 0.5756497552693843, "grad_norm": 6.324461528790446, "learning_rate": 4.177877824827599e-06, "loss": 0.9471, "step": 7968 }, { "epoch": 0.5757220004695938, "grad_norm": 5.3452112531175136, "learning_rate": 4.177660975311563e-06, "loss": 0.8691, "step": 7969 }, { "epoch": 0.5757942456698033, "grad_norm": 8.669598694767844, "learning_rate": 4.177444102829649e-06, "loss": 0.8754, "step": 7970 }, { "epoch": 0.5758664908700128, "grad_norm": 8.098757575553062, "learning_rate": 4.17722720738483e-06, "loss": 1.0133, "step": 7971 }, { "epoch": 0.5759387360702223, "grad_norm": 6.186286884412957, "learning_rate": 4.177010288980071e-06, "loss": 0.8515, "step": 7972 }, { "epoch": 0.5760109812704318, "grad_norm": 7.551675916183533, "learning_rate": 4.176793347618344e-06, "loss": 0.8276, "step": 7973 }, { "epoch": 0.5760832264706414, "grad_norm": 6.750573875615797, "learning_rate": 4.176576383302618e-06, "loss": 0.8637, "step": 7974 }, { "epoch": 0.5761554716708509, "grad_norm": 8.439352100688449, "learning_rate": 4.1763593960358626e-06, "loss": 0.9399, "step": 7975 }, { "epoch": 0.5762277168710603, "grad_norm": 6.179739969949496, "learning_rate": 4.17614238582105e-06, "loss": 0.8545, "step": 7976 }, { "epoch": 0.5762999620712699, "grad_norm": 10.664919551182892, "learning_rate": 4.175925352661148e-06, "loss": 0.8955, "step": 7977 }, { "epoch": 0.5763722072714794, "grad_norm": 7.813952013503534, "learning_rate": 4.17570829655913e-06, "loss": 0.88, "step": 7978 }, { "epoch": 0.576444452471689, "grad_norm": 5.873409299384448, "learning_rate": 4.1754912175179665e-06, "loss": 0.8996, "step": 7979 }, { "epoch": 0.5765166976718984, "grad_norm": 5.341345351975229, "learning_rate": 4.17527411554063e-06, "loss": 0.8296, "step": 7980 }, { "epoch": 0.5765889428721079, "grad_norm": 6.531902955922258, "learning_rate": 4.17505699063009e-06, "loss": 0.8531, "step": 7981 }, { "epoch": 0.5766611880723175, "grad_norm": 6.5097676759860255, "learning_rate": 4.174839842789321e-06, "loss": 0.8958, "step": 7982 }, { "epoch": 0.5767334332725269, "grad_norm": 10.568210841164213, "learning_rate": 4.174622672021295e-06, "loss": 0.8642, "step": 7983 }, { "epoch": 0.5768056784727364, "grad_norm": 6.899393259544894, "learning_rate": 4.174405478328984e-06, "loss": 0.7904, "step": 7984 }, { "epoch": 0.576877923672946, "grad_norm": 8.996137849978751, "learning_rate": 4.174188261715364e-06, "loss": 0.8622, "step": 7985 }, { "epoch": 0.5769501688731555, "grad_norm": 5.830253805456085, "learning_rate": 4.173971022183405e-06, "loss": 0.8583, "step": 7986 }, { "epoch": 0.577022414073365, "grad_norm": 6.165673184484488, "learning_rate": 4.173753759736084e-06, "loss": 0.9007, "step": 7987 }, { "epoch": 0.5770946592735745, "grad_norm": 7.557801103060846, "learning_rate": 4.173536474376373e-06, "loss": 0.8397, "step": 7988 }, { "epoch": 0.577166904473784, "grad_norm": 7.990317684346308, "learning_rate": 4.173319166107246e-06, "loss": 0.8551, "step": 7989 }, { "epoch": 0.5772391496739935, "grad_norm": 5.867142312679685, "learning_rate": 4.17310183493168e-06, "loss": 0.8105, "step": 7990 }, { "epoch": 0.577311394874203, "grad_norm": 6.376223876756028, "learning_rate": 4.172884480852649e-06, "loss": 0.8724, "step": 7991 }, { "epoch": 0.5773836400744126, "grad_norm": 8.167366724436082, "learning_rate": 4.172667103873128e-06, "loss": 0.9658, "step": 7992 }, { "epoch": 0.5774558852746221, "grad_norm": 7.083981682519477, "learning_rate": 4.172449703996093e-06, "loss": 0.9647, "step": 7993 }, { "epoch": 0.5775281304748315, "grad_norm": 5.748050525079541, "learning_rate": 4.17223228122452e-06, "loss": 0.8846, "step": 7994 }, { "epoch": 0.5776003756750411, "grad_norm": 6.692190931534405, "learning_rate": 4.172014835561385e-06, "loss": 0.834, "step": 7995 }, { "epoch": 0.5776726208752506, "grad_norm": 4.940371680234918, "learning_rate": 4.171797367009667e-06, "loss": 0.9046, "step": 7996 }, { "epoch": 0.5777448660754602, "grad_norm": 6.597075432823049, "learning_rate": 4.171579875572339e-06, "loss": 0.8303, "step": 7997 }, { "epoch": 0.5778171112756696, "grad_norm": 5.44443954848158, "learning_rate": 4.171362361252382e-06, "loss": 0.7713, "step": 7998 }, { "epoch": 0.5778893564758791, "grad_norm": 5.741736403412258, "learning_rate": 4.1711448240527716e-06, "loss": 0.8744, "step": 7999 }, { "epoch": 0.5779616016760887, "grad_norm": 5.603756271518769, "learning_rate": 4.1709272639764855e-06, "loss": 0.8894, "step": 8000 }, { "epoch": 0.5780338468762981, "grad_norm": 8.324855255609881, "learning_rate": 4.170709681026503e-06, "loss": 0.9301, "step": 8001 }, { "epoch": 0.5781060920765076, "grad_norm": 8.195962274646089, "learning_rate": 4.170492075205802e-06, "loss": 0.9179, "step": 8002 }, { "epoch": 0.5781783372767172, "grad_norm": 5.5120006032315, "learning_rate": 4.170274446517361e-06, "loss": 0.8161, "step": 8003 }, { "epoch": 0.5782505824769267, "grad_norm": 5.26722996190195, "learning_rate": 4.170056794964161e-06, "loss": 0.9021, "step": 8004 }, { "epoch": 0.5783228276771362, "grad_norm": 5.759269540352121, "learning_rate": 4.16983912054918e-06, "loss": 0.919, "step": 8005 }, { "epoch": 0.5783950728773457, "grad_norm": 7.05734767164317, "learning_rate": 4.169621423275398e-06, "loss": 0.8879, "step": 8006 }, { "epoch": 0.5784673180775552, "grad_norm": 5.81826299643238, "learning_rate": 4.169403703145794e-06, "loss": 0.8676, "step": 8007 }, { "epoch": 0.5785395632777647, "grad_norm": 5.756947508063253, "learning_rate": 4.16918596016335e-06, "loss": 0.8968, "step": 8008 }, { "epoch": 0.5786118084779742, "grad_norm": 6.781259563105296, "learning_rate": 4.1689681943310466e-06, "loss": 0.9194, "step": 8009 }, { "epoch": 0.5786840536781838, "grad_norm": 5.591005813652923, "learning_rate": 4.168750405651864e-06, "loss": 0.8788, "step": 8010 }, { "epoch": 0.5787562988783933, "grad_norm": 5.849766535868754, "learning_rate": 4.168532594128785e-06, "loss": 0.834, "step": 8011 }, { "epoch": 0.5788285440786027, "grad_norm": 6.310751493413529, "learning_rate": 4.168314759764789e-06, "loss": 0.8586, "step": 8012 }, { "epoch": 0.5789007892788123, "grad_norm": 5.896439412196077, "learning_rate": 4.16809690256286e-06, "loss": 0.9003, "step": 8013 }, { "epoch": 0.5789730344790218, "grad_norm": 5.655343336076189, "learning_rate": 4.167879022525979e-06, "loss": 0.8915, "step": 8014 }, { "epoch": 0.5790452796792314, "grad_norm": 7.60312905395751, "learning_rate": 4.167661119657131e-06, "loss": 0.864, "step": 8015 }, { "epoch": 0.5791175248794408, "grad_norm": 5.008878073791123, "learning_rate": 4.167443193959295e-06, "loss": 0.794, "step": 8016 }, { "epoch": 0.5791897700796503, "grad_norm": 5.883066686075597, "learning_rate": 4.167225245435458e-06, "loss": 0.8934, "step": 8017 }, { "epoch": 0.5792620152798599, "grad_norm": 5.253330627447164, "learning_rate": 4.1670072740886005e-06, "loss": 0.7999, "step": 8018 }, { "epoch": 0.5793342604800693, "grad_norm": 6.97344702930043, "learning_rate": 4.166789279921708e-06, "loss": 0.9073, "step": 8019 }, { "epoch": 0.5794065056802788, "grad_norm": 7.706804306006737, "learning_rate": 4.166571262937766e-06, "loss": 0.8737, "step": 8020 }, { "epoch": 0.5794787508804884, "grad_norm": 6.893514775459081, "learning_rate": 4.166353223139756e-06, "loss": 0.8415, "step": 8021 }, { "epoch": 0.5795509960806979, "grad_norm": 5.43453442612253, "learning_rate": 4.166135160530665e-06, "loss": 0.8389, "step": 8022 }, { "epoch": 0.5796232412809074, "grad_norm": 7.521424021115943, "learning_rate": 4.165917075113476e-06, "loss": 0.8832, "step": 8023 }, { "epoch": 0.5796954864811169, "grad_norm": 8.754356389726194, "learning_rate": 4.1656989668911765e-06, "loss": 0.7972, "step": 8024 }, { "epoch": 0.5797677316813264, "grad_norm": 5.5840328784182365, "learning_rate": 4.165480835866751e-06, "loss": 0.8827, "step": 8025 }, { "epoch": 0.5798399768815359, "grad_norm": 7.399215512319796, "learning_rate": 4.1652626820431865e-06, "loss": 0.8854, "step": 8026 }, { "epoch": 0.5799122220817454, "grad_norm": 6.4820405365766005, "learning_rate": 4.165044505423469e-06, "loss": 0.8846, "step": 8027 }, { "epoch": 0.579984467281955, "grad_norm": 6.419412628114058, "learning_rate": 4.164826306010585e-06, "loss": 0.8737, "step": 8028 }, { "epoch": 0.5800567124821645, "grad_norm": 5.379031776176926, "learning_rate": 4.164608083807522e-06, "loss": 0.884, "step": 8029 }, { "epoch": 0.5801289576823739, "grad_norm": 6.010476185345831, "learning_rate": 4.164389838817267e-06, "loss": 0.9398, "step": 8030 }, { "epoch": 0.5802012028825835, "grad_norm": 6.682859432830726, "learning_rate": 4.164171571042806e-06, "loss": 0.8298, "step": 8031 }, { "epoch": 0.580273448082793, "grad_norm": 5.419597928100433, "learning_rate": 4.163953280487129e-06, "loss": 0.8863, "step": 8032 }, { "epoch": 0.5803456932830026, "grad_norm": 5.569339105376751, "learning_rate": 4.163734967153223e-06, "loss": 0.8076, "step": 8033 }, { "epoch": 0.580417938483212, "grad_norm": 5.864789490495141, "learning_rate": 4.163516631044078e-06, "loss": 0.8616, "step": 8034 }, { "epoch": 0.5804901836834215, "grad_norm": 6.999663753608042, "learning_rate": 4.163298272162682e-06, "loss": 0.8416, "step": 8035 }, { "epoch": 0.5805624288836311, "grad_norm": 6.0281642471546695, "learning_rate": 4.163079890512024e-06, "loss": 0.9669, "step": 8036 }, { "epoch": 0.5806346740838405, "grad_norm": 6.507599423001092, "learning_rate": 4.162861486095094e-06, "loss": 0.9122, "step": 8037 }, { "epoch": 0.58070691928405, "grad_norm": 5.947037274967554, "learning_rate": 4.16264305891488e-06, "loss": 0.873, "step": 8038 }, { "epoch": 0.5807791644842596, "grad_norm": 7.233698704130458, "learning_rate": 4.162424608974374e-06, "loss": 0.8113, "step": 8039 }, { "epoch": 0.5808514096844691, "grad_norm": 5.588611821578339, "learning_rate": 4.162206136276566e-06, "loss": 0.9272, "step": 8040 }, { "epoch": 0.5809236548846786, "grad_norm": 5.652965287398052, "learning_rate": 4.161987640824448e-06, "loss": 0.7941, "step": 8041 }, { "epoch": 0.5809959000848881, "grad_norm": 5.528966917422295, "learning_rate": 4.161769122621007e-06, "loss": 0.8222, "step": 8042 }, { "epoch": 0.5810681452850976, "grad_norm": 6.081650836589795, "learning_rate": 4.161550581669239e-06, "loss": 0.9112, "step": 8043 }, { "epoch": 0.5811403904853071, "grad_norm": 6.19179255005448, "learning_rate": 4.161332017972134e-06, "loss": 0.9066, "step": 8044 }, { "epoch": 0.5812126356855166, "grad_norm": 7.124422551898368, "learning_rate": 4.161113431532682e-06, "loss": 0.8726, "step": 8045 }, { "epoch": 0.5812848808857262, "grad_norm": 6.317865187880045, "learning_rate": 4.160894822353877e-06, "loss": 0.8721, "step": 8046 }, { "epoch": 0.5813571260859357, "grad_norm": 5.179584761071021, "learning_rate": 4.160676190438713e-06, "loss": 0.8526, "step": 8047 }, { "epoch": 0.5814293712861451, "grad_norm": 7.203303998928309, "learning_rate": 4.16045753579018e-06, "loss": 0.8631, "step": 8048 }, { "epoch": 0.5815016164863547, "grad_norm": 6.426849443413977, "learning_rate": 4.160238858411273e-06, "loss": 0.8505, "step": 8049 }, { "epoch": 0.5815738616865642, "grad_norm": 5.9601931925958915, "learning_rate": 4.160020158304985e-06, "loss": 0.8739, "step": 8050 }, { "epoch": 0.5816461068867738, "grad_norm": 6.356109455546583, "learning_rate": 4.15980143547431e-06, "loss": 0.9052, "step": 8051 }, { "epoch": 0.5817183520869832, "grad_norm": 6.063590040234035, "learning_rate": 4.159582689922243e-06, "loss": 0.8578, "step": 8052 }, { "epoch": 0.5817905972871927, "grad_norm": 6.88148109817154, "learning_rate": 4.159363921651777e-06, "loss": 0.9234, "step": 8053 }, { "epoch": 0.5818628424874023, "grad_norm": 5.470471443628214, "learning_rate": 4.159145130665907e-06, "loss": 0.9769, "step": 8054 }, { "epoch": 0.5819350876876117, "grad_norm": 5.686061132166138, "learning_rate": 4.158926316967628e-06, "loss": 0.8289, "step": 8055 }, { "epoch": 0.5820073328878212, "grad_norm": 6.2976491750363435, "learning_rate": 4.158707480559937e-06, "loss": 0.8533, "step": 8056 }, { "epoch": 0.5820795780880308, "grad_norm": 6.475125768905538, "learning_rate": 4.158488621445827e-06, "loss": 0.8095, "step": 8057 }, { "epoch": 0.5821518232882403, "grad_norm": 6.679114958558407, "learning_rate": 4.158269739628297e-06, "loss": 0.8855, "step": 8058 }, { "epoch": 0.5822240684884498, "grad_norm": 5.662382900214589, "learning_rate": 4.15805083511034e-06, "loss": 0.7995, "step": 8059 }, { "epoch": 0.5822963136886593, "grad_norm": 7.865518099240661, "learning_rate": 4.1578319078949556e-06, "loss": 0.8956, "step": 8060 }, { "epoch": 0.5823685588888688, "grad_norm": 7.63175415073339, "learning_rate": 4.15761295798514e-06, "loss": 0.8741, "step": 8061 }, { "epoch": 0.5824408040890783, "grad_norm": 6.059245701040367, "learning_rate": 4.15739398538389e-06, "loss": 0.8333, "step": 8062 }, { "epoch": 0.5825130492892878, "grad_norm": 6.898006991716062, "learning_rate": 4.157174990094203e-06, "loss": 0.9129, "step": 8063 }, { "epoch": 0.5825852944894974, "grad_norm": 7.82489032120057, "learning_rate": 4.156955972119077e-06, "loss": 0.9145, "step": 8064 }, { "epoch": 0.5826575396897069, "grad_norm": 6.927847806197108, "learning_rate": 4.15673693146151e-06, "loss": 0.8514, "step": 8065 }, { "epoch": 0.5827297848899163, "grad_norm": 6.372644026495418, "learning_rate": 4.156517868124501e-06, "loss": 0.9049, "step": 8066 }, { "epoch": 0.5828020300901259, "grad_norm": 7.395834250293371, "learning_rate": 4.156298782111049e-06, "loss": 0.8407, "step": 8067 }, { "epoch": 0.5828742752903354, "grad_norm": 6.45272153465942, "learning_rate": 4.156079673424152e-06, "loss": 0.8458, "step": 8068 }, { "epoch": 0.582946520490545, "grad_norm": 6.1766570674837435, "learning_rate": 4.155860542066811e-06, "loss": 0.898, "step": 8069 }, { "epoch": 0.5830187656907544, "grad_norm": 6.398287555954456, "learning_rate": 4.155641388042024e-06, "loss": 0.8448, "step": 8070 }, { "epoch": 0.5830910108909639, "grad_norm": 6.007219739227235, "learning_rate": 4.155422211352792e-06, "loss": 0.9381, "step": 8071 }, { "epoch": 0.5831632560911735, "grad_norm": 8.78804160737583, "learning_rate": 4.155203012002116e-06, "loss": 0.9333, "step": 8072 }, { "epoch": 0.5832355012913829, "grad_norm": 6.920911321443666, "learning_rate": 4.154983789992995e-06, "loss": 0.7973, "step": 8073 }, { "epoch": 0.5833077464915924, "grad_norm": 7.178323211391172, "learning_rate": 4.154764545328431e-06, "loss": 0.9236, "step": 8074 }, { "epoch": 0.583379991691802, "grad_norm": 6.480327473595687, "learning_rate": 4.1545452780114255e-06, "loss": 0.8508, "step": 8075 }, { "epoch": 0.5834522368920115, "grad_norm": 6.390028648252548, "learning_rate": 4.154325988044981e-06, "loss": 0.8908, "step": 8076 }, { "epoch": 0.583524482092221, "grad_norm": 5.604135090149289, "learning_rate": 4.154106675432097e-06, "loss": 0.8683, "step": 8077 }, { "epoch": 0.5835967272924305, "grad_norm": 6.032189804935968, "learning_rate": 4.153887340175777e-06, "loss": 0.7757, "step": 8078 }, { "epoch": 0.58366897249264, "grad_norm": 6.582038784305777, "learning_rate": 4.1536679822790235e-06, "loss": 0.947, "step": 8079 }, { "epoch": 0.5837412176928495, "grad_norm": 8.109649241960883, "learning_rate": 4.15344860174484e-06, "loss": 0.8912, "step": 8080 }, { "epoch": 0.583813462893059, "grad_norm": 7.319484928541121, "learning_rate": 4.153229198576228e-06, "loss": 0.9255, "step": 8081 }, { "epoch": 0.5838857080932686, "grad_norm": 6.184427750927753, "learning_rate": 4.153009772776192e-06, "loss": 0.9105, "step": 8082 }, { "epoch": 0.5839579532934781, "grad_norm": 7.142285978096923, "learning_rate": 4.152790324347736e-06, "loss": 0.9111, "step": 8083 }, { "epoch": 0.5840301984936875, "grad_norm": 8.926371014432723, "learning_rate": 4.1525708532938636e-06, "loss": 0.8721, "step": 8084 }, { "epoch": 0.5841024436938971, "grad_norm": 6.336991943219411, "learning_rate": 4.15235135961758e-06, "loss": 0.8676, "step": 8085 }, { "epoch": 0.5841746888941066, "grad_norm": 7.606950001411512, "learning_rate": 4.152131843321889e-06, "loss": 0.9557, "step": 8086 }, { "epoch": 0.5842469340943162, "grad_norm": 5.544886272836128, "learning_rate": 4.151912304409795e-06, "loss": 0.9165, "step": 8087 }, { "epoch": 0.5843191792945256, "grad_norm": 7.211084829294633, "learning_rate": 4.151692742884305e-06, "loss": 0.804, "step": 8088 }, { "epoch": 0.5843914244947351, "grad_norm": 4.965905676964161, "learning_rate": 4.151473158748423e-06, "loss": 0.8058, "step": 8089 }, { "epoch": 0.5844636696949447, "grad_norm": 7.6952455779133215, "learning_rate": 4.151253552005156e-06, "loss": 0.8911, "step": 8090 }, { "epoch": 0.5845359148951541, "grad_norm": 6.217509850531407, "learning_rate": 4.151033922657512e-06, "loss": 0.8514, "step": 8091 }, { "epoch": 0.5846081600953636, "grad_norm": 6.4846659032074045, "learning_rate": 4.150814270708493e-06, "loss": 0.9009, "step": 8092 }, { "epoch": 0.5846804052955732, "grad_norm": 5.987346658355788, "learning_rate": 4.150594596161109e-06, "loss": 0.8782, "step": 8093 }, { "epoch": 0.5847526504957827, "grad_norm": 7.092719318278488, "learning_rate": 4.150374899018368e-06, "loss": 0.835, "step": 8094 }, { "epoch": 0.5848248956959922, "grad_norm": 5.95777531213807, "learning_rate": 4.150155179283274e-06, "loss": 0.845, "step": 8095 }, { "epoch": 0.5848971408962017, "grad_norm": 7.813991556835275, "learning_rate": 4.149935436958838e-06, "loss": 0.9091, "step": 8096 }, { "epoch": 0.5849693860964112, "grad_norm": 5.695458178756718, "learning_rate": 4.149715672048067e-06, "loss": 0.8928, "step": 8097 }, { "epoch": 0.5850416312966207, "grad_norm": 7.181825736582901, "learning_rate": 4.1494958845539694e-06, "loss": 0.8236, "step": 8098 }, { "epoch": 0.5851138764968302, "grad_norm": 5.998934969155788, "learning_rate": 4.149276074479553e-06, "loss": 0.8274, "step": 8099 }, { "epoch": 0.5851861216970398, "grad_norm": 6.004229326558756, "learning_rate": 4.149056241827828e-06, "loss": 0.9455, "step": 8100 }, { "epoch": 0.5852583668972493, "grad_norm": 6.337976393391158, "learning_rate": 4.148836386601805e-06, "loss": 0.7404, "step": 8101 }, { "epoch": 0.5853306120974587, "grad_norm": 6.310170263057828, "learning_rate": 4.14861650880449e-06, "loss": 0.8815, "step": 8102 }, { "epoch": 0.5854028572976683, "grad_norm": 7.017896934293855, "learning_rate": 4.148396608438896e-06, "loss": 0.7975, "step": 8103 }, { "epoch": 0.5854751024978778, "grad_norm": 6.131193494001692, "learning_rate": 4.148176685508032e-06, "loss": 0.7869, "step": 8104 }, { "epoch": 0.5855473476980874, "grad_norm": 7.302446545878985, "learning_rate": 4.14795674001491e-06, "loss": 0.9093, "step": 8105 }, { "epoch": 0.5856195928982968, "grad_norm": 6.975036796007894, "learning_rate": 4.147736771962539e-06, "loss": 0.9632, "step": 8106 }, { "epoch": 0.5856918380985063, "grad_norm": 5.962120968589265, "learning_rate": 4.1475167813539305e-06, "loss": 0.8406, "step": 8107 }, { "epoch": 0.5857640832987159, "grad_norm": 6.247058328721701, "learning_rate": 4.147296768192097e-06, "loss": 0.8243, "step": 8108 }, { "epoch": 0.5858363284989253, "grad_norm": 6.314203013721235, "learning_rate": 4.1470767324800495e-06, "loss": 0.8519, "step": 8109 }, { "epoch": 0.5859085736991348, "grad_norm": 8.256655233922237, "learning_rate": 4.1468566742208005e-06, "loss": 0.8781, "step": 8110 }, { "epoch": 0.5859808188993444, "grad_norm": 8.47096004012915, "learning_rate": 4.146636593417363e-06, "loss": 0.9122, "step": 8111 }, { "epoch": 0.5860530640995539, "grad_norm": 6.13591152542169, "learning_rate": 4.146416490072748e-06, "loss": 0.9001, "step": 8112 }, { "epoch": 0.5861253092997634, "grad_norm": 6.792969311565532, "learning_rate": 4.14619636418997e-06, "loss": 0.8891, "step": 8113 }, { "epoch": 0.5861975544999729, "grad_norm": 5.890682311247231, "learning_rate": 4.145976215772042e-06, "loss": 0.9171, "step": 8114 }, { "epoch": 0.5862697997001824, "grad_norm": 5.4161638857952745, "learning_rate": 4.145756044821978e-06, "loss": 0.8837, "step": 8115 }, { "epoch": 0.5863420449003919, "grad_norm": 6.265602777684252, "learning_rate": 4.145535851342791e-06, "loss": 0.9133, "step": 8116 }, { "epoch": 0.5864142901006014, "grad_norm": 5.9714077928954685, "learning_rate": 4.145315635337497e-06, "loss": 0.8748, "step": 8117 }, { "epoch": 0.586486535300811, "grad_norm": 6.354885306655203, "learning_rate": 4.145095396809108e-06, "loss": 0.8751, "step": 8118 }, { "epoch": 0.5865587805010205, "grad_norm": 6.4007139522963445, "learning_rate": 4.1448751357606415e-06, "loss": 0.8508, "step": 8119 }, { "epoch": 0.5866310257012299, "grad_norm": 7.770290516021209, "learning_rate": 4.144654852195111e-06, "loss": 0.8909, "step": 8120 }, { "epoch": 0.5867032709014395, "grad_norm": 5.560156682228714, "learning_rate": 4.144434546115532e-06, "loss": 0.8157, "step": 8121 }, { "epoch": 0.586775516101649, "grad_norm": 5.862027069081358, "learning_rate": 4.144214217524922e-06, "loss": 0.8936, "step": 8122 }, { "epoch": 0.5868477613018586, "grad_norm": 7.828877852551781, "learning_rate": 4.143993866426296e-06, "loss": 0.8625, "step": 8123 }, { "epoch": 0.586920006502068, "grad_norm": 6.251133320098407, "learning_rate": 4.14377349282267e-06, "loss": 0.8474, "step": 8124 }, { "epoch": 0.5869922517022775, "grad_norm": 6.405749492166899, "learning_rate": 4.1435530967170605e-06, "loss": 0.9522, "step": 8125 }, { "epoch": 0.5870644969024871, "grad_norm": 6.149068758791188, "learning_rate": 4.143332678112486e-06, "loss": 0.8542, "step": 8126 }, { "epoch": 0.5871367421026965, "grad_norm": 8.736939628651845, "learning_rate": 4.143112237011963e-06, "loss": 0.8371, "step": 8127 }, { "epoch": 0.587208987302906, "grad_norm": 7.2981295763024105, "learning_rate": 4.142891773418509e-06, "loss": 0.8674, "step": 8128 }, { "epoch": 0.5872812325031156, "grad_norm": 7.041920343227501, "learning_rate": 4.142671287335143e-06, "loss": 0.9432, "step": 8129 }, { "epoch": 0.5873534777033251, "grad_norm": 5.517387130267302, "learning_rate": 4.142450778764882e-06, "loss": 0.8557, "step": 8130 }, { "epoch": 0.5874257229035346, "grad_norm": 6.877113156242926, "learning_rate": 4.142230247710745e-06, "loss": 0.8749, "step": 8131 }, { "epoch": 0.5874979681037441, "grad_norm": 6.059542534308812, "learning_rate": 4.142009694175752e-06, "loss": 0.9066, "step": 8132 }, { "epoch": 0.5875702133039536, "grad_norm": 5.893512857789625, "learning_rate": 4.14178911816292e-06, "loss": 0.7769, "step": 8133 }, { "epoch": 0.5876424585041631, "grad_norm": 5.676748992687325, "learning_rate": 4.14156851967527e-06, "loss": 0.8257, "step": 8134 }, { "epoch": 0.5877147037043726, "grad_norm": 7.021944797578117, "learning_rate": 4.141347898715822e-06, "loss": 0.9058, "step": 8135 }, { "epoch": 0.5877869489045822, "grad_norm": 6.009201940781146, "learning_rate": 4.141127255287595e-06, "loss": 0.8555, "step": 8136 }, { "epoch": 0.5878591941047917, "grad_norm": 6.53906483348226, "learning_rate": 4.140906589393611e-06, "loss": 0.8676, "step": 8137 }, { "epoch": 0.5879314393050011, "grad_norm": 6.701307072466698, "learning_rate": 4.1406859010368896e-06, "loss": 0.9122, "step": 8138 }, { "epoch": 0.5880036845052107, "grad_norm": 7.615859008750235, "learning_rate": 4.140465190220451e-06, "loss": 0.8486, "step": 8139 }, { "epoch": 0.5880759297054202, "grad_norm": 6.6669663679786035, "learning_rate": 4.140244456947319e-06, "loss": 0.9773, "step": 8140 }, { "epoch": 0.5881481749056298, "grad_norm": 6.021709268424351, "learning_rate": 4.140023701220514e-06, "loss": 0.7933, "step": 8141 }, { "epoch": 0.5882204201058392, "grad_norm": 6.993025711367971, "learning_rate": 4.139802923043057e-06, "loss": 0.8766, "step": 8142 }, { "epoch": 0.5882926653060487, "grad_norm": 6.634944433857819, "learning_rate": 4.139582122417971e-06, "loss": 0.7969, "step": 8143 }, { "epoch": 0.5883649105062583, "grad_norm": 7.0056658022289025, "learning_rate": 4.1393612993482805e-06, "loss": 0.8927, "step": 8144 }, { "epoch": 0.5884371557064677, "grad_norm": 7.612637864040522, "learning_rate": 4.139140453837005e-06, "loss": 0.8271, "step": 8145 }, { "epoch": 0.5885094009066772, "grad_norm": 6.171873145767126, "learning_rate": 4.13891958588717e-06, "loss": 0.8632, "step": 8146 }, { "epoch": 0.5885816461068868, "grad_norm": 8.619290438520691, "learning_rate": 4.138698695501799e-06, "loss": 0.8209, "step": 8147 }, { "epoch": 0.5886538913070963, "grad_norm": 9.512609044576621, "learning_rate": 4.138477782683914e-06, "loss": 0.8653, "step": 8148 }, { "epoch": 0.5887261365073058, "grad_norm": 5.895252140725991, "learning_rate": 4.138256847436542e-06, "loss": 0.9094, "step": 8149 }, { "epoch": 0.5887983817075153, "grad_norm": 7.366257998262935, "learning_rate": 4.138035889762704e-06, "loss": 0.8401, "step": 8150 }, { "epoch": 0.5888706269077248, "grad_norm": 8.048534037107899, "learning_rate": 4.137814909665428e-06, "loss": 0.8602, "step": 8151 }, { "epoch": 0.5889428721079343, "grad_norm": 4.914699397940747, "learning_rate": 4.137593907147737e-06, "loss": 0.7778, "step": 8152 }, { "epoch": 0.5890151173081438, "grad_norm": 5.843967372502939, "learning_rate": 4.137372882212657e-06, "loss": 0.8156, "step": 8153 }, { "epoch": 0.5890873625083534, "grad_norm": 8.164496100925717, "learning_rate": 4.137151834863213e-06, "loss": 0.9126, "step": 8154 }, { "epoch": 0.5891596077085629, "grad_norm": 6.786658314597956, "learning_rate": 4.136930765102432e-06, "loss": 0.8425, "step": 8155 }, { "epoch": 0.5892318529087723, "grad_norm": 7.532485543992699, "learning_rate": 4.13670967293334e-06, "loss": 0.7785, "step": 8156 }, { "epoch": 0.5893040981089819, "grad_norm": 5.981383051334124, "learning_rate": 4.136488558358963e-06, "loss": 0.8598, "step": 8157 }, { "epoch": 0.5893763433091914, "grad_norm": 6.341508441142536, "learning_rate": 4.136267421382329e-06, "loss": 0.9071, "step": 8158 }, { "epoch": 0.589448588509401, "grad_norm": 6.316883944328349, "learning_rate": 4.136046262006463e-06, "loss": 0.8441, "step": 8159 }, { "epoch": 0.5895208337096104, "grad_norm": 6.555662535029801, "learning_rate": 4.135825080234396e-06, "loss": 0.8701, "step": 8160 }, { "epoch": 0.5895930789098199, "grad_norm": 6.189684549653459, "learning_rate": 4.1356038760691525e-06, "loss": 0.9348, "step": 8161 }, { "epoch": 0.5896653241100295, "grad_norm": 6.04891799204715, "learning_rate": 4.135382649513761e-06, "loss": 0.7873, "step": 8162 }, { "epoch": 0.5897375693102389, "grad_norm": 7.250247556471914, "learning_rate": 4.135161400571253e-06, "loss": 0.8373, "step": 8163 }, { "epoch": 0.5898098145104484, "grad_norm": 5.859149409719803, "learning_rate": 4.134940129244653e-06, "loss": 0.9016, "step": 8164 }, { "epoch": 0.589882059710658, "grad_norm": 5.0978710143936095, "learning_rate": 4.134718835536994e-06, "loss": 0.8197, "step": 8165 }, { "epoch": 0.5899543049108675, "grad_norm": 6.658057885167874, "learning_rate": 4.1344975194513025e-06, "loss": 0.8267, "step": 8166 }, { "epoch": 0.590026550111077, "grad_norm": 5.668062300011919, "learning_rate": 4.134276180990609e-06, "loss": 0.8357, "step": 8167 }, { "epoch": 0.5900987953112865, "grad_norm": 6.749657798852571, "learning_rate": 4.134054820157944e-06, "loss": 0.7833, "step": 8168 }, { "epoch": 0.590171040511496, "grad_norm": 5.812899339965579, "learning_rate": 4.1338334369563365e-06, "loss": 0.8523, "step": 8169 }, { "epoch": 0.5902432857117055, "grad_norm": 6.284138636836589, "learning_rate": 4.1336120313888184e-06, "loss": 0.8084, "step": 8170 }, { "epoch": 0.590315530911915, "grad_norm": 5.795388578695666, "learning_rate": 4.133390603458419e-06, "loss": 0.8624, "step": 8171 }, { "epoch": 0.5903877761121246, "grad_norm": 6.291394293070408, "learning_rate": 4.1331691531681715e-06, "loss": 0.792, "step": 8172 }, { "epoch": 0.5904600213123341, "grad_norm": 6.459017743998321, "learning_rate": 4.1329476805211065e-06, "loss": 0.8671, "step": 8173 }, { "epoch": 0.5905322665125435, "grad_norm": 6.333991300971391, "learning_rate": 4.132726185520255e-06, "loss": 0.8895, "step": 8174 }, { "epoch": 0.5906045117127531, "grad_norm": 5.564359889750664, "learning_rate": 4.1325046681686504e-06, "loss": 0.8695, "step": 8175 }, { "epoch": 0.5906767569129626, "grad_norm": 7.36310898961301, "learning_rate": 4.132283128469324e-06, "loss": 0.8867, "step": 8176 }, { "epoch": 0.5907490021131722, "grad_norm": 6.406241365752566, "learning_rate": 4.132061566425309e-06, "loss": 0.8302, "step": 8177 }, { "epoch": 0.5908212473133816, "grad_norm": 5.752020439440579, "learning_rate": 4.131839982039639e-06, "loss": 0.7977, "step": 8178 }, { "epoch": 0.5908934925135911, "grad_norm": 5.658468075001444, "learning_rate": 4.131618375315346e-06, "loss": 0.8721, "step": 8179 }, { "epoch": 0.5909657377138007, "grad_norm": 7.162110706554962, "learning_rate": 4.131396746255464e-06, "loss": 0.8599, "step": 8180 }, { "epoch": 0.5910379829140101, "grad_norm": 6.554979938444373, "learning_rate": 4.131175094863028e-06, "loss": 0.8331, "step": 8181 }, { "epoch": 0.5911102281142196, "grad_norm": 6.475210014089312, "learning_rate": 4.130953421141071e-06, "loss": 0.883, "step": 8182 }, { "epoch": 0.5911824733144292, "grad_norm": 6.118053311672177, "learning_rate": 4.130731725092628e-06, "loss": 0.8475, "step": 8183 }, { "epoch": 0.5912547185146387, "grad_norm": 5.937566495824193, "learning_rate": 4.130510006720734e-06, "loss": 0.8165, "step": 8184 }, { "epoch": 0.5913269637148482, "grad_norm": 6.90779227732917, "learning_rate": 4.130288266028424e-06, "loss": 0.927, "step": 8185 }, { "epoch": 0.5913992089150577, "grad_norm": 9.553111135805281, "learning_rate": 4.1300665030187345e-06, "loss": 0.8714, "step": 8186 }, { "epoch": 0.5914714541152672, "grad_norm": 5.663307465839791, "learning_rate": 4.1298447176946985e-06, "loss": 0.9194, "step": 8187 }, { "epoch": 0.5915436993154767, "grad_norm": 7.40036356264846, "learning_rate": 4.129622910059355e-06, "loss": 0.9394, "step": 8188 }, { "epoch": 0.5916159445156862, "grad_norm": 6.574991018775057, "learning_rate": 4.129401080115739e-06, "loss": 0.9134, "step": 8189 }, { "epoch": 0.5916881897158958, "grad_norm": 6.032454138216676, "learning_rate": 4.129179227866887e-06, "loss": 0.8215, "step": 8190 }, { "epoch": 0.5917604349161053, "grad_norm": 6.280043149180299, "learning_rate": 4.128957353315836e-06, "loss": 0.8263, "step": 8191 }, { "epoch": 0.5918326801163147, "grad_norm": 6.251498233509645, "learning_rate": 4.128735456465625e-06, "loss": 0.8303, "step": 8192 }, { "epoch": 0.5919049253165243, "grad_norm": 5.809149648185159, "learning_rate": 4.128513537319289e-06, "loss": 0.9223, "step": 8193 }, { "epoch": 0.5919771705167338, "grad_norm": 5.653121841961182, "learning_rate": 4.128291595879867e-06, "loss": 0.8553, "step": 8194 }, { "epoch": 0.5920494157169433, "grad_norm": 6.445319898196511, "learning_rate": 4.128069632150399e-06, "loss": 0.9302, "step": 8195 }, { "epoch": 0.5921216609171528, "grad_norm": 5.588958222245875, "learning_rate": 4.127847646133919e-06, "loss": 0.9073, "step": 8196 }, { "epoch": 0.5921939061173623, "grad_norm": 5.822851336481774, "learning_rate": 4.127625637833471e-06, "loss": 0.8238, "step": 8197 }, { "epoch": 0.5922661513175719, "grad_norm": 6.877457058272259, "learning_rate": 4.12740360725209e-06, "loss": 0.8819, "step": 8198 }, { "epoch": 0.5923383965177813, "grad_norm": 5.609543651712566, "learning_rate": 4.12718155439282e-06, "loss": 0.8671, "step": 8199 }, { "epoch": 0.5924106417179908, "grad_norm": 8.88705270663716, "learning_rate": 4.126959479258695e-06, "loss": 0.8624, "step": 8200 }, { "epoch": 0.5924828869182004, "grad_norm": 6.85835248064572, "learning_rate": 4.12673738185276e-06, "loss": 0.8051, "step": 8201 }, { "epoch": 0.5925551321184099, "grad_norm": 7.47826873857954, "learning_rate": 4.126515262178052e-06, "loss": 0.9025, "step": 8202 }, { "epoch": 0.5926273773186194, "grad_norm": 6.31011494817485, "learning_rate": 4.126293120237614e-06, "loss": 0.87, "step": 8203 }, { "epoch": 0.5926996225188289, "grad_norm": 6.021864471663895, "learning_rate": 4.1260709560344855e-06, "loss": 0.8698, "step": 8204 }, { "epoch": 0.5927718677190384, "grad_norm": 6.686388155054909, "learning_rate": 4.125848769571708e-06, "loss": 0.9255, "step": 8205 }, { "epoch": 0.5928441129192479, "grad_norm": 6.702289235757873, "learning_rate": 4.125626560852324e-06, "loss": 0.8174, "step": 8206 }, { "epoch": 0.5929163581194574, "grad_norm": 6.787478913051383, "learning_rate": 4.125404329879373e-06, "loss": 0.9034, "step": 8207 }, { "epoch": 0.592988603319667, "grad_norm": 6.915490230414119, "learning_rate": 4.1251820766559005e-06, "loss": 0.8236, "step": 8208 }, { "epoch": 0.5930608485198765, "grad_norm": 5.544253823352938, "learning_rate": 4.124959801184946e-06, "loss": 0.8816, "step": 8209 }, { "epoch": 0.5931330937200859, "grad_norm": 5.996465277577726, "learning_rate": 4.124737503469555e-06, "loss": 0.8828, "step": 8210 }, { "epoch": 0.5932053389202955, "grad_norm": 5.745094196818906, "learning_rate": 4.124515183512767e-06, "loss": 0.845, "step": 8211 }, { "epoch": 0.593277584120505, "grad_norm": 8.654711424778496, "learning_rate": 4.124292841317629e-06, "loss": 0.8736, "step": 8212 }, { "epoch": 0.5933498293207145, "grad_norm": 6.579387625168227, "learning_rate": 4.124070476887183e-06, "loss": 0.8873, "step": 8213 }, { "epoch": 0.593422074520924, "grad_norm": 6.6357628109732865, "learning_rate": 4.123848090224473e-06, "loss": 0.8797, "step": 8214 }, { "epoch": 0.5934943197211335, "grad_norm": 7.418523863099252, "learning_rate": 4.1236256813325435e-06, "loss": 0.8478, "step": 8215 }, { "epoch": 0.5935665649213431, "grad_norm": 5.632749517461685, "learning_rate": 4.123403250214438e-06, "loss": 0.8613, "step": 8216 }, { "epoch": 0.5936388101215525, "grad_norm": 5.0976064862274075, "learning_rate": 4.1231807968732045e-06, "loss": 0.7983, "step": 8217 }, { "epoch": 0.593711055321762, "grad_norm": 6.325630355605041, "learning_rate": 4.122958321311885e-06, "loss": 0.8407, "step": 8218 }, { "epoch": 0.5937833005219716, "grad_norm": 6.015260363340022, "learning_rate": 4.122735823533527e-06, "loss": 0.8393, "step": 8219 }, { "epoch": 0.5938555457221811, "grad_norm": 7.285773840695556, "learning_rate": 4.122513303541175e-06, "loss": 0.9197, "step": 8220 }, { "epoch": 0.5939277909223906, "grad_norm": 7.333439768394496, "learning_rate": 4.122290761337875e-06, "loss": 0.893, "step": 8221 }, { "epoch": 0.5940000361226001, "grad_norm": 7.503097911628528, "learning_rate": 4.122068196926675e-06, "loss": 0.8878, "step": 8222 }, { "epoch": 0.5940722813228096, "grad_norm": 4.9189297053119985, "learning_rate": 4.121845610310621e-06, "loss": 0.7922, "step": 8223 }, { "epoch": 0.5941445265230191, "grad_norm": 6.735256057776747, "learning_rate": 4.121623001492759e-06, "loss": 0.8134, "step": 8224 }, { "epoch": 0.5942167717232286, "grad_norm": 8.404999707350763, "learning_rate": 4.121400370476138e-06, "loss": 0.9016, "step": 8225 }, { "epoch": 0.5942890169234382, "grad_norm": 6.523390133622947, "learning_rate": 4.121177717263804e-06, "loss": 0.7894, "step": 8226 }, { "epoch": 0.5943612621236477, "grad_norm": 5.533395297304843, "learning_rate": 4.1209550418588074e-06, "loss": 0.7814, "step": 8227 }, { "epoch": 0.5944335073238571, "grad_norm": 5.560500053313147, "learning_rate": 4.120732344264194e-06, "loss": 0.9132, "step": 8228 }, { "epoch": 0.5945057525240667, "grad_norm": 7.160545425951793, "learning_rate": 4.120509624483013e-06, "loss": 0.8619, "step": 8229 }, { "epoch": 0.5945779977242762, "grad_norm": 6.093248239057982, "learning_rate": 4.120286882518313e-06, "loss": 0.8077, "step": 8230 }, { "epoch": 0.5946502429244857, "grad_norm": 5.807659471664842, "learning_rate": 4.1200641183731445e-06, "loss": 0.8692, "step": 8231 }, { "epoch": 0.5947224881246952, "grad_norm": 7.811029402605596, "learning_rate": 4.1198413320505555e-06, "loss": 0.8824, "step": 8232 }, { "epoch": 0.5947947333249047, "grad_norm": 5.518527296677069, "learning_rate": 4.119618523553597e-06, "loss": 0.8183, "step": 8233 }, { "epoch": 0.5948669785251143, "grad_norm": 7.86497683182764, "learning_rate": 4.119395692885319e-06, "loss": 0.7615, "step": 8234 }, { "epoch": 0.5949392237253237, "grad_norm": 6.341307822997229, "learning_rate": 4.11917284004877e-06, "loss": 0.8815, "step": 8235 }, { "epoch": 0.5950114689255332, "grad_norm": 6.957880823950834, "learning_rate": 4.118949965047003e-06, "loss": 0.858, "step": 8236 }, { "epoch": 0.5950837141257428, "grad_norm": 6.430125336322793, "learning_rate": 4.118727067883067e-06, "loss": 0.8428, "step": 8237 }, { "epoch": 0.5951559593259523, "grad_norm": 6.79855262835967, "learning_rate": 4.118504148560015e-06, "loss": 0.8192, "step": 8238 }, { "epoch": 0.5952282045261618, "grad_norm": 6.905747891638609, "learning_rate": 4.118281207080898e-06, "loss": 0.7784, "step": 8239 }, { "epoch": 0.5953004497263713, "grad_norm": 7.248630262748622, "learning_rate": 4.118058243448767e-06, "loss": 0.8562, "step": 8240 }, { "epoch": 0.5953726949265808, "grad_norm": 7.40444112972772, "learning_rate": 4.117835257666676e-06, "loss": 0.8664, "step": 8241 }, { "epoch": 0.5954449401267903, "grad_norm": 8.161948832995794, "learning_rate": 4.117612249737676e-06, "loss": 0.9087, "step": 8242 }, { "epoch": 0.5955171853269998, "grad_norm": 6.139443009551379, "learning_rate": 4.11738921966482e-06, "loss": 0.8096, "step": 8243 }, { "epoch": 0.5955894305272094, "grad_norm": 8.014248556891696, "learning_rate": 4.117166167451162e-06, "loss": 0.9069, "step": 8244 }, { "epoch": 0.5956616757274189, "grad_norm": 5.754926146987065, "learning_rate": 4.116943093099754e-06, "loss": 0.8554, "step": 8245 }, { "epoch": 0.5957339209276283, "grad_norm": 6.146513234262564, "learning_rate": 4.116719996613652e-06, "loss": 0.9094, "step": 8246 }, { "epoch": 0.5958061661278379, "grad_norm": 6.593229300382467, "learning_rate": 4.116496877995907e-06, "loss": 0.8704, "step": 8247 }, { "epoch": 0.5958784113280474, "grad_norm": 6.523605618623338, "learning_rate": 4.1162737372495755e-06, "loss": 0.8792, "step": 8248 }, { "epoch": 0.5959506565282569, "grad_norm": 7.5317185125601585, "learning_rate": 4.116050574377712e-06, "loss": 0.858, "step": 8249 }, { "epoch": 0.5960229017284664, "grad_norm": 6.205294520345389, "learning_rate": 4.11582738938337e-06, "loss": 0.8928, "step": 8250 }, { "epoch": 0.5960951469286759, "grad_norm": 10.229097219776564, "learning_rate": 4.1156041822696065e-06, "loss": 0.9891, "step": 8251 }, { "epoch": 0.5961673921288855, "grad_norm": 5.64365159938529, "learning_rate": 4.115380953039476e-06, "loss": 0.802, "step": 8252 }, { "epoch": 0.5962396373290949, "grad_norm": 6.965896364302524, "learning_rate": 4.115157701696034e-06, "loss": 0.8915, "step": 8253 }, { "epoch": 0.5963118825293044, "grad_norm": 7.650918653592945, "learning_rate": 4.114934428242338e-06, "loss": 0.7705, "step": 8254 }, { "epoch": 0.596384127729514, "grad_norm": 5.656966190414451, "learning_rate": 4.114711132681443e-06, "loss": 0.8324, "step": 8255 }, { "epoch": 0.5964563729297235, "grad_norm": 5.403321989229136, "learning_rate": 4.114487815016406e-06, "loss": 0.8871, "step": 8256 }, { "epoch": 0.596528618129933, "grad_norm": 6.2265395256202565, "learning_rate": 4.114264475250284e-06, "loss": 0.8089, "step": 8257 }, { "epoch": 0.5966008633301425, "grad_norm": 6.322507875436846, "learning_rate": 4.1140411133861355e-06, "loss": 0.8378, "step": 8258 }, { "epoch": 0.596673108530352, "grad_norm": 6.720871133652691, "learning_rate": 4.113817729427018e-06, "loss": 0.9816, "step": 8259 }, { "epoch": 0.5967453537305615, "grad_norm": 7.711804682272466, "learning_rate": 4.1135943233759875e-06, "loss": 0.8951, "step": 8260 }, { "epoch": 0.596817598930771, "grad_norm": 6.151781038254446, "learning_rate": 4.113370895236105e-06, "loss": 0.8555, "step": 8261 }, { "epoch": 0.5968898441309806, "grad_norm": 6.449982919596449, "learning_rate": 4.113147445010427e-06, "loss": 0.8335, "step": 8262 }, { "epoch": 0.5969620893311901, "grad_norm": 6.8012972772347124, "learning_rate": 4.1129239727020135e-06, "loss": 0.9054, "step": 8263 }, { "epoch": 0.5970343345313995, "grad_norm": 7.279924132821624, "learning_rate": 4.112700478313922e-06, "loss": 0.8106, "step": 8264 }, { "epoch": 0.5971065797316091, "grad_norm": 9.005853656675153, "learning_rate": 4.112476961849213e-06, "loss": 0.9252, "step": 8265 }, { "epoch": 0.5971788249318186, "grad_norm": 5.830636227474551, "learning_rate": 4.112253423310947e-06, "loss": 0.7945, "step": 8266 }, { "epoch": 0.597251070132028, "grad_norm": 8.436277852025858, "learning_rate": 4.112029862702184e-06, "loss": 0.9021, "step": 8267 }, { "epoch": 0.5973233153322376, "grad_norm": 7.64151788342762, "learning_rate": 4.111806280025984e-06, "loss": 0.8537, "step": 8268 }, { "epoch": 0.5973955605324471, "grad_norm": 8.555480753033539, "learning_rate": 4.111582675285407e-06, "loss": 0.8317, "step": 8269 }, { "epoch": 0.5974678057326567, "grad_norm": 6.5341381641232195, "learning_rate": 4.111359048483514e-06, "loss": 0.9092, "step": 8270 }, { "epoch": 0.5975400509328661, "grad_norm": 5.949827868111501, "learning_rate": 4.111135399623367e-06, "loss": 0.8285, "step": 8271 }, { "epoch": 0.5976122961330756, "grad_norm": 9.153751175528866, "learning_rate": 4.110911728708028e-06, "loss": 0.9022, "step": 8272 }, { "epoch": 0.5976845413332852, "grad_norm": 6.30928063211065, "learning_rate": 4.110688035740558e-06, "loss": 0.8777, "step": 8273 }, { "epoch": 0.5977567865334947, "grad_norm": 6.462606120022234, "learning_rate": 4.110464320724019e-06, "loss": 0.8494, "step": 8274 }, { "epoch": 0.5978290317337042, "grad_norm": 6.162745423295023, "learning_rate": 4.110240583661473e-06, "loss": 0.8436, "step": 8275 }, { "epoch": 0.5979012769339137, "grad_norm": 5.717139846006815, "learning_rate": 4.110016824555985e-06, "loss": 0.7976, "step": 8276 }, { "epoch": 0.5979735221341232, "grad_norm": 6.0816552273172375, "learning_rate": 4.109793043410617e-06, "loss": 0.8146, "step": 8277 }, { "epoch": 0.5980457673343327, "grad_norm": 6.338081721406451, "learning_rate": 4.109569240228432e-06, "loss": 0.8594, "step": 8278 }, { "epoch": 0.5981180125345422, "grad_norm": 6.604792455954272, "learning_rate": 4.109345415012494e-06, "loss": 0.9103, "step": 8279 }, { "epoch": 0.5981902577347518, "grad_norm": 8.375472781373206, "learning_rate": 4.109121567765866e-06, "loss": 1.015, "step": 8280 }, { "epoch": 0.5982625029349613, "grad_norm": 8.11948289624662, "learning_rate": 4.108897698491613e-06, "loss": 0.8741, "step": 8281 }, { "epoch": 0.5983347481351707, "grad_norm": 5.972478051829897, "learning_rate": 4.1086738071928005e-06, "loss": 0.8755, "step": 8282 }, { "epoch": 0.5984069933353803, "grad_norm": 6.821286476041922, "learning_rate": 4.108449893872493e-06, "loss": 0.93, "step": 8283 }, { "epoch": 0.5984792385355898, "grad_norm": 7.44486237613299, "learning_rate": 4.108225958533754e-06, "loss": 0.85, "step": 8284 }, { "epoch": 0.5985514837357993, "grad_norm": 5.540018147187515, "learning_rate": 4.108002001179651e-06, "loss": 0.8335, "step": 8285 }, { "epoch": 0.5986237289360088, "grad_norm": 6.618840070909505, "learning_rate": 4.107778021813249e-06, "loss": 0.9085, "step": 8286 }, { "epoch": 0.5986959741362183, "grad_norm": 6.426910876202076, "learning_rate": 4.107554020437614e-06, "loss": 0.7824, "step": 8287 }, { "epoch": 0.5987682193364279, "grad_norm": 6.578657912462026, "learning_rate": 4.107329997055814e-06, "loss": 0.8845, "step": 8288 }, { "epoch": 0.5988404645366373, "grad_norm": 6.379000138049102, "learning_rate": 4.107105951670913e-06, "loss": 0.8567, "step": 8289 }, { "epoch": 0.5989127097368468, "grad_norm": 6.9146577912869605, "learning_rate": 4.106881884285981e-06, "loss": 0.9061, "step": 8290 }, { "epoch": 0.5989849549370564, "grad_norm": 7.453115275314673, "learning_rate": 4.1066577949040815e-06, "loss": 0.8459, "step": 8291 }, { "epoch": 0.5990572001372659, "grad_norm": 8.657056419735994, "learning_rate": 4.106433683528286e-06, "loss": 0.9416, "step": 8292 }, { "epoch": 0.5991294453374754, "grad_norm": 7.854662595382738, "learning_rate": 4.1062095501616595e-06, "loss": 0.9607, "step": 8293 }, { "epoch": 0.5992016905376849, "grad_norm": 6.7449859080581, "learning_rate": 4.105985394807271e-06, "loss": 0.7959, "step": 8294 }, { "epoch": 0.5992739357378944, "grad_norm": 8.972011704102636, "learning_rate": 4.105761217468191e-06, "loss": 0.9424, "step": 8295 }, { "epoch": 0.5993461809381039, "grad_norm": 8.332565068752839, "learning_rate": 4.1055370181474855e-06, "loss": 0.8889, "step": 8296 }, { "epoch": 0.5994184261383134, "grad_norm": 8.036820079468377, "learning_rate": 4.105312796848225e-06, "loss": 0.8753, "step": 8297 }, { "epoch": 0.599490671338523, "grad_norm": 5.7025591007767344, "learning_rate": 4.10508855357348e-06, "loss": 0.8432, "step": 8298 }, { "epoch": 0.5995629165387325, "grad_norm": 8.752373073848494, "learning_rate": 4.104864288326318e-06, "loss": 0.9059, "step": 8299 }, { "epoch": 0.5996351617389419, "grad_norm": 7.744924790846528, "learning_rate": 4.1046400011098096e-06, "loss": 0.7978, "step": 8300 }, { "epoch": 0.5997074069391515, "grad_norm": 9.80945048963964, "learning_rate": 4.104415691927026e-06, "loss": 1.0002, "step": 8301 }, { "epoch": 0.599779652139361, "grad_norm": 8.032152889574744, "learning_rate": 4.104191360781038e-06, "loss": 0.8657, "step": 8302 }, { "epoch": 0.5998518973395705, "grad_norm": 4.931014038164977, "learning_rate": 4.1039670076749144e-06, "loss": 0.8417, "step": 8303 }, { "epoch": 0.59992414253978, "grad_norm": 6.996909140349798, "learning_rate": 4.103742632611729e-06, "loss": 0.8031, "step": 8304 }, { "epoch": 0.5999963877399895, "grad_norm": 7.298380988361017, "learning_rate": 4.103518235594551e-06, "loss": 0.9203, "step": 8305 }, { "epoch": 0.6000686329401991, "grad_norm": 5.597388762254284, "learning_rate": 4.103293816626454e-06, "loss": 0.8526, "step": 8306 }, { "epoch": 0.6001408781404085, "grad_norm": 8.212164847910556, "learning_rate": 4.10306937571051e-06, "loss": 0.8193, "step": 8307 }, { "epoch": 0.600213123340618, "grad_norm": 6.933369824729159, "learning_rate": 4.10284491284979e-06, "loss": 0.8584, "step": 8308 }, { "epoch": 0.6002853685408276, "grad_norm": 7.1424189678398315, "learning_rate": 4.102620428047369e-06, "loss": 0.9425, "step": 8309 }, { "epoch": 0.6003576137410371, "grad_norm": 6.847644551294913, "learning_rate": 4.102395921306318e-06, "loss": 0.8003, "step": 8310 }, { "epoch": 0.6004298589412466, "grad_norm": 5.747752206386893, "learning_rate": 4.102171392629711e-06, "loss": 0.8474, "step": 8311 }, { "epoch": 0.6005021041414561, "grad_norm": 6.671746925000228, "learning_rate": 4.101946842020622e-06, "loss": 0.8547, "step": 8312 }, { "epoch": 0.6005743493416656, "grad_norm": 6.6853628576230575, "learning_rate": 4.101722269482124e-06, "loss": 0.8323, "step": 8313 }, { "epoch": 0.6006465945418751, "grad_norm": 5.900470469192478, "learning_rate": 4.101497675017292e-06, "loss": 0.865, "step": 8314 }, { "epoch": 0.6007188397420846, "grad_norm": 5.936518216526944, "learning_rate": 4.1012730586292e-06, "loss": 0.7994, "step": 8315 }, { "epoch": 0.6007910849422942, "grad_norm": 7.756178577226428, "learning_rate": 4.101048420320923e-06, "loss": 0.856, "step": 8316 }, { "epoch": 0.6008633301425037, "grad_norm": 6.688019919461606, "learning_rate": 4.100823760095537e-06, "loss": 0.8797, "step": 8317 }, { "epoch": 0.6009355753427131, "grad_norm": 6.880090943999583, "learning_rate": 4.1005990779561165e-06, "loss": 0.8126, "step": 8318 }, { "epoch": 0.6010078205429227, "grad_norm": 7.092474600121536, "learning_rate": 4.100374373905738e-06, "loss": 0.8704, "step": 8319 }, { "epoch": 0.6010800657431322, "grad_norm": 5.3273189527877385, "learning_rate": 4.100149647947476e-06, "loss": 0.8267, "step": 8320 }, { "epoch": 0.6011523109433417, "grad_norm": 6.145968298857911, "learning_rate": 4.099924900084407e-06, "loss": 0.8349, "step": 8321 }, { "epoch": 0.6012245561435512, "grad_norm": 4.9797889395114, "learning_rate": 4.09970013031961e-06, "loss": 0.751, "step": 8322 }, { "epoch": 0.6012968013437607, "grad_norm": 8.857525452467709, "learning_rate": 4.0994753386561596e-06, "loss": 0.9349, "step": 8323 }, { "epoch": 0.6013690465439703, "grad_norm": 5.953096805215385, "learning_rate": 4.099250525097134e-06, "loss": 0.8727, "step": 8324 }, { "epoch": 0.6014412917441797, "grad_norm": 8.076649632558626, "learning_rate": 4.099025689645611e-06, "loss": 0.8026, "step": 8325 }, { "epoch": 0.6015135369443892, "grad_norm": 7.219254381137991, "learning_rate": 4.098800832304667e-06, "loss": 0.8689, "step": 8326 }, { "epoch": 0.6015857821445988, "grad_norm": 6.30755663398419, "learning_rate": 4.09857595307738e-06, "loss": 0.8327, "step": 8327 }, { "epoch": 0.6016580273448083, "grad_norm": 7.493246471041857, "learning_rate": 4.098351051966831e-06, "loss": 0.9223, "step": 8328 }, { "epoch": 0.6017302725450178, "grad_norm": 7.342190710871957, "learning_rate": 4.098126128976097e-06, "loss": 0.8088, "step": 8329 }, { "epoch": 0.6018025177452273, "grad_norm": 6.528337769353063, "learning_rate": 4.097901184108256e-06, "loss": 0.8286, "step": 8330 }, { "epoch": 0.6018747629454368, "grad_norm": 7.649075131825707, "learning_rate": 4.097676217366389e-06, "loss": 0.7741, "step": 8331 }, { "epoch": 0.6019470081456463, "grad_norm": 5.82947187226856, "learning_rate": 4.097451228753576e-06, "loss": 0.8526, "step": 8332 }, { "epoch": 0.6020192533458558, "grad_norm": 8.128406456133531, "learning_rate": 4.097226218272896e-06, "loss": 0.8086, "step": 8333 }, { "epoch": 0.6020914985460654, "grad_norm": 6.754395925532323, "learning_rate": 4.0970011859274285e-06, "loss": 0.9052, "step": 8334 }, { "epoch": 0.6021637437462749, "grad_norm": 5.289230186249674, "learning_rate": 4.096776131720254e-06, "loss": 0.793, "step": 8335 }, { "epoch": 0.6022359889464843, "grad_norm": 5.22795034095161, "learning_rate": 4.096551055654456e-06, "loss": 0.829, "step": 8336 }, { "epoch": 0.6023082341466939, "grad_norm": 6.0666822971240455, "learning_rate": 4.096325957733113e-06, "loss": 0.8136, "step": 8337 }, { "epoch": 0.6023804793469034, "grad_norm": 6.5928158934577645, "learning_rate": 4.096100837959306e-06, "loss": 0.8552, "step": 8338 }, { "epoch": 0.6024527245471129, "grad_norm": 6.337577635761683, "learning_rate": 4.095875696336119e-06, "loss": 0.8001, "step": 8339 }, { "epoch": 0.6025249697473224, "grad_norm": 6.305803429450457, "learning_rate": 4.095650532866633e-06, "loss": 0.8034, "step": 8340 }, { "epoch": 0.6025972149475319, "grad_norm": 6.840648488317658, "learning_rate": 4.0954253475539286e-06, "loss": 0.8577, "step": 8341 }, { "epoch": 0.6026694601477415, "grad_norm": 5.81280385525505, "learning_rate": 4.095200140401091e-06, "loss": 0.8628, "step": 8342 }, { "epoch": 0.6027417053479509, "grad_norm": 7.003645628778334, "learning_rate": 4.094974911411202e-06, "loss": 0.8955, "step": 8343 }, { "epoch": 0.6028139505481604, "grad_norm": 7.4689568526859995, "learning_rate": 4.094749660587345e-06, "loss": 0.8251, "step": 8344 }, { "epoch": 0.60288619574837, "grad_norm": 6.989197979540771, "learning_rate": 4.094524387932604e-06, "loss": 0.8787, "step": 8345 }, { "epoch": 0.6029584409485795, "grad_norm": 6.125568597053422, "learning_rate": 4.094299093450061e-06, "loss": 0.849, "step": 8346 }, { "epoch": 0.603030686148789, "grad_norm": 7.546335141793374, "learning_rate": 4.094073777142802e-06, "loss": 0.9285, "step": 8347 }, { "epoch": 0.6031029313489985, "grad_norm": 5.966362760394564, "learning_rate": 4.09384843901391e-06, "loss": 0.8038, "step": 8348 }, { "epoch": 0.603175176549208, "grad_norm": 7.7481970381843865, "learning_rate": 4.093623079066471e-06, "loss": 0.8501, "step": 8349 }, { "epoch": 0.6032474217494175, "grad_norm": 7.41711530150044, "learning_rate": 4.093397697303569e-06, "loss": 0.9788, "step": 8350 }, { "epoch": 0.603319666949627, "grad_norm": 5.933234037504237, "learning_rate": 4.09317229372829e-06, "loss": 0.8806, "step": 8351 }, { "epoch": 0.6033919121498366, "grad_norm": 5.730789269218801, "learning_rate": 4.0929468683437205e-06, "loss": 0.8434, "step": 8352 }, { "epoch": 0.6034641573500461, "grad_norm": 6.341674164388722, "learning_rate": 4.092721421152943e-06, "loss": 0.862, "step": 8353 }, { "epoch": 0.6035364025502555, "grad_norm": 6.116396720119668, "learning_rate": 4.092495952159048e-06, "loss": 0.7748, "step": 8354 }, { "epoch": 0.6036086477504651, "grad_norm": 6.389585377788517, "learning_rate": 4.092270461365119e-06, "loss": 0.8801, "step": 8355 }, { "epoch": 0.6036808929506746, "grad_norm": 7.161990332986017, "learning_rate": 4.092044948774243e-06, "loss": 0.9525, "step": 8356 }, { "epoch": 0.603753138150884, "grad_norm": 7.383555735158329, "learning_rate": 4.091819414389509e-06, "loss": 0.9092, "step": 8357 }, { "epoch": 0.6038253833510936, "grad_norm": 6.211380008312647, "learning_rate": 4.0915938582140015e-06, "loss": 0.8149, "step": 8358 }, { "epoch": 0.6038976285513031, "grad_norm": 7.4265284034464765, "learning_rate": 4.091368280250811e-06, "loss": 0.8152, "step": 8359 }, { "epoch": 0.6039698737515127, "grad_norm": 7.716072390298332, "learning_rate": 4.091142680503024e-06, "loss": 0.9021, "step": 8360 }, { "epoch": 0.6040421189517221, "grad_norm": 6.18087382931777, "learning_rate": 4.09091705897373e-06, "loss": 0.9304, "step": 8361 }, { "epoch": 0.6041143641519316, "grad_norm": 6.520556589252376, "learning_rate": 4.0906914156660164e-06, "loss": 0.8895, "step": 8362 }, { "epoch": 0.6041866093521412, "grad_norm": 7.431364461853057, "learning_rate": 4.090465750582973e-06, "loss": 0.8523, "step": 8363 }, { "epoch": 0.6042588545523507, "grad_norm": 6.3381936681210185, "learning_rate": 4.090240063727686e-06, "loss": 0.8727, "step": 8364 }, { "epoch": 0.6043310997525602, "grad_norm": 6.90206381851356, "learning_rate": 4.09001435510325e-06, "loss": 0.9507, "step": 8365 }, { "epoch": 0.6044033449527697, "grad_norm": 7.221929997457992, "learning_rate": 4.089788624712751e-06, "loss": 0.8699, "step": 8366 }, { "epoch": 0.6044755901529792, "grad_norm": 7.126352549887159, "learning_rate": 4.0895628725592795e-06, "loss": 0.8588, "step": 8367 }, { "epoch": 0.6045478353531887, "grad_norm": 5.7412045422495845, "learning_rate": 4.0893370986459275e-06, "loss": 0.8784, "step": 8368 }, { "epoch": 0.6046200805533982, "grad_norm": 5.726347665543013, "learning_rate": 4.089111302975784e-06, "loss": 0.9466, "step": 8369 }, { "epoch": 0.6046923257536078, "grad_norm": 7.532262203844918, "learning_rate": 4.088885485551941e-06, "loss": 0.8618, "step": 8370 }, { "epoch": 0.6047645709538173, "grad_norm": 6.427838825685826, "learning_rate": 4.0886596463774886e-06, "loss": 0.9751, "step": 8371 }, { "epoch": 0.6048368161540267, "grad_norm": 6.962508755306855, "learning_rate": 4.08843378545552e-06, "loss": 0.8406, "step": 8372 }, { "epoch": 0.6049090613542363, "grad_norm": 7.712919311006417, "learning_rate": 4.088207902789126e-06, "loss": 0.8174, "step": 8373 }, { "epoch": 0.6049813065544458, "grad_norm": 6.7720327713652795, "learning_rate": 4.087981998381399e-06, "loss": 0.8235, "step": 8374 }, { "epoch": 0.6050535517546552, "grad_norm": 8.01724006785777, "learning_rate": 4.087756072235431e-06, "loss": 0.8309, "step": 8375 }, { "epoch": 0.6051257969548648, "grad_norm": 7.181851763366922, "learning_rate": 4.087530124354316e-06, "loss": 0.9192, "step": 8376 }, { "epoch": 0.6051980421550743, "grad_norm": 7.428026079763748, "learning_rate": 4.0873041547411465e-06, "loss": 0.9048, "step": 8377 }, { "epoch": 0.6052702873552839, "grad_norm": 6.313295219867603, "learning_rate": 4.087078163399014e-06, "loss": 0.7764, "step": 8378 }, { "epoch": 0.6053425325554933, "grad_norm": 7.451425802521187, "learning_rate": 4.086852150331015e-06, "loss": 0.8623, "step": 8379 }, { "epoch": 0.6054147777557028, "grad_norm": 7.925330738181866, "learning_rate": 4.086626115540242e-06, "loss": 0.8773, "step": 8380 }, { "epoch": 0.6054870229559124, "grad_norm": 7.36648481786105, "learning_rate": 4.086400059029788e-06, "loss": 0.8567, "step": 8381 }, { "epoch": 0.6055592681561219, "grad_norm": 6.734468747081682, "learning_rate": 4.086173980802751e-06, "loss": 0.9247, "step": 8382 }, { "epoch": 0.6056315133563314, "grad_norm": 6.595339615112468, "learning_rate": 4.085947880862223e-06, "loss": 0.8994, "step": 8383 }, { "epoch": 0.6057037585565409, "grad_norm": 5.648639089698848, "learning_rate": 4.0857217592112995e-06, "loss": 0.7817, "step": 8384 }, { "epoch": 0.6057760037567504, "grad_norm": 7.12959944189744, "learning_rate": 4.0854956158530765e-06, "loss": 0.8449, "step": 8385 }, { "epoch": 0.6058482489569599, "grad_norm": 5.119525683903677, "learning_rate": 4.08526945079065e-06, "loss": 0.8254, "step": 8386 }, { "epoch": 0.6059204941571694, "grad_norm": 6.575386691597915, "learning_rate": 4.085043264027115e-06, "loss": 0.7787, "step": 8387 }, { "epoch": 0.605992739357379, "grad_norm": 6.029857731749292, "learning_rate": 4.08481705556557e-06, "loss": 0.7934, "step": 8388 }, { "epoch": 0.6060649845575885, "grad_norm": 6.205602194484546, "learning_rate": 4.084590825409108e-06, "loss": 0.8451, "step": 8389 }, { "epoch": 0.6061372297577979, "grad_norm": 6.369608076126586, "learning_rate": 4.084364573560829e-06, "loss": 0.8831, "step": 8390 }, { "epoch": 0.6062094749580075, "grad_norm": 5.862652403715157, "learning_rate": 4.084138300023829e-06, "loss": 0.8748, "step": 8391 }, { "epoch": 0.606281720158217, "grad_norm": 7.200497493510621, "learning_rate": 4.083912004801206e-06, "loss": 0.9134, "step": 8392 }, { "epoch": 0.6063539653584264, "grad_norm": 5.267910696131531, "learning_rate": 4.0836856878960565e-06, "loss": 0.8075, "step": 8393 }, { "epoch": 0.606426210558636, "grad_norm": 6.162423538445528, "learning_rate": 4.083459349311481e-06, "loss": 0.8949, "step": 8394 }, { "epoch": 0.6064984557588455, "grad_norm": 5.846206230438536, "learning_rate": 4.083232989050575e-06, "loss": 0.8448, "step": 8395 }, { "epoch": 0.6065707009590551, "grad_norm": 5.248915969103336, "learning_rate": 4.0830066071164396e-06, "loss": 0.8523, "step": 8396 }, { "epoch": 0.6066429461592645, "grad_norm": 6.24142417012229, "learning_rate": 4.0827802035121724e-06, "loss": 0.8709, "step": 8397 }, { "epoch": 0.606715191359474, "grad_norm": 5.126703862988314, "learning_rate": 4.082553778240874e-06, "loss": 0.8064, "step": 8398 }, { "epoch": 0.6067874365596836, "grad_norm": 5.370132548935794, "learning_rate": 4.0823273313056425e-06, "loss": 0.8613, "step": 8399 }, { "epoch": 0.6068596817598931, "grad_norm": 6.538995120340578, "learning_rate": 4.082100862709578e-06, "loss": 0.8739, "step": 8400 }, { "epoch": 0.6069319269601026, "grad_norm": 6.381819424830397, "learning_rate": 4.081874372455782e-06, "loss": 0.7978, "step": 8401 }, { "epoch": 0.6070041721603121, "grad_norm": 5.954636749796035, "learning_rate": 4.081647860547354e-06, "loss": 0.8925, "step": 8402 }, { "epoch": 0.6070764173605216, "grad_norm": 7.464737201211728, "learning_rate": 4.081421326987394e-06, "loss": 0.961, "step": 8403 }, { "epoch": 0.6071486625607311, "grad_norm": 6.314760615386673, "learning_rate": 4.081194771779005e-06, "loss": 0.7963, "step": 8404 }, { "epoch": 0.6072209077609406, "grad_norm": 5.647614858404138, "learning_rate": 4.080968194925287e-06, "loss": 0.8761, "step": 8405 }, { "epoch": 0.6072931529611502, "grad_norm": 8.734405572117783, "learning_rate": 4.080741596429341e-06, "loss": 0.9114, "step": 8406 }, { "epoch": 0.6073653981613597, "grad_norm": 6.019885489049566, "learning_rate": 4.0805149762942705e-06, "loss": 0.8566, "step": 8407 }, { "epoch": 0.6074376433615691, "grad_norm": 7.601431521064542, "learning_rate": 4.0802883345231775e-06, "loss": 0.9127, "step": 8408 }, { "epoch": 0.6075098885617787, "grad_norm": 5.143267755162466, "learning_rate": 4.0800616711191645e-06, "loss": 0.9248, "step": 8409 }, { "epoch": 0.6075821337619882, "grad_norm": 6.115417458234275, "learning_rate": 4.0798349860853334e-06, "loss": 0.8284, "step": 8410 }, { "epoch": 0.6076543789621976, "grad_norm": 6.462556832098691, "learning_rate": 4.079608279424788e-06, "loss": 0.856, "step": 8411 }, { "epoch": 0.6077266241624072, "grad_norm": 6.247267774377483, "learning_rate": 4.079381551140632e-06, "loss": 0.7891, "step": 8412 }, { "epoch": 0.6077988693626167, "grad_norm": 5.435968643805403, "learning_rate": 4.079154801235969e-06, "loss": 0.8024, "step": 8413 }, { "epoch": 0.6078711145628263, "grad_norm": 6.612659404830773, "learning_rate": 4.078928029713902e-06, "loss": 0.8124, "step": 8414 }, { "epoch": 0.6079433597630357, "grad_norm": 6.5530824935473175, "learning_rate": 4.078701236577538e-06, "loss": 0.8295, "step": 8415 }, { "epoch": 0.6080156049632452, "grad_norm": 7.359450678021191, "learning_rate": 4.078474421829979e-06, "loss": 0.8331, "step": 8416 }, { "epoch": 0.6080878501634548, "grad_norm": 7.141008295836182, "learning_rate": 4.0782475854743295e-06, "loss": 0.912, "step": 8417 }, { "epoch": 0.6081600953636642, "grad_norm": 5.530933090872904, "learning_rate": 4.078020727513698e-06, "loss": 0.8558, "step": 8418 }, { "epoch": 0.6082323405638738, "grad_norm": 6.257688752092613, "learning_rate": 4.077793847951186e-06, "loss": 0.8509, "step": 8419 }, { "epoch": 0.6083045857640833, "grad_norm": 11.37504158431616, "learning_rate": 4.077566946789903e-06, "loss": 0.9231, "step": 8420 }, { "epoch": 0.6083768309642928, "grad_norm": 7.435686932290069, "learning_rate": 4.077340024032952e-06, "loss": 0.9159, "step": 8421 }, { "epoch": 0.6084490761645023, "grad_norm": 7.245707326286587, "learning_rate": 4.077113079683442e-06, "loss": 0.8132, "step": 8422 }, { "epoch": 0.6085213213647118, "grad_norm": 6.2878789889895135, "learning_rate": 4.076886113744478e-06, "loss": 0.9048, "step": 8423 }, { "epoch": 0.6085935665649214, "grad_norm": 6.4049356740539976, "learning_rate": 4.076659126219168e-06, "loss": 0.8813, "step": 8424 }, { "epoch": 0.6086658117651309, "grad_norm": 6.3357406190511405, "learning_rate": 4.076432117110618e-06, "loss": 0.871, "step": 8425 }, { "epoch": 0.6087380569653403, "grad_norm": 5.946698582368289, "learning_rate": 4.076205086421937e-06, "loss": 0.8695, "step": 8426 }, { "epoch": 0.6088103021655499, "grad_norm": 6.14937893592576, "learning_rate": 4.075978034156232e-06, "loss": 0.8559, "step": 8427 }, { "epoch": 0.6088825473657594, "grad_norm": 6.6376673675528055, "learning_rate": 4.07575096031661e-06, "loss": 0.8905, "step": 8428 }, { "epoch": 0.6089547925659688, "grad_norm": 6.649548519554994, "learning_rate": 4.0755238649061825e-06, "loss": 0.7934, "step": 8429 }, { "epoch": 0.6090270377661784, "grad_norm": 6.090705619736441, "learning_rate": 4.075296747928056e-06, "loss": 0.8146, "step": 8430 }, { "epoch": 0.6090992829663879, "grad_norm": 5.952788576246942, "learning_rate": 4.07506960938534e-06, "loss": 0.9015, "step": 8431 }, { "epoch": 0.6091715281665975, "grad_norm": 7.983696298438046, "learning_rate": 4.074842449281144e-06, "loss": 0.9471, "step": 8432 }, { "epoch": 0.6092437733668069, "grad_norm": 6.807993667079612, "learning_rate": 4.074615267618579e-06, "loss": 0.8687, "step": 8433 }, { "epoch": 0.6093160185670164, "grad_norm": 5.454212331714253, "learning_rate": 4.074388064400753e-06, "loss": 0.9185, "step": 8434 }, { "epoch": 0.609388263767226, "grad_norm": 6.3325607765555265, "learning_rate": 4.074160839630776e-06, "loss": 0.9122, "step": 8435 }, { "epoch": 0.6094605089674354, "grad_norm": 6.2479473558032375, "learning_rate": 4.073933593311759e-06, "loss": 0.9355, "step": 8436 }, { "epoch": 0.609532754167645, "grad_norm": 5.703708232164076, "learning_rate": 4.0737063254468146e-06, "loss": 0.8317, "step": 8437 }, { "epoch": 0.6096049993678545, "grad_norm": 5.330114625195654, "learning_rate": 4.073479036039052e-06, "loss": 0.7311, "step": 8438 }, { "epoch": 0.609677244568064, "grad_norm": 5.453457272962478, "learning_rate": 4.073251725091583e-06, "loss": 0.8745, "step": 8439 }, { "epoch": 0.6097494897682735, "grad_norm": 4.972594111699348, "learning_rate": 4.073024392607519e-06, "loss": 0.92, "step": 8440 }, { "epoch": 0.609821734968483, "grad_norm": 6.738244734955409, "learning_rate": 4.072797038589973e-06, "loss": 0.8116, "step": 8441 }, { "epoch": 0.6098939801686926, "grad_norm": 6.273458478424606, "learning_rate": 4.072569663042056e-06, "loss": 0.9044, "step": 8442 }, { "epoch": 0.6099662253689021, "grad_norm": 4.523480930679891, "learning_rate": 4.0723422659668825e-06, "loss": 0.7786, "step": 8443 }, { "epoch": 0.6100384705691115, "grad_norm": 6.538402382768603, "learning_rate": 4.072114847367563e-06, "loss": 0.8788, "step": 8444 }, { "epoch": 0.6101107157693211, "grad_norm": 6.924590600647251, "learning_rate": 4.071887407247213e-06, "loss": 0.9087, "step": 8445 }, { "epoch": 0.6101829609695306, "grad_norm": 5.9019210532792306, "learning_rate": 4.071659945608945e-06, "loss": 0.8202, "step": 8446 }, { "epoch": 0.61025520616974, "grad_norm": 7.180866663489746, "learning_rate": 4.071432462455872e-06, "loss": 0.8614, "step": 8447 }, { "epoch": 0.6103274513699496, "grad_norm": 6.31354929446215, "learning_rate": 4.071204957791109e-06, "loss": 0.9225, "step": 8448 }, { "epoch": 0.6103996965701591, "grad_norm": 5.746500691954776, "learning_rate": 4.0709774316177696e-06, "loss": 0.7699, "step": 8449 }, { "epoch": 0.6104719417703687, "grad_norm": 6.070630471482271, "learning_rate": 4.0707498839389695e-06, "loss": 0.8626, "step": 8450 }, { "epoch": 0.6105441869705781, "grad_norm": 8.316623210092684, "learning_rate": 4.070522314757822e-06, "loss": 0.8793, "step": 8451 }, { "epoch": 0.6106164321707876, "grad_norm": 6.496734972862691, "learning_rate": 4.070294724077445e-06, "loss": 0.8733, "step": 8452 }, { "epoch": 0.6106886773709972, "grad_norm": 6.198950143341909, "learning_rate": 4.070067111900952e-06, "loss": 0.8406, "step": 8453 }, { "epoch": 0.6107609225712066, "grad_norm": 6.674720604010445, "learning_rate": 4.069839478231458e-06, "loss": 0.8495, "step": 8454 }, { "epoch": 0.6108331677714162, "grad_norm": 6.738756210615819, "learning_rate": 4.069611823072082e-06, "loss": 0.8845, "step": 8455 }, { "epoch": 0.6109054129716257, "grad_norm": 6.904296046234038, "learning_rate": 4.069384146425938e-06, "loss": 0.9763, "step": 8456 }, { "epoch": 0.6109776581718352, "grad_norm": 11.006068203154134, "learning_rate": 4.069156448296145e-06, "loss": 0.8966, "step": 8457 }, { "epoch": 0.6110499033720447, "grad_norm": 6.614597142322774, "learning_rate": 4.068928728685817e-06, "loss": 0.8571, "step": 8458 }, { "epoch": 0.6111221485722542, "grad_norm": 6.233584093296035, "learning_rate": 4.068700987598074e-06, "loss": 0.8621, "step": 8459 }, { "epoch": 0.6111943937724638, "grad_norm": 6.048905694530074, "learning_rate": 4.068473225036032e-06, "loss": 0.9194, "step": 8460 }, { "epoch": 0.6112666389726733, "grad_norm": 7.852438442276775, "learning_rate": 4.068245441002809e-06, "loss": 0.8382, "step": 8461 }, { "epoch": 0.6113388841728827, "grad_norm": 9.925683631702183, "learning_rate": 4.0680176355015235e-06, "loss": 0.8685, "step": 8462 }, { "epoch": 0.6114111293730923, "grad_norm": 8.13993706606219, "learning_rate": 4.067789808535295e-06, "loss": 0.9347, "step": 8463 }, { "epoch": 0.6114833745733018, "grad_norm": 5.788873049046638, "learning_rate": 4.067561960107241e-06, "loss": 0.8844, "step": 8464 }, { "epoch": 0.6115556197735112, "grad_norm": 4.494861423957479, "learning_rate": 4.067334090220481e-06, "loss": 0.7907, "step": 8465 }, { "epoch": 0.6116278649737208, "grad_norm": 8.501331449459798, "learning_rate": 4.067106198878134e-06, "loss": 0.8889, "step": 8466 }, { "epoch": 0.6117001101739303, "grad_norm": 9.583682465758775, "learning_rate": 4.06687828608332e-06, "loss": 0.8997, "step": 8467 }, { "epoch": 0.6117723553741399, "grad_norm": 7.344410448609642, "learning_rate": 4.0666503518391585e-06, "loss": 0.8164, "step": 8468 }, { "epoch": 0.6118446005743493, "grad_norm": 7.681088635058764, "learning_rate": 4.06642239614877e-06, "loss": 0.8006, "step": 8469 }, { "epoch": 0.6119168457745588, "grad_norm": 6.13742766483357, "learning_rate": 4.066194419015276e-06, "loss": 0.9038, "step": 8470 }, { "epoch": 0.6119890909747684, "grad_norm": 5.841289797973239, "learning_rate": 4.065966420441796e-06, "loss": 0.9177, "step": 8471 }, { "epoch": 0.6120613361749778, "grad_norm": 6.556065774807595, "learning_rate": 4.065738400431452e-06, "loss": 0.8785, "step": 8472 }, { "epoch": 0.6121335813751874, "grad_norm": 7.233630411871129, "learning_rate": 4.065510358987364e-06, "loss": 0.869, "step": 8473 }, { "epoch": 0.6122058265753969, "grad_norm": 7.150542135959676, "learning_rate": 4.065282296112656e-06, "loss": 0.8293, "step": 8474 }, { "epoch": 0.6122780717756064, "grad_norm": 6.5022749221020835, "learning_rate": 4.065054211810448e-06, "loss": 0.8507, "step": 8475 }, { "epoch": 0.6123503169758159, "grad_norm": 5.845265237942954, "learning_rate": 4.064826106083862e-06, "loss": 0.8312, "step": 8476 }, { "epoch": 0.6124225621760254, "grad_norm": 5.090085823819443, "learning_rate": 4.064597978936023e-06, "loss": 0.8388, "step": 8477 }, { "epoch": 0.612494807376235, "grad_norm": 5.911429581313155, "learning_rate": 4.064369830370053e-06, "loss": 0.7671, "step": 8478 }, { "epoch": 0.6125670525764445, "grad_norm": 7.424873726024713, "learning_rate": 4.064141660389074e-06, "loss": 0.9081, "step": 8479 }, { "epoch": 0.6126392977766539, "grad_norm": 6.347914658201739, "learning_rate": 4.063913468996209e-06, "loss": 0.8699, "step": 8480 }, { "epoch": 0.6127115429768635, "grad_norm": 6.747566349569725, "learning_rate": 4.063685256194585e-06, "loss": 0.8607, "step": 8481 }, { "epoch": 0.612783788177073, "grad_norm": 5.894037447217248, "learning_rate": 4.063457021987323e-06, "loss": 0.8837, "step": 8482 }, { "epoch": 0.6128560333772824, "grad_norm": 6.089533359332538, "learning_rate": 4.063228766377548e-06, "loss": 0.8834, "step": 8483 }, { "epoch": 0.612928278577492, "grad_norm": 5.543140626464454, "learning_rate": 4.063000489368385e-06, "loss": 0.8008, "step": 8484 }, { "epoch": 0.6130005237777015, "grad_norm": 7.0575379350962235, "learning_rate": 4.062772190962959e-06, "loss": 0.8666, "step": 8485 }, { "epoch": 0.6130727689779111, "grad_norm": 8.41639658680291, "learning_rate": 4.062543871164396e-06, "loss": 0.8566, "step": 8486 }, { "epoch": 0.6131450141781205, "grad_norm": 10.616400615845881, "learning_rate": 4.062315529975819e-06, "loss": 0.8441, "step": 8487 }, { "epoch": 0.61321725937833, "grad_norm": 6.86271513165682, "learning_rate": 4.062087167400356e-06, "loss": 0.8801, "step": 8488 }, { "epoch": 0.6132895045785396, "grad_norm": 7.409488018433696, "learning_rate": 4.061858783441133e-06, "loss": 0.875, "step": 8489 }, { "epoch": 0.613361749778749, "grad_norm": 8.126833664782309, "learning_rate": 4.061630378101276e-06, "loss": 0.8633, "step": 8490 }, { "epoch": 0.6134339949789586, "grad_norm": 7.6826255665431935, "learning_rate": 4.061401951383912e-06, "loss": 0.8147, "step": 8491 }, { "epoch": 0.6135062401791681, "grad_norm": 10.589125082239997, "learning_rate": 4.061173503292167e-06, "loss": 0.8744, "step": 8492 }, { "epoch": 0.6135784853793776, "grad_norm": 8.18296164026538, "learning_rate": 4.060945033829169e-06, "loss": 0.8451, "step": 8493 }, { "epoch": 0.6136507305795871, "grad_norm": 7.489822857828507, "learning_rate": 4.060716542998045e-06, "loss": 0.8627, "step": 8494 }, { "epoch": 0.6137229757797966, "grad_norm": 7.311897970343332, "learning_rate": 4.0604880308019245e-06, "loss": 0.9487, "step": 8495 }, { "epoch": 0.6137952209800062, "grad_norm": 7.7355820455696955, "learning_rate": 4.0602594972439335e-06, "loss": 0.8141, "step": 8496 }, { "epoch": 0.6138674661802157, "grad_norm": 5.660865544433837, "learning_rate": 4.060030942327202e-06, "loss": 0.8288, "step": 8497 }, { "epoch": 0.6139397113804251, "grad_norm": 6.080989368652124, "learning_rate": 4.059802366054858e-06, "loss": 0.8264, "step": 8498 }, { "epoch": 0.6140119565806347, "grad_norm": 6.29314817792, "learning_rate": 4.059573768430031e-06, "loss": 0.904, "step": 8499 }, { "epoch": 0.6140842017808442, "grad_norm": 5.136744370777359, "learning_rate": 4.05934514945585e-06, "loss": 0.7632, "step": 8500 }, { "epoch": 0.6141564469810536, "grad_norm": 7.215114454179785, "learning_rate": 4.059116509135444e-06, "loss": 0.8196, "step": 8501 }, { "epoch": 0.6142286921812632, "grad_norm": 6.305986121774895, "learning_rate": 4.058887847471944e-06, "loss": 0.8258, "step": 8502 }, { "epoch": 0.6143009373814727, "grad_norm": 5.197149471116072, "learning_rate": 4.058659164468479e-06, "loss": 0.8049, "step": 8503 }, { "epoch": 0.6143731825816823, "grad_norm": 5.29763653437472, "learning_rate": 4.058430460128182e-06, "loss": 0.8508, "step": 8504 }, { "epoch": 0.6144454277818917, "grad_norm": 7.419668668767957, "learning_rate": 4.05820173445418e-06, "loss": 0.8794, "step": 8505 }, { "epoch": 0.6145176729821012, "grad_norm": 5.747076867163849, "learning_rate": 4.057972987449608e-06, "loss": 0.8025, "step": 8506 }, { "epoch": 0.6145899181823108, "grad_norm": 5.585258474627074, "learning_rate": 4.057744219117594e-06, "loss": 0.9402, "step": 8507 }, { "epoch": 0.6146621633825202, "grad_norm": 6.661184281912463, "learning_rate": 4.057515429461273e-06, "loss": 0.9672, "step": 8508 }, { "epoch": 0.6147344085827298, "grad_norm": 7.636091326527618, "learning_rate": 4.057286618483773e-06, "loss": 0.8522, "step": 8509 }, { "epoch": 0.6148066537829393, "grad_norm": 6.002780270148133, "learning_rate": 4.057057786188229e-06, "loss": 0.8843, "step": 8510 }, { "epoch": 0.6148788989831488, "grad_norm": 5.695820852441329, "learning_rate": 4.056828932577774e-06, "loss": 0.861, "step": 8511 }, { "epoch": 0.6149511441833583, "grad_norm": 5.0215459083161, "learning_rate": 4.056600057655538e-06, "loss": 0.8836, "step": 8512 }, { "epoch": 0.6150233893835678, "grad_norm": 5.495749825437729, "learning_rate": 4.056371161424657e-06, "loss": 0.7904, "step": 8513 }, { "epoch": 0.6150956345837774, "grad_norm": 5.2805933798079066, "learning_rate": 4.056142243888263e-06, "loss": 0.8533, "step": 8514 }, { "epoch": 0.6151678797839869, "grad_norm": 8.9918790842255, "learning_rate": 4.0559133050494895e-06, "loss": 0.918, "step": 8515 }, { "epoch": 0.6152401249841963, "grad_norm": 8.40241321231748, "learning_rate": 4.055684344911472e-06, "loss": 0.8263, "step": 8516 }, { "epoch": 0.6153123701844059, "grad_norm": 6.441582727646162, "learning_rate": 4.055455363477343e-06, "loss": 0.8342, "step": 8517 }, { "epoch": 0.6153846153846154, "grad_norm": 5.501959711679586, "learning_rate": 4.055226360750238e-06, "loss": 0.8491, "step": 8518 }, { "epoch": 0.6154568605848248, "grad_norm": 7.439318009980033, "learning_rate": 4.054997336733291e-06, "loss": 0.8544, "step": 8519 }, { "epoch": 0.6155291057850344, "grad_norm": 5.571021079088416, "learning_rate": 4.054768291429638e-06, "loss": 0.8902, "step": 8520 }, { "epoch": 0.6156013509852439, "grad_norm": 5.490616857341358, "learning_rate": 4.054539224842416e-06, "loss": 0.8076, "step": 8521 }, { "epoch": 0.6156735961854535, "grad_norm": 7.822386566636833, "learning_rate": 4.054310136974758e-06, "loss": 0.9139, "step": 8522 }, { "epoch": 0.6157458413856629, "grad_norm": 8.001050403300328, "learning_rate": 4.0540810278298e-06, "loss": 0.7977, "step": 8523 }, { "epoch": 0.6158180865858724, "grad_norm": 7.053609197422333, "learning_rate": 4.05385189741068e-06, "loss": 0.8152, "step": 8524 }, { "epoch": 0.615890331786082, "grad_norm": 6.615586700833134, "learning_rate": 4.053622745720534e-06, "loss": 0.8121, "step": 8525 }, { "epoch": 0.6159625769862914, "grad_norm": 5.772740054816128, "learning_rate": 4.053393572762499e-06, "loss": 0.819, "step": 8526 }, { "epoch": 0.616034822186501, "grad_norm": 5.850211910746034, "learning_rate": 4.053164378539712e-06, "loss": 0.9109, "step": 8527 }, { "epoch": 0.6161070673867105, "grad_norm": 8.83427908770782, "learning_rate": 4.052935163055311e-06, "loss": 0.9018, "step": 8528 }, { "epoch": 0.61617931258692, "grad_norm": 8.027876921162164, "learning_rate": 4.052705926312434e-06, "loss": 0.8824, "step": 8529 }, { "epoch": 0.6162515577871295, "grad_norm": 6.049293527260442, "learning_rate": 4.052476668314217e-06, "loss": 0.8597, "step": 8530 }, { "epoch": 0.616323802987339, "grad_norm": 7.793346393966934, "learning_rate": 4.0522473890638e-06, "loss": 0.8868, "step": 8531 }, { "epoch": 0.6163960481875486, "grad_norm": 6.226430779072552, "learning_rate": 4.052018088564322e-06, "loss": 0.8883, "step": 8532 }, { "epoch": 0.6164682933877581, "grad_norm": 5.961380968308971, "learning_rate": 4.051788766818922e-06, "loss": 0.8891, "step": 8533 }, { "epoch": 0.6165405385879675, "grad_norm": 6.783635858011658, "learning_rate": 4.051559423830738e-06, "loss": 1.0432, "step": 8534 }, { "epoch": 0.6166127837881771, "grad_norm": 7.4767446465861, "learning_rate": 4.05133005960291e-06, "loss": 0.8496, "step": 8535 }, { "epoch": 0.6166850289883866, "grad_norm": 5.369328189930435, "learning_rate": 4.0511006741385786e-06, "loss": 0.8143, "step": 8536 }, { "epoch": 0.616757274188596, "grad_norm": 5.424328163842296, "learning_rate": 4.050871267440883e-06, "loss": 0.793, "step": 8537 }, { "epoch": 0.6168295193888056, "grad_norm": 5.770074383604945, "learning_rate": 4.0506418395129645e-06, "loss": 0.8562, "step": 8538 }, { "epoch": 0.6169017645890151, "grad_norm": 6.11629069296255, "learning_rate": 4.050412390357962e-06, "loss": 0.9453, "step": 8539 }, { "epoch": 0.6169740097892247, "grad_norm": 8.195143997016523, "learning_rate": 4.050182919979019e-06, "loss": 0.9172, "step": 8540 }, { "epoch": 0.6170462549894341, "grad_norm": 6.42597715316876, "learning_rate": 4.0499534283792745e-06, "loss": 0.8733, "step": 8541 }, { "epoch": 0.6171185001896436, "grad_norm": 5.5449002041448665, "learning_rate": 4.049723915561871e-06, "loss": 0.7533, "step": 8542 }, { "epoch": 0.6171907453898532, "grad_norm": 5.8440254676885335, "learning_rate": 4.049494381529951e-06, "loss": 0.871, "step": 8543 }, { "epoch": 0.6172629905900626, "grad_norm": 5.378195056979884, "learning_rate": 4.049264826286656e-06, "loss": 0.8888, "step": 8544 }, { "epoch": 0.6173352357902722, "grad_norm": 7.444433490624918, "learning_rate": 4.049035249835128e-06, "loss": 0.7692, "step": 8545 }, { "epoch": 0.6174074809904817, "grad_norm": 7.657088751489914, "learning_rate": 4.04880565217851e-06, "loss": 0.8205, "step": 8546 }, { "epoch": 0.6174797261906912, "grad_norm": 5.368998348367273, "learning_rate": 4.048576033319946e-06, "loss": 0.759, "step": 8547 }, { "epoch": 0.6175519713909007, "grad_norm": 5.668911246263923, "learning_rate": 4.048346393262578e-06, "loss": 0.8594, "step": 8548 }, { "epoch": 0.6176242165911102, "grad_norm": 7.012407750357604, "learning_rate": 4.048116732009551e-06, "loss": 0.904, "step": 8549 }, { "epoch": 0.6176964617913198, "grad_norm": 6.465272420172333, "learning_rate": 4.047887049564007e-06, "loss": 0.8207, "step": 8550 }, { "epoch": 0.6177687069915293, "grad_norm": 6.549895761839786, "learning_rate": 4.04765734592909e-06, "loss": 0.8861, "step": 8551 }, { "epoch": 0.6178409521917387, "grad_norm": 7.719138934881021, "learning_rate": 4.047427621107948e-06, "loss": 0.949, "step": 8552 }, { "epoch": 0.6179131973919483, "grad_norm": 7.6924179597434, "learning_rate": 4.047197875103722e-06, "loss": 0.8856, "step": 8553 }, { "epoch": 0.6179854425921578, "grad_norm": 6.166801897920296, "learning_rate": 4.0469681079195584e-06, "loss": 0.8699, "step": 8554 }, { "epoch": 0.6180576877923672, "grad_norm": 7.762186129454819, "learning_rate": 4.046738319558603e-06, "loss": 0.8676, "step": 8555 }, { "epoch": 0.6181299329925768, "grad_norm": 6.451683640861338, "learning_rate": 4.0465085100240006e-06, "loss": 0.8604, "step": 8556 }, { "epoch": 0.6182021781927863, "grad_norm": 5.83321373226535, "learning_rate": 4.046278679318898e-06, "loss": 0.8955, "step": 8557 }, { "epoch": 0.6182744233929959, "grad_norm": 6.964068152973362, "learning_rate": 4.046048827446441e-06, "loss": 0.8779, "step": 8558 }, { "epoch": 0.6183466685932053, "grad_norm": 6.073175842442932, "learning_rate": 4.0458189544097744e-06, "loss": 0.8341, "step": 8559 }, { "epoch": 0.6184189137934148, "grad_norm": 6.199735315887328, "learning_rate": 4.0455890602120475e-06, "loss": 0.9331, "step": 8560 }, { "epoch": 0.6184911589936244, "grad_norm": 7.817837044208884, "learning_rate": 4.045359144856407e-06, "loss": 0.9444, "step": 8561 }, { "epoch": 0.6185634041938338, "grad_norm": 7.06432757741979, "learning_rate": 4.045129208345999e-06, "loss": 0.9046, "step": 8562 }, { "epoch": 0.6186356493940434, "grad_norm": 6.5262963888919465, "learning_rate": 4.044899250683973e-06, "loss": 0.8503, "step": 8563 }, { "epoch": 0.6187078945942529, "grad_norm": 5.628207839110476, "learning_rate": 4.044669271873475e-06, "loss": 0.8759, "step": 8564 }, { "epoch": 0.6187801397944624, "grad_norm": 8.134232191018544, "learning_rate": 4.044439271917654e-06, "loss": 0.8644, "step": 8565 }, { "epoch": 0.6188523849946719, "grad_norm": 7.558559683789512, "learning_rate": 4.044209250819658e-06, "loss": 0.931, "step": 8566 }, { "epoch": 0.6189246301948814, "grad_norm": 6.745835891858237, "learning_rate": 4.043979208582637e-06, "loss": 0.9127, "step": 8567 }, { "epoch": 0.618996875395091, "grad_norm": 5.7235884079758925, "learning_rate": 4.0437491452097395e-06, "loss": 0.8835, "step": 8568 }, { "epoch": 0.6190691205953005, "grad_norm": 7.833999226721773, "learning_rate": 4.043519060704114e-06, "loss": 0.9022, "step": 8569 }, { "epoch": 0.6191413657955099, "grad_norm": 6.609677138761175, "learning_rate": 4.043288955068913e-06, "loss": 0.8651, "step": 8570 }, { "epoch": 0.6192136109957195, "grad_norm": 6.256508452024492, "learning_rate": 4.043058828307283e-06, "loss": 0.9041, "step": 8571 }, { "epoch": 0.619285856195929, "grad_norm": 5.467528811755184, "learning_rate": 4.042828680422376e-06, "loss": 0.8627, "step": 8572 }, { "epoch": 0.6193581013961384, "grad_norm": 6.278543828776166, "learning_rate": 4.042598511417343e-06, "loss": 0.8546, "step": 8573 }, { "epoch": 0.619430346596348, "grad_norm": 5.506936294444872, "learning_rate": 4.042368321295333e-06, "loss": 0.7523, "step": 8574 }, { "epoch": 0.6195025917965575, "grad_norm": 6.279927432375967, "learning_rate": 4.042138110059498e-06, "loss": 0.792, "step": 8575 }, { "epoch": 0.6195748369967671, "grad_norm": 7.915729614482157, "learning_rate": 4.041907877712991e-06, "loss": 0.9044, "step": 8576 }, { "epoch": 0.6196470821969765, "grad_norm": 6.533001094596721, "learning_rate": 4.041677624258962e-06, "loss": 0.8166, "step": 8577 }, { "epoch": 0.619719327397186, "grad_norm": 7.546137992933765, "learning_rate": 4.041447349700562e-06, "loss": 0.9074, "step": 8578 }, { "epoch": 0.6197915725973956, "grad_norm": 6.210840457182129, "learning_rate": 4.041217054040946e-06, "loss": 0.8078, "step": 8579 }, { "epoch": 0.619863817797605, "grad_norm": 5.564381313429654, "learning_rate": 4.040986737283265e-06, "loss": 0.9725, "step": 8580 }, { "epoch": 0.6199360629978146, "grad_norm": 5.157645851547048, "learning_rate": 4.040756399430672e-06, "loss": 0.9078, "step": 8581 }, { "epoch": 0.6200083081980241, "grad_norm": 6.149983116064699, "learning_rate": 4.04052604048632e-06, "loss": 0.8702, "step": 8582 }, { "epoch": 0.6200805533982336, "grad_norm": 5.842140669735749, "learning_rate": 4.0402956604533625e-06, "loss": 0.8525, "step": 8583 }, { "epoch": 0.6201527985984431, "grad_norm": 7.266537713589435, "learning_rate": 4.040065259334954e-06, "loss": 0.8761, "step": 8584 }, { "epoch": 0.6202250437986526, "grad_norm": 6.599337122602821, "learning_rate": 4.039834837134248e-06, "loss": 0.8982, "step": 8585 }, { "epoch": 0.6202972889988622, "grad_norm": 7.094537498018529, "learning_rate": 4.039604393854398e-06, "loss": 0.8933, "step": 8586 }, { "epoch": 0.6203695341990717, "grad_norm": 5.991331195945666, "learning_rate": 4.03937392949856e-06, "loss": 0.8513, "step": 8587 }, { "epoch": 0.6204417793992811, "grad_norm": 7.291875462040355, "learning_rate": 4.0391434440698875e-06, "loss": 0.8908, "step": 8588 }, { "epoch": 0.6205140245994907, "grad_norm": 6.696658699077742, "learning_rate": 4.0389129375715366e-06, "loss": 0.8472, "step": 8589 }, { "epoch": 0.6205862697997002, "grad_norm": 6.952151661156894, "learning_rate": 4.038682410006662e-06, "loss": 0.8738, "step": 8590 }, { "epoch": 0.6206585149999096, "grad_norm": 5.5665450229412174, "learning_rate": 4.038451861378421e-06, "loss": 0.7677, "step": 8591 }, { "epoch": 0.6207307602001192, "grad_norm": 6.914504145940511, "learning_rate": 4.038221291689968e-06, "loss": 0.8007, "step": 8592 }, { "epoch": 0.6208030054003287, "grad_norm": 6.040644311454678, "learning_rate": 4.037990700944459e-06, "loss": 0.9086, "step": 8593 }, { "epoch": 0.6208752506005383, "grad_norm": 6.981428032417592, "learning_rate": 4.037760089145052e-06, "loss": 0.8873, "step": 8594 }, { "epoch": 0.6209474958007477, "grad_norm": 4.992125413774236, "learning_rate": 4.037529456294904e-06, "loss": 0.8494, "step": 8595 }, { "epoch": 0.6210197410009572, "grad_norm": 5.103170284670668, "learning_rate": 4.03729880239717e-06, "loss": 0.8111, "step": 8596 }, { "epoch": 0.6210919862011668, "grad_norm": 6.630788613392682, "learning_rate": 4.037068127455011e-06, "loss": 0.8626, "step": 8597 }, { "epoch": 0.6211642314013762, "grad_norm": 5.066085111135766, "learning_rate": 4.036837431471582e-06, "loss": 0.8458, "step": 8598 }, { "epoch": 0.6212364766015858, "grad_norm": 5.966819890503893, "learning_rate": 4.036606714450042e-06, "loss": 0.8539, "step": 8599 }, { "epoch": 0.6213087218017953, "grad_norm": 5.528932592424352, "learning_rate": 4.0363759763935486e-06, "loss": 0.9367, "step": 8600 }, { "epoch": 0.6213809670020048, "grad_norm": 8.016190834225608, "learning_rate": 4.0361452173052616e-06, "loss": 1.0066, "step": 8601 }, { "epoch": 0.6214532122022143, "grad_norm": 5.661418766092856, "learning_rate": 4.035914437188339e-06, "loss": 0.8229, "step": 8602 }, { "epoch": 0.6215254574024238, "grad_norm": 6.3437911910800935, "learning_rate": 4.03568363604594e-06, "loss": 0.8129, "step": 8603 }, { "epoch": 0.6215977026026334, "grad_norm": 5.581194036017363, "learning_rate": 4.0354528138812255e-06, "loss": 0.8575, "step": 8604 }, { "epoch": 0.6216699478028429, "grad_norm": 5.443187326286224, "learning_rate": 4.035221970697353e-06, "loss": 0.8738, "step": 8605 }, { "epoch": 0.6217421930030523, "grad_norm": 6.071810150978165, "learning_rate": 4.0349911064974845e-06, "loss": 0.8846, "step": 8606 }, { "epoch": 0.6218144382032619, "grad_norm": 7.05213424085746, "learning_rate": 4.03476022128478e-06, "loss": 0.9483, "step": 8607 }, { "epoch": 0.6218866834034714, "grad_norm": 6.0723419528211835, "learning_rate": 4.034529315062399e-06, "loss": 0.8076, "step": 8608 }, { "epoch": 0.6219589286036808, "grad_norm": 5.061651841687166, "learning_rate": 4.034298387833503e-06, "loss": 0.8547, "step": 8609 }, { "epoch": 0.6220311738038904, "grad_norm": 6.164975871018019, "learning_rate": 4.034067439601254e-06, "loss": 0.8349, "step": 8610 }, { "epoch": 0.6221034190040999, "grad_norm": 6.098552044225975, "learning_rate": 4.0338364703688115e-06, "loss": 0.8819, "step": 8611 }, { "epoch": 0.6221756642043095, "grad_norm": 5.359447245208215, "learning_rate": 4.03360548013934e-06, "loss": 0.7751, "step": 8612 }, { "epoch": 0.6222479094045189, "grad_norm": 7.419079191750604, "learning_rate": 4.0333744689159996e-06, "loss": 0.8948, "step": 8613 }, { "epoch": 0.6223201546047284, "grad_norm": 7.842023317480895, "learning_rate": 4.033143436701954e-06, "loss": 1.0058, "step": 8614 }, { "epoch": 0.622392399804938, "grad_norm": 7.231652557061539, "learning_rate": 4.032912383500365e-06, "loss": 0.8516, "step": 8615 }, { "epoch": 0.6224646450051474, "grad_norm": 5.660025837977444, "learning_rate": 4.032681309314395e-06, "loss": 0.8835, "step": 8616 }, { "epoch": 0.622536890205357, "grad_norm": 6.419366871165047, "learning_rate": 4.032450214147208e-06, "loss": 0.7578, "step": 8617 }, { "epoch": 0.6226091354055665, "grad_norm": 6.666124353603455, "learning_rate": 4.032219098001968e-06, "loss": 0.8731, "step": 8618 }, { "epoch": 0.622681380605776, "grad_norm": 6.073902850507435, "learning_rate": 4.031987960881838e-06, "loss": 0.9018, "step": 8619 }, { "epoch": 0.6227536258059855, "grad_norm": 6.866496150050774, "learning_rate": 4.031756802789983e-06, "loss": 0.874, "step": 8620 }, { "epoch": 0.622825871006195, "grad_norm": 8.0088980780937, "learning_rate": 4.031525623729565e-06, "loss": 0.8587, "step": 8621 }, { "epoch": 0.6228981162064046, "grad_norm": 4.998681848342347, "learning_rate": 4.031294423703752e-06, "loss": 0.9626, "step": 8622 }, { "epoch": 0.622970361406614, "grad_norm": 7.478993561706343, "learning_rate": 4.031063202715707e-06, "loss": 0.9321, "step": 8623 }, { "epoch": 0.6230426066068235, "grad_norm": 7.5477678548736815, "learning_rate": 4.0308319607685955e-06, "loss": 0.8691, "step": 8624 }, { "epoch": 0.6231148518070331, "grad_norm": 7.011652648104156, "learning_rate": 4.0306006978655824e-06, "loss": 0.8974, "step": 8625 }, { "epoch": 0.6231870970072426, "grad_norm": 7.591483574622917, "learning_rate": 4.030369414009835e-06, "loss": 0.9032, "step": 8626 }, { "epoch": 0.623259342207452, "grad_norm": 7.059366797129614, "learning_rate": 4.0301381092045185e-06, "loss": 0.9249, "step": 8627 }, { "epoch": 0.6233315874076616, "grad_norm": 6.321539421433539, "learning_rate": 4.029906783452798e-06, "loss": 0.8655, "step": 8628 }, { "epoch": 0.6234038326078711, "grad_norm": 5.361149886039929, "learning_rate": 4.029675436757844e-06, "loss": 0.8288, "step": 8629 }, { "epoch": 0.6234760778080807, "grad_norm": 6.908206438188349, "learning_rate": 4.029444069122819e-06, "loss": 0.9234, "step": 8630 }, { "epoch": 0.6235483230082901, "grad_norm": 7.208591199316093, "learning_rate": 4.029212680550893e-06, "loss": 0.8237, "step": 8631 }, { "epoch": 0.6236205682084996, "grad_norm": 5.42602872455782, "learning_rate": 4.028981271045233e-06, "loss": 0.9514, "step": 8632 }, { "epoch": 0.6236928134087092, "grad_norm": 9.379855106755672, "learning_rate": 4.028749840609005e-06, "loss": 0.9254, "step": 8633 }, { "epoch": 0.6237650586089186, "grad_norm": 5.883173674379345, "learning_rate": 4.0285183892453805e-06, "loss": 0.79, "step": 8634 }, { "epoch": 0.6238373038091282, "grad_norm": 7.253313556999473, "learning_rate": 4.0282869169575254e-06, "loss": 0.8397, "step": 8635 }, { "epoch": 0.6239095490093377, "grad_norm": 6.251244993186857, "learning_rate": 4.028055423748609e-06, "loss": 0.8414, "step": 8636 }, { "epoch": 0.6239817942095472, "grad_norm": 6.048988938772866, "learning_rate": 4.027823909621801e-06, "loss": 0.8863, "step": 8637 }, { "epoch": 0.6240540394097567, "grad_norm": 4.965379447459275, "learning_rate": 4.027592374580269e-06, "loss": 0.7733, "step": 8638 }, { "epoch": 0.6241262846099662, "grad_norm": 7.396320108504021, "learning_rate": 4.0273608186271836e-06, "loss": 0.8983, "step": 8639 }, { "epoch": 0.6241985298101758, "grad_norm": 5.978561246661116, "learning_rate": 4.027129241765715e-06, "loss": 0.8035, "step": 8640 }, { "epoch": 0.6242707750103852, "grad_norm": 6.307626788316156, "learning_rate": 4.0268976439990325e-06, "loss": 0.8869, "step": 8641 }, { "epoch": 0.6243430202105947, "grad_norm": 6.228383639143278, "learning_rate": 4.026666025330307e-06, "loss": 0.8435, "step": 8642 }, { "epoch": 0.6244152654108043, "grad_norm": 6.151119669894204, "learning_rate": 4.026434385762709e-06, "loss": 0.7973, "step": 8643 }, { "epoch": 0.6244875106110138, "grad_norm": 6.264931809281819, "learning_rate": 4.026202725299409e-06, "loss": 0.8913, "step": 8644 }, { "epoch": 0.6245597558112232, "grad_norm": 6.569310605184731, "learning_rate": 4.02597104394358e-06, "loss": 0.9117, "step": 8645 }, { "epoch": 0.6246320010114328, "grad_norm": 7.48270354590886, "learning_rate": 4.02573934169839e-06, "loss": 0.9066, "step": 8646 }, { "epoch": 0.6247042462116423, "grad_norm": 6.676724603293343, "learning_rate": 4.025507618567015e-06, "loss": 0.9038, "step": 8647 }, { "epoch": 0.6247764914118519, "grad_norm": 6.7451518980077285, "learning_rate": 4.025275874552624e-06, "loss": 0.8843, "step": 8648 }, { "epoch": 0.6248487366120613, "grad_norm": 7.9113461199987904, "learning_rate": 4.025044109658391e-06, "loss": 0.8725, "step": 8649 }, { "epoch": 0.6249209818122708, "grad_norm": 5.712228527338096, "learning_rate": 4.024812323887489e-06, "loss": 0.9144, "step": 8650 }, { "epoch": 0.6249932270124804, "grad_norm": 6.269088586884466, "learning_rate": 4.02458051724309e-06, "loss": 0.8536, "step": 8651 }, { "epoch": 0.6250654722126898, "grad_norm": 9.03788539966947, "learning_rate": 4.024348689728367e-06, "loss": 0.9003, "step": 8652 }, { "epoch": 0.6251377174128994, "grad_norm": 6.313328452559897, "learning_rate": 4.024116841346494e-06, "loss": 0.855, "step": 8653 }, { "epoch": 0.6252099626131089, "grad_norm": 8.12857962762839, "learning_rate": 4.023884972100644e-06, "loss": 0.9329, "step": 8654 }, { "epoch": 0.6252822078133184, "grad_norm": 6.302557943668648, "learning_rate": 4.023653081993994e-06, "loss": 0.8442, "step": 8655 }, { "epoch": 0.6253544530135279, "grad_norm": 9.851413032916506, "learning_rate": 4.023421171029714e-06, "loss": 1.0157, "step": 8656 }, { "epoch": 0.6254266982137374, "grad_norm": 7.012001648192748, "learning_rate": 4.0231892392109835e-06, "loss": 0.8833, "step": 8657 }, { "epoch": 0.625498943413947, "grad_norm": 5.514517264936543, "learning_rate": 4.022957286540974e-06, "loss": 0.9733, "step": 8658 }, { "epoch": 0.6255711886141564, "grad_norm": 7.81889191642788, "learning_rate": 4.022725313022861e-06, "loss": 0.862, "step": 8659 }, { "epoch": 0.6256434338143659, "grad_norm": 6.331980042012246, "learning_rate": 4.0224933186598214e-06, "loss": 0.8539, "step": 8660 }, { "epoch": 0.6257156790145755, "grad_norm": 5.557186203170099, "learning_rate": 4.022261303455031e-06, "loss": 0.8457, "step": 8661 }, { "epoch": 0.625787924214785, "grad_norm": 7.138510493216885, "learning_rate": 4.022029267411663e-06, "loss": 0.9836, "step": 8662 }, { "epoch": 0.6258601694149944, "grad_norm": 6.4195258305928204, "learning_rate": 4.021797210532898e-06, "loss": 0.8183, "step": 8663 }, { "epoch": 0.625932414615204, "grad_norm": 8.166749240172338, "learning_rate": 4.0215651328219115e-06, "loss": 0.9248, "step": 8664 }, { "epoch": 0.6260046598154135, "grad_norm": 6.548093841644634, "learning_rate": 4.021333034281878e-06, "loss": 0.8515, "step": 8665 }, { "epoch": 0.6260769050156231, "grad_norm": 8.877241684322614, "learning_rate": 4.021100914915977e-06, "loss": 0.9273, "step": 8666 }, { "epoch": 0.6261491502158325, "grad_norm": 4.9543251484941875, "learning_rate": 4.0208687747273865e-06, "loss": 0.8673, "step": 8667 }, { "epoch": 0.626221395416042, "grad_norm": 5.333194412965308, "learning_rate": 4.020636613719282e-06, "loss": 0.8442, "step": 8668 }, { "epoch": 0.6262936406162516, "grad_norm": 5.915531031551936, "learning_rate": 4.020404431894844e-06, "loss": 0.9245, "step": 8669 }, { "epoch": 0.626365885816461, "grad_norm": 5.322727008384531, "learning_rate": 4.02017222925725e-06, "loss": 0.8416, "step": 8670 }, { "epoch": 0.6264381310166706, "grad_norm": 5.911286643695732, "learning_rate": 4.0199400058096775e-06, "loss": 0.8927, "step": 8671 }, { "epoch": 0.6265103762168801, "grad_norm": 5.654160255797689, "learning_rate": 4.019707761555307e-06, "loss": 0.9003, "step": 8672 }, { "epoch": 0.6265826214170896, "grad_norm": 6.602492769144209, "learning_rate": 4.019475496497318e-06, "loss": 0.8777, "step": 8673 }, { "epoch": 0.6266548666172991, "grad_norm": 7.07231093347127, "learning_rate": 4.0192432106388876e-06, "loss": 0.8661, "step": 8674 }, { "epoch": 0.6267271118175086, "grad_norm": 6.1363510517029844, "learning_rate": 4.019010903983198e-06, "loss": 0.8113, "step": 8675 }, { "epoch": 0.6267993570177182, "grad_norm": 5.923624308746452, "learning_rate": 4.018778576533429e-06, "loss": 0.8343, "step": 8676 }, { "epoch": 0.6268716022179276, "grad_norm": 6.171160151936555, "learning_rate": 4.01854622829276e-06, "loss": 0.8589, "step": 8677 }, { "epoch": 0.6269438474181371, "grad_norm": 5.799771718761532, "learning_rate": 4.018313859264372e-06, "loss": 0.8396, "step": 8678 }, { "epoch": 0.6270160926183467, "grad_norm": 4.979838157076865, "learning_rate": 4.018081469451447e-06, "loss": 0.8034, "step": 8679 }, { "epoch": 0.6270883378185562, "grad_norm": 6.90482035895599, "learning_rate": 4.017849058857165e-06, "loss": 0.7689, "step": 8680 }, { "epoch": 0.6271605830187656, "grad_norm": 6.87410688668004, "learning_rate": 4.017616627484708e-06, "loss": 0.9576, "step": 8681 }, { "epoch": 0.6272328282189752, "grad_norm": 5.376642730211842, "learning_rate": 4.017384175337258e-06, "loss": 0.8116, "step": 8682 }, { "epoch": 0.6273050734191847, "grad_norm": 5.770232719078514, "learning_rate": 4.017151702417996e-06, "loss": 0.8755, "step": 8683 }, { "epoch": 0.6273773186193943, "grad_norm": 6.319816051172241, "learning_rate": 4.016919208730106e-06, "loss": 0.8454, "step": 8684 }, { "epoch": 0.6274495638196037, "grad_norm": 6.855988450742807, "learning_rate": 4.016686694276769e-06, "loss": 0.8998, "step": 8685 }, { "epoch": 0.6275218090198132, "grad_norm": 6.482172359803252, "learning_rate": 4.016454159061169e-06, "loss": 0.9166, "step": 8686 }, { "epoch": 0.6275940542200228, "grad_norm": 6.371929551246992, "learning_rate": 4.01622160308649e-06, "loss": 0.8594, "step": 8687 }, { "epoch": 0.6276662994202322, "grad_norm": 5.654083680134464, "learning_rate": 4.015989026355913e-06, "loss": 0.8354, "step": 8688 }, { "epoch": 0.6277385446204418, "grad_norm": 6.525007705391613, "learning_rate": 4.015756428872625e-06, "loss": 0.7484, "step": 8689 }, { "epoch": 0.6278107898206513, "grad_norm": 7.141005891954283, "learning_rate": 4.015523810639807e-06, "loss": 0.9266, "step": 8690 }, { "epoch": 0.6278830350208608, "grad_norm": 5.0800467742809055, "learning_rate": 4.015291171660646e-06, "loss": 0.8066, "step": 8691 }, { "epoch": 0.6279552802210703, "grad_norm": 5.5277183163169505, "learning_rate": 4.0150585119383235e-06, "loss": 0.8837, "step": 8692 }, { "epoch": 0.6280275254212798, "grad_norm": 5.520334626711422, "learning_rate": 4.014825831476028e-06, "loss": 0.8454, "step": 8693 }, { "epoch": 0.6280997706214894, "grad_norm": 6.102560862824119, "learning_rate": 4.0145931302769424e-06, "loss": 0.8294, "step": 8694 }, { "epoch": 0.6281720158216988, "grad_norm": 7.729828949253457, "learning_rate": 4.014360408344253e-06, "loss": 0.9084, "step": 8695 }, { "epoch": 0.6282442610219083, "grad_norm": 5.175221815285055, "learning_rate": 4.014127665681146e-06, "loss": 0.8469, "step": 8696 }, { "epoch": 0.6283165062221179, "grad_norm": 7.141068125524967, "learning_rate": 4.013894902290806e-06, "loss": 0.8416, "step": 8697 }, { "epoch": 0.6283887514223274, "grad_norm": 6.546473524944019, "learning_rate": 4.013662118176421e-06, "loss": 0.8202, "step": 8698 }, { "epoch": 0.6284609966225368, "grad_norm": 6.526188545573369, "learning_rate": 4.013429313341176e-06, "loss": 0.9016, "step": 8699 }, { "epoch": 0.6285332418227464, "grad_norm": 7.752114530475121, "learning_rate": 4.013196487788259e-06, "loss": 0.8727, "step": 8700 }, { "epoch": 0.6286054870229559, "grad_norm": 6.195558482959505, "learning_rate": 4.012963641520858e-06, "loss": 0.8982, "step": 8701 }, { "epoch": 0.6286777322231655, "grad_norm": 6.776253402025121, "learning_rate": 4.0127307745421584e-06, "loss": 0.9, "step": 8702 }, { "epoch": 0.6287499774233749, "grad_norm": 4.532645175163786, "learning_rate": 4.0124978868553486e-06, "loss": 0.7657, "step": 8703 }, { "epoch": 0.6288222226235844, "grad_norm": 6.884463645859575, "learning_rate": 4.012264978463618e-06, "loss": 0.8208, "step": 8704 }, { "epoch": 0.628894467823794, "grad_norm": 10.89561665442913, "learning_rate": 4.012032049370155e-06, "loss": 0.8693, "step": 8705 }, { "epoch": 0.6289667130240034, "grad_norm": 7.242828768719998, "learning_rate": 4.011799099578145e-06, "loss": 0.8105, "step": 8706 }, { "epoch": 0.629038958224213, "grad_norm": 6.154618874966487, "learning_rate": 4.011566129090782e-06, "loss": 0.8565, "step": 8707 }, { "epoch": 0.6291112034244225, "grad_norm": 6.200941801834325, "learning_rate": 4.011333137911249e-06, "loss": 0.9046, "step": 8708 }, { "epoch": 0.629183448624632, "grad_norm": 7.146560972959958, "learning_rate": 4.0111001260427415e-06, "loss": 0.837, "step": 8709 }, { "epoch": 0.6292556938248415, "grad_norm": 5.9819675314126375, "learning_rate": 4.010867093488445e-06, "loss": 0.9306, "step": 8710 }, { "epoch": 0.629327939025051, "grad_norm": 8.95715262742117, "learning_rate": 4.010634040251552e-06, "loss": 0.9856, "step": 8711 }, { "epoch": 0.6294001842252606, "grad_norm": 5.374868169542935, "learning_rate": 4.0104009663352515e-06, "loss": 0.8119, "step": 8712 }, { "epoch": 0.62947242942547, "grad_norm": 7.132157060606434, "learning_rate": 4.0101678717427354e-06, "loss": 0.7803, "step": 8713 }, { "epoch": 0.6295446746256795, "grad_norm": 5.514531791781453, "learning_rate": 4.009934756477193e-06, "loss": 0.8642, "step": 8714 }, { "epoch": 0.6296169198258891, "grad_norm": 7.194908768616924, "learning_rate": 4.009701620541816e-06, "loss": 0.954, "step": 8715 }, { "epoch": 0.6296891650260986, "grad_norm": 7.931883499421097, "learning_rate": 4.0094684639397966e-06, "loss": 0.7909, "step": 8716 }, { "epoch": 0.629761410226308, "grad_norm": 8.17911432074264, "learning_rate": 4.009235286674325e-06, "loss": 0.9322, "step": 8717 }, { "epoch": 0.6298336554265176, "grad_norm": 7.5439103584343945, "learning_rate": 4.009002088748595e-06, "loss": 0.8273, "step": 8718 }, { "epoch": 0.6299059006267271, "grad_norm": 5.162498433480014, "learning_rate": 4.008768870165797e-06, "loss": 0.7806, "step": 8719 }, { "epoch": 0.6299781458269367, "grad_norm": 6.526413290205449, "learning_rate": 4.008535630929125e-06, "loss": 0.8471, "step": 8720 }, { "epoch": 0.6300503910271461, "grad_norm": 6.754208136056071, "learning_rate": 4.008302371041773e-06, "loss": 0.8687, "step": 8721 }, { "epoch": 0.6301226362273556, "grad_norm": 6.3851578511178095, "learning_rate": 4.0080690905069315e-06, "loss": 0.7783, "step": 8722 }, { "epoch": 0.6301948814275652, "grad_norm": 6.06525665218183, "learning_rate": 4.007835789327795e-06, "loss": 0.7899, "step": 8723 }, { "epoch": 0.6302671266277746, "grad_norm": 5.877446740548848, "learning_rate": 4.0076024675075575e-06, "loss": 0.8259, "step": 8724 }, { "epoch": 0.6303393718279842, "grad_norm": 5.92562578333059, "learning_rate": 4.007369125049413e-06, "loss": 0.838, "step": 8725 }, { "epoch": 0.6304116170281937, "grad_norm": 6.45260359387189, "learning_rate": 4.007135761956556e-06, "loss": 0.7733, "step": 8726 }, { "epoch": 0.6304838622284032, "grad_norm": 7.419619054757772, "learning_rate": 4.006902378232179e-06, "loss": 0.9769, "step": 8727 }, { "epoch": 0.6305561074286127, "grad_norm": 6.971521758633728, "learning_rate": 4.0066689738794795e-06, "loss": 0.9175, "step": 8728 }, { "epoch": 0.6306283526288222, "grad_norm": 6.497753928999326, "learning_rate": 4.006435548901651e-06, "loss": 0.8895, "step": 8729 }, { "epoch": 0.6307005978290318, "grad_norm": 7.163201965765293, "learning_rate": 4.006202103301891e-06, "loss": 0.8643, "step": 8730 }, { "epoch": 0.6307728430292412, "grad_norm": 5.806843291850855, "learning_rate": 4.005968637083393e-06, "loss": 0.8843, "step": 8731 }, { "epoch": 0.6308450882294507, "grad_norm": 5.729549029913405, "learning_rate": 4.005735150249353e-06, "loss": 0.8333, "step": 8732 }, { "epoch": 0.6309173334296603, "grad_norm": 6.602328103719184, "learning_rate": 4.0055016428029684e-06, "loss": 0.9721, "step": 8733 }, { "epoch": 0.6309895786298698, "grad_norm": 5.975993768895589, "learning_rate": 4.005268114747435e-06, "loss": 0.8151, "step": 8734 }, { "epoch": 0.6310618238300792, "grad_norm": 6.165184392716199, "learning_rate": 4.00503456608595e-06, "loss": 0.8674, "step": 8735 }, { "epoch": 0.6311340690302888, "grad_norm": 7.197833519403382, "learning_rate": 4.004800996821711e-06, "loss": 0.89, "step": 8736 }, { "epoch": 0.6312063142304983, "grad_norm": 5.972450587122927, "learning_rate": 4.004567406957914e-06, "loss": 0.8409, "step": 8737 }, { "epoch": 0.6312785594307079, "grad_norm": 6.306304005901059, "learning_rate": 4.004333796497758e-06, "loss": 0.8714, "step": 8738 }, { "epoch": 0.6313508046309173, "grad_norm": 7.05324765336627, "learning_rate": 4.00410016544444e-06, "loss": 0.9145, "step": 8739 }, { "epoch": 0.6314230498311268, "grad_norm": 5.4246488395986505, "learning_rate": 4.0038665138011575e-06, "loss": 0.8913, "step": 8740 }, { "epoch": 0.6314952950313364, "grad_norm": 5.585785208806885, "learning_rate": 4.003632841571112e-06, "loss": 0.8412, "step": 8741 }, { "epoch": 0.6315675402315458, "grad_norm": 7.177096060052743, "learning_rate": 4.003399148757499e-06, "loss": 0.8826, "step": 8742 }, { "epoch": 0.6316397854317554, "grad_norm": 6.1103080954033695, "learning_rate": 4.0031654353635204e-06, "loss": 0.925, "step": 8743 }, { "epoch": 0.6317120306319649, "grad_norm": 6.385566481491739, "learning_rate": 4.002931701392373e-06, "loss": 0.9062, "step": 8744 }, { "epoch": 0.6317842758321744, "grad_norm": 5.903688874583489, "learning_rate": 4.002697946847258e-06, "loss": 0.9745, "step": 8745 }, { "epoch": 0.6318565210323839, "grad_norm": 7.6143018372180205, "learning_rate": 4.002464171731375e-06, "loss": 0.8942, "step": 8746 }, { "epoch": 0.6319287662325934, "grad_norm": 7.118264745930996, "learning_rate": 4.002230376047923e-06, "loss": 0.9196, "step": 8747 }, { "epoch": 0.632001011432803, "grad_norm": 6.429291463824504, "learning_rate": 4.001996559800105e-06, "loss": 0.8561, "step": 8748 }, { "epoch": 0.6320732566330124, "grad_norm": 5.262692866109795, "learning_rate": 4.00176272299112e-06, "loss": 0.8219, "step": 8749 }, { "epoch": 0.6321455018332219, "grad_norm": 6.320552109371867, "learning_rate": 4.001528865624169e-06, "loss": 0.8655, "step": 8750 }, { "epoch": 0.6322177470334315, "grad_norm": 6.231060169814103, "learning_rate": 4.001294987702454e-06, "loss": 0.8487, "step": 8751 }, { "epoch": 0.632289992233641, "grad_norm": 7.277911685685576, "learning_rate": 4.001061089229176e-06, "loss": 0.9258, "step": 8752 }, { "epoch": 0.6323622374338504, "grad_norm": 8.00586008973222, "learning_rate": 4.000827170207537e-06, "loss": 0.8404, "step": 8753 }, { "epoch": 0.63243448263406, "grad_norm": 5.9865967134893046, "learning_rate": 4.0005932306407394e-06, "loss": 0.9005, "step": 8754 }, { "epoch": 0.6325067278342695, "grad_norm": 5.854608086973097, "learning_rate": 4.0003592705319855e-06, "loss": 0.8083, "step": 8755 }, { "epoch": 0.6325789730344791, "grad_norm": 6.8334546272243575, "learning_rate": 4.000125289884478e-06, "loss": 0.8501, "step": 8756 }, { "epoch": 0.6326512182346885, "grad_norm": 7.335667094324142, "learning_rate": 3.9998912887014205e-06, "loss": 0.8875, "step": 8757 }, { "epoch": 0.632723463434898, "grad_norm": 6.66441682363289, "learning_rate": 3.9996572669860164e-06, "loss": 0.8904, "step": 8758 }, { "epoch": 0.6327957086351076, "grad_norm": 6.64608289036817, "learning_rate": 3.999423224741469e-06, "loss": 0.8423, "step": 8759 }, { "epoch": 0.632867953835317, "grad_norm": 6.051777589552363, "learning_rate": 3.99918916197098e-06, "loss": 0.8338, "step": 8760 }, { "epoch": 0.6329401990355266, "grad_norm": 6.01621534745338, "learning_rate": 3.998955078677757e-06, "loss": 0.8669, "step": 8761 }, { "epoch": 0.6330124442357361, "grad_norm": 7.032079486662411, "learning_rate": 3.9987209748650015e-06, "loss": 0.8469, "step": 8762 }, { "epoch": 0.6330846894359456, "grad_norm": 5.4877899927490486, "learning_rate": 3.9984868505359206e-06, "loss": 0.7907, "step": 8763 }, { "epoch": 0.6331569346361551, "grad_norm": 6.270146971323095, "learning_rate": 3.9982527056937185e-06, "loss": 0.9133, "step": 8764 }, { "epoch": 0.6332291798363646, "grad_norm": 6.307197685602035, "learning_rate": 3.998018540341599e-06, "loss": 0.8653, "step": 8765 }, { "epoch": 0.6333014250365742, "grad_norm": 6.060615797324018, "learning_rate": 3.99778435448277e-06, "loss": 0.7956, "step": 8766 }, { "epoch": 0.6333736702367836, "grad_norm": 6.037056297283658, "learning_rate": 3.997550148120435e-06, "loss": 0.8323, "step": 8767 }, { "epoch": 0.6334459154369931, "grad_norm": 6.75608607824676, "learning_rate": 3.9973159212578016e-06, "loss": 0.9111, "step": 8768 }, { "epoch": 0.6335181606372027, "grad_norm": 7.788224002008978, "learning_rate": 3.997081673898077e-06, "loss": 0.7565, "step": 8769 }, { "epoch": 0.6335904058374122, "grad_norm": 5.331405748281369, "learning_rate": 3.996847406044465e-06, "loss": 0.927, "step": 8770 }, { "epoch": 0.6336626510376216, "grad_norm": 5.687466464100992, "learning_rate": 3.996613117700175e-06, "loss": 0.8967, "step": 8771 }, { "epoch": 0.6337348962378312, "grad_norm": 7.631960334178924, "learning_rate": 3.996378808868414e-06, "loss": 0.8284, "step": 8772 }, { "epoch": 0.6338071414380407, "grad_norm": 7.899833397376101, "learning_rate": 3.996144479552388e-06, "loss": 0.8885, "step": 8773 }, { "epoch": 0.6338793866382503, "grad_norm": 6.783996869500355, "learning_rate": 3.995910129755306e-06, "loss": 0.8585, "step": 8774 }, { "epoch": 0.6339516318384597, "grad_norm": 5.023790409393554, "learning_rate": 3.9956757594803765e-06, "loss": 0.7675, "step": 8775 }, { "epoch": 0.6340238770386692, "grad_norm": 5.681562069756013, "learning_rate": 3.9954413687308065e-06, "loss": 0.8697, "step": 8776 }, { "epoch": 0.6340961222388788, "grad_norm": 6.730749583792046, "learning_rate": 3.9952069575098054e-06, "loss": 0.8607, "step": 8777 }, { "epoch": 0.6341683674390882, "grad_norm": 10.022879938092542, "learning_rate": 3.994972525820582e-06, "loss": 0.9504, "step": 8778 }, { "epoch": 0.6342406126392978, "grad_norm": 6.6973126160315575, "learning_rate": 3.994738073666345e-06, "loss": 0.8251, "step": 8779 }, { "epoch": 0.6343128578395073, "grad_norm": 6.444090497319733, "learning_rate": 3.994503601050306e-06, "loss": 0.8272, "step": 8780 }, { "epoch": 0.6343851030397168, "grad_norm": 8.075482938042603, "learning_rate": 3.994269107975671e-06, "loss": 0.9418, "step": 8781 }, { "epoch": 0.6344573482399263, "grad_norm": 5.785381098981226, "learning_rate": 3.994034594445654e-06, "loss": 0.824, "step": 8782 }, { "epoch": 0.6345295934401358, "grad_norm": 6.707826614987042, "learning_rate": 3.993800060463462e-06, "loss": 0.919, "step": 8783 }, { "epoch": 0.6346018386403454, "grad_norm": 6.62695082246497, "learning_rate": 3.993565506032308e-06, "loss": 0.8774, "step": 8784 }, { "epoch": 0.6346740838405548, "grad_norm": 5.706373486222034, "learning_rate": 3.993330931155402e-06, "loss": 0.799, "step": 8785 }, { "epoch": 0.6347463290407643, "grad_norm": 5.240531921470023, "learning_rate": 3.9930963358359545e-06, "loss": 0.8331, "step": 8786 }, { "epoch": 0.6348185742409739, "grad_norm": 5.685324787400665, "learning_rate": 3.992861720077178e-06, "loss": 0.8823, "step": 8787 }, { "epoch": 0.6348908194411834, "grad_norm": 7.0811672114947575, "learning_rate": 3.992627083882282e-06, "loss": 0.8922, "step": 8788 }, { "epoch": 0.6349630646413928, "grad_norm": 6.435913186653669, "learning_rate": 3.992392427254482e-06, "loss": 0.8577, "step": 8789 }, { "epoch": 0.6350353098416024, "grad_norm": 6.389715526379609, "learning_rate": 3.992157750196988e-06, "loss": 0.8426, "step": 8790 }, { "epoch": 0.6351075550418119, "grad_norm": 4.752763446246464, "learning_rate": 3.991923052713013e-06, "loss": 0.8055, "step": 8791 }, { "epoch": 0.6351798002420215, "grad_norm": 7.2848954795822225, "learning_rate": 3.9916883348057685e-06, "loss": 0.8083, "step": 8792 }, { "epoch": 0.6352520454422309, "grad_norm": 7.457837420409338, "learning_rate": 3.991453596478471e-06, "loss": 0.8054, "step": 8793 }, { "epoch": 0.6353242906424404, "grad_norm": 6.1053840890043585, "learning_rate": 3.991218837734331e-06, "loss": 0.7809, "step": 8794 }, { "epoch": 0.63539653584265, "grad_norm": 6.91721877992984, "learning_rate": 3.990984058576563e-06, "loss": 0.874, "step": 8795 }, { "epoch": 0.6354687810428594, "grad_norm": 4.857964706613073, "learning_rate": 3.99074925900838e-06, "loss": 0.8117, "step": 8796 }, { "epoch": 0.635541026243069, "grad_norm": 7.387617793814219, "learning_rate": 3.990514439032998e-06, "loss": 0.8549, "step": 8797 }, { "epoch": 0.6356132714432785, "grad_norm": 6.068866650828399, "learning_rate": 3.99027959865363e-06, "loss": 0.7671, "step": 8798 }, { "epoch": 0.635685516643488, "grad_norm": 6.650118157046951, "learning_rate": 3.990044737873493e-06, "loss": 0.8958, "step": 8799 }, { "epoch": 0.6357577618436975, "grad_norm": 5.660184219112229, "learning_rate": 3.989809856695799e-06, "loss": 0.8406, "step": 8800 }, { "epoch": 0.635830007043907, "grad_norm": 7.106007207985467, "learning_rate": 3.989574955123765e-06, "loss": 0.7636, "step": 8801 }, { "epoch": 0.6359022522441166, "grad_norm": 6.16883578255325, "learning_rate": 3.9893400331606075e-06, "loss": 0.8132, "step": 8802 }, { "epoch": 0.635974497444326, "grad_norm": 6.247298610535535, "learning_rate": 3.98910509080954e-06, "loss": 0.8229, "step": 8803 }, { "epoch": 0.6360467426445355, "grad_norm": 6.946095133948518, "learning_rate": 3.988870128073781e-06, "loss": 0.827, "step": 8804 }, { "epoch": 0.6361189878447451, "grad_norm": 6.964928370297081, "learning_rate": 3.988635144956546e-06, "loss": 0.8954, "step": 8805 }, { "epoch": 0.6361912330449546, "grad_norm": 6.063651378665514, "learning_rate": 3.98840014146105e-06, "loss": 0.7878, "step": 8806 }, { "epoch": 0.636263478245164, "grad_norm": 7.561696411818694, "learning_rate": 3.988165117590514e-06, "loss": 0.9234, "step": 8807 }, { "epoch": 0.6363357234453736, "grad_norm": 7.511381541443146, "learning_rate": 3.987930073348152e-06, "loss": 0.8393, "step": 8808 }, { "epoch": 0.6364079686455831, "grad_norm": 5.813382645727402, "learning_rate": 3.9876950087371826e-06, "loss": 0.9414, "step": 8809 }, { "epoch": 0.6364802138457927, "grad_norm": 6.083195236241112, "learning_rate": 3.9874599237608236e-06, "loss": 0.8275, "step": 8810 }, { "epoch": 0.6365524590460021, "grad_norm": 6.173964057543411, "learning_rate": 3.987224818422293e-06, "loss": 0.8838, "step": 8811 }, { "epoch": 0.6366247042462116, "grad_norm": 6.791756783416064, "learning_rate": 3.98698969272481e-06, "loss": 0.8935, "step": 8812 }, { "epoch": 0.6366969494464212, "grad_norm": 7.35516120158284, "learning_rate": 3.986754546671592e-06, "loss": 0.8739, "step": 8813 }, { "epoch": 0.6367691946466306, "grad_norm": 7.2364707201706215, "learning_rate": 3.986519380265859e-06, "loss": 0.9234, "step": 8814 }, { "epoch": 0.6368414398468402, "grad_norm": 6.4027780940602135, "learning_rate": 3.9862841935108295e-06, "loss": 0.8922, "step": 8815 }, { "epoch": 0.6369136850470497, "grad_norm": 5.39887205988834, "learning_rate": 3.986048986409724e-06, "loss": 0.8013, "step": 8816 }, { "epoch": 0.6369859302472592, "grad_norm": 5.5865391120521375, "learning_rate": 3.985813758965762e-06, "loss": 0.8422, "step": 8817 }, { "epoch": 0.6370581754474687, "grad_norm": 6.199288899650356, "learning_rate": 3.985578511182163e-06, "loss": 0.8207, "step": 8818 }, { "epoch": 0.6371304206476782, "grad_norm": 5.649436260895238, "learning_rate": 3.985343243062148e-06, "loss": 0.7677, "step": 8819 }, { "epoch": 0.6372026658478878, "grad_norm": 7.613635990965794, "learning_rate": 3.985107954608935e-06, "loss": 0.8364, "step": 8820 }, { "epoch": 0.6372749110480972, "grad_norm": 6.341270826754065, "learning_rate": 3.98487264582575e-06, "loss": 0.9381, "step": 8821 }, { "epoch": 0.6373471562483067, "grad_norm": 7.280920717054753, "learning_rate": 3.98463731671581e-06, "loss": 0.9618, "step": 8822 }, { "epoch": 0.6374194014485163, "grad_norm": 5.832407669010226, "learning_rate": 3.9844019672823385e-06, "loss": 0.8339, "step": 8823 }, { "epoch": 0.6374916466487258, "grad_norm": 6.561281799599813, "learning_rate": 3.984166597528557e-06, "loss": 0.8482, "step": 8824 }, { "epoch": 0.6375638918489352, "grad_norm": 7.483005597500031, "learning_rate": 3.983931207457686e-06, "loss": 0.8492, "step": 8825 }, { "epoch": 0.6376361370491448, "grad_norm": 6.085506869592974, "learning_rate": 3.983695797072949e-06, "loss": 0.855, "step": 8826 }, { "epoch": 0.6377083822493543, "grad_norm": 6.418958609663125, "learning_rate": 3.98346036637757e-06, "loss": 0.8431, "step": 8827 }, { "epoch": 0.6377806274495639, "grad_norm": 6.079484253140203, "learning_rate": 3.983224915374769e-06, "loss": 0.8212, "step": 8828 }, { "epoch": 0.6378528726497733, "grad_norm": 8.54874668862571, "learning_rate": 3.982989444067771e-06, "loss": 0.953, "step": 8829 }, { "epoch": 0.6379251178499828, "grad_norm": 6.362175102407305, "learning_rate": 3.982753952459799e-06, "loss": 0.8296, "step": 8830 }, { "epoch": 0.6379973630501924, "grad_norm": 5.447560791684803, "learning_rate": 3.982518440554078e-06, "loss": 0.8431, "step": 8831 }, { "epoch": 0.6380696082504018, "grad_norm": 7.7237088108893746, "learning_rate": 3.982282908353829e-06, "loss": 0.9218, "step": 8832 }, { "epoch": 0.6381418534506114, "grad_norm": 5.8162462200953495, "learning_rate": 3.982047355862277e-06, "loss": 0.8876, "step": 8833 }, { "epoch": 0.6382140986508209, "grad_norm": 6.7220152971070375, "learning_rate": 3.98181178308265e-06, "loss": 0.8545, "step": 8834 }, { "epoch": 0.6382863438510304, "grad_norm": 6.422463049034299, "learning_rate": 3.981576190018168e-06, "loss": 0.8564, "step": 8835 }, { "epoch": 0.6383585890512399, "grad_norm": 6.180291803378085, "learning_rate": 3.981340576672059e-06, "loss": 0.866, "step": 8836 }, { "epoch": 0.6384308342514494, "grad_norm": 6.830445338373466, "learning_rate": 3.981104943047549e-06, "loss": 0.8222, "step": 8837 }, { "epoch": 0.638503079451659, "grad_norm": 5.523904697024256, "learning_rate": 3.980869289147861e-06, "loss": 0.8527, "step": 8838 }, { "epoch": 0.6385753246518684, "grad_norm": 5.934992210428064, "learning_rate": 3.980633614976222e-06, "loss": 0.8474, "step": 8839 }, { "epoch": 0.6386475698520779, "grad_norm": 8.128728861892014, "learning_rate": 3.9803979205358586e-06, "loss": 0.8246, "step": 8840 }, { "epoch": 0.6387198150522875, "grad_norm": 5.766874679312403, "learning_rate": 3.980162205829998e-06, "loss": 0.9161, "step": 8841 }, { "epoch": 0.638792060252497, "grad_norm": 5.542123398088819, "learning_rate": 3.979926470861866e-06, "loss": 0.8759, "step": 8842 }, { "epoch": 0.6388643054527064, "grad_norm": 5.334220196578954, "learning_rate": 3.979690715634689e-06, "loss": 0.8351, "step": 8843 }, { "epoch": 0.638936550652916, "grad_norm": 6.418003818186299, "learning_rate": 3.979454940151695e-06, "loss": 0.8449, "step": 8844 }, { "epoch": 0.6390087958531255, "grad_norm": 6.929042850687255, "learning_rate": 3.9792191444161125e-06, "loss": 0.823, "step": 8845 }, { "epoch": 0.639081041053335, "grad_norm": 6.008649313876492, "learning_rate": 3.978983328431168e-06, "loss": 0.9188, "step": 8846 }, { "epoch": 0.6391532862535445, "grad_norm": 7.126297146691908, "learning_rate": 3.97874749220009e-06, "loss": 0.8844, "step": 8847 }, { "epoch": 0.639225531453754, "grad_norm": 6.658542005621426, "learning_rate": 3.978511635726106e-06, "loss": 0.9205, "step": 8848 }, { "epoch": 0.6392977766539636, "grad_norm": 7.3119104913955075, "learning_rate": 3.9782757590124475e-06, "loss": 0.9773, "step": 8849 }, { "epoch": 0.639370021854173, "grad_norm": 6.699996253624623, "learning_rate": 3.978039862062341e-06, "loss": 0.8971, "step": 8850 }, { "epoch": 0.6394422670543826, "grad_norm": 5.844223900821193, "learning_rate": 3.977803944879015e-06, "loss": 0.8584, "step": 8851 }, { "epoch": 0.6395145122545921, "grad_norm": 7.2032335649695485, "learning_rate": 3.9775680074657025e-06, "loss": 0.8739, "step": 8852 }, { "epoch": 0.6395867574548016, "grad_norm": 5.41196029826712, "learning_rate": 3.977332049825631e-06, "loss": 0.7687, "step": 8853 }, { "epoch": 0.6396590026550111, "grad_norm": 5.859122715922934, "learning_rate": 3.97709607196203e-06, "loss": 0.886, "step": 8854 }, { "epoch": 0.6397312478552206, "grad_norm": 5.631006551850859, "learning_rate": 3.97686007387813e-06, "loss": 0.8265, "step": 8855 }, { "epoch": 0.6398034930554302, "grad_norm": 6.1684223803064295, "learning_rate": 3.976624055577163e-06, "loss": 0.7754, "step": 8856 }, { "epoch": 0.6398757382556396, "grad_norm": 5.972896713296245, "learning_rate": 3.97638801706236e-06, "loss": 0.8501, "step": 8857 }, { "epoch": 0.6399479834558491, "grad_norm": 6.78660210559367, "learning_rate": 3.97615195833695e-06, "loss": 0.8954, "step": 8858 }, { "epoch": 0.6400202286560587, "grad_norm": 4.805575653112547, "learning_rate": 3.975915879404168e-06, "loss": 0.7679, "step": 8859 }, { "epoch": 0.6400924738562682, "grad_norm": 7.088799316148603, "learning_rate": 3.975679780267241e-06, "loss": 0.8567, "step": 8860 }, { "epoch": 0.6401647190564776, "grad_norm": 5.4877395959915125, "learning_rate": 3.975443660929406e-06, "loss": 0.8429, "step": 8861 }, { "epoch": 0.6402369642566872, "grad_norm": 5.6914022284646055, "learning_rate": 3.975207521393891e-06, "loss": 0.966, "step": 8862 }, { "epoch": 0.6403092094568967, "grad_norm": 5.858441657434548, "learning_rate": 3.9749713616639305e-06, "loss": 0.943, "step": 8863 }, { "epoch": 0.6403814546571062, "grad_norm": 6.408214137734365, "learning_rate": 3.974735181742758e-06, "loss": 0.8695, "step": 8864 }, { "epoch": 0.6404536998573157, "grad_norm": 7.655068318007955, "learning_rate": 3.974498981633606e-06, "loss": 0.8906, "step": 8865 }, { "epoch": 0.6405259450575252, "grad_norm": 6.035436886679037, "learning_rate": 3.974262761339708e-06, "loss": 0.8673, "step": 8866 }, { "epoch": 0.6405981902577348, "grad_norm": 6.646670903568327, "learning_rate": 3.974026520864297e-06, "loss": 0.9299, "step": 8867 }, { "epoch": 0.6406704354579442, "grad_norm": 5.861350903821116, "learning_rate": 3.973790260210609e-06, "loss": 0.9411, "step": 8868 }, { "epoch": 0.6407426806581538, "grad_norm": 6.610178941390848, "learning_rate": 3.973553979381874e-06, "loss": 0.8134, "step": 8869 }, { "epoch": 0.6408149258583633, "grad_norm": 5.38936823693521, "learning_rate": 3.9733176783813305e-06, "loss": 0.7965, "step": 8870 }, { "epoch": 0.6408871710585728, "grad_norm": 6.479589254288953, "learning_rate": 3.9730813572122116e-06, "loss": 0.821, "step": 8871 }, { "epoch": 0.6409594162587823, "grad_norm": 6.605745947022301, "learning_rate": 3.972845015877753e-06, "loss": 0.8963, "step": 8872 }, { "epoch": 0.6410316614589918, "grad_norm": 6.237485028633085, "learning_rate": 3.97260865438119e-06, "loss": 0.8351, "step": 8873 }, { "epoch": 0.6411039066592014, "grad_norm": 7.370253247655084, "learning_rate": 3.972372272725758e-06, "loss": 0.8388, "step": 8874 }, { "epoch": 0.6411761518594108, "grad_norm": 7.020856847827447, "learning_rate": 3.972135870914692e-06, "loss": 0.8018, "step": 8875 }, { "epoch": 0.6412483970596203, "grad_norm": 5.807658486406252, "learning_rate": 3.971899448951229e-06, "loss": 0.9365, "step": 8876 }, { "epoch": 0.6413206422598299, "grad_norm": 6.786720425006268, "learning_rate": 3.971663006838606e-06, "loss": 0.8461, "step": 8877 }, { "epoch": 0.6413928874600394, "grad_norm": 7.981827122218595, "learning_rate": 3.9714265445800595e-06, "loss": 0.8928, "step": 8878 }, { "epoch": 0.6414651326602488, "grad_norm": 6.462520529974492, "learning_rate": 3.971190062178825e-06, "loss": 0.9296, "step": 8879 }, { "epoch": 0.6415373778604584, "grad_norm": 6.4984897913185256, "learning_rate": 3.970953559638141e-06, "loss": 0.8593, "step": 8880 }, { "epoch": 0.6416096230606679, "grad_norm": 6.693546022073007, "learning_rate": 3.9707170369612455e-06, "loss": 0.8248, "step": 8881 }, { "epoch": 0.6416818682608774, "grad_norm": 7.062651776058571, "learning_rate": 3.970480494151375e-06, "loss": 0.8032, "step": 8882 }, { "epoch": 0.6417541134610869, "grad_norm": 5.631284128100489, "learning_rate": 3.970243931211769e-06, "loss": 0.8549, "step": 8883 }, { "epoch": 0.6418263586612964, "grad_norm": 5.439322506005763, "learning_rate": 3.970007348145664e-06, "loss": 0.7844, "step": 8884 }, { "epoch": 0.641898603861506, "grad_norm": 5.860190698430611, "learning_rate": 3.969770744956301e-06, "loss": 0.8047, "step": 8885 }, { "epoch": 0.6419708490617154, "grad_norm": 9.620506129284143, "learning_rate": 3.969534121646918e-06, "loss": 0.8094, "step": 8886 }, { "epoch": 0.642043094261925, "grad_norm": 7.574941694079313, "learning_rate": 3.9692974782207535e-06, "loss": 0.7891, "step": 8887 }, { "epoch": 0.6421153394621345, "grad_norm": 6.539782923090976, "learning_rate": 3.969060814681047e-06, "loss": 0.9308, "step": 8888 }, { "epoch": 0.642187584662344, "grad_norm": 9.969661085055947, "learning_rate": 3.968824131031039e-06, "loss": 0.8971, "step": 8889 }, { "epoch": 0.6422598298625535, "grad_norm": 6.590886221819099, "learning_rate": 3.968587427273968e-06, "loss": 0.8792, "step": 8890 }, { "epoch": 0.642332075062763, "grad_norm": 6.829369333396998, "learning_rate": 3.968350703413076e-06, "loss": 0.8563, "step": 8891 }, { "epoch": 0.6424043202629726, "grad_norm": 8.038153742519416, "learning_rate": 3.968113959451604e-06, "loss": 0.8369, "step": 8892 }, { "epoch": 0.642476565463182, "grad_norm": 6.514698722003694, "learning_rate": 3.9678771953927905e-06, "loss": 0.8246, "step": 8893 }, { "epoch": 0.6425488106633915, "grad_norm": 6.551341424692398, "learning_rate": 3.967640411239879e-06, "loss": 0.8128, "step": 8894 }, { "epoch": 0.6426210558636011, "grad_norm": 8.627804065387824, "learning_rate": 3.967403606996109e-06, "loss": 0.7666, "step": 8895 }, { "epoch": 0.6426933010638106, "grad_norm": 6.406065105468258, "learning_rate": 3.967166782664723e-06, "loss": 0.772, "step": 8896 }, { "epoch": 0.64276554626402, "grad_norm": 6.578983205597294, "learning_rate": 3.966929938248963e-06, "loss": 0.8139, "step": 8897 }, { "epoch": 0.6428377914642296, "grad_norm": 8.844239993051268, "learning_rate": 3.966693073752072e-06, "loss": 0.7874, "step": 8898 }, { "epoch": 0.6429100366644391, "grad_norm": 6.706510529163673, "learning_rate": 3.966456189177291e-06, "loss": 0.9465, "step": 8899 }, { "epoch": 0.6429822818646486, "grad_norm": 9.12843864134544, "learning_rate": 3.966219284527864e-06, "loss": 0.8858, "step": 8900 }, { "epoch": 0.6430545270648581, "grad_norm": 6.775447208523455, "learning_rate": 3.965982359807032e-06, "loss": 0.7972, "step": 8901 }, { "epoch": 0.6431267722650676, "grad_norm": 6.248571308875302, "learning_rate": 3.965745415018041e-06, "loss": 0.851, "step": 8902 }, { "epoch": 0.6431990174652772, "grad_norm": 6.524245599274175, "learning_rate": 3.9655084501641325e-06, "loss": 0.8243, "step": 8903 }, { "epoch": 0.6432712626654866, "grad_norm": 7.782305419920615, "learning_rate": 3.965271465248551e-06, "loss": 0.9182, "step": 8904 }, { "epoch": 0.6433435078656962, "grad_norm": 10.024218795665771, "learning_rate": 3.965034460274542e-06, "loss": 0.8823, "step": 8905 }, { "epoch": 0.6434157530659057, "grad_norm": 7.949484840832087, "learning_rate": 3.964797435245348e-06, "loss": 0.8555, "step": 8906 }, { "epoch": 0.6434879982661152, "grad_norm": 6.509827154299677, "learning_rate": 3.964560390164215e-06, "loss": 0.8113, "step": 8907 }, { "epoch": 0.6435602434663247, "grad_norm": 5.018422520120651, "learning_rate": 3.964323325034387e-06, "loss": 0.8175, "step": 8908 }, { "epoch": 0.6436324886665342, "grad_norm": 6.227361343046909, "learning_rate": 3.9640862398591096e-06, "loss": 0.8038, "step": 8909 }, { "epoch": 0.6437047338667438, "grad_norm": 6.587828666696489, "learning_rate": 3.963849134641629e-06, "loss": 0.8729, "step": 8910 }, { "epoch": 0.6437769790669532, "grad_norm": 9.147811594595444, "learning_rate": 3.963612009385189e-06, "loss": 0.8939, "step": 8911 }, { "epoch": 0.6438492242671627, "grad_norm": 8.913734690784516, "learning_rate": 3.9633748640930384e-06, "loss": 0.8978, "step": 8912 }, { "epoch": 0.6439214694673723, "grad_norm": 6.921068958341843, "learning_rate": 3.963137698768422e-06, "loss": 0.9342, "step": 8913 }, { "epoch": 0.6439937146675818, "grad_norm": 9.01671743137738, "learning_rate": 3.962900513414586e-06, "loss": 0.8922, "step": 8914 }, { "epoch": 0.6440659598677912, "grad_norm": 8.450471528570212, "learning_rate": 3.962663308034778e-06, "loss": 0.8034, "step": 8915 }, { "epoch": 0.6441382050680008, "grad_norm": 7.594273427497956, "learning_rate": 3.962426082632246e-06, "loss": 0.9164, "step": 8916 }, { "epoch": 0.6442104502682103, "grad_norm": 8.16266108261926, "learning_rate": 3.9621888372102344e-06, "loss": 0.8875, "step": 8917 }, { "epoch": 0.6442826954684198, "grad_norm": 9.487395909960949, "learning_rate": 3.961951571771994e-06, "loss": 0.7804, "step": 8918 }, { "epoch": 0.6443549406686293, "grad_norm": 9.867981166739675, "learning_rate": 3.961714286320772e-06, "loss": 0.9331, "step": 8919 }, { "epoch": 0.6444271858688388, "grad_norm": 7.355114264310545, "learning_rate": 3.9614769808598156e-06, "loss": 0.8371, "step": 8920 }, { "epoch": 0.6444994310690484, "grad_norm": 5.959207466223028, "learning_rate": 3.961239655392375e-06, "loss": 0.8136, "step": 8921 }, { "epoch": 0.6445716762692578, "grad_norm": 5.641899831321453, "learning_rate": 3.961002309921698e-06, "loss": 0.8379, "step": 8922 }, { "epoch": 0.6446439214694674, "grad_norm": 11.418260746138042, "learning_rate": 3.960764944451033e-06, "loss": 0.8194, "step": 8923 }, { "epoch": 0.6447161666696769, "grad_norm": 7.17038207628248, "learning_rate": 3.96052755898363e-06, "loss": 0.8467, "step": 8924 }, { "epoch": 0.6447884118698864, "grad_norm": 8.273749182632343, "learning_rate": 3.96029015352274e-06, "loss": 0.8381, "step": 8925 }, { "epoch": 0.6448606570700959, "grad_norm": 6.314976574634588, "learning_rate": 3.960052728071611e-06, "loss": 0.9022, "step": 8926 }, { "epoch": 0.6449329022703054, "grad_norm": 8.654270648163262, "learning_rate": 3.959815282633493e-06, "loss": 0.833, "step": 8927 }, { "epoch": 0.645005147470515, "grad_norm": 5.752945187155533, "learning_rate": 3.9595778172116384e-06, "loss": 0.8023, "step": 8928 }, { "epoch": 0.6450773926707244, "grad_norm": 6.789733361920523, "learning_rate": 3.959340331809296e-06, "loss": 0.8922, "step": 8929 }, { "epoch": 0.6451496378709339, "grad_norm": 6.1303961203006185, "learning_rate": 3.959102826429718e-06, "loss": 0.8515, "step": 8930 }, { "epoch": 0.6452218830711435, "grad_norm": 8.262230332860625, "learning_rate": 3.958865301076156e-06, "loss": 0.8134, "step": 8931 }, { "epoch": 0.645294128271353, "grad_norm": 7.730643433789365, "learning_rate": 3.958627755751859e-06, "loss": 0.8593, "step": 8932 }, { "epoch": 0.6453663734715624, "grad_norm": 7.585416263230028, "learning_rate": 3.9583901904600815e-06, "loss": 0.7967, "step": 8933 }, { "epoch": 0.645438618671772, "grad_norm": 5.416600720908915, "learning_rate": 3.958152605204075e-06, "loss": 0.8163, "step": 8934 }, { "epoch": 0.6455108638719815, "grad_norm": 6.610605400397578, "learning_rate": 3.957914999987091e-06, "loss": 0.836, "step": 8935 }, { "epoch": 0.645583109072191, "grad_norm": 5.882339438254622, "learning_rate": 3.957677374812383e-06, "loss": 0.8551, "step": 8936 }, { "epoch": 0.6456553542724005, "grad_norm": 5.691767171230227, "learning_rate": 3.957439729683203e-06, "loss": 0.9016, "step": 8937 }, { "epoch": 0.64572759947261, "grad_norm": 5.177607652400601, "learning_rate": 3.957202064602805e-06, "loss": 0.7567, "step": 8938 }, { "epoch": 0.6457998446728196, "grad_norm": 9.049909171813074, "learning_rate": 3.9569643795744425e-06, "loss": 0.855, "step": 8939 }, { "epoch": 0.645872089873029, "grad_norm": 10.24210109135938, "learning_rate": 3.9567266746013686e-06, "loss": 0.8897, "step": 8940 }, { "epoch": 0.6459443350732386, "grad_norm": 6.559321042257376, "learning_rate": 3.9564889496868385e-06, "loss": 0.8537, "step": 8941 }, { "epoch": 0.6460165802734481, "grad_norm": 7.026666800530931, "learning_rate": 3.956251204834104e-06, "loss": 0.7624, "step": 8942 }, { "epoch": 0.6460888254736576, "grad_norm": 6.629084929180725, "learning_rate": 3.956013440046422e-06, "loss": 0.8064, "step": 8943 }, { "epoch": 0.6461610706738671, "grad_norm": 8.734802126463803, "learning_rate": 3.955775655327047e-06, "loss": 0.8418, "step": 8944 }, { "epoch": 0.6462333158740766, "grad_norm": 8.256880232084766, "learning_rate": 3.955537850679233e-06, "loss": 0.9639, "step": 8945 }, { "epoch": 0.6463055610742862, "grad_norm": 6.885082827355876, "learning_rate": 3.9553000261062365e-06, "loss": 0.8467, "step": 8946 }, { "epoch": 0.6463778062744956, "grad_norm": 7.869760617174506, "learning_rate": 3.9550621816113125e-06, "loss": 0.8768, "step": 8947 }, { "epoch": 0.6464500514747051, "grad_norm": 6.031479589015853, "learning_rate": 3.954824317197716e-06, "loss": 0.838, "step": 8948 }, { "epoch": 0.6465222966749147, "grad_norm": 6.549339249458084, "learning_rate": 3.954586432868705e-06, "loss": 0.8463, "step": 8949 }, { "epoch": 0.6465945418751242, "grad_norm": 6.793789428484223, "learning_rate": 3.954348528627535e-06, "loss": 0.8532, "step": 8950 }, { "epoch": 0.6466667870753336, "grad_norm": 7.033571394135165, "learning_rate": 3.954110604477463e-06, "loss": 0.8166, "step": 8951 }, { "epoch": 0.6467390322755432, "grad_norm": 6.195341439678625, "learning_rate": 3.953872660421746e-06, "loss": 0.8556, "step": 8952 }, { "epoch": 0.6468112774757527, "grad_norm": 7.525792340611313, "learning_rate": 3.953634696463641e-06, "loss": 0.8769, "step": 8953 }, { "epoch": 0.6468835226759622, "grad_norm": 6.794226821127172, "learning_rate": 3.953396712606405e-06, "loss": 0.9084, "step": 8954 }, { "epoch": 0.6469557678761717, "grad_norm": 6.760025209007435, "learning_rate": 3.953158708853298e-06, "loss": 0.8786, "step": 8955 }, { "epoch": 0.6470280130763812, "grad_norm": 6.284607251018996, "learning_rate": 3.952920685207575e-06, "loss": 0.8292, "step": 8956 }, { "epoch": 0.6471002582765908, "grad_norm": 7.1738095136567726, "learning_rate": 3.952682641672497e-06, "loss": 0.9663, "step": 8957 }, { "epoch": 0.6471725034768002, "grad_norm": 5.610218547841164, "learning_rate": 3.952444578251321e-06, "loss": 0.8407, "step": 8958 }, { "epoch": 0.6472447486770098, "grad_norm": 5.9434225960940825, "learning_rate": 3.9522064949473065e-06, "loss": 0.8115, "step": 8959 }, { "epoch": 0.6473169938772193, "grad_norm": 7.027489500034539, "learning_rate": 3.951968391763713e-06, "loss": 0.9205, "step": 8960 }, { "epoch": 0.6473892390774288, "grad_norm": 5.228152091804899, "learning_rate": 3.9517302687037996e-06, "loss": 0.7813, "step": 8961 }, { "epoch": 0.6474614842776383, "grad_norm": 6.521524443479034, "learning_rate": 3.951492125770826e-06, "loss": 0.9276, "step": 8962 }, { "epoch": 0.6475337294778478, "grad_norm": 4.867137731110916, "learning_rate": 3.951253962968052e-06, "loss": 0.7958, "step": 8963 }, { "epoch": 0.6476059746780574, "grad_norm": 5.270407115404784, "learning_rate": 3.951015780298738e-06, "loss": 0.8618, "step": 8964 }, { "epoch": 0.6476782198782668, "grad_norm": 6.265294092434709, "learning_rate": 3.9507775777661445e-06, "loss": 0.8659, "step": 8965 }, { "epoch": 0.6477504650784763, "grad_norm": 6.572940913023161, "learning_rate": 3.9505393553735325e-06, "loss": 0.84, "step": 8966 }, { "epoch": 0.6478227102786859, "grad_norm": 6.178259837753891, "learning_rate": 3.950301113124163e-06, "loss": 0.8579, "step": 8967 }, { "epoch": 0.6478949554788954, "grad_norm": 6.030748099360431, "learning_rate": 3.950062851021298e-06, "loss": 0.8004, "step": 8968 }, { "epoch": 0.6479672006791048, "grad_norm": 5.98668910769283, "learning_rate": 3.949824569068198e-06, "loss": 0.8068, "step": 8969 }, { "epoch": 0.6480394458793144, "grad_norm": 6.493394576507784, "learning_rate": 3.949586267268125e-06, "loss": 0.8214, "step": 8970 }, { "epoch": 0.6481116910795239, "grad_norm": 6.568553348867909, "learning_rate": 3.949347945624342e-06, "loss": 0.8644, "step": 8971 }, { "epoch": 0.6481839362797334, "grad_norm": 6.2483698435595425, "learning_rate": 3.949109604140111e-06, "loss": 0.7739, "step": 8972 }, { "epoch": 0.6482561814799429, "grad_norm": 5.769707782294213, "learning_rate": 3.948871242818695e-06, "loss": 0.8548, "step": 8973 }, { "epoch": 0.6483284266801524, "grad_norm": 5.097940605095938, "learning_rate": 3.948632861663355e-06, "loss": 0.8821, "step": 8974 }, { "epoch": 0.648400671880362, "grad_norm": 7.770040381288599, "learning_rate": 3.948394460677358e-06, "loss": 0.9094, "step": 8975 }, { "epoch": 0.6484729170805714, "grad_norm": 7.297572361952274, "learning_rate": 3.948156039863964e-06, "loss": 0.8103, "step": 8976 }, { "epoch": 0.648545162280781, "grad_norm": 6.954065289300321, "learning_rate": 3.947917599226439e-06, "loss": 0.8722, "step": 8977 }, { "epoch": 0.6486174074809905, "grad_norm": 6.52957496883845, "learning_rate": 3.947679138768046e-06, "loss": 0.9429, "step": 8978 }, { "epoch": 0.6486896526812, "grad_norm": 6.383275663083803, "learning_rate": 3.94744065849205e-06, "loss": 0.8463, "step": 8979 }, { "epoch": 0.6487618978814095, "grad_norm": 6.751881090066274, "learning_rate": 3.947202158401715e-06, "loss": 0.8821, "step": 8980 }, { "epoch": 0.648834143081619, "grad_norm": 6.076507431569385, "learning_rate": 3.946963638500306e-06, "loss": 0.9204, "step": 8981 }, { "epoch": 0.6489063882818286, "grad_norm": 6.014049611086475, "learning_rate": 3.946725098791089e-06, "loss": 0.8997, "step": 8982 }, { "epoch": 0.648978633482038, "grad_norm": 8.041543383139219, "learning_rate": 3.946486539277328e-06, "loss": 0.8285, "step": 8983 }, { "epoch": 0.6490508786822475, "grad_norm": 6.995165381504544, "learning_rate": 3.946247959962289e-06, "loss": 0.8142, "step": 8984 }, { "epoch": 0.6491231238824571, "grad_norm": 6.419804222948417, "learning_rate": 3.946009360849239e-06, "loss": 0.9177, "step": 8985 }, { "epoch": 0.6491953690826666, "grad_norm": 6.778148592340223, "learning_rate": 3.945770741941443e-06, "loss": 0.8996, "step": 8986 }, { "epoch": 0.649267614282876, "grad_norm": 6.569554488174651, "learning_rate": 3.945532103242169e-06, "loss": 0.9115, "step": 8987 }, { "epoch": 0.6493398594830856, "grad_norm": 5.516998725109634, "learning_rate": 3.945293444754682e-06, "loss": 0.8628, "step": 8988 }, { "epoch": 0.6494121046832951, "grad_norm": 5.823434696345247, "learning_rate": 3.945054766482251e-06, "loss": 0.896, "step": 8989 }, { "epoch": 0.6494843498835046, "grad_norm": 7.496003675689936, "learning_rate": 3.9448160684281414e-06, "loss": 0.8396, "step": 8990 }, { "epoch": 0.6495565950837141, "grad_norm": 7.00186541088714, "learning_rate": 3.944577350595622e-06, "loss": 0.8703, "step": 8991 }, { "epoch": 0.6496288402839236, "grad_norm": 5.478532336204459, "learning_rate": 3.94433861298796e-06, "loss": 0.8916, "step": 8992 }, { "epoch": 0.6497010854841332, "grad_norm": 5.9273250733897465, "learning_rate": 3.944099855608424e-06, "loss": 0.8986, "step": 8993 }, { "epoch": 0.6497733306843426, "grad_norm": 6.243847985400904, "learning_rate": 3.943861078460283e-06, "loss": 0.8633, "step": 8994 }, { "epoch": 0.6498455758845522, "grad_norm": 6.933880110723907, "learning_rate": 3.9436222815468035e-06, "loss": 0.8276, "step": 8995 }, { "epoch": 0.6499178210847617, "grad_norm": 6.101496541036732, "learning_rate": 3.943383464871257e-06, "loss": 0.8273, "step": 8996 }, { "epoch": 0.6499900662849712, "grad_norm": 6.098006576259291, "learning_rate": 3.943144628436911e-06, "loss": 0.7822, "step": 8997 }, { "epoch": 0.6500623114851807, "grad_norm": 6.465779530083022, "learning_rate": 3.942905772247036e-06, "loss": 0.8337, "step": 8998 }, { "epoch": 0.6501345566853902, "grad_norm": 6.1842488696713405, "learning_rate": 3.942666896304901e-06, "loss": 0.872, "step": 8999 }, { "epoch": 0.6502068018855998, "grad_norm": 6.2136817099901736, "learning_rate": 3.942428000613776e-06, "loss": 0.8215, "step": 9000 }, { "epoch": 0.6502790470858092, "grad_norm": 5.993137249537748, "learning_rate": 3.942189085176933e-06, "loss": 0.9213, "step": 9001 }, { "epoch": 0.6503512922860187, "grad_norm": 6.518884077000833, "learning_rate": 3.94195014999764e-06, "loss": 0.8824, "step": 9002 }, { "epoch": 0.6504235374862283, "grad_norm": 8.63732295206798, "learning_rate": 3.941711195079169e-06, "loss": 0.8762, "step": 9003 }, { "epoch": 0.6504957826864378, "grad_norm": 5.191510040876026, "learning_rate": 3.941472220424791e-06, "loss": 0.8376, "step": 9004 }, { "epoch": 0.6505680278866472, "grad_norm": 6.164607382939405, "learning_rate": 3.941233226037778e-06, "loss": 0.9631, "step": 9005 }, { "epoch": 0.6506402730868568, "grad_norm": 6.6695892921291335, "learning_rate": 3.940994211921401e-06, "loss": 0.8197, "step": 9006 }, { "epoch": 0.6507125182870663, "grad_norm": 5.527098913802258, "learning_rate": 3.940755178078933e-06, "loss": 0.8823, "step": 9007 }, { "epoch": 0.6507847634872758, "grad_norm": 6.815978395866577, "learning_rate": 3.9405161245136444e-06, "loss": 0.8753, "step": 9008 }, { "epoch": 0.6508570086874853, "grad_norm": 6.795164677952131, "learning_rate": 3.940277051228808e-06, "loss": 0.8408, "step": 9009 }, { "epoch": 0.6509292538876948, "grad_norm": 6.232038093401502, "learning_rate": 3.940037958227698e-06, "loss": 0.8674, "step": 9010 }, { "epoch": 0.6510014990879044, "grad_norm": 5.1075279326942455, "learning_rate": 3.9397988455135865e-06, "loss": 0.8046, "step": 9011 }, { "epoch": 0.6510737442881138, "grad_norm": 4.882536710961498, "learning_rate": 3.939559713089747e-06, "loss": 0.854, "step": 9012 }, { "epoch": 0.6511459894883234, "grad_norm": 5.830345732970455, "learning_rate": 3.939320560959452e-06, "loss": 0.8762, "step": 9013 }, { "epoch": 0.6512182346885329, "grad_norm": 5.431941381067951, "learning_rate": 3.939081389125976e-06, "loss": 0.9098, "step": 9014 }, { "epoch": 0.6512904798887424, "grad_norm": 6.4444036409394405, "learning_rate": 3.9388421975925935e-06, "loss": 0.8024, "step": 9015 }, { "epoch": 0.6513627250889519, "grad_norm": 10.786593586313014, "learning_rate": 3.938602986362579e-06, "loss": 0.8968, "step": 9016 }, { "epoch": 0.6514349702891614, "grad_norm": 7.556769603852107, "learning_rate": 3.938363755439205e-06, "loss": 0.7988, "step": 9017 }, { "epoch": 0.651507215489371, "grad_norm": 6.2704773186116105, "learning_rate": 3.93812450482575e-06, "loss": 0.9045, "step": 9018 }, { "epoch": 0.6515794606895804, "grad_norm": 6.375035603741724, "learning_rate": 3.937885234525486e-06, "loss": 0.8794, "step": 9019 }, { "epoch": 0.6516517058897899, "grad_norm": 5.593565016090668, "learning_rate": 3.93764594454169e-06, "loss": 0.8634, "step": 9020 }, { "epoch": 0.6517239510899995, "grad_norm": 7.004332427862756, "learning_rate": 3.937406634877637e-06, "loss": 0.7927, "step": 9021 }, { "epoch": 0.651796196290209, "grad_norm": 8.214195933354826, "learning_rate": 3.9371673055366035e-06, "loss": 0.8046, "step": 9022 }, { "epoch": 0.6518684414904184, "grad_norm": 5.730297998798082, "learning_rate": 3.936927956521865e-06, "loss": 0.8312, "step": 9023 }, { "epoch": 0.651940686690628, "grad_norm": 6.087711726899051, "learning_rate": 3.936688587836699e-06, "loss": 0.7981, "step": 9024 }, { "epoch": 0.6520129318908375, "grad_norm": 7.483276796304372, "learning_rate": 3.936449199484382e-06, "loss": 0.8626, "step": 9025 }, { "epoch": 0.652085177091047, "grad_norm": 6.611424482311111, "learning_rate": 3.936209791468191e-06, "loss": 0.8889, "step": 9026 }, { "epoch": 0.6521574222912565, "grad_norm": 6.994666929025144, "learning_rate": 3.935970363791402e-06, "loss": 0.8728, "step": 9027 }, { "epoch": 0.652229667491466, "grad_norm": 5.7121624135240205, "learning_rate": 3.935730916457295e-06, "loss": 0.8292, "step": 9028 }, { "epoch": 0.6523019126916756, "grad_norm": 6.9897658603294115, "learning_rate": 3.935491449469144e-06, "loss": 0.7834, "step": 9029 }, { "epoch": 0.652374157891885, "grad_norm": 8.667676084373094, "learning_rate": 3.935251962830232e-06, "loss": 0.9806, "step": 9030 }, { "epoch": 0.6524464030920946, "grad_norm": 7.900003129016884, "learning_rate": 3.935012456543834e-06, "loss": 0.8307, "step": 9031 }, { "epoch": 0.6525186482923041, "grad_norm": 6.529527939094975, "learning_rate": 3.934772930613231e-06, "loss": 0.8642, "step": 9032 }, { "epoch": 0.6525908934925136, "grad_norm": 5.475874962415039, "learning_rate": 3.934533385041699e-06, "loss": 0.8472, "step": 9033 }, { "epoch": 0.6526631386927231, "grad_norm": 6.113458151893343, "learning_rate": 3.93429381983252e-06, "loss": 0.8378, "step": 9034 }, { "epoch": 0.6527353838929326, "grad_norm": 5.864279522883418, "learning_rate": 3.934054234988972e-06, "loss": 0.8448, "step": 9035 }, { "epoch": 0.6528076290931422, "grad_norm": 5.915980805237577, "learning_rate": 3.933814630514334e-06, "loss": 0.841, "step": 9036 }, { "epoch": 0.6528798742933516, "grad_norm": 7.07027473228863, "learning_rate": 3.9335750064118885e-06, "loss": 0.8585, "step": 9037 }, { "epoch": 0.6529521194935611, "grad_norm": 5.6474329898095865, "learning_rate": 3.933335362684913e-06, "loss": 0.8387, "step": 9038 }, { "epoch": 0.6530243646937707, "grad_norm": 6.047994978140709, "learning_rate": 3.93309569933669e-06, "loss": 0.8109, "step": 9039 }, { "epoch": 0.6530966098939802, "grad_norm": 6.146564435846554, "learning_rate": 3.932856016370499e-06, "loss": 0.812, "step": 9040 }, { "epoch": 0.6531688550941896, "grad_norm": 6.572787695182121, "learning_rate": 3.932616313789622e-06, "loss": 0.8378, "step": 9041 }, { "epoch": 0.6532411002943992, "grad_norm": 6.751695208138009, "learning_rate": 3.93237659159734e-06, "loss": 0.9239, "step": 9042 }, { "epoch": 0.6533133454946087, "grad_norm": 6.482299178279166, "learning_rate": 3.9321368497969345e-06, "loss": 0.8845, "step": 9043 }, { "epoch": 0.6533855906948182, "grad_norm": 6.593459281117011, "learning_rate": 3.931897088391688e-06, "loss": 0.825, "step": 9044 }, { "epoch": 0.6534578358950277, "grad_norm": 7.904740381676617, "learning_rate": 3.931657307384882e-06, "loss": 0.8724, "step": 9045 }, { "epoch": 0.6535300810952372, "grad_norm": 7.796975052980908, "learning_rate": 3.931417506779799e-06, "loss": 0.8636, "step": 9046 }, { "epoch": 0.6536023262954468, "grad_norm": 5.608992964717202, "learning_rate": 3.9311776865797215e-06, "loss": 0.911, "step": 9047 }, { "epoch": 0.6536745714956562, "grad_norm": 6.625041457712265, "learning_rate": 3.930937846787933e-06, "loss": 0.8276, "step": 9048 }, { "epoch": 0.6537468166958658, "grad_norm": 5.854451381868844, "learning_rate": 3.930697987407716e-06, "loss": 0.8744, "step": 9049 }, { "epoch": 0.6538190618960753, "grad_norm": 6.130589017712863, "learning_rate": 3.930458108442355e-06, "loss": 0.8413, "step": 9050 }, { "epoch": 0.6538913070962847, "grad_norm": 6.72360409001218, "learning_rate": 3.930218209895133e-06, "loss": 0.8186, "step": 9051 }, { "epoch": 0.6539635522964943, "grad_norm": 5.1263710607090465, "learning_rate": 3.929978291769334e-06, "loss": 0.874, "step": 9052 }, { "epoch": 0.6540357974967038, "grad_norm": 6.4913948025007375, "learning_rate": 3.929738354068244e-06, "loss": 0.8287, "step": 9053 }, { "epoch": 0.6541080426969134, "grad_norm": 5.2878553627305465, "learning_rate": 3.929498396795144e-06, "loss": 0.8211, "step": 9054 }, { "epoch": 0.6541802878971228, "grad_norm": 4.834066839574382, "learning_rate": 3.929258419953321e-06, "loss": 0.7897, "step": 9055 }, { "epoch": 0.6542525330973323, "grad_norm": 8.077142711070543, "learning_rate": 3.9290184235460606e-06, "loss": 0.8557, "step": 9056 }, { "epoch": 0.6543247782975419, "grad_norm": 5.367142012179876, "learning_rate": 3.928778407576648e-06, "loss": 0.8041, "step": 9057 }, { "epoch": 0.6543970234977514, "grad_norm": 6.415024419248967, "learning_rate": 3.928538372048367e-06, "loss": 0.8318, "step": 9058 }, { "epoch": 0.6544692686979608, "grad_norm": 7.873146686758776, "learning_rate": 3.928298316964506e-06, "loss": 0.895, "step": 9059 }, { "epoch": 0.6545415138981704, "grad_norm": 6.9949104334977905, "learning_rate": 3.928058242328349e-06, "loss": 0.8477, "step": 9060 }, { "epoch": 0.6546137590983799, "grad_norm": 5.5735957887596665, "learning_rate": 3.927818148143185e-06, "loss": 0.8008, "step": 9061 }, { "epoch": 0.6546860042985894, "grad_norm": 6.732035715190872, "learning_rate": 3.927578034412298e-06, "loss": 0.7843, "step": 9062 }, { "epoch": 0.6547582494987989, "grad_norm": 5.4471572531461705, "learning_rate": 3.927337901138977e-06, "loss": 0.8104, "step": 9063 }, { "epoch": 0.6548304946990084, "grad_norm": 5.935238698708295, "learning_rate": 3.927097748326508e-06, "loss": 0.7887, "step": 9064 }, { "epoch": 0.654902739899218, "grad_norm": 6.343091808431086, "learning_rate": 3.9268575759781795e-06, "loss": 0.968, "step": 9065 }, { "epoch": 0.6549749850994274, "grad_norm": 6.432717037164359, "learning_rate": 3.926617384097278e-06, "loss": 0.8048, "step": 9066 }, { "epoch": 0.655047230299637, "grad_norm": 5.927812563837708, "learning_rate": 3.926377172687092e-06, "loss": 0.8493, "step": 9067 }, { "epoch": 0.6551194754998465, "grad_norm": 5.283829098775656, "learning_rate": 3.92613694175091e-06, "loss": 0.8566, "step": 9068 }, { "epoch": 0.6551917207000559, "grad_norm": 5.778035357838817, "learning_rate": 3.925896691292021e-06, "loss": 0.8718, "step": 9069 }, { "epoch": 0.6552639659002655, "grad_norm": 6.056942939286731, "learning_rate": 3.925656421313713e-06, "loss": 0.8385, "step": 9070 }, { "epoch": 0.655336211100475, "grad_norm": 6.435702174719512, "learning_rate": 3.925416131819276e-06, "loss": 0.9599, "step": 9071 }, { "epoch": 0.6554084563006846, "grad_norm": 7.970446836038321, "learning_rate": 3.925175822811999e-06, "loss": 0.8988, "step": 9072 }, { "epoch": 0.655480701500894, "grad_norm": 5.57724445135463, "learning_rate": 3.924935494295171e-06, "loss": 0.818, "step": 9073 }, { "epoch": 0.6555529467011035, "grad_norm": 5.516950842384771, "learning_rate": 3.924695146272083e-06, "loss": 0.814, "step": 9074 }, { "epoch": 0.6556251919013131, "grad_norm": 6.73378586460191, "learning_rate": 3.924454778746024e-06, "loss": 0.9293, "step": 9075 }, { "epoch": 0.6556974371015226, "grad_norm": 5.844955891099306, "learning_rate": 3.924214391720285e-06, "loss": 0.885, "step": 9076 }, { "epoch": 0.655769682301732, "grad_norm": 6.126026301134162, "learning_rate": 3.923973985198158e-06, "loss": 0.8512, "step": 9077 }, { "epoch": 0.6558419275019416, "grad_norm": 6.016312042239798, "learning_rate": 3.9237335591829325e-06, "loss": 0.8249, "step": 9078 }, { "epoch": 0.6559141727021511, "grad_norm": 5.430906836727534, "learning_rate": 3.923493113677899e-06, "loss": 0.7953, "step": 9079 }, { "epoch": 0.6559864179023606, "grad_norm": 5.662229633402969, "learning_rate": 3.923252648686351e-06, "loss": 0.7528, "step": 9080 }, { "epoch": 0.6560586631025701, "grad_norm": 5.406848499487584, "learning_rate": 3.923012164211579e-06, "loss": 0.8134, "step": 9081 }, { "epoch": 0.6561309083027796, "grad_norm": 5.43883138558462, "learning_rate": 3.9227716602568755e-06, "loss": 0.8055, "step": 9082 }, { "epoch": 0.6562031535029892, "grad_norm": 6.101234261137441, "learning_rate": 3.922531136825532e-06, "loss": 0.8775, "step": 9083 }, { "epoch": 0.6562753987031986, "grad_norm": 6.158293254276225, "learning_rate": 3.922290593920843e-06, "loss": 0.7598, "step": 9084 }, { "epoch": 0.6563476439034082, "grad_norm": 5.718479838008948, "learning_rate": 3.9220500315461e-06, "loss": 0.8491, "step": 9085 }, { "epoch": 0.6564198891036177, "grad_norm": 5.491460065568482, "learning_rate": 3.921809449704595e-06, "loss": 0.8688, "step": 9086 }, { "epoch": 0.6564921343038271, "grad_norm": 8.096432274537463, "learning_rate": 3.921568848399623e-06, "loss": 0.869, "step": 9087 }, { "epoch": 0.6565643795040367, "grad_norm": 5.386702639173243, "learning_rate": 3.921328227634478e-06, "loss": 0.7914, "step": 9088 }, { "epoch": 0.6566366247042462, "grad_norm": 5.987644507589573, "learning_rate": 3.921087587412453e-06, "loss": 0.861, "step": 9089 }, { "epoch": 0.6567088699044558, "grad_norm": 6.952253445639918, "learning_rate": 3.920846927736841e-06, "loss": 0.8787, "step": 9090 }, { "epoch": 0.6567811151046652, "grad_norm": 8.826532973153835, "learning_rate": 3.9206062486109395e-06, "loss": 0.8904, "step": 9091 }, { "epoch": 0.6568533603048747, "grad_norm": 6.760662276200594, "learning_rate": 3.920365550038041e-06, "loss": 0.8143, "step": 9092 }, { "epoch": 0.6569256055050843, "grad_norm": 7.215382240411394, "learning_rate": 3.92012483202144e-06, "loss": 0.9014, "step": 9093 }, { "epoch": 0.6569978507052938, "grad_norm": 5.822558160526595, "learning_rate": 3.919884094564434e-06, "loss": 0.8239, "step": 9094 }, { "epoch": 0.6570700959055032, "grad_norm": 5.893539395822125, "learning_rate": 3.919643337670316e-06, "loss": 0.8548, "step": 9095 }, { "epoch": 0.6571423411057128, "grad_norm": 5.413959804953609, "learning_rate": 3.919402561342384e-06, "loss": 0.8549, "step": 9096 }, { "epoch": 0.6572145863059223, "grad_norm": 8.487215637926726, "learning_rate": 3.9191617655839324e-06, "loss": 0.9271, "step": 9097 }, { "epoch": 0.6572868315061318, "grad_norm": 5.713722228469071, "learning_rate": 3.918920950398259e-06, "loss": 0.8179, "step": 9098 }, { "epoch": 0.6573590767063413, "grad_norm": 6.934789729598872, "learning_rate": 3.918680115788658e-06, "loss": 0.9247, "step": 9099 }, { "epoch": 0.6574313219065508, "grad_norm": 6.52916132917188, "learning_rate": 3.918439261758429e-06, "loss": 0.8084, "step": 9100 }, { "epoch": 0.6575035671067604, "grad_norm": 6.368096297297973, "learning_rate": 3.9181983883108676e-06, "loss": 0.7446, "step": 9101 }, { "epoch": 0.6575758123069698, "grad_norm": 6.509227658351792, "learning_rate": 3.9179574954492714e-06, "loss": 0.9312, "step": 9102 }, { "epoch": 0.6576480575071794, "grad_norm": 5.999998410542595, "learning_rate": 3.917716583176938e-06, "loss": 0.8898, "step": 9103 }, { "epoch": 0.6577203027073889, "grad_norm": 6.670721378693477, "learning_rate": 3.917475651497164e-06, "loss": 0.8037, "step": 9104 }, { "epoch": 0.6577925479075983, "grad_norm": 7.771716547486946, "learning_rate": 3.917234700413252e-06, "loss": 0.9096, "step": 9105 }, { "epoch": 0.6578647931078079, "grad_norm": 6.916353731851184, "learning_rate": 3.916993729928495e-06, "loss": 0.9388, "step": 9106 }, { "epoch": 0.6579370383080174, "grad_norm": 6.175591926157988, "learning_rate": 3.916752740046195e-06, "loss": 0.7842, "step": 9107 }, { "epoch": 0.658009283508227, "grad_norm": 6.454933042526165, "learning_rate": 3.916511730769649e-06, "loss": 0.8383, "step": 9108 }, { "epoch": 0.6580815287084364, "grad_norm": 6.6759383895709865, "learning_rate": 3.916270702102158e-06, "loss": 0.8229, "step": 9109 }, { "epoch": 0.6581537739086459, "grad_norm": 8.337541763681475, "learning_rate": 3.916029654047021e-06, "loss": 0.8542, "step": 9110 }, { "epoch": 0.6582260191088555, "grad_norm": 7.482953599613899, "learning_rate": 3.9157885866075375e-06, "loss": 0.8864, "step": 9111 }, { "epoch": 0.658298264309065, "grad_norm": 5.598429677682576, "learning_rate": 3.915547499787008e-06, "loss": 0.8046, "step": 9112 }, { "epoch": 0.6583705095092744, "grad_norm": 7.017686298915779, "learning_rate": 3.9153063935887305e-06, "loss": 0.8652, "step": 9113 }, { "epoch": 0.658442754709484, "grad_norm": 5.882680539377363, "learning_rate": 3.915065268016009e-06, "loss": 0.8394, "step": 9114 }, { "epoch": 0.6585149999096935, "grad_norm": 7.6483020867896565, "learning_rate": 3.9148241230721415e-06, "loss": 0.899, "step": 9115 }, { "epoch": 0.658587245109903, "grad_norm": 6.886908188333227, "learning_rate": 3.9145829587604314e-06, "loss": 0.8446, "step": 9116 }, { "epoch": 0.6586594903101125, "grad_norm": 6.814361317990054, "learning_rate": 3.9143417750841785e-06, "loss": 0.8781, "step": 9117 }, { "epoch": 0.658731735510322, "grad_norm": 6.307984200428181, "learning_rate": 3.914100572046685e-06, "loss": 0.7993, "step": 9118 }, { "epoch": 0.6588039807105316, "grad_norm": 6.7391190602338975, "learning_rate": 3.913859349651253e-06, "loss": 0.8496, "step": 9119 }, { "epoch": 0.658876225910741, "grad_norm": 5.687649903051148, "learning_rate": 3.913618107901184e-06, "loss": 0.8927, "step": 9120 }, { "epoch": 0.6589484711109506, "grad_norm": 5.782936963543387, "learning_rate": 3.91337684679978e-06, "loss": 0.8591, "step": 9121 }, { "epoch": 0.6590207163111601, "grad_norm": 8.41576519157739, "learning_rate": 3.913135566350345e-06, "loss": 0.8868, "step": 9122 }, { "epoch": 0.6590929615113695, "grad_norm": 6.861911869382602, "learning_rate": 3.912894266556182e-06, "loss": 0.8705, "step": 9123 }, { "epoch": 0.6591652067115791, "grad_norm": 5.268914980584887, "learning_rate": 3.912652947420592e-06, "loss": 0.7518, "step": 9124 }, { "epoch": 0.6592374519117886, "grad_norm": 6.5174086802779, "learning_rate": 3.912411608946881e-06, "loss": 0.9465, "step": 9125 }, { "epoch": 0.6593096971119982, "grad_norm": 8.776819163368062, "learning_rate": 3.912170251138352e-06, "loss": 0.8435, "step": 9126 }, { "epoch": 0.6593819423122076, "grad_norm": 5.955371502899616, "learning_rate": 3.911928873998308e-06, "loss": 0.872, "step": 9127 }, { "epoch": 0.6594541875124171, "grad_norm": 5.722164614653904, "learning_rate": 3.911687477530054e-06, "loss": 0.8693, "step": 9128 }, { "epoch": 0.6595264327126267, "grad_norm": 8.066789299319115, "learning_rate": 3.911446061736895e-06, "loss": 0.8216, "step": 9129 }, { "epoch": 0.6595986779128362, "grad_norm": 5.438676728702136, "learning_rate": 3.911204626622135e-06, "loss": 0.8652, "step": 9130 }, { "epoch": 0.6596709231130456, "grad_norm": 5.348916216730593, "learning_rate": 3.91096317218908e-06, "loss": 0.7859, "step": 9131 }, { "epoch": 0.6597431683132552, "grad_norm": 6.342586617365134, "learning_rate": 3.910721698441034e-06, "loss": 0.874, "step": 9132 }, { "epoch": 0.6598154135134647, "grad_norm": 6.383105940125943, "learning_rate": 3.910480205381304e-06, "loss": 0.8443, "step": 9133 }, { "epoch": 0.6598876587136742, "grad_norm": 6.0662257750440345, "learning_rate": 3.910238693013194e-06, "loss": 0.8091, "step": 9134 }, { "epoch": 0.6599599039138837, "grad_norm": 7.450819382635459, "learning_rate": 3.909997161340013e-06, "loss": 0.8529, "step": 9135 }, { "epoch": 0.6600321491140932, "grad_norm": 7.179934027323562, "learning_rate": 3.9097556103650635e-06, "loss": 0.936, "step": 9136 }, { "epoch": 0.6601043943143028, "grad_norm": 7.526326069633531, "learning_rate": 3.909514040091656e-06, "loss": 0.9578, "step": 9137 }, { "epoch": 0.6601766395145122, "grad_norm": 5.772634654302209, "learning_rate": 3.909272450523095e-06, "loss": 0.8597, "step": 9138 }, { "epoch": 0.6602488847147218, "grad_norm": 10.612475744069195, "learning_rate": 3.909030841662689e-06, "loss": 0.8264, "step": 9139 }, { "epoch": 0.6603211299149313, "grad_norm": 6.970261208936203, "learning_rate": 3.908789213513744e-06, "loss": 0.9623, "step": 9140 }, { "epoch": 0.6603933751151407, "grad_norm": 6.242225389043333, "learning_rate": 3.908547566079569e-06, "loss": 0.9068, "step": 9141 }, { "epoch": 0.6604656203153503, "grad_norm": 6.915258271861777, "learning_rate": 3.908305899363472e-06, "loss": 0.7967, "step": 9142 }, { "epoch": 0.6605378655155598, "grad_norm": 7.355076143764006, "learning_rate": 3.90806421336876e-06, "loss": 0.9146, "step": 9143 }, { "epoch": 0.6606101107157694, "grad_norm": 7.910949516550966, "learning_rate": 3.907822508098742e-06, "loss": 0.9376, "step": 9144 }, { "epoch": 0.6606823559159788, "grad_norm": 6.111276316095511, "learning_rate": 3.907580783556727e-06, "loss": 0.9047, "step": 9145 }, { "epoch": 0.6607546011161883, "grad_norm": 7.22810266526759, "learning_rate": 3.907339039746024e-06, "loss": 0.8218, "step": 9146 }, { "epoch": 0.6608268463163979, "grad_norm": 9.372297380627547, "learning_rate": 3.907097276669942e-06, "loss": 0.8534, "step": 9147 }, { "epoch": 0.6608990915166074, "grad_norm": 5.392517111604765, "learning_rate": 3.9068554943317925e-06, "loss": 0.7965, "step": 9148 }, { "epoch": 0.6609713367168168, "grad_norm": 6.52010083781706, "learning_rate": 3.906613692734882e-06, "loss": 0.8173, "step": 9149 }, { "epoch": 0.6610435819170264, "grad_norm": 7.302747172946128, "learning_rate": 3.9063718718825225e-06, "loss": 0.8228, "step": 9150 }, { "epoch": 0.6611158271172359, "grad_norm": 6.935728379594862, "learning_rate": 3.906130031778025e-06, "loss": 0.8565, "step": 9151 }, { "epoch": 0.6611880723174454, "grad_norm": 5.686800064782183, "learning_rate": 3.905888172424698e-06, "loss": 0.7741, "step": 9152 }, { "epoch": 0.6612603175176549, "grad_norm": 7.185593592706243, "learning_rate": 3.905646293825854e-06, "loss": 0.9534, "step": 9153 }, { "epoch": 0.6613325627178644, "grad_norm": 7.633477173273188, "learning_rate": 3.905404395984803e-06, "loss": 0.8537, "step": 9154 }, { "epoch": 0.661404807918074, "grad_norm": 6.19805624241036, "learning_rate": 3.905162478904858e-06, "loss": 0.791, "step": 9155 }, { "epoch": 0.6614770531182834, "grad_norm": 5.3521902788148, "learning_rate": 3.90492054258933e-06, "loss": 0.9297, "step": 9156 }, { "epoch": 0.661549298318493, "grad_norm": 7.33133274301599, "learning_rate": 3.90467858704153e-06, "loss": 0.883, "step": 9157 }, { "epoch": 0.6616215435187025, "grad_norm": 5.771793363661982, "learning_rate": 3.90443661226477e-06, "loss": 0.854, "step": 9158 }, { "epoch": 0.6616937887189119, "grad_norm": 5.394910618109681, "learning_rate": 3.904194618262364e-06, "loss": 0.8332, "step": 9159 }, { "epoch": 0.6617660339191215, "grad_norm": 5.996630676285929, "learning_rate": 3.9039526050376245e-06, "loss": 0.934, "step": 9160 }, { "epoch": 0.661838279119331, "grad_norm": 6.948042274113702, "learning_rate": 3.903710572593863e-06, "loss": 0.8508, "step": 9161 }, { "epoch": 0.6619105243195406, "grad_norm": 5.383000651532963, "learning_rate": 3.903468520934394e-06, "loss": 0.9314, "step": 9162 }, { "epoch": 0.66198276951975, "grad_norm": 6.242306360824705, "learning_rate": 3.903226450062531e-06, "loss": 0.7335, "step": 9163 }, { "epoch": 0.6620550147199595, "grad_norm": 7.255225304562961, "learning_rate": 3.902984359981587e-06, "loss": 0.8842, "step": 9164 }, { "epoch": 0.6621272599201691, "grad_norm": 7.8176457662872165, "learning_rate": 3.902742250694877e-06, "loss": 0.852, "step": 9165 }, { "epoch": 0.6621995051203786, "grad_norm": 6.127507844668549, "learning_rate": 3.902500122205714e-06, "loss": 0.8371, "step": 9166 }, { "epoch": 0.662271750320588, "grad_norm": 6.137894739326725, "learning_rate": 3.902257974517414e-06, "loss": 0.9059, "step": 9167 }, { "epoch": 0.6623439955207976, "grad_norm": 5.4620634345355805, "learning_rate": 3.9020158076332905e-06, "loss": 0.7654, "step": 9168 }, { "epoch": 0.6624162407210071, "grad_norm": 5.749962184615802, "learning_rate": 3.9017736215566594e-06, "loss": 0.8728, "step": 9169 }, { "epoch": 0.6624884859212166, "grad_norm": 6.918229290424868, "learning_rate": 3.901531416290836e-06, "loss": 0.7879, "step": 9170 }, { "epoch": 0.6625607311214261, "grad_norm": 7.459804141257847, "learning_rate": 3.901289191839136e-06, "loss": 0.8929, "step": 9171 }, { "epoch": 0.6626329763216356, "grad_norm": 6.457713862417367, "learning_rate": 3.9010469482048745e-06, "loss": 0.9069, "step": 9172 }, { "epoch": 0.6627052215218452, "grad_norm": 5.153895539924164, "learning_rate": 3.900804685391368e-06, "loss": 0.8305, "step": 9173 }, { "epoch": 0.6627774667220546, "grad_norm": 6.453570408262723, "learning_rate": 3.900562403401933e-06, "loss": 0.865, "step": 9174 }, { "epoch": 0.6628497119222642, "grad_norm": 5.839895354078019, "learning_rate": 3.9003201022398865e-06, "loss": 0.793, "step": 9175 }, { "epoch": 0.6629219571224737, "grad_norm": 6.984392750694026, "learning_rate": 3.900077781908545e-06, "loss": 0.8926, "step": 9176 }, { "epoch": 0.6629942023226831, "grad_norm": 7.354957631687352, "learning_rate": 3.899835442411226e-06, "loss": 0.8913, "step": 9177 }, { "epoch": 0.6630664475228927, "grad_norm": 6.176757735301383, "learning_rate": 3.8995930837512466e-06, "loss": 0.8227, "step": 9178 }, { "epoch": 0.6631386927231022, "grad_norm": 5.8434712552587404, "learning_rate": 3.899350705931925e-06, "loss": 0.798, "step": 9179 }, { "epoch": 0.6632109379233118, "grad_norm": 5.885472156184303, "learning_rate": 3.899108308956578e-06, "loss": 0.8001, "step": 9180 }, { "epoch": 0.6632831831235212, "grad_norm": 6.346216182802725, "learning_rate": 3.898865892828524e-06, "loss": 0.9273, "step": 9181 }, { "epoch": 0.6633554283237307, "grad_norm": 5.863101028834179, "learning_rate": 3.898623457551083e-06, "loss": 0.8633, "step": 9182 }, { "epoch": 0.6634276735239403, "grad_norm": 6.1849847170148315, "learning_rate": 3.898381003127573e-06, "loss": 0.8665, "step": 9183 }, { "epoch": 0.6634999187241498, "grad_norm": 5.860366452577526, "learning_rate": 3.898138529561313e-06, "loss": 0.8627, "step": 9184 }, { "epoch": 0.6635721639243592, "grad_norm": 5.8168210611192155, "learning_rate": 3.897896036855622e-06, "loss": 0.8406, "step": 9185 }, { "epoch": 0.6636444091245688, "grad_norm": 6.669901285656362, "learning_rate": 3.897653525013818e-06, "loss": 0.8225, "step": 9186 }, { "epoch": 0.6637166543247783, "grad_norm": 5.446620614375795, "learning_rate": 3.897410994039224e-06, "loss": 0.8642, "step": 9187 }, { "epoch": 0.6637888995249878, "grad_norm": 6.270706057180994, "learning_rate": 3.897168443935159e-06, "loss": 0.8431, "step": 9188 }, { "epoch": 0.6638611447251973, "grad_norm": 5.427408785833258, "learning_rate": 3.896925874704942e-06, "loss": 0.7699, "step": 9189 }, { "epoch": 0.6639333899254068, "grad_norm": 6.301647866917276, "learning_rate": 3.896683286351895e-06, "loss": 0.835, "step": 9190 }, { "epoch": 0.6640056351256164, "grad_norm": 5.408745514965027, "learning_rate": 3.896440678879337e-06, "loss": 0.784, "step": 9191 }, { "epoch": 0.6640778803258258, "grad_norm": 5.73353874075158, "learning_rate": 3.896198052290592e-06, "loss": 0.8518, "step": 9192 }, { "epoch": 0.6641501255260354, "grad_norm": 5.754450195457595, "learning_rate": 3.895955406588978e-06, "loss": 0.8379, "step": 9193 }, { "epoch": 0.6642223707262449, "grad_norm": 5.936255394842187, "learning_rate": 3.8957127417778195e-06, "loss": 0.7491, "step": 9194 }, { "epoch": 0.6642946159264543, "grad_norm": 7.162735977413163, "learning_rate": 3.895470057860437e-06, "loss": 0.8869, "step": 9195 }, { "epoch": 0.6643668611266639, "grad_norm": 5.446396838772458, "learning_rate": 3.895227354840153e-06, "loss": 0.8605, "step": 9196 }, { "epoch": 0.6644391063268734, "grad_norm": 6.79407831291856, "learning_rate": 3.89498463272029e-06, "loss": 0.8745, "step": 9197 }, { "epoch": 0.664511351527083, "grad_norm": 5.676157613984047, "learning_rate": 3.89474189150417e-06, "loss": 0.8107, "step": 9198 }, { "epoch": 0.6645835967272924, "grad_norm": 5.494460003381566, "learning_rate": 3.8944991311951175e-06, "loss": 0.815, "step": 9199 }, { "epoch": 0.6646558419275019, "grad_norm": 6.706283003271686, "learning_rate": 3.894256351796453e-06, "loss": 0.7951, "step": 9200 }, { "epoch": 0.6647280871277115, "grad_norm": 5.567105047968136, "learning_rate": 3.894013553311503e-06, "loss": 0.8999, "step": 9201 }, { "epoch": 0.664800332327921, "grad_norm": 7.058514305896317, "learning_rate": 3.893770735743589e-06, "loss": 0.7517, "step": 9202 }, { "epoch": 0.6648725775281304, "grad_norm": 5.466427904047704, "learning_rate": 3.893527899096037e-06, "loss": 0.8715, "step": 9203 }, { "epoch": 0.66494482272834, "grad_norm": 6.071710884518922, "learning_rate": 3.893285043372169e-06, "loss": 0.8973, "step": 9204 }, { "epoch": 0.6650170679285495, "grad_norm": 6.017695717063472, "learning_rate": 3.89304216857531e-06, "loss": 0.7992, "step": 9205 }, { "epoch": 0.665089313128759, "grad_norm": 6.01453546342694, "learning_rate": 3.892799274708786e-06, "loss": 1.0126, "step": 9206 }, { "epoch": 0.6651615583289685, "grad_norm": 6.409819115810688, "learning_rate": 3.892556361775922e-06, "loss": 0.8502, "step": 9207 }, { "epoch": 0.665233803529178, "grad_norm": 5.533863893986549, "learning_rate": 3.892313429780042e-06, "loss": 0.9596, "step": 9208 }, { "epoch": 0.6653060487293876, "grad_norm": 6.286959506213132, "learning_rate": 3.892070478724473e-06, "loss": 0.8748, "step": 9209 }, { "epoch": 0.665378293929597, "grad_norm": 6.966230407391589, "learning_rate": 3.891827508612539e-06, "loss": 0.8652, "step": 9210 }, { "epoch": 0.6654505391298066, "grad_norm": 6.742164019458678, "learning_rate": 3.8915845194475675e-06, "loss": 0.8398, "step": 9211 }, { "epoch": 0.6655227843300161, "grad_norm": 6.404905299038265, "learning_rate": 3.891341511232885e-06, "loss": 0.9322, "step": 9212 }, { "epoch": 0.6655950295302255, "grad_norm": 7.803509964191144, "learning_rate": 3.8910984839718166e-06, "loss": 0.8376, "step": 9213 }, { "epoch": 0.6656672747304351, "grad_norm": 7.687488835024285, "learning_rate": 3.890855437667691e-06, "loss": 0.9547, "step": 9214 }, { "epoch": 0.6657395199306446, "grad_norm": 6.989138760098426, "learning_rate": 3.8906123723238335e-06, "loss": 0.854, "step": 9215 }, { "epoch": 0.6658117651308542, "grad_norm": 6.698817863369743, "learning_rate": 3.890369287943573e-06, "loss": 0.877, "step": 9216 }, { "epoch": 0.6658840103310636, "grad_norm": 7.128480027926354, "learning_rate": 3.890126184530236e-06, "loss": 0.9161, "step": 9217 }, { "epoch": 0.6659562555312731, "grad_norm": 6.4486248590875705, "learning_rate": 3.889883062087151e-06, "loss": 0.8485, "step": 9218 }, { "epoch": 0.6660285007314827, "grad_norm": 5.614050316737668, "learning_rate": 3.889639920617646e-06, "loss": 0.794, "step": 9219 }, { "epoch": 0.6661007459316922, "grad_norm": 7.4107989413076325, "learning_rate": 3.88939676012505e-06, "loss": 0.8515, "step": 9220 }, { "epoch": 0.6661729911319016, "grad_norm": 6.011815517110249, "learning_rate": 3.889153580612691e-06, "loss": 0.8533, "step": 9221 }, { "epoch": 0.6662452363321112, "grad_norm": 8.59510553939848, "learning_rate": 3.888910382083897e-06, "loss": 0.9223, "step": 9222 }, { "epoch": 0.6663174815323207, "grad_norm": 5.311251504834564, "learning_rate": 3.888667164541999e-06, "loss": 0.9095, "step": 9223 }, { "epoch": 0.6663897267325302, "grad_norm": 7.675407513165934, "learning_rate": 3.888423927990326e-06, "loss": 0.8976, "step": 9224 }, { "epoch": 0.6664619719327397, "grad_norm": 7.8415932900877, "learning_rate": 3.888180672432208e-06, "loss": 0.902, "step": 9225 }, { "epoch": 0.6665342171329492, "grad_norm": 5.0118658411408745, "learning_rate": 3.887937397870973e-06, "loss": 0.8291, "step": 9226 }, { "epoch": 0.6666064623331588, "grad_norm": 6.623907898912203, "learning_rate": 3.887694104309954e-06, "loss": 0.8667, "step": 9227 }, { "epoch": 0.6666787075333682, "grad_norm": 6.688482479761764, "learning_rate": 3.88745079175248e-06, "loss": 0.8659, "step": 9228 }, { "epoch": 0.6667509527335778, "grad_norm": 8.1585030787119, "learning_rate": 3.887207460201881e-06, "loss": 0.8621, "step": 9229 }, { "epoch": 0.6668231979337873, "grad_norm": 6.215103383758051, "learning_rate": 3.88696410966149e-06, "loss": 0.7751, "step": 9230 }, { "epoch": 0.6668954431339967, "grad_norm": 6.453899346322419, "learning_rate": 3.8867207401346366e-06, "loss": 0.8352, "step": 9231 }, { "epoch": 0.6669676883342063, "grad_norm": 6.911906553873009, "learning_rate": 3.8864773516246534e-06, "loss": 0.875, "step": 9232 }, { "epoch": 0.6670399335344158, "grad_norm": 6.228079540433222, "learning_rate": 3.886233944134872e-06, "loss": 0.8901, "step": 9233 }, { "epoch": 0.6671121787346254, "grad_norm": 6.325557385639124, "learning_rate": 3.885990517668623e-06, "loss": 0.8193, "step": 9234 }, { "epoch": 0.6671844239348348, "grad_norm": 6.842303118760096, "learning_rate": 3.885747072229241e-06, "loss": 0.775, "step": 9235 }, { "epoch": 0.6672566691350443, "grad_norm": 6.042364286288348, "learning_rate": 3.885503607820058e-06, "loss": 0.8892, "step": 9236 }, { "epoch": 0.6673289143352539, "grad_norm": 7.455228304830674, "learning_rate": 3.885260124444406e-06, "loss": 0.8815, "step": 9237 }, { "epoch": 0.6674011595354634, "grad_norm": 5.466261989842928, "learning_rate": 3.885016622105617e-06, "loss": 0.8718, "step": 9238 }, { "epoch": 0.6674734047356728, "grad_norm": 7.583923764002584, "learning_rate": 3.8847731008070275e-06, "loss": 0.942, "step": 9239 }, { "epoch": 0.6675456499358824, "grad_norm": 7.252456248959135, "learning_rate": 3.884529560551969e-06, "loss": 0.7951, "step": 9240 }, { "epoch": 0.6676178951360919, "grad_norm": 7.428453088174087, "learning_rate": 3.884286001343776e-06, "loss": 0.8736, "step": 9241 }, { "epoch": 0.6676901403363014, "grad_norm": 6.398895955225462, "learning_rate": 3.884042423185783e-06, "loss": 0.8306, "step": 9242 }, { "epoch": 0.6677623855365109, "grad_norm": 5.89780561081401, "learning_rate": 3.8837988260813225e-06, "loss": 0.8401, "step": 9243 }, { "epoch": 0.6678346307367204, "grad_norm": 6.050618592471558, "learning_rate": 3.883555210033732e-06, "loss": 0.7332, "step": 9244 }, { "epoch": 0.66790687593693, "grad_norm": 5.7128408785227265, "learning_rate": 3.883311575046344e-06, "loss": 0.8404, "step": 9245 }, { "epoch": 0.6679791211371394, "grad_norm": 8.808637489497835, "learning_rate": 3.883067921122494e-06, "loss": 0.9064, "step": 9246 }, { "epoch": 0.668051366337349, "grad_norm": 8.02163821719105, "learning_rate": 3.882824248265519e-06, "loss": 0.8699, "step": 9247 }, { "epoch": 0.6681236115375585, "grad_norm": 6.818844422019686, "learning_rate": 3.882580556478753e-06, "loss": 0.7987, "step": 9248 }, { "epoch": 0.6681958567377679, "grad_norm": 5.621669206992845, "learning_rate": 3.882336845765534e-06, "loss": 0.8582, "step": 9249 }, { "epoch": 0.6682681019379775, "grad_norm": 6.833200065739897, "learning_rate": 3.882093116129196e-06, "loss": 0.845, "step": 9250 }, { "epoch": 0.668340347138187, "grad_norm": 6.619833028940737, "learning_rate": 3.881849367573076e-06, "loss": 0.8665, "step": 9251 }, { "epoch": 0.6684125923383966, "grad_norm": 6.525553724390912, "learning_rate": 3.881605600100512e-06, "loss": 0.9643, "step": 9252 }, { "epoch": 0.668484837538606, "grad_norm": 5.193797322228665, "learning_rate": 3.881361813714839e-06, "loss": 0.7913, "step": 9253 }, { "epoch": 0.6685570827388155, "grad_norm": 5.977472931574436, "learning_rate": 3.881118008419397e-06, "loss": 0.862, "step": 9254 }, { "epoch": 0.6686293279390251, "grad_norm": 7.083984913495982, "learning_rate": 3.8808741842175205e-06, "loss": 0.8828, "step": 9255 }, { "epoch": 0.6687015731392346, "grad_norm": 5.453819252569456, "learning_rate": 3.880630341112549e-06, "loss": 0.8446, "step": 9256 }, { "epoch": 0.668773818339444, "grad_norm": 6.525583830105202, "learning_rate": 3.88038647910782e-06, "loss": 0.8472, "step": 9257 }, { "epoch": 0.6688460635396536, "grad_norm": 7.031385361110252, "learning_rate": 3.880142598206672e-06, "loss": 0.8084, "step": 9258 }, { "epoch": 0.6689183087398631, "grad_norm": 6.220001248178633, "learning_rate": 3.879898698412443e-06, "loss": 0.8626, "step": 9259 }, { "epoch": 0.6689905539400726, "grad_norm": 6.840810484133732, "learning_rate": 3.879654779728474e-06, "loss": 0.8912, "step": 9260 }, { "epoch": 0.6690627991402821, "grad_norm": 7.19940446933875, "learning_rate": 3.8794108421581e-06, "loss": 0.8596, "step": 9261 }, { "epoch": 0.6691350443404916, "grad_norm": 5.391211790130308, "learning_rate": 3.879166885704664e-06, "loss": 0.8908, "step": 9262 }, { "epoch": 0.6692072895407012, "grad_norm": 9.219614382807764, "learning_rate": 3.878922910371503e-06, "loss": 0.9053, "step": 9263 }, { "epoch": 0.6692795347409106, "grad_norm": 5.95446922364635, "learning_rate": 3.878678916161959e-06, "loss": 0.7407, "step": 9264 }, { "epoch": 0.6693517799411202, "grad_norm": 7.07416940942553, "learning_rate": 3.878434903079371e-06, "loss": 0.8717, "step": 9265 }, { "epoch": 0.6694240251413297, "grad_norm": 7.296482116447265, "learning_rate": 3.878190871127079e-06, "loss": 0.7886, "step": 9266 }, { "epoch": 0.6694962703415391, "grad_norm": 7.544244343284597, "learning_rate": 3.877946820308425e-06, "loss": 0.8728, "step": 9267 }, { "epoch": 0.6695685155417487, "grad_norm": 6.448576943193371, "learning_rate": 3.877702750626748e-06, "loss": 0.8744, "step": 9268 }, { "epoch": 0.6696407607419582, "grad_norm": 6.216770182847976, "learning_rate": 3.87745866208539e-06, "loss": 0.7908, "step": 9269 }, { "epoch": 0.6697130059421678, "grad_norm": 8.771635500604704, "learning_rate": 3.8772145546876925e-06, "loss": 0.9132, "step": 9270 }, { "epoch": 0.6697852511423772, "grad_norm": 7.178722827347826, "learning_rate": 3.876970428436998e-06, "loss": 0.932, "step": 9271 }, { "epoch": 0.6698574963425867, "grad_norm": 6.887330805367098, "learning_rate": 3.876726283336647e-06, "loss": 0.7897, "step": 9272 }, { "epoch": 0.6699297415427963, "grad_norm": 6.458246374057539, "learning_rate": 3.876482119389982e-06, "loss": 0.8339, "step": 9273 }, { "epoch": 0.6700019867430057, "grad_norm": 6.285349860092875, "learning_rate": 3.876237936600345e-06, "loss": 0.8477, "step": 9274 }, { "epoch": 0.6700742319432152, "grad_norm": 6.1435744795073415, "learning_rate": 3.87599373497108e-06, "loss": 0.8056, "step": 9275 }, { "epoch": 0.6701464771434248, "grad_norm": 5.9794968924783465, "learning_rate": 3.8757495145055294e-06, "loss": 0.8346, "step": 9276 }, { "epoch": 0.6702187223436343, "grad_norm": 7.092589161540678, "learning_rate": 3.875505275207035e-06, "loss": 0.829, "step": 9277 }, { "epoch": 0.6702909675438438, "grad_norm": 7.574405347189005, "learning_rate": 3.875261017078943e-06, "loss": 0.8429, "step": 9278 }, { "epoch": 0.6703632127440533, "grad_norm": 7.550996969726342, "learning_rate": 3.875016740124594e-06, "loss": 0.9167, "step": 9279 }, { "epoch": 0.6704354579442628, "grad_norm": 6.516785003810443, "learning_rate": 3.8747724443473345e-06, "loss": 0.767, "step": 9280 }, { "epoch": 0.6705077031444724, "grad_norm": 6.192075328595414, "learning_rate": 3.874528129750507e-06, "loss": 0.8914, "step": 9281 }, { "epoch": 0.6705799483446818, "grad_norm": 6.458717121493149, "learning_rate": 3.874283796337457e-06, "loss": 0.8379, "step": 9282 }, { "epoch": 0.6706521935448914, "grad_norm": 6.348103650098396, "learning_rate": 3.874039444111529e-06, "loss": 0.7417, "step": 9283 }, { "epoch": 0.6707244387451009, "grad_norm": 7.018779631609034, "learning_rate": 3.873795073076067e-06, "loss": 0.9506, "step": 9284 }, { "epoch": 0.6707966839453103, "grad_norm": 6.260869454646455, "learning_rate": 3.8735506832344185e-06, "loss": 0.8211, "step": 9285 }, { "epoch": 0.6708689291455199, "grad_norm": 6.169387663838974, "learning_rate": 3.8733062745899275e-06, "loss": 0.8415, "step": 9286 }, { "epoch": 0.6709411743457294, "grad_norm": 5.093923554067397, "learning_rate": 3.873061847145939e-06, "loss": 0.8092, "step": 9287 }, { "epoch": 0.671013419545939, "grad_norm": 5.653416719601409, "learning_rate": 3.8728174009058e-06, "loss": 0.825, "step": 9288 }, { "epoch": 0.6710856647461484, "grad_norm": 8.269723146687049, "learning_rate": 3.872572935872857e-06, "loss": 0.8612, "step": 9289 }, { "epoch": 0.6711579099463579, "grad_norm": 6.236624175755594, "learning_rate": 3.8723284520504565e-06, "loss": 0.8077, "step": 9290 }, { "epoch": 0.6712301551465675, "grad_norm": 6.796181268476023, "learning_rate": 3.872083949441945e-06, "loss": 0.8011, "step": 9291 }, { "epoch": 0.6713024003467769, "grad_norm": 7.365800194969114, "learning_rate": 3.87183942805067e-06, "loss": 0.8759, "step": 9292 }, { "epoch": 0.6713746455469864, "grad_norm": 7.21342291521914, "learning_rate": 3.871594887879977e-06, "loss": 0.8385, "step": 9293 }, { "epoch": 0.671446890747196, "grad_norm": 6.209220293764277, "learning_rate": 3.871350328933215e-06, "loss": 0.7953, "step": 9294 }, { "epoch": 0.6715191359474055, "grad_norm": 6.810895538375843, "learning_rate": 3.871105751213733e-06, "loss": 0.8221, "step": 9295 }, { "epoch": 0.671591381147615, "grad_norm": 6.633877258747773, "learning_rate": 3.870861154724877e-06, "loss": 0.7691, "step": 9296 }, { "epoch": 0.6716636263478245, "grad_norm": 6.93693333966354, "learning_rate": 3.870616539469997e-06, "loss": 0.8906, "step": 9297 }, { "epoch": 0.671735871548034, "grad_norm": 8.289644643348005, "learning_rate": 3.87037190545244e-06, "loss": 0.8567, "step": 9298 }, { "epoch": 0.6718081167482436, "grad_norm": 5.358052527078712, "learning_rate": 3.870127252675556e-06, "loss": 0.8296, "step": 9299 }, { "epoch": 0.671880361948453, "grad_norm": 5.985604501023751, "learning_rate": 3.869882581142694e-06, "loss": 0.7974, "step": 9300 }, { "epoch": 0.6719526071486626, "grad_norm": 5.599417942315488, "learning_rate": 3.869637890857203e-06, "loss": 0.8523, "step": 9301 }, { "epoch": 0.6720248523488721, "grad_norm": 5.817539450557174, "learning_rate": 3.869393181822433e-06, "loss": 0.8829, "step": 9302 }, { "epoch": 0.6720970975490815, "grad_norm": 6.819453619048487, "learning_rate": 3.869148454041733e-06, "loss": 0.8847, "step": 9303 }, { "epoch": 0.6721693427492911, "grad_norm": 8.390589083518334, "learning_rate": 3.868903707518453e-06, "loss": 0.84, "step": 9304 }, { "epoch": 0.6722415879495006, "grad_norm": 6.197890064171725, "learning_rate": 3.868658942255946e-06, "loss": 0.842, "step": 9305 }, { "epoch": 0.6723138331497102, "grad_norm": 6.231964333624384, "learning_rate": 3.86841415825756e-06, "loss": 0.8314, "step": 9306 }, { "epoch": 0.6723860783499196, "grad_norm": 8.41918857171252, "learning_rate": 3.8681693555266465e-06, "loss": 0.9258, "step": 9307 }, { "epoch": 0.6724583235501291, "grad_norm": 6.348929259369529, "learning_rate": 3.867924534066557e-06, "loss": 0.8644, "step": 9308 }, { "epoch": 0.6725305687503387, "grad_norm": 6.435865472481756, "learning_rate": 3.867679693880642e-06, "loss": 0.7979, "step": 9309 }, { "epoch": 0.6726028139505481, "grad_norm": 9.114649506828236, "learning_rate": 3.8674348349722544e-06, "loss": 0.8699, "step": 9310 }, { "epoch": 0.6726750591507576, "grad_norm": 9.626398554667306, "learning_rate": 3.867189957344746e-06, "loss": 0.9857, "step": 9311 }, { "epoch": 0.6727473043509672, "grad_norm": 7.562898483496898, "learning_rate": 3.866945061001468e-06, "loss": 0.8341, "step": 9312 }, { "epoch": 0.6728195495511767, "grad_norm": 6.760091796342022, "learning_rate": 3.866700145945774e-06, "loss": 0.8446, "step": 9313 }, { "epoch": 0.6728917947513862, "grad_norm": 6.748139866852024, "learning_rate": 3.866455212181016e-06, "loss": 0.9242, "step": 9314 }, { "epoch": 0.6729640399515957, "grad_norm": 9.265253635317158, "learning_rate": 3.8662102597105475e-06, "loss": 0.8509, "step": 9315 }, { "epoch": 0.6730362851518052, "grad_norm": 7.278976150869822, "learning_rate": 3.8659652885377204e-06, "loss": 0.912, "step": 9316 }, { "epoch": 0.6731085303520148, "grad_norm": 10.188642753679677, "learning_rate": 3.8657202986658905e-06, "loss": 0.863, "step": 9317 }, { "epoch": 0.6731807755522242, "grad_norm": 7.165729224823272, "learning_rate": 3.86547529009841e-06, "loss": 0.8152, "step": 9318 }, { "epoch": 0.6732530207524338, "grad_norm": 6.000473957415618, "learning_rate": 3.865230262838632e-06, "loss": 0.8989, "step": 9319 }, { "epoch": 0.6733252659526433, "grad_norm": 6.780484345276742, "learning_rate": 3.8649852168899114e-06, "loss": 0.7896, "step": 9320 }, { "epoch": 0.6733975111528527, "grad_norm": 6.313835682579278, "learning_rate": 3.864740152255604e-06, "loss": 0.826, "step": 9321 }, { "epoch": 0.6734697563530623, "grad_norm": 9.673579101314804, "learning_rate": 3.8644950689390626e-06, "loss": 0.8939, "step": 9322 }, { "epoch": 0.6735420015532718, "grad_norm": 7.073855562961034, "learning_rate": 3.864249966943644e-06, "loss": 0.7918, "step": 9323 }, { "epoch": 0.6736142467534814, "grad_norm": 8.289146717733102, "learning_rate": 3.864004846272703e-06, "loss": 0.9242, "step": 9324 }, { "epoch": 0.6736864919536908, "grad_norm": 6.854440917429177, "learning_rate": 3.8637597069295944e-06, "loss": 0.7882, "step": 9325 }, { "epoch": 0.6737587371539003, "grad_norm": 6.1763204668538165, "learning_rate": 3.863514548917674e-06, "loss": 0.8724, "step": 9326 }, { "epoch": 0.6738309823541099, "grad_norm": 5.623148804271107, "learning_rate": 3.863269372240298e-06, "loss": 0.7811, "step": 9327 }, { "epoch": 0.6739032275543193, "grad_norm": 6.012522980136535, "learning_rate": 3.8630241769008235e-06, "loss": 0.8624, "step": 9328 }, { "epoch": 0.6739754727545288, "grad_norm": 5.270445295482242, "learning_rate": 3.862778962902606e-06, "loss": 0.8327, "step": 9329 }, { "epoch": 0.6740477179547384, "grad_norm": 8.872130883891336, "learning_rate": 3.8625337302490015e-06, "loss": 0.81, "step": 9330 }, { "epoch": 0.6741199631549479, "grad_norm": 6.5630600463222075, "learning_rate": 3.8622884789433704e-06, "loss": 0.9192, "step": 9331 }, { "epoch": 0.6741922083551574, "grad_norm": 6.739361609497182, "learning_rate": 3.862043208989066e-06, "loss": 0.9527, "step": 9332 }, { "epoch": 0.6742644535553669, "grad_norm": 7.58362472589159, "learning_rate": 3.861797920389448e-06, "loss": 0.8417, "step": 9333 }, { "epoch": 0.6743366987555764, "grad_norm": 7.026947468329064, "learning_rate": 3.8615526131478745e-06, "loss": 0.8627, "step": 9334 }, { "epoch": 0.674408943955786, "grad_norm": 5.750202672952432, "learning_rate": 3.861307287267703e-06, "loss": 0.7602, "step": 9335 }, { "epoch": 0.6744811891559954, "grad_norm": 6.113140536020317, "learning_rate": 3.86106194275229e-06, "loss": 0.8023, "step": 9336 }, { "epoch": 0.674553434356205, "grad_norm": 8.213671141196405, "learning_rate": 3.860816579604997e-06, "loss": 0.7853, "step": 9337 }, { "epoch": 0.6746256795564145, "grad_norm": 6.365262841778168, "learning_rate": 3.860571197829181e-06, "loss": 0.7788, "step": 9338 }, { "epoch": 0.6746979247566239, "grad_norm": 6.219305732470893, "learning_rate": 3.8603257974282035e-06, "loss": 0.7938, "step": 9339 }, { "epoch": 0.6747701699568335, "grad_norm": 6.246132836807032, "learning_rate": 3.860080378405421e-06, "loss": 0.8951, "step": 9340 }, { "epoch": 0.674842415157043, "grad_norm": 5.163050565387392, "learning_rate": 3.859834940764193e-06, "loss": 0.7617, "step": 9341 }, { "epoch": 0.6749146603572526, "grad_norm": 6.234438941264363, "learning_rate": 3.859589484507882e-06, "loss": 0.8371, "step": 9342 }, { "epoch": 0.674986905557462, "grad_norm": 5.872024107090996, "learning_rate": 3.859344009639846e-06, "loss": 0.791, "step": 9343 }, { "epoch": 0.6750591507576715, "grad_norm": 6.443420650379298, "learning_rate": 3.859098516163446e-06, "loss": 0.8776, "step": 9344 }, { "epoch": 0.6751313959578811, "grad_norm": 5.844399808770432, "learning_rate": 3.8588530040820426e-06, "loss": 0.7773, "step": 9345 }, { "epoch": 0.6752036411580905, "grad_norm": 7.910886106375006, "learning_rate": 3.858607473398997e-06, "loss": 0.8418, "step": 9346 }, { "epoch": 0.6752758863583, "grad_norm": 5.345382708701991, "learning_rate": 3.8583619241176695e-06, "loss": 0.8363, "step": 9347 }, { "epoch": 0.6753481315585096, "grad_norm": 6.936126573034708, "learning_rate": 3.858116356241422e-06, "loss": 0.8511, "step": 9348 }, { "epoch": 0.6754203767587191, "grad_norm": 6.412654001866641, "learning_rate": 3.857870769773617e-06, "loss": 0.8418, "step": 9349 }, { "epoch": 0.6754926219589286, "grad_norm": 6.672075112265369, "learning_rate": 3.857625164717614e-06, "loss": 0.8306, "step": 9350 }, { "epoch": 0.6755648671591381, "grad_norm": 7.342307350818728, "learning_rate": 3.8573795410767775e-06, "loss": 0.8535, "step": 9351 }, { "epoch": 0.6756371123593476, "grad_norm": 6.85233914052547, "learning_rate": 3.85713389885447e-06, "loss": 0.8772, "step": 9352 }, { "epoch": 0.6757093575595572, "grad_norm": 8.856259614001045, "learning_rate": 3.856888238054052e-06, "loss": 0.8641, "step": 9353 }, { "epoch": 0.6757816027597666, "grad_norm": 5.577525216134939, "learning_rate": 3.856642558678887e-06, "loss": 0.9051, "step": 9354 }, { "epoch": 0.6758538479599762, "grad_norm": 7.278949685257332, "learning_rate": 3.85639686073234e-06, "loss": 0.8311, "step": 9355 }, { "epoch": 0.6759260931601857, "grad_norm": 6.247405467331394, "learning_rate": 3.8561511442177724e-06, "loss": 0.7671, "step": 9356 }, { "epoch": 0.6759983383603951, "grad_norm": 6.909939570072994, "learning_rate": 3.855905409138549e-06, "loss": 0.8451, "step": 9357 }, { "epoch": 0.6760705835606047, "grad_norm": 7.085755267002765, "learning_rate": 3.8556596554980326e-06, "loss": 0.8484, "step": 9358 }, { "epoch": 0.6761428287608142, "grad_norm": 6.612349614126101, "learning_rate": 3.855413883299588e-06, "loss": 0.9431, "step": 9359 }, { "epoch": 0.6762150739610238, "grad_norm": 5.673528256920915, "learning_rate": 3.855168092546581e-06, "loss": 0.8184, "step": 9360 }, { "epoch": 0.6762873191612332, "grad_norm": 5.907010145569693, "learning_rate": 3.854922283242374e-06, "loss": 0.8058, "step": 9361 }, { "epoch": 0.6763595643614427, "grad_norm": 5.798479459667806, "learning_rate": 3.8546764553903335e-06, "loss": 0.7776, "step": 9362 }, { "epoch": 0.6764318095616523, "grad_norm": 6.371040161581036, "learning_rate": 3.854430608993824e-06, "loss": 0.8609, "step": 9363 }, { "epoch": 0.6765040547618617, "grad_norm": 6.598772929014998, "learning_rate": 3.854184744056211e-06, "loss": 0.878, "step": 9364 }, { "epoch": 0.6765762999620712, "grad_norm": 7.040832500219174, "learning_rate": 3.85393886058086e-06, "loss": 0.8371, "step": 9365 }, { "epoch": 0.6766485451622808, "grad_norm": 7.65041281390387, "learning_rate": 3.853692958571138e-06, "loss": 0.8808, "step": 9366 }, { "epoch": 0.6767207903624903, "grad_norm": 7.5953859678418105, "learning_rate": 3.85344703803041e-06, "loss": 0.7673, "step": 9367 }, { "epoch": 0.6767930355626998, "grad_norm": 6.2215051729811846, "learning_rate": 3.853201098962044e-06, "loss": 0.8839, "step": 9368 }, { "epoch": 0.6768652807629093, "grad_norm": 6.446882207055624, "learning_rate": 3.852955141369405e-06, "loss": 0.8966, "step": 9369 }, { "epoch": 0.6769375259631188, "grad_norm": 5.415674363234347, "learning_rate": 3.8527091652558595e-06, "loss": 0.8636, "step": 9370 }, { "epoch": 0.6770097711633284, "grad_norm": 7.131122033645212, "learning_rate": 3.852463170624777e-06, "loss": 0.7553, "step": 9371 }, { "epoch": 0.6770820163635378, "grad_norm": 7.379278461364062, "learning_rate": 3.852217157479524e-06, "loss": 0.8612, "step": 9372 }, { "epoch": 0.6771542615637474, "grad_norm": 5.562085425540124, "learning_rate": 3.851971125823467e-06, "loss": 0.8169, "step": 9373 }, { "epoch": 0.6772265067639569, "grad_norm": 7.398776762614737, "learning_rate": 3.851725075659975e-06, "loss": 0.8719, "step": 9374 }, { "epoch": 0.6772987519641663, "grad_norm": 6.271433889161477, "learning_rate": 3.8514790069924174e-06, "loss": 0.757, "step": 9375 }, { "epoch": 0.6773709971643759, "grad_norm": 5.877550586010577, "learning_rate": 3.851232919824161e-06, "loss": 0.8494, "step": 9376 }, { "epoch": 0.6774432423645854, "grad_norm": 8.46410509943475, "learning_rate": 3.850986814158575e-06, "loss": 0.9782, "step": 9377 }, { "epoch": 0.677515487564795, "grad_norm": 6.235908185927717, "learning_rate": 3.850740689999029e-06, "loss": 0.8256, "step": 9378 }, { "epoch": 0.6775877327650044, "grad_norm": 7.99704854880495, "learning_rate": 3.850494547348891e-06, "loss": 0.8624, "step": 9379 }, { "epoch": 0.6776599779652139, "grad_norm": 6.826355170107051, "learning_rate": 3.850248386211531e-06, "loss": 0.8516, "step": 9380 }, { "epoch": 0.6777322231654235, "grad_norm": 6.4527401566919735, "learning_rate": 3.85000220659032e-06, "loss": 0.888, "step": 9381 }, { "epoch": 0.6778044683656329, "grad_norm": 6.071225523558662, "learning_rate": 3.849756008488627e-06, "loss": 0.8259, "step": 9382 }, { "epoch": 0.6778767135658424, "grad_norm": 6.719048072058192, "learning_rate": 3.849509791909822e-06, "loss": 0.8795, "step": 9383 }, { "epoch": 0.677948958766052, "grad_norm": 6.34581974635065, "learning_rate": 3.849263556857275e-06, "loss": 0.9088, "step": 9384 }, { "epoch": 0.6780212039662615, "grad_norm": 6.840020544010118, "learning_rate": 3.849017303334358e-06, "loss": 0.8694, "step": 9385 }, { "epoch": 0.678093449166471, "grad_norm": 6.394896630233905, "learning_rate": 3.848771031344442e-06, "loss": 0.8456, "step": 9386 }, { "epoch": 0.6781656943666805, "grad_norm": 5.515177333052369, "learning_rate": 3.8485247408908974e-06, "loss": 0.8063, "step": 9387 }, { "epoch": 0.67823793956689, "grad_norm": 8.606915075773783, "learning_rate": 3.848278431977096e-06, "loss": 0.843, "step": 9388 }, { "epoch": 0.6783101847670996, "grad_norm": 6.4393761132413445, "learning_rate": 3.848032104606411e-06, "loss": 0.7785, "step": 9389 }, { "epoch": 0.678382429967309, "grad_norm": 5.624106103018311, "learning_rate": 3.847785758782212e-06, "loss": 0.7467, "step": 9390 }, { "epoch": 0.6784546751675186, "grad_norm": 6.356477944516984, "learning_rate": 3.847539394507872e-06, "loss": 0.8268, "step": 9391 }, { "epoch": 0.6785269203677281, "grad_norm": 7.231263779055855, "learning_rate": 3.8472930117867654e-06, "loss": 0.8801, "step": 9392 }, { "epoch": 0.6785991655679375, "grad_norm": 8.498824992027851, "learning_rate": 3.847046610622263e-06, "loss": 0.8354, "step": 9393 }, { "epoch": 0.6786714107681471, "grad_norm": 5.994300678561261, "learning_rate": 3.846800191017737e-06, "loss": 0.8498, "step": 9394 }, { "epoch": 0.6787436559683566, "grad_norm": 6.099868285211069, "learning_rate": 3.846553752976564e-06, "loss": 0.8503, "step": 9395 }, { "epoch": 0.6788159011685662, "grad_norm": 5.500722144142136, "learning_rate": 3.846307296502115e-06, "loss": 0.8344, "step": 9396 }, { "epoch": 0.6788881463687756, "grad_norm": 6.6093184376273575, "learning_rate": 3.846060821597764e-06, "loss": 0.7913, "step": 9397 }, { "epoch": 0.6789603915689851, "grad_norm": 6.304786426094235, "learning_rate": 3.8458143282668865e-06, "loss": 0.8363, "step": 9398 }, { "epoch": 0.6790326367691947, "grad_norm": 9.248071830417755, "learning_rate": 3.845567816512855e-06, "loss": 0.9, "step": 9399 }, { "epoch": 0.6791048819694041, "grad_norm": 5.933272613614741, "learning_rate": 3.845321286339045e-06, "loss": 0.9487, "step": 9400 }, { "epoch": 0.6791771271696136, "grad_norm": 6.937710870510728, "learning_rate": 3.845074737748832e-06, "loss": 0.8416, "step": 9401 }, { "epoch": 0.6792493723698232, "grad_norm": 6.213666668956454, "learning_rate": 3.844828170745588e-06, "loss": 0.8424, "step": 9402 }, { "epoch": 0.6793216175700327, "grad_norm": 7.74514692390033, "learning_rate": 3.8445815853326925e-06, "loss": 0.9048, "step": 9403 }, { "epoch": 0.6793938627702422, "grad_norm": 5.87461559580178, "learning_rate": 3.844334981513519e-06, "loss": 0.895, "step": 9404 }, { "epoch": 0.6794661079704517, "grad_norm": 6.145747642135214, "learning_rate": 3.844088359291443e-06, "loss": 0.8781, "step": 9405 }, { "epoch": 0.6795383531706612, "grad_norm": 8.221843412912053, "learning_rate": 3.8438417186698416e-06, "loss": 0.8, "step": 9406 }, { "epoch": 0.6796105983708708, "grad_norm": 5.900078672757386, "learning_rate": 3.843595059652089e-06, "loss": 0.8812, "step": 9407 }, { "epoch": 0.6796828435710802, "grad_norm": 6.209386475950329, "learning_rate": 3.843348382241564e-06, "loss": 0.7575, "step": 9408 }, { "epoch": 0.6797550887712898, "grad_norm": 7.101504486149176, "learning_rate": 3.843101686441643e-06, "loss": 0.9085, "step": 9409 }, { "epoch": 0.6798273339714993, "grad_norm": 7.303633310879929, "learning_rate": 3.842854972255703e-06, "loss": 0.8883, "step": 9410 }, { "epoch": 0.6798995791717087, "grad_norm": 7.018940233623152, "learning_rate": 3.842608239687121e-06, "loss": 0.8744, "step": 9411 }, { "epoch": 0.6799718243719183, "grad_norm": 7.165136691464654, "learning_rate": 3.842361488739275e-06, "loss": 0.9556, "step": 9412 }, { "epoch": 0.6800440695721278, "grad_norm": 5.863843023033555, "learning_rate": 3.8421147194155406e-06, "loss": 0.9436, "step": 9413 }, { "epoch": 0.6801163147723374, "grad_norm": 5.1027727779907766, "learning_rate": 3.841867931719299e-06, "loss": 0.8194, "step": 9414 }, { "epoch": 0.6801885599725468, "grad_norm": 5.719815847993791, "learning_rate": 3.841621125653928e-06, "loss": 0.848, "step": 9415 }, { "epoch": 0.6802608051727563, "grad_norm": 7.835205530758495, "learning_rate": 3.8413743012228044e-06, "loss": 0.8006, "step": 9416 }, { "epoch": 0.6803330503729659, "grad_norm": 5.8056741648619, "learning_rate": 3.841127458429309e-06, "loss": 0.8901, "step": 9417 }, { "epoch": 0.6804052955731753, "grad_norm": 8.71651875921719, "learning_rate": 3.8408805972768194e-06, "loss": 0.769, "step": 9418 }, { "epoch": 0.6804775407733848, "grad_norm": 6.616653943309834, "learning_rate": 3.840633717768716e-06, "loss": 0.7956, "step": 9419 }, { "epoch": 0.6805497859735944, "grad_norm": 7.185884509769423, "learning_rate": 3.840386819908377e-06, "loss": 0.8632, "step": 9420 }, { "epoch": 0.6806220311738039, "grad_norm": 5.656263657021237, "learning_rate": 3.8401399036991845e-06, "loss": 0.8445, "step": 9421 }, { "epoch": 0.6806942763740134, "grad_norm": 6.240485005725874, "learning_rate": 3.839892969144516e-06, "loss": 0.8026, "step": 9422 }, { "epoch": 0.6807665215742229, "grad_norm": 6.879408931182361, "learning_rate": 3.839646016247754e-06, "loss": 0.9328, "step": 9423 }, { "epoch": 0.6808387667744324, "grad_norm": 5.100768887771721, "learning_rate": 3.8393990450122784e-06, "loss": 0.7939, "step": 9424 }, { "epoch": 0.680911011974642, "grad_norm": 5.597627286975839, "learning_rate": 3.839152055441469e-06, "loss": 0.8326, "step": 9425 }, { "epoch": 0.6809832571748514, "grad_norm": 6.951317301443073, "learning_rate": 3.838905047538709e-06, "loss": 0.8559, "step": 9426 }, { "epoch": 0.681055502375061, "grad_norm": 6.047809538471185, "learning_rate": 3.838658021307377e-06, "loss": 0.8576, "step": 9427 }, { "epoch": 0.6811277475752705, "grad_norm": 6.849456498240504, "learning_rate": 3.838410976750856e-06, "loss": 0.8683, "step": 9428 }, { "epoch": 0.6811999927754799, "grad_norm": 5.723801346611249, "learning_rate": 3.838163913872529e-06, "loss": 0.8762, "step": 9429 }, { "epoch": 0.6812722379756895, "grad_norm": 4.812663286089464, "learning_rate": 3.837916832675777e-06, "loss": 0.7725, "step": 9430 }, { "epoch": 0.681344483175899, "grad_norm": 8.170929658961654, "learning_rate": 3.837669733163982e-06, "loss": 0.8607, "step": 9431 }, { "epoch": 0.6814167283761086, "grad_norm": 8.075850441381546, "learning_rate": 3.837422615340527e-06, "loss": 0.7813, "step": 9432 }, { "epoch": 0.681488973576318, "grad_norm": 5.14211578550959, "learning_rate": 3.8371754792087944e-06, "loss": 0.8599, "step": 9433 }, { "epoch": 0.6815612187765275, "grad_norm": 5.06947655979708, "learning_rate": 3.836928324772169e-06, "loss": 0.8051, "step": 9434 }, { "epoch": 0.6816334639767371, "grad_norm": 7.382693658104341, "learning_rate": 3.8366811520340315e-06, "loss": 0.8895, "step": 9435 }, { "epoch": 0.6817057091769465, "grad_norm": 6.5876300487674495, "learning_rate": 3.836433960997768e-06, "loss": 0.8728, "step": 9436 }, { "epoch": 0.681777954377156, "grad_norm": 5.640605556634165, "learning_rate": 3.83618675166676e-06, "loss": 0.8686, "step": 9437 }, { "epoch": 0.6818501995773656, "grad_norm": 5.613685418154372, "learning_rate": 3.8359395240443945e-06, "loss": 0.7752, "step": 9438 }, { "epoch": 0.6819224447775751, "grad_norm": 7.307372814406725, "learning_rate": 3.835692278134054e-06, "loss": 0.865, "step": 9439 }, { "epoch": 0.6819946899777846, "grad_norm": 5.786766595695444, "learning_rate": 3.835445013939122e-06, "loss": 0.8347, "step": 9440 }, { "epoch": 0.6820669351779941, "grad_norm": 6.83899736205629, "learning_rate": 3.835197731462985e-06, "loss": 0.8756, "step": 9441 }, { "epoch": 0.6821391803782036, "grad_norm": 5.706147195280629, "learning_rate": 3.834950430709028e-06, "loss": 0.8326, "step": 9442 }, { "epoch": 0.6822114255784132, "grad_norm": 5.58011128085617, "learning_rate": 3.834703111680636e-06, "loss": 0.8145, "step": 9443 }, { "epoch": 0.6822836707786226, "grad_norm": 6.892914614377833, "learning_rate": 3.834455774381195e-06, "loss": 0.8488, "step": 9444 }, { "epoch": 0.6823559159788322, "grad_norm": 7.120718489409613, "learning_rate": 3.8342084188140905e-06, "loss": 0.8799, "step": 9445 }, { "epoch": 0.6824281611790417, "grad_norm": 8.466946230300325, "learning_rate": 3.833961044982709e-06, "loss": 0.8296, "step": 9446 }, { "epoch": 0.6825004063792511, "grad_norm": 6.586042627330442, "learning_rate": 3.833713652890436e-06, "loss": 0.8332, "step": 9447 }, { "epoch": 0.6825726515794607, "grad_norm": 4.666999237380896, "learning_rate": 3.8334662425406585e-06, "loss": 0.7535, "step": 9448 }, { "epoch": 0.6826448967796702, "grad_norm": 7.11071576900375, "learning_rate": 3.833218813936765e-06, "loss": 0.7809, "step": 9449 }, { "epoch": 0.6827171419798798, "grad_norm": 5.917434033701845, "learning_rate": 3.83297136708214e-06, "loss": 0.8959, "step": 9450 }, { "epoch": 0.6827893871800892, "grad_norm": 5.237508900345771, "learning_rate": 3.832723901980171e-06, "loss": 0.7947, "step": 9451 }, { "epoch": 0.6828616323802987, "grad_norm": 5.424877379829119, "learning_rate": 3.832476418634248e-06, "loss": 0.7909, "step": 9452 }, { "epoch": 0.6829338775805083, "grad_norm": 7.486627992217034, "learning_rate": 3.8322289170477575e-06, "loss": 0.928, "step": 9453 }, { "epoch": 0.6830061227807177, "grad_norm": 5.41681815693609, "learning_rate": 3.831981397224087e-06, "loss": 0.858, "step": 9454 }, { "epoch": 0.6830783679809272, "grad_norm": 6.650697209801755, "learning_rate": 3.831733859166625e-06, "loss": 0.8841, "step": 9455 }, { "epoch": 0.6831506131811368, "grad_norm": 5.212489959194841, "learning_rate": 3.831486302878761e-06, "loss": 0.8337, "step": 9456 }, { "epoch": 0.6832228583813463, "grad_norm": 7.450556730423688, "learning_rate": 3.831238728363883e-06, "loss": 0.8683, "step": 9457 }, { "epoch": 0.6832951035815558, "grad_norm": 7.351736587587418, "learning_rate": 3.830991135625381e-06, "loss": 0.8155, "step": 9458 }, { "epoch": 0.6833673487817653, "grad_norm": 6.64456770593688, "learning_rate": 3.830743524666643e-06, "loss": 0.8505, "step": 9459 }, { "epoch": 0.6834395939819748, "grad_norm": 5.838678617569867, "learning_rate": 3.830495895491061e-06, "loss": 0.7815, "step": 9460 }, { "epoch": 0.6835118391821844, "grad_norm": 6.10040577023557, "learning_rate": 3.830248248102022e-06, "loss": 0.8427, "step": 9461 }, { "epoch": 0.6835840843823938, "grad_norm": 5.923136151576898, "learning_rate": 3.830000582502918e-06, "loss": 0.8802, "step": 9462 }, { "epoch": 0.6836563295826034, "grad_norm": 7.01511739750876, "learning_rate": 3.829752898697138e-06, "loss": 0.8896, "step": 9463 }, { "epoch": 0.6837285747828129, "grad_norm": 7.2106692841842985, "learning_rate": 3.829505196688074e-06, "loss": 0.8276, "step": 9464 }, { "epoch": 0.6838008199830223, "grad_norm": 5.90127014454072, "learning_rate": 3.829257476479114e-06, "loss": 0.8595, "step": 9465 }, { "epoch": 0.6838730651832319, "grad_norm": 6.272073143688249, "learning_rate": 3.829009738073653e-06, "loss": 0.8227, "step": 9466 }, { "epoch": 0.6839453103834414, "grad_norm": 5.909937116414244, "learning_rate": 3.828761981475082e-06, "loss": 0.8096, "step": 9467 }, { "epoch": 0.684017555583651, "grad_norm": 6.990884840684539, "learning_rate": 3.828514206686789e-06, "loss": 0.8251, "step": 9468 }, { "epoch": 0.6840898007838604, "grad_norm": 5.6934779683188745, "learning_rate": 3.8282664137121695e-06, "loss": 0.8445, "step": 9469 }, { "epoch": 0.6841620459840699, "grad_norm": 7.046764296784969, "learning_rate": 3.8280186025546126e-06, "loss": 0.9287, "step": 9470 }, { "epoch": 0.6842342911842795, "grad_norm": 5.093490125895765, "learning_rate": 3.827770773217513e-06, "loss": 0.8548, "step": 9471 }, { "epoch": 0.6843065363844889, "grad_norm": 8.04944209659317, "learning_rate": 3.827522925704263e-06, "loss": 0.868, "step": 9472 }, { "epoch": 0.6843787815846984, "grad_norm": 6.659615252496145, "learning_rate": 3.827275060018254e-06, "loss": 0.8507, "step": 9473 }, { "epoch": 0.684451026784908, "grad_norm": 5.684993988275495, "learning_rate": 3.8270271761628805e-06, "loss": 0.8457, "step": 9474 }, { "epoch": 0.6845232719851175, "grad_norm": 6.757737705059354, "learning_rate": 3.8267792741415345e-06, "loss": 0.8216, "step": 9475 }, { "epoch": 0.684595517185327, "grad_norm": 6.5248825939621495, "learning_rate": 3.826531353957612e-06, "loss": 0.936, "step": 9476 }, { "epoch": 0.6846677623855365, "grad_norm": 6.031435316332063, "learning_rate": 3.8262834156145035e-06, "loss": 0.8325, "step": 9477 }, { "epoch": 0.684740007585746, "grad_norm": 5.046257241059391, "learning_rate": 3.826035459115606e-06, "loss": 0.7969, "step": 9478 }, { "epoch": 0.6848122527859556, "grad_norm": 5.773062852568559, "learning_rate": 3.825787484464312e-06, "loss": 0.8489, "step": 9479 }, { "epoch": 0.684884497986165, "grad_norm": 6.880608784998408, "learning_rate": 3.825539491664017e-06, "loss": 0.8809, "step": 9480 }, { "epoch": 0.6849567431863746, "grad_norm": 8.448195622577861, "learning_rate": 3.825291480718116e-06, "loss": 0.9149, "step": 9481 }, { "epoch": 0.6850289883865841, "grad_norm": 6.299019800862683, "learning_rate": 3.825043451630003e-06, "loss": 0.799, "step": 9482 }, { "epoch": 0.6851012335867935, "grad_norm": 6.12317272099543, "learning_rate": 3.824795404403074e-06, "loss": 0.8448, "step": 9483 }, { "epoch": 0.6851734787870031, "grad_norm": 8.946209698456101, "learning_rate": 3.824547339040725e-06, "loss": 0.8016, "step": 9484 }, { "epoch": 0.6852457239872126, "grad_norm": 5.750072312936934, "learning_rate": 3.824299255546352e-06, "loss": 0.8095, "step": 9485 }, { "epoch": 0.6853179691874222, "grad_norm": 6.485696754407865, "learning_rate": 3.824051153923349e-06, "loss": 0.8317, "step": 9486 }, { "epoch": 0.6853902143876316, "grad_norm": 8.022655356077708, "learning_rate": 3.823803034175114e-06, "loss": 0.8537, "step": 9487 }, { "epoch": 0.6854624595878411, "grad_norm": 5.730617529063015, "learning_rate": 3.823554896305044e-06, "loss": 0.9229, "step": 9488 }, { "epoch": 0.6855347047880507, "grad_norm": 6.7502192708887705, "learning_rate": 3.823306740316534e-06, "loss": 0.9249, "step": 9489 }, { "epoch": 0.6856069499882601, "grad_norm": 11.241312699416145, "learning_rate": 3.823058566212984e-06, "loss": 0.8448, "step": 9490 }, { "epoch": 0.6856791951884696, "grad_norm": 6.306361471349721, "learning_rate": 3.822810373997788e-06, "loss": 0.8783, "step": 9491 }, { "epoch": 0.6857514403886792, "grad_norm": 6.53957817933786, "learning_rate": 3.822562163674346e-06, "loss": 0.7916, "step": 9492 }, { "epoch": 0.6858236855888887, "grad_norm": 5.99106536663513, "learning_rate": 3.822313935246055e-06, "loss": 0.8492, "step": 9493 }, { "epoch": 0.6858959307890982, "grad_norm": 5.566515384072711, "learning_rate": 3.822065688716312e-06, "loss": 0.8059, "step": 9494 }, { "epoch": 0.6859681759893077, "grad_norm": 5.386062947298854, "learning_rate": 3.821817424088517e-06, "loss": 0.8078, "step": 9495 }, { "epoch": 0.6860404211895172, "grad_norm": 5.6489946399243935, "learning_rate": 3.821569141366068e-06, "loss": 0.8641, "step": 9496 }, { "epoch": 0.6861126663897267, "grad_norm": 7.9120575454208355, "learning_rate": 3.821320840552362e-06, "loss": 0.9127, "step": 9497 }, { "epoch": 0.6861849115899362, "grad_norm": 7.4587166401772516, "learning_rate": 3.821072521650802e-06, "loss": 0.9433, "step": 9498 }, { "epoch": 0.6862571567901458, "grad_norm": 6.355516767642822, "learning_rate": 3.820824184664783e-06, "loss": 0.9384, "step": 9499 }, { "epoch": 0.6863294019903553, "grad_norm": 6.899551664520436, "learning_rate": 3.820575829597707e-06, "loss": 0.8514, "step": 9500 }, { "epoch": 0.6864016471905647, "grad_norm": 6.59301753744243, "learning_rate": 3.820327456452974e-06, "loss": 0.8427, "step": 9501 }, { "epoch": 0.6864738923907743, "grad_norm": 7.415972416420082, "learning_rate": 3.8200790652339825e-06, "loss": 0.8538, "step": 9502 }, { "epoch": 0.6865461375909838, "grad_norm": 9.572995080028303, "learning_rate": 3.819830655944134e-06, "loss": 0.8576, "step": 9503 }, { "epoch": 0.6866183827911934, "grad_norm": 7.523532062700759, "learning_rate": 3.819582228586828e-06, "loss": 0.8269, "step": 9504 }, { "epoch": 0.6866906279914028, "grad_norm": 6.477379391900794, "learning_rate": 3.819333783165466e-06, "loss": 0.852, "step": 9505 }, { "epoch": 0.6867628731916123, "grad_norm": 6.179869291040523, "learning_rate": 3.819085319683449e-06, "loss": 0.8797, "step": 9506 }, { "epoch": 0.6868351183918219, "grad_norm": 5.864772904236081, "learning_rate": 3.818836838144178e-06, "loss": 0.8952, "step": 9507 }, { "epoch": 0.6869073635920313, "grad_norm": 6.394229385437603, "learning_rate": 3.818588338551055e-06, "loss": 0.8082, "step": 9508 }, { "epoch": 0.6869796087922408, "grad_norm": 5.613217538619384, "learning_rate": 3.818339820907482e-06, "loss": 0.8404, "step": 9509 }, { "epoch": 0.6870518539924504, "grad_norm": 10.138753709516104, "learning_rate": 3.81809128521686e-06, "loss": 0.8025, "step": 9510 }, { "epoch": 0.6871240991926599, "grad_norm": 5.111974772216442, "learning_rate": 3.817842731482591e-06, "loss": 0.7465, "step": 9511 }, { "epoch": 0.6871963443928694, "grad_norm": 7.861520523654928, "learning_rate": 3.81759415970808e-06, "loss": 0.9108, "step": 9512 }, { "epoch": 0.6872685895930789, "grad_norm": 7.780558482622804, "learning_rate": 3.817345569896726e-06, "loss": 0.9047, "step": 9513 }, { "epoch": 0.6873408347932884, "grad_norm": 7.664763034609027, "learning_rate": 3.817096962051935e-06, "loss": 0.8225, "step": 9514 }, { "epoch": 0.6874130799934979, "grad_norm": 6.77606255901692, "learning_rate": 3.81684833617711e-06, "loss": 0.8387, "step": 9515 }, { "epoch": 0.6874853251937074, "grad_norm": 6.63645721692605, "learning_rate": 3.816599692275652e-06, "loss": 0.8203, "step": 9516 }, { "epoch": 0.687557570393917, "grad_norm": 6.363708968801104, "learning_rate": 3.816351030350967e-06, "loss": 0.8439, "step": 9517 }, { "epoch": 0.6876298155941265, "grad_norm": 7.72016529281838, "learning_rate": 3.816102350406459e-06, "loss": 0.7994, "step": 9518 }, { "epoch": 0.6877020607943359, "grad_norm": 6.124706961959327, "learning_rate": 3.815853652445533e-06, "loss": 0.788, "step": 9519 }, { "epoch": 0.6877743059945455, "grad_norm": 8.168496478348807, "learning_rate": 3.8156049364715895e-06, "loss": 0.9449, "step": 9520 }, { "epoch": 0.687846551194755, "grad_norm": 7.079963361138838, "learning_rate": 3.815356202488038e-06, "loss": 0.8143, "step": 9521 }, { "epoch": 0.6879187963949646, "grad_norm": 4.630716657108579, "learning_rate": 3.81510745049828e-06, "loss": 0.8449, "step": 9522 }, { "epoch": 0.687991041595174, "grad_norm": 6.243017034113588, "learning_rate": 3.814858680505723e-06, "loss": 0.7923, "step": 9523 }, { "epoch": 0.6880632867953835, "grad_norm": 6.034924904546504, "learning_rate": 3.8146098925137714e-06, "loss": 0.8488, "step": 9524 }, { "epoch": 0.6881355319955931, "grad_norm": 6.456882962037427, "learning_rate": 3.8143610865258308e-06, "loss": 0.8277, "step": 9525 }, { "epoch": 0.6882077771958025, "grad_norm": 6.384258055211063, "learning_rate": 3.8141122625453074e-06, "loss": 0.8341, "step": 9526 }, { "epoch": 0.688280022396012, "grad_norm": 6.403631801106355, "learning_rate": 3.8138634205756075e-06, "loss": 0.9938, "step": 9527 }, { "epoch": 0.6883522675962216, "grad_norm": 4.5103579099614866, "learning_rate": 3.813614560620138e-06, "loss": 0.8147, "step": 9528 }, { "epoch": 0.6884245127964311, "grad_norm": 7.777664032361707, "learning_rate": 3.813365682682305e-06, "loss": 0.8398, "step": 9529 }, { "epoch": 0.6884967579966406, "grad_norm": 6.113042252716408, "learning_rate": 3.8131167867655154e-06, "loss": 0.8173, "step": 9530 }, { "epoch": 0.6885690031968501, "grad_norm": 5.732317396833248, "learning_rate": 3.8128678728731765e-06, "loss": 0.7524, "step": 9531 }, { "epoch": 0.6886412483970596, "grad_norm": 5.9699734537374685, "learning_rate": 3.8126189410086958e-06, "loss": 0.7783, "step": 9532 }, { "epoch": 0.6887134935972691, "grad_norm": 6.153479867818093, "learning_rate": 3.8123699911754813e-06, "loss": 0.8316, "step": 9533 }, { "epoch": 0.6887857387974786, "grad_norm": 7.440935383314527, "learning_rate": 3.8121210233769403e-06, "loss": 0.786, "step": 9534 }, { "epoch": 0.6888579839976882, "grad_norm": 7.110362761910044, "learning_rate": 3.811872037616482e-06, "loss": 0.8567, "step": 9535 }, { "epoch": 0.6889302291978977, "grad_norm": 9.19045537009282, "learning_rate": 3.811623033897513e-06, "loss": 0.8704, "step": 9536 }, { "epoch": 0.6890024743981071, "grad_norm": 7.025863007992689, "learning_rate": 3.8113740122234433e-06, "loss": 0.8248, "step": 9537 }, { "epoch": 0.6890747195983167, "grad_norm": 5.531197068128176, "learning_rate": 3.8111249725976823e-06, "loss": 0.8204, "step": 9538 }, { "epoch": 0.6891469647985262, "grad_norm": 6.0136151015304815, "learning_rate": 3.8108759150236375e-06, "loss": 0.8572, "step": 9539 }, { "epoch": 0.6892192099987358, "grad_norm": 6.725819545569916, "learning_rate": 3.8106268395047203e-06, "loss": 0.908, "step": 9540 }, { "epoch": 0.6892914551989452, "grad_norm": 6.371715522330683, "learning_rate": 3.810377746044338e-06, "loss": 0.8417, "step": 9541 }, { "epoch": 0.6893637003991547, "grad_norm": 8.296831321242136, "learning_rate": 3.8101286346459033e-06, "loss": 0.883, "step": 9542 }, { "epoch": 0.6894359455993643, "grad_norm": 8.053862920783873, "learning_rate": 3.8098795053128235e-06, "loss": 0.8382, "step": 9543 }, { "epoch": 0.6895081907995737, "grad_norm": 5.0392623506880945, "learning_rate": 3.809630358048512e-06, "loss": 0.8641, "step": 9544 }, { "epoch": 0.6895804359997832, "grad_norm": 6.308098797824527, "learning_rate": 3.809381192856376e-06, "loss": 0.8795, "step": 9545 }, { "epoch": 0.6896526811999928, "grad_norm": 8.04329173480313, "learning_rate": 3.8091320097398287e-06, "loss": 0.8315, "step": 9546 }, { "epoch": 0.6897249264002023, "grad_norm": 5.756629107952278, "learning_rate": 3.808882808702281e-06, "loss": 0.8307, "step": 9547 }, { "epoch": 0.6897971716004118, "grad_norm": 10.400951239291958, "learning_rate": 3.8086335897471432e-06, "loss": 0.9127, "step": 9548 }, { "epoch": 0.6898694168006213, "grad_norm": 7.738449595515498, "learning_rate": 3.8083843528778288e-06, "loss": 0.8396, "step": 9549 }, { "epoch": 0.6899416620008308, "grad_norm": 5.668959359505637, "learning_rate": 3.8081350980977472e-06, "loss": 0.8669, "step": 9550 }, { "epoch": 0.6900139072010403, "grad_norm": 9.017330861145942, "learning_rate": 3.8078858254103122e-06, "loss": 0.8416, "step": 9551 }, { "epoch": 0.6900861524012498, "grad_norm": 8.648877691767206, "learning_rate": 3.807636534818936e-06, "loss": 0.9144, "step": 9552 }, { "epoch": 0.6901583976014594, "grad_norm": 6.6330304688493325, "learning_rate": 3.8073872263270316e-06, "loss": 0.7488, "step": 9553 }, { "epoch": 0.6902306428016689, "grad_norm": 5.613467792764148, "learning_rate": 3.8071378999380105e-06, "loss": 0.8271, "step": 9554 }, { "epoch": 0.6903028880018783, "grad_norm": 5.136885282768654, "learning_rate": 3.806888555655286e-06, "loss": 0.8064, "step": 9555 }, { "epoch": 0.6903751332020879, "grad_norm": 6.863018901502662, "learning_rate": 3.8066391934822733e-06, "loss": 0.8599, "step": 9556 }, { "epoch": 0.6904473784022974, "grad_norm": 5.73634674516045, "learning_rate": 3.806389813422383e-06, "loss": 0.8398, "step": 9557 }, { "epoch": 0.690519623602507, "grad_norm": 7.281285887535621, "learning_rate": 3.8061404154790315e-06, "loss": 0.8475, "step": 9558 }, { "epoch": 0.6905918688027164, "grad_norm": 7.033855311593607, "learning_rate": 3.8058909996556314e-06, "loss": 0.7975, "step": 9559 }, { "epoch": 0.6906641140029259, "grad_norm": 6.939915416982619, "learning_rate": 3.805641565955598e-06, "loss": 0.844, "step": 9560 }, { "epoch": 0.6907363592031355, "grad_norm": 5.457921233405945, "learning_rate": 3.8053921143823447e-06, "loss": 0.8584, "step": 9561 }, { "epoch": 0.6908086044033449, "grad_norm": 6.174297697462017, "learning_rate": 3.805142644939287e-06, "loss": 0.9722, "step": 9562 }, { "epoch": 0.6908808496035544, "grad_norm": 6.069864106770283, "learning_rate": 3.804893157629841e-06, "loss": 0.9059, "step": 9563 }, { "epoch": 0.690953094803764, "grad_norm": 5.760530531610368, "learning_rate": 3.8046436524574193e-06, "loss": 0.8507, "step": 9564 }, { "epoch": 0.6910253400039735, "grad_norm": 7.102316099832054, "learning_rate": 3.8043941294254394e-06, "loss": 0.8929, "step": 9565 }, { "epoch": 0.691097585204183, "grad_norm": 5.737559917860131, "learning_rate": 3.8041445885373176e-06, "loss": 0.8624, "step": 9566 }, { "epoch": 0.6911698304043925, "grad_norm": 6.106518948585486, "learning_rate": 3.8038950297964682e-06, "loss": 0.8813, "step": 9567 }, { "epoch": 0.691242075604602, "grad_norm": 6.240287864109566, "learning_rate": 3.8036454532063083e-06, "loss": 0.8252, "step": 9568 }, { "epoch": 0.6913143208048115, "grad_norm": 5.052093359029464, "learning_rate": 3.8033958587702535e-06, "loss": 0.9679, "step": 9569 }, { "epoch": 0.691386566005021, "grad_norm": 5.2299860987287055, "learning_rate": 3.803146246491723e-06, "loss": 0.8235, "step": 9570 }, { "epoch": 0.6914588112052306, "grad_norm": 5.5902076458793415, "learning_rate": 3.802896616374131e-06, "loss": 0.8262, "step": 9571 }, { "epoch": 0.6915310564054401, "grad_norm": 8.221930639005487, "learning_rate": 3.8026469684208974e-06, "loss": 0.8901, "step": 9572 }, { "epoch": 0.6916033016056495, "grad_norm": 7.319898986536337, "learning_rate": 3.8023973026354365e-06, "loss": 0.8739, "step": 9573 }, { "epoch": 0.6916755468058591, "grad_norm": 6.3522186128101445, "learning_rate": 3.802147619021169e-06, "loss": 0.8563, "step": 9574 }, { "epoch": 0.6917477920060686, "grad_norm": 5.594224962923016, "learning_rate": 3.801897917581511e-06, "loss": 0.7802, "step": 9575 }, { "epoch": 0.6918200372062782, "grad_norm": 8.017116831659651, "learning_rate": 3.8016481983198814e-06, "loss": 0.8952, "step": 9576 }, { "epoch": 0.6918922824064876, "grad_norm": 6.584011026903933, "learning_rate": 3.8013984612396993e-06, "loss": 0.8498, "step": 9577 }, { "epoch": 0.6919645276066971, "grad_norm": 6.622726626178065, "learning_rate": 3.8011487063443826e-06, "loss": 0.888, "step": 9578 }, { "epoch": 0.6920367728069067, "grad_norm": 7.839301439810118, "learning_rate": 3.8008989336373497e-06, "loss": 0.8153, "step": 9579 }, { "epoch": 0.6921090180071161, "grad_norm": 8.690832582893696, "learning_rate": 3.8006491431220203e-06, "loss": 0.835, "step": 9580 }, { "epoch": 0.6921812632073256, "grad_norm": 5.4695407840649475, "learning_rate": 3.8003993348018153e-06, "loss": 0.8502, "step": 9581 }, { "epoch": 0.6922535084075352, "grad_norm": 7.299218007883538, "learning_rate": 3.800149508680152e-06, "loss": 0.8942, "step": 9582 }, { "epoch": 0.6923257536077447, "grad_norm": 8.491282312182053, "learning_rate": 3.7998996647604512e-06, "loss": 0.924, "step": 9583 }, { "epoch": 0.6923979988079542, "grad_norm": 8.794201188757274, "learning_rate": 3.7996498030461344e-06, "loss": 0.7621, "step": 9584 }, { "epoch": 0.6924702440081637, "grad_norm": 6.440848655463649, "learning_rate": 3.7993999235406207e-06, "loss": 0.7875, "step": 9585 }, { "epoch": 0.6925424892083732, "grad_norm": 6.4631571157093, "learning_rate": 3.7991500262473304e-06, "loss": 0.8813, "step": 9586 }, { "epoch": 0.6926147344085827, "grad_norm": 7.601273690958214, "learning_rate": 3.7989001111696855e-06, "loss": 0.8185, "step": 9587 }, { "epoch": 0.6926869796087922, "grad_norm": 5.350019380053063, "learning_rate": 3.7986501783111064e-06, "loss": 0.8671, "step": 9588 }, { "epoch": 0.6927592248090018, "grad_norm": 9.181942459075712, "learning_rate": 3.798400227675014e-06, "loss": 0.8699, "step": 9589 }, { "epoch": 0.6928314700092113, "grad_norm": 7.08302282512972, "learning_rate": 3.7981502592648316e-06, "loss": 0.8724, "step": 9590 }, { "epoch": 0.6929037152094207, "grad_norm": 5.87270618867619, "learning_rate": 3.7979002730839796e-06, "loss": 0.8005, "step": 9591 }, { "epoch": 0.6929759604096303, "grad_norm": 5.905903478074627, "learning_rate": 3.7976502691358814e-06, "loss": 0.8194, "step": 9592 }, { "epoch": 0.6930482056098398, "grad_norm": 6.030005450143454, "learning_rate": 3.7974002474239578e-06, "loss": 0.9017, "step": 9593 }, { "epoch": 0.6931204508100494, "grad_norm": 5.834655539209703, "learning_rate": 3.797150207951632e-06, "loss": 0.8845, "step": 9594 }, { "epoch": 0.6931926960102588, "grad_norm": 6.4736648538060955, "learning_rate": 3.796900150722328e-06, "loss": 0.9645, "step": 9595 }, { "epoch": 0.6932649412104683, "grad_norm": 6.378757341916275, "learning_rate": 3.7966500757394664e-06, "loss": 0.8301, "step": 9596 }, { "epoch": 0.6933371864106779, "grad_norm": 5.430529632029512, "learning_rate": 3.7963999830064737e-06, "loss": 0.8, "step": 9597 }, { "epoch": 0.6934094316108873, "grad_norm": 5.538699720632715, "learning_rate": 3.7961498725267702e-06, "loss": 0.7661, "step": 9598 }, { "epoch": 0.6934816768110968, "grad_norm": 6.73863167114188, "learning_rate": 3.7958997443037827e-06, "loss": 0.8137, "step": 9599 }, { "epoch": 0.6935539220113064, "grad_norm": 5.440849412631507, "learning_rate": 3.795649598340933e-06, "loss": 0.8197, "step": 9600 }, { "epoch": 0.6936261672115159, "grad_norm": 8.047170333627713, "learning_rate": 3.795399434641647e-06, "loss": 0.8223, "step": 9601 }, { "epoch": 0.6936984124117254, "grad_norm": 7.230574266518034, "learning_rate": 3.795149253209348e-06, "loss": 0.8273, "step": 9602 }, { "epoch": 0.6937706576119349, "grad_norm": 6.0719615606164306, "learning_rate": 3.794899054047462e-06, "loss": 0.8061, "step": 9603 }, { "epoch": 0.6938429028121444, "grad_norm": 8.05539502681811, "learning_rate": 3.7946488371594125e-06, "loss": 0.8569, "step": 9604 }, { "epoch": 0.6939151480123539, "grad_norm": 7.819526869175888, "learning_rate": 3.794398602548626e-06, "loss": 0.9143, "step": 9605 }, { "epoch": 0.6939873932125634, "grad_norm": 6.546068237415226, "learning_rate": 3.7941483502185282e-06, "loss": 0.8675, "step": 9606 }, { "epoch": 0.694059638412773, "grad_norm": 6.136310954782935, "learning_rate": 3.793898080172544e-06, "loss": 0.768, "step": 9607 }, { "epoch": 0.6941318836129825, "grad_norm": 4.684304330609571, "learning_rate": 3.7936477924140993e-06, "loss": 0.7341, "step": 9608 }, { "epoch": 0.6942041288131919, "grad_norm": 5.505414724917185, "learning_rate": 3.7933974869466207e-06, "loss": 0.7928, "step": 9609 }, { "epoch": 0.6942763740134015, "grad_norm": 6.206420945255569, "learning_rate": 3.7931471637735357e-06, "loss": 0.9526, "step": 9610 }, { "epoch": 0.694348619213611, "grad_norm": 7.019100560225686, "learning_rate": 3.79289682289827e-06, "loss": 0.6897, "step": 9611 }, { "epoch": 0.6944208644138206, "grad_norm": 4.452476909391233, "learning_rate": 3.7926464643242496e-06, "loss": 0.7655, "step": 9612 }, { "epoch": 0.69449310961403, "grad_norm": 4.73261613561258, "learning_rate": 3.792396088054904e-06, "loss": 0.8012, "step": 9613 }, { "epoch": 0.6945653548142395, "grad_norm": 6.70389920545385, "learning_rate": 3.7921456940936586e-06, "loss": 0.8471, "step": 9614 }, { "epoch": 0.6946376000144491, "grad_norm": 7.6999849492706245, "learning_rate": 3.791895282443942e-06, "loss": 0.7722, "step": 9615 }, { "epoch": 0.6947098452146585, "grad_norm": 7.435684110649092, "learning_rate": 3.791644853109182e-06, "loss": 0.9324, "step": 9616 }, { "epoch": 0.694782090414868, "grad_norm": 5.804783446801251, "learning_rate": 3.791394406092807e-06, "loss": 0.7545, "step": 9617 }, { "epoch": 0.6948543356150776, "grad_norm": 5.77237031852932, "learning_rate": 3.7911439413982453e-06, "loss": 0.9211, "step": 9618 }, { "epoch": 0.6949265808152871, "grad_norm": 8.171931950387119, "learning_rate": 3.790893459028926e-06, "loss": 0.8393, "step": 9619 }, { "epoch": 0.6949988260154966, "grad_norm": 6.280260909668153, "learning_rate": 3.7906429589882778e-06, "loss": 0.8794, "step": 9620 }, { "epoch": 0.6950710712157061, "grad_norm": 5.955642127994116, "learning_rate": 3.790392441279728e-06, "loss": 0.8694, "step": 9621 }, { "epoch": 0.6951433164159156, "grad_norm": 7.706849596345033, "learning_rate": 3.790141905906709e-06, "loss": 0.8852, "step": 9622 }, { "epoch": 0.6952155616161251, "grad_norm": 6.6158035074043715, "learning_rate": 3.7898913528726484e-06, "loss": 0.9381, "step": 9623 }, { "epoch": 0.6952878068163346, "grad_norm": 5.8503778580142125, "learning_rate": 3.789640782180977e-06, "loss": 0.7794, "step": 9624 }, { "epoch": 0.6953600520165442, "grad_norm": 6.786931764762411, "learning_rate": 3.7893901938351245e-06, "loss": 0.7637, "step": 9625 }, { "epoch": 0.6954322972167537, "grad_norm": 8.406140181380717, "learning_rate": 3.789139587838522e-06, "loss": 0.8727, "step": 9626 }, { "epoch": 0.6955045424169631, "grad_norm": 9.05613453372194, "learning_rate": 3.7888889641945982e-06, "loss": 0.8504, "step": 9627 }, { "epoch": 0.6955767876171727, "grad_norm": 6.0277260086172575, "learning_rate": 3.7886383229067864e-06, "loss": 0.8325, "step": 9628 }, { "epoch": 0.6956490328173822, "grad_norm": 7.111799359129286, "learning_rate": 3.7883876639785164e-06, "loss": 0.8133, "step": 9629 }, { "epoch": 0.6957212780175918, "grad_norm": 6.978911923746015, "learning_rate": 3.788136987413219e-06, "loss": 0.8597, "step": 9630 }, { "epoch": 0.6957935232178012, "grad_norm": 5.893350066999841, "learning_rate": 3.7878862932143268e-06, "loss": 0.7851, "step": 9631 }, { "epoch": 0.6958657684180107, "grad_norm": 6.462856390624046, "learning_rate": 3.787635581385271e-06, "loss": 0.9467, "step": 9632 }, { "epoch": 0.6959380136182203, "grad_norm": 7.170986411370513, "learning_rate": 3.787384851929484e-06, "loss": 0.8109, "step": 9633 }, { "epoch": 0.6960102588184297, "grad_norm": 8.053611409689521, "learning_rate": 3.7871341048503984e-06, "loss": 0.9164, "step": 9634 }, { "epoch": 0.6960825040186392, "grad_norm": 6.410300263083674, "learning_rate": 3.786883340151446e-06, "loss": 0.8962, "step": 9635 }, { "epoch": 0.6961547492188488, "grad_norm": 6.699459896807768, "learning_rate": 3.7866325578360598e-06, "loss": 0.7915, "step": 9636 }, { "epoch": 0.6962269944190583, "grad_norm": 7.1255388558056225, "learning_rate": 3.7863817579076734e-06, "loss": 0.8219, "step": 9637 }, { "epoch": 0.6962992396192678, "grad_norm": 6.248749874974994, "learning_rate": 3.786130940369719e-06, "loss": 0.8549, "step": 9638 }, { "epoch": 0.6963714848194773, "grad_norm": 6.565992897180903, "learning_rate": 3.7858801052256307e-06, "loss": 0.8555, "step": 9639 }, { "epoch": 0.6964437300196868, "grad_norm": 6.746415776097305, "learning_rate": 3.7856292524788417e-06, "loss": 0.8237, "step": 9640 }, { "epoch": 0.6965159752198963, "grad_norm": 7.170988539220982, "learning_rate": 3.785378382132787e-06, "loss": 0.8567, "step": 9641 }, { "epoch": 0.6965882204201058, "grad_norm": 6.8715709110718395, "learning_rate": 3.7851274941909004e-06, "loss": 0.8407, "step": 9642 }, { "epoch": 0.6966604656203154, "grad_norm": 5.895290965372808, "learning_rate": 3.7848765886566163e-06, "loss": 0.8479, "step": 9643 }, { "epoch": 0.6967327108205249, "grad_norm": 7.614348178751472, "learning_rate": 3.784625665533369e-06, "loss": 0.8806, "step": 9644 }, { "epoch": 0.6968049560207343, "grad_norm": 5.503721711941394, "learning_rate": 3.7843747248245937e-06, "loss": 0.8729, "step": 9645 }, { "epoch": 0.6968772012209439, "grad_norm": 5.428230188873574, "learning_rate": 3.784123766533726e-06, "loss": 0.8064, "step": 9646 }, { "epoch": 0.6969494464211534, "grad_norm": 7.355290083049788, "learning_rate": 3.7838727906642014e-06, "loss": 0.8437, "step": 9647 }, { "epoch": 0.697021691621363, "grad_norm": 7.073122661064687, "learning_rate": 3.7836217972194546e-06, "loss": 0.903, "step": 9648 }, { "epoch": 0.6970939368215724, "grad_norm": 6.614967667435037, "learning_rate": 3.783370786202922e-06, "loss": 0.7687, "step": 9649 }, { "epoch": 0.6971661820217819, "grad_norm": 5.1975182920191845, "learning_rate": 3.783119757618039e-06, "loss": 0.8416, "step": 9650 }, { "epoch": 0.6972384272219915, "grad_norm": 8.513848187624328, "learning_rate": 3.7828687114682444e-06, "loss": 0.8522, "step": 9651 }, { "epoch": 0.6973106724222009, "grad_norm": 6.365870502215432, "learning_rate": 3.782617647756972e-06, "loss": 0.8569, "step": 9652 }, { "epoch": 0.6973829176224104, "grad_norm": 7.311740411937674, "learning_rate": 3.7823665664876606e-06, "loss": 0.9081, "step": 9653 }, { "epoch": 0.69745516282262, "grad_norm": 7.123629789608282, "learning_rate": 3.7821154676637465e-06, "loss": 0.7606, "step": 9654 }, { "epoch": 0.6975274080228295, "grad_norm": 6.728215704491207, "learning_rate": 3.7818643512886673e-06, "loss": 0.863, "step": 9655 }, { "epoch": 0.697599653223039, "grad_norm": 9.065762899271125, "learning_rate": 3.7816132173658605e-06, "loss": 0.8504, "step": 9656 }, { "epoch": 0.6976718984232485, "grad_norm": 8.723673862355142, "learning_rate": 3.781362065898763e-06, "loss": 0.9153, "step": 9657 }, { "epoch": 0.697744143623458, "grad_norm": 6.543289112959283, "learning_rate": 3.781110896890815e-06, "loss": 0.8191, "step": 9658 }, { "epoch": 0.6978163888236675, "grad_norm": 6.326891517515854, "learning_rate": 3.7808597103454525e-06, "loss": 0.7784, "step": 9659 }, { "epoch": 0.697888634023877, "grad_norm": 6.0554932679561055, "learning_rate": 3.7806085062661153e-06, "loss": 0.7813, "step": 9660 }, { "epoch": 0.6979608792240866, "grad_norm": 6.855694663300448, "learning_rate": 3.780357284656242e-06, "loss": 0.8677, "step": 9661 }, { "epoch": 0.6980331244242961, "grad_norm": 6.941718116499369, "learning_rate": 3.7801060455192717e-06, "loss": 0.8548, "step": 9662 }, { "epoch": 0.6981053696245055, "grad_norm": 8.630574069947809, "learning_rate": 3.779854788858644e-06, "loss": 0.8474, "step": 9663 }, { "epoch": 0.6981776148247151, "grad_norm": 5.332970169736451, "learning_rate": 3.7796035146777966e-06, "loss": 0.8754, "step": 9664 }, { "epoch": 0.6982498600249246, "grad_norm": 7.5389448561902705, "learning_rate": 3.779352222980172e-06, "loss": 0.8698, "step": 9665 }, { "epoch": 0.6983221052251342, "grad_norm": 8.019292932862998, "learning_rate": 3.779100913769208e-06, "loss": 0.7918, "step": 9666 }, { "epoch": 0.6983943504253436, "grad_norm": 5.650605646941405, "learning_rate": 3.7788495870483467e-06, "loss": 0.8365, "step": 9667 }, { "epoch": 0.6984665956255531, "grad_norm": 5.295357365782564, "learning_rate": 3.778598242821027e-06, "loss": 0.8199, "step": 9668 }, { "epoch": 0.6985388408257627, "grad_norm": 5.613157054612937, "learning_rate": 3.7783468810906897e-06, "loss": 0.8293, "step": 9669 }, { "epoch": 0.6986110860259721, "grad_norm": 7.081884737177249, "learning_rate": 3.778095501860777e-06, "loss": 0.9037, "step": 9670 }, { "epoch": 0.6986833312261816, "grad_norm": 7.586261589128181, "learning_rate": 3.777844105134728e-06, "loss": 0.8255, "step": 9671 }, { "epoch": 0.6987555764263912, "grad_norm": 5.505178614463698, "learning_rate": 3.7775926909159865e-06, "loss": 0.7853, "step": 9672 }, { "epoch": 0.6988278216266007, "grad_norm": 8.871894830491442, "learning_rate": 3.777341259207993e-06, "loss": 0.8955, "step": 9673 }, { "epoch": 0.6989000668268102, "grad_norm": 5.933577677521023, "learning_rate": 3.7770898100141885e-06, "loss": 0.7451, "step": 9674 }, { "epoch": 0.6989723120270197, "grad_norm": 5.62740240716557, "learning_rate": 3.776838343338017e-06, "loss": 0.8039, "step": 9675 }, { "epoch": 0.6990445572272292, "grad_norm": 6.176606115355588, "learning_rate": 3.7765868591829196e-06, "loss": 0.8627, "step": 9676 }, { "epoch": 0.6991168024274387, "grad_norm": 5.121756480942182, "learning_rate": 3.77633535755234e-06, "loss": 0.7731, "step": 9677 }, { "epoch": 0.6991890476276482, "grad_norm": 5.351458427363954, "learning_rate": 3.7760838384497188e-06, "loss": 0.8901, "step": 9678 }, { "epoch": 0.6992612928278578, "grad_norm": 4.861529004738737, "learning_rate": 3.775832301878502e-06, "loss": 0.7641, "step": 9679 }, { "epoch": 0.6993335380280673, "grad_norm": 8.486919885510078, "learning_rate": 3.7755807478421312e-06, "loss": 0.8535, "step": 9680 }, { "epoch": 0.6994057832282767, "grad_norm": 6.027846250521149, "learning_rate": 3.775329176344051e-06, "loss": 0.8725, "step": 9681 }, { "epoch": 0.6994780284284863, "grad_norm": 8.211099639366074, "learning_rate": 3.7750775873877033e-06, "loss": 0.8637, "step": 9682 }, { "epoch": 0.6995502736286958, "grad_norm": 5.944210396473936, "learning_rate": 3.774825980976534e-06, "loss": 0.809, "step": 9683 }, { "epoch": 0.6996225188289054, "grad_norm": 6.315960832412775, "learning_rate": 3.7745743571139872e-06, "loss": 0.8766, "step": 9684 }, { "epoch": 0.6996947640291148, "grad_norm": 4.909990573415622, "learning_rate": 3.7743227158035072e-06, "loss": 0.8338, "step": 9685 }, { "epoch": 0.6997670092293243, "grad_norm": 5.56159194220869, "learning_rate": 3.7740710570485383e-06, "loss": 0.8929, "step": 9686 }, { "epoch": 0.6998392544295339, "grad_norm": 5.216572198631975, "learning_rate": 3.7738193808525257e-06, "loss": 0.8164, "step": 9687 }, { "epoch": 0.6999114996297433, "grad_norm": 6.634548575618848, "learning_rate": 3.773567687218915e-06, "loss": 0.8654, "step": 9688 }, { "epoch": 0.6999837448299528, "grad_norm": 5.930042281361932, "learning_rate": 3.7733159761511516e-06, "loss": 0.9486, "step": 9689 }, { "epoch": 0.7000559900301624, "grad_norm": 5.8256536157987915, "learning_rate": 3.773064247652682e-06, "loss": 0.8456, "step": 9690 }, { "epoch": 0.7001282352303719, "grad_norm": 5.1862598453482525, "learning_rate": 3.77281250172695e-06, "loss": 0.8684, "step": 9691 }, { "epoch": 0.7002004804305814, "grad_norm": 6.086578687942161, "learning_rate": 3.772560738377404e-06, "loss": 0.8835, "step": 9692 }, { "epoch": 0.7002727256307909, "grad_norm": 5.640497179775786, "learning_rate": 3.772308957607489e-06, "loss": 0.826, "step": 9693 }, { "epoch": 0.7003449708310004, "grad_norm": 7.512311875309362, "learning_rate": 3.7720571594206522e-06, "loss": 0.8953, "step": 9694 }, { "epoch": 0.7004172160312099, "grad_norm": 7.323029070046233, "learning_rate": 3.7718053438203417e-06, "loss": 0.7829, "step": 9695 }, { "epoch": 0.7004894612314194, "grad_norm": 6.169399412035552, "learning_rate": 3.771553510810002e-06, "loss": 0.9676, "step": 9696 }, { "epoch": 0.700561706431629, "grad_norm": 5.039813413042317, "learning_rate": 3.771301660393083e-06, "loss": 0.8196, "step": 9697 }, { "epoch": 0.7006339516318385, "grad_norm": 6.598643724207531, "learning_rate": 3.7710497925730307e-06, "loss": 0.879, "step": 9698 }, { "epoch": 0.7007061968320479, "grad_norm": 7.512607404858266, "learning_rate": 3.770797907353294e-06, "loss": 0.8299, "step": 9699 }, { "epoch": 0.7007784420322575, "grad_norm": 6.783446909760484, "learning_rate": 3.770546004737321e-06, "loss": 0.8047, "step": 9700 }, { "epoch": 0.700850687232467, "grad_norm": 6.538204888877781, "learning_rate": 3.7702940847285596e-06, "loss": 0.8632, "step": 9701 }, { "epoch": 0.7009229324326764, "grad_norm": 8.053343785681268, "learning_rate": 3.770042147330458e-06, "loss": 0.8416, "step": 9702 }, { "epoch": 0.700995177632886, "grad_norm": 6.694981178445263, "learning_rate": 3.7697901925464657e-06, "loss": 0.8833, "step": 9703 }, { "epoch": 0.7010674228330955, "grad_norm": 5.287501861355501, "learning_rate": 3.7695382203800326e-06, "loss": 0.8059, "step": 9704 }, { "epoch": 0.7011396680333051, "grad_norm": 5.315927275103513, "learning_rate": 3.7692862308346056e-06, "loss": 0.8305, "step": 9705 }, { "epoch": 0.7012119132335145, "grad_norm": 5.678435091779489, "learning_rate": 3.769034223913637e-06, "loss": 0.8489, "step": 9706 }, { "epoch": 0.701284158433724, "grad_norm": 5.4623771810800275, "learning_rate": 3.7687821996205733e-06, "loss": 0.8514, "step": 9707 }, { "epoch": 0.7013564036339336, "grad_norm": 5.430602159920752, "learning_rate": 3.768530157958867e-06, "loss": 0.838, "step": 9708 }, { "epoch": 0.7014286488341431, "grad_norm": 5.811033330477457, "learning_rate": 3.768278098931969e-06, "loss": 0.8014, "step": 9709 }, { "epoch": 0.7015008940343526, "grad_norm": 5.62368199483907, "learning_rate": 3.768026022543328e-06, "loss": 0.8033, "step": 9710 }, { "epoch": 0.7015731392345621, "grad_norm": 5.672284240250771, "learning_rate": 3.767773928796395e-06, "loss": 0.817, "step": 9711 }, { "epoch": 0.7016453844347716, "grad_norm": 7.284838402080968, "learning_rate": 3.7675218176946214e-06, "loss": 0.8927, "step": 9712 }, { "epoch": 0.7017176296349811, "grad_norm": 8.070507242931086, "learning_rate": 3.7672696892414586e-06, "loss": 0.874, "step": 9713 }, { "epoch": 0.7017898748351906, "grad_norm": 7.517852897112126, "learning_rate": 3.767017543440357e-06, "loss": 0.8328, "step": 9714 }, { "epoch": 0.7018621200354002, "grad_norm": 5.276787853540213, "learning_rate": 3.7667653802947703e-06, "loss": 0.7872, "step": 9715 }, { "epoch": 0.7019343652356097, "grad_norm": 6.646397135226753, "learning_rate": 3.7665131998081478e-06, "loss": 0.906, "step": 9716 }, { "epoch": 0.7020066104358191, "grad_norm": 6.051583440711412, "learning_rate": 3.7662610019839437e-06, "loss": 0.8083, "step": 9717 }, { "epoch": 0.7020788556360287, "grad_norm": 6.24468829462069, "learning_rate": 3.766008786825609e-06, "loss": 0.9174, "step": 9718 }, { "epoch": 0.7021511008362382, "grad_norm": 9.185150053556429, "learning_rate": 3.765756554336598e-06, "loss": 0.814, "step": 9719 }, { "epoch": 0.7022233460364476, "grad_norm": 7.553893938171914, "learning_rate": 3.765504304520362e-06, "loss": 0.9073, "step": 9720 }, { "epoch": 0.7022955912366572, "grad_norm": 6.036740664796187, "learning_rate": 3.7652520373803544e-06, "loss": 0.9424, "step": 9721 }, { "epoch": 0.7023678364368667, "grad_norm": 9.9162400952656, "learning_rate": 3.764999752920029e-06, "loss": 0.78, "step": 9722 }, { "epoch": 0.7024400816370763, "grad_norm": 7.9695991811632805, "learning_rate": 3.7647474511428394e-06, "loss": 0.9082, "step": 9723 }, { "epoch": 0.7025123268372857, "grad_norm": 7.099633016638766, "learning_rate": 3.7644951320522393e-06, "loss": 0.9241, "step": 9724 }, { "epoch": 0.7025845720374952, "grad_norm": 5.650585900369629, "learning_rate": 3.7642427956516824e-06, "loss": 0.8186, "step": 9725 }, { "epoch": 0.7026568172377048, "grad_norm": 9.367770650113345, "learning_rate": 3.763990441944623e-06, "loss": 0.7936, "step": 9726 }, { "epoch": 0.7027290624379143, "grad_norm": 6.249279438443752, "learning_rate": 3.763738070934516e-06, "loss": 0.7884, "step": 9727 }, { "epoch": 0.7028013076381238, "grad_norm": 5.699489824987559, "learning_rate": 3.763485682624817e-06, "loss": 0.8117, "step": 9728 }, { "epoch": 0.7028735528383333, "grad_norm": 6.40202676152431, "learning_rate": 3.7632332770189796e-06, "loss": 0.7741, "step": 9729 }, { "epoch": 0.7029457980385428, "grad_norm": 8.099768797789029, "learning_rate": 3.7629808541204583e-06, "loss": 0.9076, "step": 9730 }, { "epoch": 0.7030180432387523, "grad_norm": 6.557777085843738, "learning_rate": 3.7627284139327104e-06, "loss": 0.8865, "step": 9731 }, { "epoch": 0.7030902884389618, "grad_norm": 6.71206801177021, "learning_rate": 3.762475956459191e-06, "loss": 0.864, "step": 9732 }, { "epoch": 0.7031625336391714, "grad_norm": 6.935639002925848, "learning_rate": 3.7622234817033564e-06, "loss": 0.8222, "step": 9733 }, { "epoch": 0.7032347788393809, "grad_norm": 5.927527475281312, "learning_rate": 3.7619709896686616e-06, "loss": 0.8074, "step": 9734 }, { "epoch": 0.7033070240395903, "grad_norm": 5.1826193036610055, "learning_rate": 3.7617184803585648e-06, "loss": 0.8115, "step": 9735 }, { "epoch": 0.7033792692397999, "grad_norm": 6.322518735762078, "learning_rate": 3.761465953776522e-06, "loss": 0.9187, "step": 9736 }, { "epoch": 0.7034515144400094, "grad_norm": 6.945498692491587, "learning_rate": 3.761213409925988e-06, "loss": 0.8136, "step": 9737 }, { "epoch": 0.7035237596402188, "grad_norm": 5.5843430170766775, "learning_rate": 3.7609608488104233e-06, "loss": 0.8429, "step": 9738 }, { "epoch": 0.7035960048404284, "grad_norm": 7.030911450182864, "learning_rate": 3.760708270433283e-06, "loss": 0.9103, "step": 9739 }, { "epoch": 0.7036682500406379, "grad_norm": 6.8251225834272455, "learning_rate": 3.7604556747980246e-06, "loss": 0.8246, "step": 9740 }, { "epoch": 0.7037404952408475, "grad_norm": 5.270340887695005, "learning_rate": 3.7602030619081074e-06, "loss": 0.732, "step": 9741 }, { "epoch": 0.7038127404410569, "grad_norm": 5.502730298766858, "learning_rate": 3.7599504317669895e-06, "loss": 0.8521, "step": 9742 }, { "epoch": 0.7038849856412664, "grad_norm": 6.149716390877909, "learning_rate": 3.759697784378128e-06, "loss": 0.8095, "step": 9743 }, { "epoch": 0.703957230841476, "grad_norm": 7.698813696299077, "learning_rate": 3.7594451197449814e-06, "loss": 0.7384, "step": 9744 }, { "epoch": 0.7040294760416855, "grad_norm": 6.345096539183051, "learning_rate": 3.7591924378710094e-06, "loss": 0.8493, "step": 9745 }, { "epoch": 0.704101721241895, "grad_norm": 5.488069251910869, "learning_rate": 3.7589397387596705e-06, "loss": 0.8142, "step": 9746 }, { "epoch": 0.7041739664421045, "grad_norm": 6.240696811406122, "learning_rate": 3.7586870224144247e-06, "loss": 0.841, "step": 9747 }, { "epoch": 0.704246211642314, "grad_norm": 7.664623928213145, "learning_rate": 3.7584342888387297e-06, "loss": 0.9662, "step": 9748 }, { "epoch": 0.7043184568425235, "grad_norm": 6.064445891199389, "learning_rate": 3.7581815380360464e-06, "loss": 0.792, "step": 9749 }, { "epoch": 0.704390702042733, "grad_norm": 6.824437313149948, "learning_rate": 3.7579287700098353e-06, "loss": 0.8507, "step": 9750 }, { "epoch": 0.7044629472429426, "grad_norm": 6.8406102890895095, "learning_rate": 3.7576759847635567e-06, "loss": 0.8144, "step": 9751 }, { "epoch": 0.7045351924431521, "grad_norm": 8.192429318115856, "learning_rate": 3.7574231823006703e-06, "loss": 0.8036, "step": 9752 }, { "epoch": 0.7046074376433615, "grad_norm": 9.386322508412631, "learning_rate": 3.757170362624636e-06, "loss": 0.8868, "step": 9753 }, { "epoch": 0.7046796828435711, "grad_norm": 5.319897516453798, "learning_rate": 3.7569175257389155e-06, "loss": 0.8692, "step": 9754 }, { "epoch": 0.7047519280437806, "grad_norm": 5.855453114555509, "learning_rate": 3.7566646716469708e-06, "loss": 0.8183, "step": 9755 }, { "epoch": 0.70482417324399, "grad_norm": 6.265960151669851, "learning_rate": 3.756411800352262e-06, "loss": 0.8441, "step": 9756 }, { "epoch": 0.7048964184441996, "grad_norm": 5.358508337474451, "learning_rate": 3.7561589118582513e-06, "loss": 0.8148, "step": 9757 }, { "epoch": 0.7049686636444091, "grad_norm": 6.172200718264351, "learning_rate": 3.7559060061684006e-06, "loss": 0.7529, "step": 9758 }, { "epoch": 0.7050409088446187, "grad_norm": 5.639058157014641, "learning_rate": 3.7556530832861714e-06, "loss": 0.909, "step": 9759 }, { "epoch": 0.7051131540448281, "grad_norm": 8.382656867439259, "learning_rate": 3.755400143215027e-06, "loss": 0.9029, "step": 9760 }, { "epoch": 0.7051853992450376, "grad_norm": 5.2800600415486185, "learning_rate": 3.7551471859584294e-06, "loss": 0.8187, "step": 9761 }, { "epoch": 0.7052576444452472, "grad_norm": 6.108962881720135, "learning_rate": 3.7548942115198407e-06, "loss": 0.8424, "step": 9762 }, { "epoch": 0.7053298896454567, "grad_norm": 5.529776511617139, "learning_rate": 3.754641219902725e-06, "loss": 0.801, "step": 9763 }, { "epoch": 0.7054021348456662, "grad_norm": 5.5758493092998425, "learning_rate": 3.754388211110545e-06, "loss": 0.8345, "step": 9764 }, { "epoch": 0.7054743800458757, "grad_norm": 6.633315715414961, "learning_rate": 3.7541351851467652e-06, "loss": 0.8419, "step": 9765 }, { "epoch": 0.7055466252460852, "grad_norm": 7.304567909281739, "learning_rate": 3.7538821420148476e-06, "loss": 0.7779, "step": 9766 }, { "epoch": 0.7056188704462947, "grad_norm": 7.064622678190611, "learning_rate": 3.7536290817182576e-06, "loss": 0.8195, "step": 9767 }, { "epoch": 0.7056911156465042, "grad_norm": 5.585140486190861, "learning_rate": 3.7533760042604585e-06, "loss": 0.8694, "step": 9768 }, { "epoch": 0.7057633608467138, "grad_norm": 7.333383328816467, "learning_rate": 3.7531229096449145e-06, "loss": 0.7572, "step": 9769 }, { "epoch": 0.7058356060469233, "grad_norm": 7.877338122874986, "learning_rate": 3.7528697978750915e-06, "loss": 0.8527, "step": 9770 }, { "epoch": 0.7059078512471327, "grad_norm": 4.597262953122896, "learning_rate": 3.7526166689544543e-06, "loss": 0.84, "step": 9771 }, { "epoch": 0.7059800964473423, "grad_norm": 6.5151674398129495, "learning_rate": 3.752363522886467e-06, "loss": 0.8182, "step": 9772 }, { "epoch": 0.7060523416475518, "grad_norm": 6.1418440290675385, "learning_rate": 3.7521103596745944e-06, "loss": 0.7935, "step": 9773 }, { "epoch": 0.7061245868477612, "grad_norm": 7.582464426864657, "learning_rate": 3.7518571793223047e-06, "loss": 0.8963, "step": 9774 }, { "epoch": 0.7061968320479708, "grad_norm": 7.594994964899299, "learning_rate": 3.7516039818330617e-06, "loss": 0.9011, "step": 9775 }, { "epoch": 0.7062690772481803, "grad_norm": 5.962108492043602, "learning_rate": 3.7513507672103323e-06, "loss": 0.7514, "step": 9776 }, { "epoch": 0.7063413224483899, "grad_norm": 5.931823908511765, "learning_rate": 3.7510975354575816e-06, "loss": 0.7922, "step": 9777 }, { "epoch": 0.7064135676485993, "grad_norm": 6.890160508189434, "learning_rate": 3.750844286578278e-06, "loss": 0.8198, "step": 9778 }, { "epoch": 0.7064858128488088, "grad_norm": 7.338614498415056, "learning_rate": 3.7505910205758864e-06, "loss": 0.8703, "step": 9779 }, { "epoch": 0.7065580580490184, "grad_norm": 5.095993203342762, "learning_rate": 3.7503377374538757e-06, "loss": 0.7942, "step": 9780 }, { "epoch": 0.7066303032492279, "grad_norm": 5.267585909975245, "learning_rate": 3.750084437215712e-06, "loss": 0.8037, "step": 9781 }, { "epoch": 0.7067025484494374, "grad_norm": 7.097653562705875, "learning_rate": 3.749831119864863e-06, "loss": 0.8234, "step": 9782 }, { "epoch": 0.7067747936496469, "grad_norm": 6.547629811331749, "learning_rate": 3.7495777854047956e-06, "loss": 0.8825, "step": 9783 }, { "epoch": 0.7068470388498564, "grad_norm": 6.180478514382851, "learning_rate": 3.7493244338389788e-06, "loss": 0.9105, "step": 9784 }, { "epoch": 0.7069192840500659, "grad_norm": 7.899296654403944, "learning_rate": 3.749071065170882e-06, "loss": 0.9183, "step": 9785 }, { "epoch": 0.7069915292502754, "grad_norm": 6.178644181444537, "learning_rate": 3.748817679403971e-06, "loss": 0.9109, "step": 9786 }, { "epoch": 0.707063774450485, "grad_norm": 5.459296027552918, "learning_rate": 3.7485642765417153e-06, "loss": 0.859, "step": 9787 }, { "epoch": 0.7071360196506945, "grad_norm": 7.256922606740053, "learning_rate": 3.748310856587585e-06, "loss": 0.8897, "step": 9788 }, { "epoch": 0.7072082648509039, "grad_norm": 5.70058637581049, "learning_rate": 3.748057419545047e-06, "loss": 0.8319, "step": 9789 }, { "epoch": 0.7072805100511135, "grad_norm": 6.315839733793816, "learning_rate": 3.747803965417573e-06, "loss": 0.8278, "step": 9790 }, { "epoch": 0.707352755251323, "grad_norm": 7.226919068483617, "learning_rate": 3.7475504942086315e-06, "loss": 0.8467, "step": 9791 }, { "epoch": 0.7074250004515324, "grad_norm": 5.477892922220146, "learning_rate": 3.747297005921692e-06, "loss": 0.9096, "step": 9792 }, { "epoch": 0.707497245651742, "grad_norm": 5.8790282379370264, "learning_rate": 3.7470435005602256e-06, "loss": 0.8307, "step": 9793 }, { "epoch": 0.7075694908519515, "grad_norm": 6.214250786844451, "learning_rate": 3.7467899781277014e-06, "loss": 0.8411, "step": 9794 }, { "epoch": 0.7076417360521611, "grad_norm": 7.334565088111106, "learning_rate": 3.7465364386275903e-06, "loss": 0.8999, "step": 9795 }, { "epoch": 0.7077139812523705, "grad_norm": 6.708191706266121, "learning_rate": 3.746282882063364e-06, "loss": 0.8489, "step": 9796 }, { "epoch": 0.70778622645258, "grad_norm": 5.8057536690151785, "learning_rate": 3.746029308438492e-06, "loss": 0.8511, "step": 9797 }, { "epoch": 0.7078584716527896, "grad_norm": 7.567115069083462, "learning_rate": 3.7457757177564463e-06, "loss": 1.0173, "step": 9798 }, { "epoch": 0.7079307168529991, "grad_norm": 6.440595752810686, "learning_rate": 3.7455221100206984e-06, "loss": 0.8684, "step": 9799 }, { "epoch": 0.7080029620532086, "grad_norm": 5.805650181990345, "learning_rate": 3.74526848523472e-06, "loss": 0.8771, "step": 9800 }, { "epoch": 0.7080752072534181, "grad_norm": 6.5410751107701195, "learning_rate": 3.7450148434019835e-06, "loss": 0.8533, "step": 9801 }, { "epoch": 0.7081474524536276, "grad_norm": 6.042972221940664, "learning_rate": 3.7447611845259595e-06, "loss": 0.809, "step": 9802 }, { "epoch": 0.7082196976538371, "grad_norm": 6.311024106161748, "learning_rate": 3.7445075086101217e-06, "loss": 0.8835, "step": 9803 }, { "epoch": 0.7082919428540466, "grad_norm": 7.108186928544635, "learning_rate": 3.7442538156579427e-06, "loss": 0.7591, "step": 9804 }, { "epoch": 0.7083641880542562, "grad_norm": 5.195370689582491, "learning_rate": 3.7440001056728948e-06, "loss": 0.8294, "step": 9805 }, { "epoch": 0.7084364332544657, "grad_norm": 11.095199963012757, "learning_rate": 3.743746378658452e-06, "loss": 0.8644, "step": 9806 }, { "epoch": 0.7085086784546751, "grad_norm": 9.220430692927849, "learning_rate": 3.7434926346180854e-06, "loss": 0.8529, "step": 9807 }, { "epoch": 0.7085809236548847, "grad_norm": 6.2129744350485145, "learning_rate": 3.7432388735552715e-06, "loss": 0.8179, "step": 9808 }, { "epoch": 0.7086531688550942, "grad_norm": 8.603327404652262, "learning_rate": 3.7429850954734823e-06, "loss": 0.9815, "step": 9809 }, { "epoch": 0.7087254140553036, "grad_norm": 5.690942749336062, "learning_rate": 3.742731300376193e-06, "loss": 0.8186, "step": 9810 }, { "epoch": 0.7087976592555132, "grad_norm": 7.870477013291383, "learning_rate": 3.7424774882668758e-06, "loss": 0.8753, "step": 9811 }, { "epoch": 0.7088699044557227, "grad_norm": 5.2543623829065425, "learning_rate": 3.742223659149007e-06, "loss": 0.8327, "step": 9812 }, { "epoch": 0.7089421496559323, "grad_norm": 5.96011894875436, "learning_rate": 3.741969813026062e-06, "loss": 0.9072, "step": 9813 }, { "epoch": 0.7090143948561417, "grad_norm": 7.411382643062276, "learning_rate": 3.741715949901513e-06, "loss": 0.7757, "step": 9814 }, { "epoch": 0.7090866400563512, "grad_norm": 9.05891379070266, "learning_rate": 3.7414620697788375e-06, "loss": 0.8879, "step": 9815 }, { "epoch": 0.7091588852565608, "grad_norm": 6.797198548782599, "learning_rate": 3.7412081726615097e-06, "loss": 0.8066, "step": 9816 }, { "epoch": 0.7092311304567703, "grad_norm": 6.216481777574673, "learning_rate": 3.7409542585530064e-06, "loss": 0.833, "step": 9817 }, { "epoch": 0.7093033756569798, "grad_norm": 5.530092969668216, "learning_rate": 3.740700327456803e-06, "loss": 0.8217, "step": 9818 }, { "epoch": 0.7093756208571893, "grad_norm": 6.632802722865291, "learning_rate": 3.7404463793763744e-06, "loss": 0.7189, "step": 9819 }, { "epoch": 0.7094478660573988, "grad_norm": 8.39639373751673, "learning_rate": 3.7401924143151983e-06, "loss": 0.9204, "step": 9820 }, { "epoch": 0.7095201112576083, "grad_norm": 7.149433204219868, "learning_rate": 3.739938432276751e-06, "loss": 0.8951, "step": 9821 }, { "epoch": 0.7095923564578178, "grad_norm": 6.347293559349879, "learning_rate": 3.7396844332645103e-06, "loss": 0.7775, "step": 9822 }, { "epoch": 0.7096646016580274, "grad_norm": 7.0682007054241325, "learning_rate": 3.7394304172819517e-06, "loss": 0.8068, "step": 9823 }, { "epoch": 0.7097368468582369, "grad_norm": 7.102903669595328, "learning_rate": 3.7391763843325528e-06, "loss": 0.7981, "step": 9824 }, { "epoch": 0.7098090920584463, "grad_norm": 6.513042862244917, "learning_rate": 3.738922334419792e-06, "loss": 0.7864, "step": 9825 }, { "epoch": 0.7098813372586559, "grad_norm": 6.393046845881298, "learning_rate": 3.7386682675471452e-06, "loss": 0.9183, "step": 9826 }, { "epoch": 0.7099535824588654, "grad_norm": 7.145199702190253, "learning_rate": 3.7384141837180925e-06, "loss": 0.8619, "step": 9827 }, { "epoch": 0.7100258276590748, "grad_norm": 5.325826288866328, "learning_rate": 3.738160082936111e-06, "loss": 0.7852, "step": 9828 }, { "epoch": 0.7100980728592844, "grad_norm": 8.948253218183886, "learning_rate": 3.7379059652046793e-06, "loss": 0.8121, "step": 9829 }, { "epoch": 0.7101703180594939, "grad_norm": 9.527070676949645, "learning_rate": 3.7376518305272757e-06, "loss": 0.8391, "step": 9830 }, { "epoch": 0.7102425632597035, "grad_norm": 6.259850635635706, "learning_rate": 3.73739767890738e-06, "loss": 0.7758, "step": 9831 }, { "epoch": 0.7103148084599129, "grad_norm": 5.848803288575131, "learning_rate": 3.737143510348471e-06, "loss": 0.8559, "step": 9832 }, { "epoch": 0.7103870536601224, "grad_norm": 7.453069722672372, "learning_rate": 3.7368893248540277e-06, "loss": 0.8463, "step": 9833 }, { "epoch": 0.710459298860332, "grad_norm": 7.11652633326133, "learning_rate": 3.73663512242753e-06, "loss": 0.858, "step": 9834 }, { "epoch": 0.7105315440605415, "grad_norm": 5.535400040668199, "learning_rate": 3.7363809030724575e-06, "loss": 0.8246, "step": 9835 }, { "epoch": 0.710603789260751, "grad_norm": 5.236683077593236, "learning_rate": 3.7361266667922905e-06, "loss": 0.8301, "step": 9836 }, { "epoch": 0.7106760344609605, "grad_norm": 7.590347092724885, "learning_rate": 3.735872413590509e-06, "loss": 0.7653, "step": 9837 }, { "epoch": 0.71074827966117, "grad_norm": 6.773529580094274, "learning_rate": 3.7356181434705947e-06, "loss": 0.8275, "step": 9838 }, { "epoch": 0.7108205248613795, "grad_norm": 6.5020701706384445, "learning_rate": 3.7353638564360263e-06, "loss": 0.8094, "step": 9839 }, { "epoch": 0.710892770061589, "grad_norm": 6.197014785244195, "learning_rate": 3.735109552490286e-06, "loss": 0.864, "step": 9840 }, { "epoch": 0.7109650152617986, "grad_norm": 5.510733101982808, "learning_rate": 3.734855231636855e-06, "loss": 0.8499, "step": 9841 }, { "epoch": 0.7110372604620081, "grad_norm": 7.093663152087446, "learning_rate": 3.7346008938792155e-06, "loss": 0.8083, "step": 9842 }, { "epoch": 0.7111095056622175, "grad_norm": 6.187489210947822, "learning_rate": 3.7343465392208477e-06, "loss": 0.7922, "step": 9843 }, { "epoch": 0.7111817508624271, "grad_norm": 6.177668303076962, "learning_rate": 3.7340921676652334e-06, "loss": 0.7477, "step": 9844 }, { "epoch": 0.7112539960626366, "grad_norm": 5.145689906954195, "learning_rate": 3.733837779215857e-06, "loss": 0.753, "step": 9845 }, { "epoch": 0.711326241262846, "grad_norm": 7.358420923675829, "learning_rate": 3.733583373876199e-06, "loss": 0.876, "step": 9846 }, { "epoch": 0.7113984864630556, "grad_norm": 6.299078241158179, "learning_rate": 3.7333289516497424e-06, "loss": 0.7836, "step": 9847 }, { "epoch": 0.7114707316632651, "grad_norm": 8.027120872057983, "learning_rate": 3.7330745125399697e-06, "loss": 0.9263, "step": 9848 }, { "epoch": 0.7115429768634747, "grad_norm": 9.980691579581961, "learning_rate": 3.7328200565503643e-06, "loss": 0.9146, "step": 9849 }, { "epoch": 0.7116152220636841, "grad_norm": 5.834963797724194, "learning_rate": 3.73256558368441e-06, "loss": 0.8133, "step": 9850 }, { "epoch": 0.7116874672638936, "grad_norm": 5.656755287826453, "learning_rate": 3.7323110939455896e-06, "loss": 0.8168, "step": 9851 }, { "epoch": 0.7117597124641032, "grad_norm": 8.361040010146437, "learning_rate": 3.7320565873373876e-06, "loss": 0.8451, "step": 9852 }, { "epoch": 0.7118319576643127, "grad_norm": 10.939978792773235, "learning_rate": 3.7318020638632866e-06, "loss": 0.9182, "step": 9853 }, { "epoch": 0.7119042028645222, "grad_norm": 5.395784866331354, "learning_rate": 3.7315475235267726e-06, "loss": 0.767, "step": 9854 }, { "epoch": 0.7119764480647317, "grad_norm": 6.048865333272709, "learning_rate": 3.731292966331329e-06, "loss": 0.7851, "step": 9855 }, { "epoch": 0.7120486932649412, "grad_norm": 5.574293001104104, "learning_rate": 3.7310383922804406e-06, "loss": 0.7369, "step": 9856 }, { "epoch": 0.7121209384651507, "grad_norm": 6.857187674012638, "learning_rate": 3.730783801377593e-06, "loss": 0.9011, "step": 9857 }, { "epoch": 0.7121931836653602, "grad_norm": 7.529831368836572, "learning_rate": 3.73052919362627e-06, "loss": 0.8732, "step": 9858 }, { "epoch": 0.7122654288655698, "grad_norm": 7.337848515982451, "learning_rate": 3.730274569029958e-06, "loss": 0.8329, "step": 9859 }, { "epoch": 0.7123376740657793, "grad_norm": 7.78494824381868, "learning_rate": 3.7300199275921428e-06, "loss": 1.0011, "step": 9860 }, { "epoch": 0.7124099192659887, "grad_norm": 5.5348056205657565, "learning_rate": 3.72976526931631e-06, "loss": 0.8472, "step": 9861 }, { "epoch": 0.7124821644661983, "grad_norm": 6.646305302649389, "learning_rate": 3.729510594205945e-06, "loss": 0.8067, "step": 9862 }, { "epoch": 0.7125544096664078, "grad_norm": 5.816687275468374, "learning_rate": 3.7292559022645343e-06, "loss": 0.8431, "step": 9863 }, { "epoch": 0.7126266548666172, "grad_norm": 7.722084715035922, "learning_rate": 3.729001193495565e-06, "loss": 0.9323, "step": 9864 }, { "epoch": 0.7126989000668268, "grad_norm": 5.273874222367698, "learning_rate": 3.7287464679025243e-06, "loss": 0.8203, "step": 9865 }, { "epoch": 0.7127711452670363, "grad_norm": 9.454726507547404, "learning_rate": 3.7284917254888976e-06, "loss": 0.8792, "step": 9866 }, { "epoch": 0.7128433904672459, "grad_norm": 8.621976903110646, "learning_rate": 3.728236966258174e-06, "loss": 0.9025, "step": 9867 }, { "epoch": 0.7129156356674553, "grad_norm": 6.212071191737176, "learning_rate": 3.727982190213839e-06, "loss": 0.8834, "step": 9868 }, { "epoch": 0.7129878808676648, "grad_norm": 5.5292673814230175, "learning_rate": 3.7277273973593818e-06, "loss": 0.8779, "step": 9869 }, { "epoch": 0.7130601260678744, "grad_norm": 8.918025213356076, "learning_rate": 3.72747258769829e-06, "loss": 0.9161, "step": 9870 }, { "epoch": 0.7131323712680839, "grad_norm": 7.364344771503723, "learning_rate": 3.727217761234051e-06, "loss": 0.8414, "step": 9871 }, { "epoch": 0.7132046164682934, "grad_norm": 6.23987583330302, "learning_rate": 3.726962917970154e-06, "loss": 0.7934, "step": 9872 }, { "epoch": 0.7132768616685029, "grad_norm": 7.803406817473018, "learning_rate": 3.7267080579100867e-06, "loss": 0.8414, "step": 9873 }, { "epoch": 0.7133491068687124, "grad_norm": 6.250646634029623, "learning_rate": 3.72645318105734e-06, "loss": 0.8298, "step": 9874 }, { "epoch": 0.7134213520689219, "grad_norm": 6.63634254148178, "learning_rate": 3.7261982874154013e-06, "loss": 0.8082, "step": 9875 }, { "epoch": 0.7134935972691314, "grad_norm": 6.593085522187685, "learning_rate": 3.725943376987759e-06, "loss": 0.8462, "step": 9876 }, { "epoch": 0.713565842469341, "grad_norm": 7.933183352337613, "learning_rate": 3.7256884497779046e-06, "loss": 0.8567, "step": 9877 }, { "epoch": 0.7136380876695505, "grad_norm": 7.052135052249497, "learning_rate": 3.725433505789326e-06, "loss": 0.9179, "step": 9878 }, { "epoch": 0.7137103328697599, "grad_norm": 5.541611616200613, "learning_rate": 3.725178545025515e-06, "loss": 0.8187, "step": 9879 }, { "epoch": 0.7137825780699695, "grad_norm": 6.378303120886017, "learning_rate": 3.724923567489961e-06, "loss": 0.8425, "step": 9880 }, { "epoch": 0.713854823270179, "grad_norm": 5.594486806069973, "learning_rate": 3.724668573186155e-06, "loss": 0.782, "step": 9881 }, { "epoch": 0.7139270684703884, "grad_norm": 5.5006427389335, "learning_rate": 3.7244135621175857e-06, "loss": 0.7841, "step": 9882 }, { "epoch": 0.713999313670598, "grad_norm": 8.150323461186842, "learning_rate": 3.7241585342877464e-06, "loss": 0.906, "step": 9883 }, { "epoch": 0.7140715588708075, "grad_norm": 7.71623652380541, "learning_rate": 3.7239034897001277e-06, "loss": 0.9127, "step": 9884 }, { "epoch": 0.7141438040710171, "grad_norm": 5.836825424524615, "learning_rate": 3.7236484283582197e-06, "loss": 0.837, "step": 9885 }, { "epoch": 0.7142160492712265, "grad_norm": 6.507012912313577, "learning_rate": 3.723393350265515e-06, "loss": 0.9232, "step": 9886 }, { "epoch": 0.714288294471436, "grad_norm": 5.664295859955704, "learning_rate": 3.723138255425505e-06, "loss": 0.8269, "step": 9887 }, { "epoch": 0.7143605396716456, "grad_norm": 7.113317716820308, "learning_rate": 3.7228831438416826e-06, "loss": 0.8463, "step": 9888 }, { "epoch": 0.7144327848718551, "grad_norm": 6.657574302496497, "learning_rate": 3.7226280155175386e-06, "loss": 0.8545, "step": 9889 }, { "epoch": 0.7145050300720646, "grad_norm": 6.04257640785579, "learning_rate": 3.722372870456567e-06, "loss": 0.7444, "step": 9890 }, { "epoch": 0.7145772752722741, "grad_norm": 6.306578625527548, "learning_rate": 3.7221177086622597e-06, "loss": 0.8352, "step": 9891 }, { "epoch": 0.7146495204724836, "grad_norm": 5.955879435264964, "learning_rate": 3.72186253013811e-06, "loss": 0.8157, "step": 9892 }, { "epoch": 0.7147217656726931, "grad_norm": 5.971266929853211, "learning_rate": 3.7216073348876115e-06, "loss": 0.9453, "step": 9893 }, { "epoch": 0.7147940108729026, "grad_norm": 5.314222168872114, "learning_rate": 3.7213521229142563e-06, "loss": 0.8058, "step": 9894 }, { "epoch": 0.7148662560731122, "grad_norm": 8.22709527966722, "learning_rate": 3.721096894221539e-06, "loss": 0.9721, "step": 9895 }, { "epoch": 0.7149385012733217, "grad_norm": 7.737876515633434, "learning_rate": 3.7208416488129537e-06, "loss": 0.8749, "step": 9896 }, { "epoch": 0.7150107464735311, "grad_norm": 6.630548708664956, "learning_rate": 3.7205863866919933e-06, "loss": 0.8078, "step": 9897 }, { "epoch": 0.7150829916737407, "grad_norm": 9.115111963836826, "learning_rate": 3.720331107862154e-06, "loss": 0.8734, "step": 9898 }, { "epoch": 0.7151552368739502, "grad_norm": 6.043312778062715, "learning_rate": 3.7200758123269294e-06, "loss": 0.768, "step": 9899 }, { "epoch": 0.7152274820741596, "grad_norm": 6.616151190343698, "learning_rate": 3.7198205000898136e-06, "loss": 0.7809, "step": 9900 }, { "epoch": 0.7152997272743692, "grad_norm": 7.026883138305653, "learning_rate": 3.719565171154302e-06, "loss": 0.8262, "step": 9901 }, { "epoch": 0.7153719724745787, "grad_norm": 5.28751773335254, "learning_rate": 3.7193098255238912e-06, "loss": 0.8101, "step": 9902 }, { "epoch": 0.7154442176747883, "grad_norm": 7.557087624492508, "learning_rate": 3.7190544632020747e-06, "loss": 0.8666, "step": 9903 }, { "epoch": 0.7155164628749977, "grad_norm": 6.557321012601464, "learning_rate": 3.71879908419235e-06, "loss": 0.8331, "step": 9904 }, { "epoch": 0.7155887080752072, "grad_norm": 6.713847798267457, "learning_rate": 3.7185436884982114e-06, "loss": 0.8716, "step": 9905 }, { "epoch": 0.7156609532754168, "grad_norm": 5.8323903366093575, "learning_rate": 3.718288276123156e-06, "loss": 0.7436, "step": 9906 }, { "epoch": 0.7157331984756263, "grad_norm": 5.176746298592412, "learning_rate": 3.71803284707068e-06, "loss": 0.8471, "step": 9907 }, { "epoch": 0.7158054436758358, "grad_norm": 6.960274918486911, "learning_rate": 3.7177774013442802e-06, "loss": 0.8047, "step": 9908 }, { "epoch": 0.7158776888760453, "grad_norm": 6.770204892459952, "learning_rate": 3.7175219389474535e-06, "loss": 0.9332, "step": 9909 }, { "epoch": 0.7159499340762548, "grad_norm": 6.675797821256495, "learning_rate": 3.717266459883697e-06, "loss": 0.8514, "step": 9910 }, { "epoch": 0.7160221792764643, "grad_norm": 5.914045406906441, "learning_rate": 3.7170109641565072e-06, "loss": 0.9265, "step": 9911 }, { "epoch": 0.7160944244766738, "grad_norm": 8.098803264609387, "learning_rate": 3.716755451769382e-06, "loss": 0.8188, "step": 9912 }, { "epoch": 0.7161666696768834, "grad_norm": 6.8417224406277874, "learning_rate": 3.7164999227258203e-06, "loss": 0.8979, "step": 9913 }, { "epoch": 0.7162389148770929, "grad_norm": 7.050105468826834, "learning_rate": 3.716244377029319e-06, "loss": 0.8659, "step": 9914 }, { "epoch": 0.7163111600773023, "grad_norm": 5.3509587079548915, "learning_rate": 3.715988814683376e-06, "loss": 0.8942, "step": 9915 }, { "epoch": 0.7163834052775119, "grad_norm": 5.611246715875261, "learning_rate": 3.715733235691491e-06, "loss": 0.7954, "step": 9916 }, { "epoch": 0.7164556504777214, "grad_norm": 7.867632128851819, "learning_rate": 3.715477640057161e-06, "loss": 0.9646, "step": 9917 }, { "epoch": 0.7165278956779308, "grad_norm": 8.49358574692291, "learning_rate": 3.7152220277838875e-06, "loss": 0.8563, "step": 9918 }, { "epoch": 0.7166001408781404, "grad_norm": 5.557271321577342, "learning_rate": 3.7149663988751666e-06, "loss": 0.8115, "step": 9919 }, { "epoch": 0.7166723860783499, "grad_norm": 7.7529475851797764, "learning_rate": 3.7147107533344994e-06, "loss": 0.8222, "step": 9920 }, { "epoch": 0.7167446312785595, "grad_norm": 7.208806310876605, "learning_rate": 3.714455091165385e-06, "loss": 0.9258, "step": 9921 }, { "epoch": 0.7168168764787689, "grad_norm": 6.3721127423156325, "learning_rate": 3.714199412371324e-06, "loss": 0.8085, "step": 9922 }, { "epoch": 0.7168891216789784, "grad_norm": 6.679918501579502, "learning_rate": 3.7139437169558147e-06, "loss": 0.8316, "step": 9923 }, { "epoch": 0.716961366879188, "grad_norm": 5.956441761304356, "learning_rate": 3.7136880049223594e-06, "loss": 0.8187, "step": 9924 }, { "epoch": 0.7170336120793974, "grad_norm": 7.405079680741119, "learning_rate": 3.7134322762744574e-06, "loss": 0.7719, "step": 9925 }, { "epoch": 0.717105857279607, "grad_norm": 7.506196323428372, "learning_rate": 3.71317653101561e-06, "loss": 0.8655, "step": 9926 }, { "epoch": 0.7171781024798165, "grad_norm": 6.085205660928069, "learning_rate": 3.7129207691493174e-06, "loss": 0.8399, "step": 9927 }, { "epoch": 0.717250347680026, "grad_norm": 6.389454629516238, "learning_rate": 3.7126649906790815e-06, "loss": 0.8105, "step": 9928 }, { "epoch": 0.7173225928802355, "grad_norm": 7.223587595019522, "learning_rate": 3.712409195608403e-06, "loss": 0.8448, "step": 9929 }, { "epoch": 0.717394838080445, "grad_norm": 6.57836246741433, "learning_rate": 3.712153383940784e-06, "loss": 0.8272, "step": 9930 }, { "epoch": 0.7174670832806546, "grad_norm": 5.933264576945736, "learning_rate": 3.711897555679727e-06, "loss": 0.8175, "step": 9931 }, { "epoch": 0.7175393284808641, "grad_norm": 6.935910704428416, "learning_rate": 3.7116417108287333e-06, "loss": 0.91, "step": 9932 }, { "epoch": 0.7176115736810735, "grad_norm": 5.49046053262853, "learning_rate": 3.711385849391306e-06, "loss": 0.8121, "step": 9933 }, { "epoch": 0.7176838188812831, "grad_norm": 5.547865916110855, "learning_rate": 3.7111299713709453e-06, "loss": 0.7688, "step": 9934 }, { "epoch": 0.7177560640814926, "grad_norm": 6.6009201911085675, "learning_rate": 3.7108740767711565e-06, "loss": 0.7631, "step": 9935 }, { "epoch": 0.717828309281702, "grad_norm": 6.152862391333764, "learning_rate": 3.710618165595442e-06, "loss": 0.8987, "step": 9936 }, { "epoch": 0.7179005544819116, "grad_norm": 5.701722523938729, "learning_rate": 3.7103622378473046e-06, "loss": 0.8139, "step": 9937 }, { "epoch": 0.7179727996821211, "grad_norm": 5.64750121232721, "learning_rate": 3.7101062935302483e-06, "loss": 0.7432, "step": 9938 }, { "epoch": 0.7180450448823307, "grad_norm": 6.225124446743327, "learning_rate": 3.7098503326477753e-06, "loss": 0.9261, "step": 9939 }, { "epoch": 0.7181172900825401, "grad_norm": 6.324900919624465, "learning_rate": 3.709594355203392e-06, "loss": 0.8422, "step": 9940 }, { "epoch": 0.7181895352827496, "grad_norm": 5.881041970865872, "learning_rate": 3.709338361200601e-06, "loss": 0.8514, "step": 9941 }, { "epoch": 0.7182617804829592, "grad_norm": 5.273607853151233, "learning_rate": 3.7090823506429064e-06, "loss": 0.8685, "step": 9942 }, { "epoch": 0.7183340256831686, "grad_norm": 5.626708385861265, "learning_rate": 3.7088263235338127e-06, "loss": 0.8784, "step": 9943 }, { "epoch": 0.7184062708833782, "grad_norm": 6.016958113750415, "learning_rate": 3.708570279876826e-06, "loss": 0.8284, "step": 9944 }, { "epoch": 0.7184785160835877, "grad_norm": 5.804689471525574, "learning_rate": 3.7083142196754505e-06, "loss": 0.7409, "step": 9945 }, { "epoch": 0.7185507612837972, "grad_norm": 7.017689560414982, "learning_rate": 3.708058142933191e-06, "loss": 0.9745, "step": 9946 }, { "epoch": 0.7186230064840067, "grad_norm": 7.990957633484643, "learning_rate": 3.7078020496535545e-06, "loss": 0.8793, "step": 9947 }, { "epoch": 0.7186952516842162, "grad_norm": 7.928289641646911, "learning_rate": 3.707545939840045e-06, "loss": 0.8816, "step": 9948 }, { "epoch": 0.7187674968844258, "grad_norm": 5.806640296522863, "learning_rate": 3.707289813496169e-06, "loss": 0.7848, "step": 9949 }, { "epoch": 0.7188397420846353, "grad_norm": 8.001365545076508, "learning_rate": 3.707033670625434e-06, "loss": 0.907, "step": 9950 }, { "epoch": 0.7189119872848447, "grad_norm": 5.582465526646911, "learning_rate": 3.7067775112313443e-06, "loss": 0.8625, "step": 9951 }, { "epoch": 0.7189842324850543, "grad_norm": 6.599639142893061, "learning_rate": 3.7065213353174074e-06, "loss": 0.875, "step": 9952 }, { "epoch": 0.7190564776852638, "grad_norm": 5.580945923180132, "learning_rate": 3.7062651428871298e-06, "loss": 0.8875, "step": 9953 }, { "epoch": 0.7191287228854732, "grad_norm": 5.3133957051661165, "learning_rate": 3.7060089339440198e-06, "loss": 0.8227, "step": 9954 }, { "epoch": 0.7192009680856828, "grad_norm": 7.08754859113772, "learning_rate": 3.7057527084915833e-06, "loss": 0.938, "step": 9955 }, { "epoch": 0.7192732132858923, "grad_norm": 8.79970319420788, "learning_rate": 3.7054964665333292e-06, "loss": 0.9003, "step": 9956 }, { "epoch": 0.7193454584861019, "grad_norm": 6.509915931264313, "learning_rate": 3.7052402080727646e-06, "loss": 0.8932, "step": 9957 }, { "epoch": 0.7194177036863113, "grad_norm": 8.539842910215173, "learning_rate": 3.7049839331133963e-06, "loss": 0.8308, "step": 9958 }, { "epoch": 0.7194899488865208, "grad_norm": 6.683984420657763, "learning_rate": 3.7047276416587346e-06, "loss": 0.8647, "step": 9959 }, { "epoch": 0.7195621940867304, "grad_norm": 6.043937344098197, "learning_rate": 3.704471333712286e-06, "loss": 0.8957, "step": 9960 }, { "epoch": 0.7196344392869398, "grad_norm": 5.487484477027714, "learning_rate": 3.704215009277561e-06, "loss": 0.7956, "step": 9961 }, { "epoch": 0.7197066844871494, "grad_norm": 7.024504413975614, "learning_rate": 3.703958668358067e-06, "loss": 0.8474, "step": 9962 }, { "epoch": 0.7197789296873589, "grad_norm": 5.988972066011888, "learning_rate": 3.703702310957313e-06, "loss": 0.8394, "step": 9963 }, { "epoch": 0.7198511748875684, "grad_norm": 6.038508183749118, "learning_rate": 3.70344593707881e-06, "loss": 0.779, "step": 9964 }, { "epoch": 0.7199234200877779, "grad_norm": 6.684245806244038, "learning_rate": 3.7031895467260664e-06, "loss": 0.8111, "step": 9965 }, { "epoch": 0.7199956652879874, "grad_norm": 7.146599671965535, "learning_rate": 3.7029331399025926e-06, "loss": 0.8568, "step": 9966 }, { "epoch": 0.720067910488197, "grad_norm": 6.294767344131794, "learning_rate": 3.7026767166118966e-06, "loss": 0.8999, "step": 9967 }, { "epoch": 0.7201401556884065, "grad_norm": 6.483574875334016, "learning_rate": 3.7024202768574915e-06, "loss": 0.9812, "step": 9968 }, { "epoch": 0.7202124008886159, "grad_norm": 8.090942838635458, "learning_rate": 3.7021638206428857e-06, "loss": 0.8508, "step": 9969 }, { "epoch": 0.7202846460888255, "grad_norm": 4.832487733907274, "learning_rate": 3.701907347971591e-06, "loss": 0.7712, "step": 9970 }, { "epoch": 0.720356891289035, "grad_norm": 5.403874046071998, "learning_rate": 3.701650858847118e-06, "loss": 0.7601, "step": 9971 }, { "epoch": 0.7204291364892444, "grad_norm": 7.28099354304596, "learning_rate": 3.7013943532729767e-06, "loss": 0.8755, "step": 9972 }, { "epoch": 0.720501381689454, "grad_norm": 6.322993555078053, "learning_rate": 3.7011378312526802e-06, "loss": 0.8554, "step": 9973 }, { "epoch": 0.7205736268896635, "grad_norm": 6.358075583230378, "learning_rate": 3.7008812927897404e-06, "loss": 0.8891, "step": 9974 }, { "epoch": 0.7206458720898731, "grad_norm": 8.213681823141, "learning_rate": 3.7006247378876677e-06, "loss": 0.833, "step": 9975 }, { "epoch": 0.7207181172900825, "grad_norm": 7.374164339986923, "learning_rate": 3.7003681665499735e-06, "loss": 0.9184, "step": 9976 }, { "epoch": 0.720790362490292, "grad_norm": 7.5382414850069175, "learning_rate": 3.700111578780172e-06, "loss": 0.878, "step": 9977 }, { "epoch": 0.7208626076905016, "grad_norm": 5.689738744761712, "learning_rate": 3.6998549745817747e-06, "loss": 0.7638, "step": 9978 }, { "epoch": 0.720934852890711, "grad_norm": 6.715255041214346, "learning_rate": 3.699598353958294e-06, "loss": 0.913, "step": 9979 }, { "epoch": 0.7210070980909206, "grad_norm": 5.940501849620266, "learning_rate": 3.699341716913244e-06, "loss": 0.8899, "step": 9980 }, { "epoch": 0.7210793432911301, "grad_norm": 5.6939616962694775, "learning_rate": 3.699085063450137e-06, "loss": 0.753, "step": 9981 }, { "epoch": 0.7211515884913396, "grad_norm": 7.811671342771691, "learning_rate": 3.6988283935724855e-06, "loss": 0.7899, "step": 9982 }, { "epoch": 0.7212238336915491, "grad_norm": 5.019655313702704, "learning_rate": 3.698571707283805e-06, "loss": 0.841, "step": 9983 }, { "epoch": 0.7212960788917586, "grad_norm": 5.952191617588452, "learning_rate": 3.698315004587609e-06, "loss": 0.8157, "step": 9984 }, { "epoch": 0.7213683240919682, "grad_norm": 6.583599648541472, "learning_rate": 3.69805828548741e-06, "loss": 0.8183, "step": 9985 }, { "epoch": 0.7214405692921777, "grad_norm": 5.5991086386680715, "learning_rate": 3.6978015499867235e-06, "loss": 0.7524, "step": 9986 }, { "epoch": 0.7215128144923871, "grad_norm": 6.129835438845338, "learning_rate": 3.6975447980890632e-06, "loss": 0.8927, "step": 9987 }, { "epoch": 0.7215850596925967, "grad_norm": 5.2887925680634265, "learning_rate": 3.697288029797946e-06, "loss": 0.8077, "step": 9988 }, { "epoch": 0.7216573048928062, "grad_norm": 4.962150749773385, "learning_rate": 3.6970312451168843e-06, "loss": 0.7546, "step": 9989 }, { "epoch": 0.7217295500930156, "grad_norm": 6.455993458173763, "learning_rate": 3.6967744440493947e-06, "loss": 0.8671, "step": 9990 }, { "epoch": 0.7218017952932252, "grad_norm": 5.130620526118865, "learning_rate": 3.6965176265989924e-06, "loss": 0.7112, "step": 9991 }, { "epoch": 0.7218740404934347, "grad_norm": 6.054676159263573, "learning_rate": 3.6962607927691916e-06, "loss": 0.8912, "step": 9992 }, { "epoch": 0.7219462856936443, "grad_norm": 5.594159841174838, "learning_rate": 3.6960039425635107e-06, "loss": 0.8506, "step": 9993 }, { "epoch": 0.7220185308938537, "grad_norm": 7.26502538688409, "learning_rate": 3.695747075985464e-06, "loss": 0.8038, "step": 9994 }, { "epoch": 0.7220907760940632, "grad_norm": 5.289224055878258, "learning_rate": 3.6954901930385683e-06, "loss": 0.8237, "step": 9995 }, { "epoch": 0.7221630212942728, "grad_norm": 5.426202371841266, "learning_rate": 3.69523329372634e-06, "loss": 0.8208, "step": 9996 }, { "epoch": 0.7222352664944822, "grad_norm": 5.495108423248076, "learning_rate": 3.6949763780522957e-06, "loss": 0.8919, "step": 9997 }, { "epoch": 0.7223075116946918, "grad_norm": 5.5909892680529625, "learning_rate": 3.6947194460199527e-06, "loss": 0.7933, "step": 9998 }, { "epoch": 0.7223797568949013, "grad_norm": 6.91269241810202, "learning_rate": 3.6944624976328287e-06, "loss": 0.8731, "step": 9999 }, { "epoch": 0.7224520020951108, "grad_norm": 6.434630411793435, "learning_rate": 3.69420553289444e-06, "loss": 0.9572, "step": 10000 }, { "epoch": 0.7225242472953203, "grad_norm": 7.653735436876252, "learning_rate": 3.6939485518083053e-06, "loss": 0.7815, "step": 10001 }, { "epoch": 0.7225964924955298, "grad_norm": 7.486412710681813, "learning_rate": 3.693691554377942e-06, "loss": 0.9157, "step": 10002 }, { "epoch": 0.7226687376957394, "grad_norm": 7.723483097806933, "learning_rate": 3.6934345406068674e-06, "loss": 0.8962, "step": 10003 }, { "epoch": 0.7227409828959489, "grad_norm": 6.466544102297078, "learning_rate": 3.6931775104986013e-06, "loss": 0.8627, "step": 10004 }, { "epoch": 0.7228132280961583, "grad_norm": 5.397332217712689, "learning_rate": 3.6929204640566605e-06, "loss": 0.8482, "step": 10005 }, { "epoch": 0.7228854732963679, "grad_norm": 5.806522372029988, "learning_rate": 3.6926634012845653e-06, "loss": 0.8034, "step": 10006 }, { "epoch": 0.7229577184965774, "grad_norm": 5.849667740213835, "learning_rate": 3.692406322185835e-06, "loss": 0.8996, "step": 10007 }, { "epoch": 0.7230299636967868, "grad_norm": 6.4352469343988234, "learning_rate": 3.6921492267639867e-06, "loss": 0.8281, "step": 10008 }, { "epoch": 0.7231022088969964, "grad_norm": 6.0125017257339, "learning_rate": 3.691892115022543e-06, "loss": 0.7375, "step": 10009 }, { "epoch": 0.7231744540972059, "grad_norm": 5.397099330460884, "learning_rate": 3.691634986965019e-06, "loss": 0.858, "step": 10010 }, { "epoch": 0.7232466992974155, "grad_norm": 5.760932480779106, "learning_rate": 3.6913778425949397e-06, "loss": 0.817, "step": 10011 }, { "epoch": 0.7233189444976249, "grad_norm": 6.042199823817274, "learning_rate": 3.6911206819158214e-06, "loss": 0.8591, "step": 10012 }, { "epoch": 0.7233911896978344, "grad_norm": 5.490819204388205, "learning_rate": 3.690863504931187e-06, "loss": 0.8243, "step": 10013 }, { "epoch": 0.723463434898044, "grad_norm": 6.923587630255583, "learning_rate": 3.6906063116445544e-06, "loss": 0.8546, "step": 10014 }, { "epoch": 0.7235356800982534, "grad_norm": 6.579254270924221, "learning_rate": 3.6903491020594466e-06, "loss": 0.8571, "step": 10015 }, { "epoch": 0.723607925298463, "grad_norm": 5.582485172509995, "learning_rate": 3.690091876179384e-06, "loss": 0.7454, "step": 10016 }, { "epoch": 0.7236801704986725, "grad_norm": 6.113493094871779, "learning_rate": 3.689834634007887e-06, "loss": 0.7785, "step": 10017 }, { "epoch": 0.723752415698882, "grad_norm": 6.350143443011833, "learning_rate": 3.689577375548479e-06, "loss": 0.9742, "step": 10018 }, { "epoch": 0.7238246608990915, "grad_norm": 6.470744392224279, "learning_rate": 3.6893201008046792e-06, "loss": 0.8476, "step": 10019 }, { "epoch": 0.723896906099301, "grad_norm": 7.9072773985838065, "learning_rate": 3.6890628097800107e-06, "loss": 0.9054, "step": 10020 }, { "epoch": 0.7239691512995106, "grad_norm": 7.294799770982784, "learning_rate": 3.6888055024779955e-06, "loss": 0.8215, "step": 10021 }, { "epoch": 0.7240413964997201, "grad_norm": 5.686699779513969, "learning_rate": 3.688548178902157e-06, "loss": 0.8687, "step": 10022 }, { "epoch": 0.7241136416999295, "grad_norm": 6.949859606745074, "learning_rate": 3.6882908390560162e-06, "loss": 0.9461, "step": 10023 }, { "epoch": 0.7241858869001391, "grad_norm": 8.316975930171187, "learning_rate": 3.6880334829430964e-06, "loss": 0.8223, "step": 10024 }, { "epoch": 0.7242581321003486, "grad_norm": 5.850963689216444, "learning_rate": 3.687776110566921e-06, "loss": 0.828, "step": 10025 }, { "epoch": 0.724330377300558, "grad_norm": 6.898630763767178, "learning_rate": 3.687518721931012e-06, "loss": 0.8386, "step": 10026 }, { "epoch": 0.7244026225007676, "grad_norm": 5.831975324685809, "learning_rate": 3.687261317038895e-06, "loss": 0.8441, "step": 10027 }, { "epoch": 0.7244748677009771, "grad_norm": 6.499285145111033, "learning_rate": 3.6870038958940914e-06, "loss": 0.8728, "step": 10028 }, { "epoch": 0.7245471129011867, "grad_norm": 5.661195058120148, "learning_rate": 3.6867464585001268e-06, "loss": 0.8359, "step": 10029 }, { "epoch": 0.7246193581013961, "grad_norm": 6.417181745721185, "learning_rate": 3.6864890048605238e-06, "loss": 0.8734, "step": 10030 }, { "epoch": 0.7246916033016056, "grad_norm": 8.593872069445535, "learning_rate": 3.6862315349788086e-06, "loss": 0.8359, "step": 10031 }, { "epoch": 0.7247638485018152, "grad_norm": 5.340098778886184, "learning_rate": 3.6859740488585046e-06, "loss": 0.8486, "step": 10032 }, { "epoch": 0.7248360937020246, "grad_norm": 6.0481712667042355, "learning_rate": 3.6857165465031357e-06, "loss": 0.8081, "step": 10033 }, { "epoch": 0.7249083389022342, "grad_norm": 6.096671162320845, "learning_rate": 3.685459027916228e-06, "loss": 0.8782, "step": 10034 }, { "epoch": 0.7249805841024437, "grad_norm": 6.105163215123801, "learning_rate": 3.6852014931013074e-06, "loss": 0.8134, "step": 10035 }, { "epoch": 0.7250528293026532, "grad_norm": 7.328306411417248, "learning_rate": 3.6849439420618995e-06, "loss": 0.8018, "step": 10036 }, { "epoch": 0.7251250745028627, "grad_norm": 7.317764343554182, "learning_rate": 3.6846863748015273e-06, "loss": 0.9304, "step": 10037 }, { "epoch": 0.7251973197030722, "grad_norm": 6.470685144198531, "learning_rate": 3.6844287913237192e-06, "loss": 0.8356, "step": 10038 }, { "epoch": 0.7252695649032818, "grad_norm": 5.835143253558629, "learning_rate": 3.6841711916320005e-06, "loss": 0.8826, "step": 10039 }, { "epoch": 0.7253418101034913, "grad_norm": 7.1389914215416805, "learning_rate": 3.683913575729898e-06, "loss": 0.8306, "step": 10040 }, { "epoch": 0.7254140553037007, "grad_norm": 7.020160797538117, "learning_rate": 3.6836559436209378e-06, "loss": 0.8727, "step": 10041 }, { "epoch": 0.7254863005039103, "grad_norm": 7.5000033060702345, "learning_rate": 3.6833982953086465e-06, "loss": 0.7901, "step": 10042 }, { "epoch": 0.7255585457041198, "grad_norm": 5.933088410426521, "learning_rate": 3.683140630796551e-06, "loss": 0.8581, "step": 10043 }, { "epoch": 0.7256307909043292, "grad_norm": 6.0558302852982875, "learning_rate": 3.6828829500881796e-06, "loss": 0.7984, "step": 10044 }, { "epoch": 0.7257030361045388, "grad_norm": 5.822599107775929, "learning_rate": 3.6826252531870593e-06, "loss": 0.8489, "step": 10045 }, { "epoch": 0.7257752813047483, "grad_norm": 8.88482036728849, "learning_rate": 3.682367540096717e-06, "loss": 0.9326, "step": 10046 }, { "epoch": 0.7258475265049579, "grad_norm": 6.1382067240448475, "learning_rate": 3.6821098108206814e-06, "loss": 0.8622, "step": 10047 }, { "epoch": 0.7259197717051673, "grad_norm": 5.899971021160791, "learning_rate": 3.68185206536248e-06, "loss": 0.8536, "step": 10048 }, { "epoch": 0.7259920169053768, "grad_norm": 7.509030055816156, "learning_rate": 3.6815943037256415e-06, "loss": 0.8641, "step": 10049 }, { "epoch": 0.7260642621055864, "grad_norm": 7.57100558577485, "learning_rate": 3.6813365259136945e-06, "loss": 0.8059, "step": 10050 }, { "epoch": 0.7261365073057958, "grad_norm": 5.704436854151165, "learning_rate": 3.6810787319301678e-06, "loss": 0.8522, "step": 10051 }, { "epoch": 0.7262087525060054, "grad_norm": 6.479236303938558, "learning_rate": 3.6808209217785905e-06, "loss": 0.8851, "step": 10052 }, { "epoch": 0.7262809977062149, "grad_norm": 5.500254885229432, "learning_rate": 3.680563095462491e-06, "loss": 0.856, "step": 10053 }, { "epoch": 0.7263532429064244, "grad_norm": 7.011323489468633, "learning_rate": 3.6803052529854e-06, "loss": 0.8623, "step": 10054 }, { "epoch": 0.7264254881066339, "grad_norm": 6.456467028623257, "learning_rate": 3.6800473943508462e-06, "loss": 0.825, "step": 10055 }, { "epoch": 0.7264977333068434, "grad_norm": 7.346057545541169, "learning_rate": 3.67978951956236e-06, "loss": 0.7887, "step": 10056 }, { "epoch": 0.726569978507053, "grad_norm": 7.0287775058197965, "learning_rate": 3.6795316286234718e-06, "loss": 0.8373, "step": 10057 }, { "epoch": 0.7266422237072625, "grad_norm": 6.630006733656466, "learning_rate": 3.6792737215377104e-06, "loss": 0.8598, "step": 10058 }, { "epoch": 0.7267144689074719, "grad_norm": 6.676410071777587, "learning_rate": 3.679015798308608e-06, "loss": 0.8218, "step": 10059 }, { "epoch": 0.7267867141076815, "grad_norm": 7.218596749929029, "learning_rate": 3.678757858939695e-06, "loss": 0.8843, "step": 10060 }, { "epoch": 0.726858959307891, "grad_norm": 6.5352890448066585, "learning_rate": 3.678499903434502e-06, "loss": 0.7775, "step": 10061 }, { "epoch": 0.7269312045081004, "grad_norm": 8.634129654183674, "learning_rate": 3.6782419317965595e-06, "loss": 0.91, "step": 10062 }, { "epoch": 0.72700344970831, "grad_norm": 6.167274170240688, "learning_rate": 3.677983944029401e-06, "loss": 0.8301, "step": 10063 }, { "epoch": 0.7270756949085195, "grad_norm": 5.7656068052092655, "learning_rate": 3.677725940136556e-06, "loss": 0.8616, "step": 10064 }, { "epoch": 0.7271479401087291, "grad_norm": 6.283718857544795, "learning_rate": 3.677467920121558e-06, "loss": 0.8533, "step": 10065 }, { "epoch": 0.7272201853089385, "grad_norm": 7.928404154541073, "learning_rate": 3.6772098839879382e-06, "loss": 0.8117, "step": 10066 }, { "epoch": 0.727292430509148, "grad_norm": 9.864944940133979, "learning_rate": 3.6769518317392293e-06, "loss": 0.8268, "step": 10067 }, { "epoch": 0.7273646757093576, "grad_norm": 6.945503910203376, "learning_rate": 3.6766937633789636e-06, "loss": 0.9149, "step": 10068 }, { "epoch": 0.727436920909567, "grad_norm": 6.411155943527389, "learning_rate": 3.6764356789106736e-06, "loss": 0.8938, "step": 10069 }, { "epoch": 0.7275091661097766, "grad_norm": 6.070010851578542, "learning_rate": 3.6761775783378935e-06, "loss": 0.8685, "step": 10070 }, { "epoch": 0.7275814113099861, "grad_norm": 7.176629113457718, "learning_rate": 3.6759194616641547e-06, "loss": 0.7963, "step": 10071 }, { "epoch": 0.7276536565101956, "grad_norm": 5.830722915034984, "learning_rate": 3.6756613288929914e-06, "loss": 0.835, "step": 10072 }, { "epoch": 0.7277259017104051, "grad_norm": 8.92253599373913, "learning_rate": 3.6754031800279378e-06, "loss": 0.8347, "step": 10073 }, { "epoch": 0.7277981469106146, "grad_norm": 6.879751055006091, "learning_rate": 3.675145015072527e-06, "loss": 0.8169, "step": 10074 }, { "epoch": 0.7278703921108242, "grad_norm": 6.38029200492542, "learning_rate": 3.674886834030294e-06, "loss": 0.8315, "step": 10075 }, { "epoch": 0.7279426373110337, "grad_norm": 5.4998357921709555, "learning_rate": 3.6746286369047723e-06, "loss": 0.7331, "step": 10076 }, { "epoch": 0.7280148825112431, "grad_norm": 6.186656855926189, "learning_rate": 3.6743704236994958e-06, "loss": 0.7812, "step": 10077 }, { "epoch": 0.7280871277114527, "grad_norm": 6.731399905661631, "learning_rate": 3.6741121944180003e-06, "loss": 0.8523, "step": 10078 }, { "epoch": 0.7281593729116622, "grad_norm": 6.180331306342108, "learning_rate": 3.6738539490638216e-06, "loss": 0.8231, "step": 10079 }, { "epoch": 0.7282316181118716, "grad_norm": 5.09198510422367, "learning_rate": 3.673595687640493e-06, "loss": 0.7867, "step": 10080 }, { "epoch": 0.7283038633120812, "grad_norm": 5.428687485192866, "learning_rate": 3.6733374101515503e-06, "loss": 0.8958, "step": 10081 }, { "epoch": 0.7283761085122907, "grad_norm": 5.745590177521985, "learning_rate": 3.67307911660053e-06, "loss": 0.8476, "step": 10082 }, { "epoch": 0.7284483537125003, "grad_norm": 5.82968127051731, "learning_rate": 3.6728208069909672e-06, "loss": 0.852, "step": 10083 }, { "epoch": 0.7285205989127097, "grad_norm": 5.954153057249234, "learning_rate": 3.672562481326398e-06, "loss": 0.7996, "step": 10084 }, { "epoch": 0.7285928441129192, "grad_norm": 5.961397925677457, "learning_rate": 3.672304139610359e-06, "loss": 0.8299, "step": 10085 }, { "epoch": 0.7286650893131288, "grad_norm": 6.291164487624812, "learning_rate": 3.6720457818463868e-06, "loss": 0.8485, "step": 10086 }, { "epoch": 0.7287373345133382, "grad_norm": 8.632079826091939, "learning_rate": 3.671787408038018e-06, "loss": 0.7978, "step": 10087 }, { "epoch": 0.7288095797135478, "grad_norm": 6.606460830658973, "learning_rate": 3.671529018188789e-06, "loss": 0.9085, "step": 10088 }, { "epoch": 0.7288818249137573, "grad_norm": 6.313361382964253, "learning_rate": 3.671270612302238e-06, "loss": 0.8303, "step": 10089 }, { "epoch": 0.7289540701139668, "grad_norm": 7.820747843542547, "learning_rate": 3.6710121903819014e-06, "loss": 0.8179, "step": 10090 }, { "epoch": 0.7290263153141763, "grad_norm": 7.150182025433071, "learning_rate": 3.670753752431317e-06, "loss": 0.8099, "step": 10091 }, { "epoch": 0.7290985605143858, "grad_norm": 5.605327708101459, "learning_rate": 3.670495298454022e-06, "loss": 0.8893, "step": 10092 }, { "epoch": 0.7291708057145954, "grad_norm": 5.594163591662582, "learning_rate": 3.6702368284535567e-06, "loss": 0.8331, "step": 10093 }, { "epoch": 0.7292430509148049, "grad_norm": 8.020302759101819, "learning_rate": 3.669978342433457e-06, "loss": 0.8221, "step": 10094 }, { "epoch": 0.7293152961150143, "grad_norm": 6.014110503348303, "learning_rate": 3.6697198403972624e-06, "loss": 0.9556, "step": 10095 }, { "epoch": 0.7293875413152239, "grad_norm": 7.035135373979992, "learning_rate": 3.669461322348511e-06, "loss": 0.8951, "step": 10096 }, { "epoch": 0.7294597865154334, "grad_norm": 6.310425067978719, "learning_rate": 3.669202788290743e-06, "loss": 0.8728, "step": 10097 }, { "epoch": 0.7295320317156428, "grad_norm": 5.199690318056117, "learning_rate": 3.6689442382274964e-06, "loss": 0.9039, "step": 10098 }, { "epoch": 0.7296042769158524, "grad_norm": 7.975353422076745, "learning_rate": 3.6686856721623104e-06, "loss": 0.9308, "step": 10099 }, { "epoch": 0.7296765221160619, "grad_norm": 5.7138307185908905, "learning_rate": 3.668427090098725e-06, "loss": 0.8346, "step": 10100 }, { "epoch": 0.7297487673162715, "grad_norm": 5.372931148790078, "learning_rate": 3.6681684920402797e-06, "loss": 0.8194, "step": 10101 }, { "epoch": 0.7298210125164809, "grad_norm": 5.405391735240044, "learning_rate": 3.667909877990516e-06, "loss": 0.9185, "step": 10102 }, { "epoch": 0.7298932577166904, "grad_norm": 7.254209677451679, "learning_rate": 3.6676512479529717e-06, "loss": 0.8812, "step": 10103 }, { "epoch": 0.7299655029169, "grad_norm": 6.367469678371501, "learning_rate": 3.667392601931189e-06, "loss": 0.8617, "step": 10104 }, { "epoch": 0.7300377481171094, "grad_norm": 5.109562768314879, "learning_rate": 3.6671339399287077e-06, "loss": 0.8591, "step": 10105 }, { "epoch": 0.730109993317319, "grad_norm": 6.928115408361228, "learning_rate": 3.666875261949069e-06, "loss": 0.8322, "step": 10106 }, { "epoch": 0.7301822385175285, "grad_norm": 6.5235149812951025, "learning_rate": 3.6666165679958145e-06, "loss": 0.8302, "step": 10107 }, { "epoch": 0.730254483717738, "grad_norm": 5.3979845312405, "learning_rate": 3.6663578580724844e-06, "loss": 0.8349, "step": 10108 }, { "epoch": 0.7303267289179475, "grad_norm": 6.037705203531873, "learning_rate": 3.6660991321826213e-06, "loss": 0.8681, "step": 10109 }, { "epoch": 0.730398974118157, "grad_norm": 7.721790533402732, "learning_rate": 3.6658403903297655e-06, "loss": 0.8072, "step": 10110 }, { "epoch": 0.7304712193183666, "grad_norm": 6.800712817361041, "learning_rate": 3.6655816325174613e-06, "loss": 0.8917, "step": 10111 }, { "epoch": 0.7305434645185761, "grad_norm": 6.5436511421205354, "learning_rate": 3.665322858749249e-06, "loss": 0.804, "step": 10112 }, { "epoch": 0.7306157097187855, "grad_norm": 6.814101284715349, "learning_rate": 3.6650640690286715e-06, "loss": 0.8848, "step": 10113 }, { "epoch": 0.7306879549189951, "grad_norm": 5.276555068056285, "learning_rate": 3.6648052633592713e-06, "loss": 0.8767, "step": 10114 }, { "epoch": 0.7307602001192046, "grad_norm": 6.931129623757452, "learning_rate": 3.664546441744592e-06, "loss": 0.8414, "step": 10115 }, { "epoch": 0.730832445319414, "grad_norm": 5.452437480443899, "learning_rate": 3.6642876041881757e-06, "loss": 0.8246, "step": 10116 }, { "epoch": 0.7309046905196236, "grad_norm": 7.936966284883226, "learning_rate": 3.664028750693566e-06, "loss": 0.7765, "step": 10117 }, { "epoch": 0.7309769357198331, "grad_norm": 6.26437525283644, "learning_rate": 3.6637698812643076e-06, "loss": 0.773, "step": 10118 }, { "epoch": 0.7310491809200427, "grad_norm": 6.8804532704634545, "learning_rate": 3.6635109959039416e-06, "loss": 0.7572, "step": 10119 }, { "epoch": 0.7311214261202521, "grad_norm": 6.239695179169819, "learning_rate": 3.6632520946160142e-06, "loss": 0.7619, "step": 10120 }, { "epoch": 0.7311936713204616, "grad_norm": 5.425575774265788, "learning_rate": 3.662993177404069e-06, "loss": 0.7883, "step": 10121 }, { "epoch": 0.7312659165206712, "grad_norm": 6.123090485291268, "learning_rate": 3.66273424427165e-06, "loss": 0.8307, "step": 10122 }, { "epoch": 0.7313381617208806, "grad_norm": 6.4100387162632755, "learning_rate": 3.662475295222302e-06, "loss": 0.8187, "step": 10123 }, { "epoch": 0.7314104069210902, "grad_norm": 6.552200809592053, "learning_rate": 3.66221633025957e-06, "loss": 0.8464, "step": 10124 }, { "epoch": 0.7314826521212997, "grad_norm": 8.1115056584984, "learning_rate": 3.6619573493869988e-06, "loss": 0.8258, "step": 10125 }, { "epoch": 0.7315548973215092, "grad_norm": 6.398230916136208, "learning_rate": 3.6616983526081336e-06, "loss": 0.815, "step": 10126 }, { "epoch": 0.7316271425217187, "grad_norm": 6.357464777601013, "learning_rate": 3.66143933992652e-06, "loss": 0.7698, "step": 10127 }, { "epoch": 0.7316993877219282, "grad_norm": 7.30854781853552, "learning_rate": 3.6611803113457034e-06, "loss": 0.8168, "step": 10128 }, { "epoch": 0.7317716329221378, "grad_norm": 5.873997704250574, "learning_rate": 3.6609212668692297e-06, "loss": 0.782, "step": 10129 }, { "epoch": 0.7318438781223472, "grad_norm": 7.580554942246766, "learning_rate": 3.660662206500646e-06, "loss": 0.8445, "step": 10130 }, { "epoch": 0.7319161233225567, "grad_norm": 6.593155531277807, "learning_rate": 3.6604031302434973e-06, "loss": 0.856, "step": 10131 }, { "epoch": 0.7319883685227663, "grad_norm": 6.236349839944186, "learning_rate": 3.6601440381013316e-06, "loss": 0.8537, "step": 10132 }, { "epoch": 0.7320606137229758, "grad_norm": 6.4182038220606294, "learning_rate": 3.6598849300776933e-06, "loss": 0.8477, "step": 10133 }, { "epoch": 0.7321328589231852, "grad_norm": 6.522907678709008, "learning_rate": 3.659625806176132e-06, "loss": 0.8503, "step": 10134 }, { "epoch": 0.7322051041233948, "grad_norm": 5.574960532283923, "learning_rate": 3.6593666664001935e-06, "loss": 0.8399, "step": 10135 }, { "epoch": 0.7322773493236043, "grad_norm": 7.066044550928272, "learning_rate": 3.659107510753426e-06, "loss": 0.8593, "step": 10136 }, { "epoch": 0.7323495945238139, "grad_norm": 5.305558819905731, "learning_rate": 3.658848339239376e-06, "loss": 0.7879, "step": 10137 }, { "epoch": 0.7324218397240233, "grad_norm": 7.196473731028062, "learning_rate": 3.6585891518615923e-06, "loss": 0.8902, "step": 10138 }, { "epoch": 0.7324940849242328, "grad_norm": 6.231115268180453, "learning_rate": 3.658329948623623e-06, "loss": 0.9211, "step": 10139 }, { "epoch": 0.7325663301244424, "grad_norm": 7.636860112981899, "learning_rate": 3.658070729529016e-06, "loss": 0.8342, "step": 10140 }, { "epoch": 0.7326385753246518, "grad_norm": 5.938094902351758, "learning_rate": 3.65781149458132e-06, "loss": 0.8609, "step": 10141 }, { "epoch": 0.7327108205248614, "grad_norm": 7.528093238342348, "learning_rate": 3.6575522437840827e-06, "loss": 0.8249, "step": 10142 }, { "epoch": 0.7327830657250709, "grad_norm": 5.016087971172944, "learning_rate": 3.6572929771408543e-06, "loss": 0.8568, "step": 10143 }, { "epoch": 0.7328553109252804, "grad_norm": 5.979816503356801, "learning_rate": 3.6570336946551837e-06, "loss": 0.8174, "step": 10144 }, { "epoch": 0.7329275561254899, "grad_norm": 5.8095410106264325, "learning_rate": 3.656774396330621e-06, "loss": 0.8512, "step": 10145 }, { "epoch": 0.7329998013256994, "grad_norm": 6.889016030618392, "learning_rate": 3.656515082170714e-06, "loss": 0.9389, "step": 10146 }, { "epoch": 0.733072046525909, "grad_norm": 6.5569623561963635, "learning_rate": 3.6562557521790137e-06, "loss": 0.8607, "step": 10147 }, { "epoch": 0.7331442917261184, "grad_norm": 7.401364546051943, "learning_rate": 3.65599640635907e-06, "loss": 0.8574, "step": 10148 }, { "epoch": 0.7332165369263279, "grad_norm": 6.481435822018251, "learning_rate": 3.6557370447144334e-06, "loss": 0.8511, "step": 10149 }, { "epoch": 0.7332887821265375, "grad_norm": 7.637149074752321, "learning_rate": 3.655477667248654e-06, "loss": 0.8045, "step": 10150 }, { "epoch": 0.733361027326747, "grad_norm": 6.6369690648608515, "learning_rate": 3.6552182739652824e-06, "loss": 0.7891, "step": 10151 }, { "epoch": 0.7334332725269564, "grad_norm": 6.5826927871376455, "learning_rate": 3.654958864867869e-06, "loss": 0.885, "step": 10152 }, { "epoch": 0.733505517727166, "grad_norm": 6.992228690084683, "learning_rate": 3.6546994399599663e-06, "loss": 0.81, "step": 10153 }, { "epoch": 0.7335777629273755, "grad_norm": 6.301579462090517, "learning_rate": 3.654439999245125e-06, "loss": 0.7769, "step": 10154 }, { "epoch": 0.7336500081275851, "grad_norm": 5.564851928002291, "learning_rate": 3.6541805427268958e-06, "loss": 0.8448, "step": 10155 }, { "epoch": 0.7337222533277945, "grad_norm": 5.364028022937555, "learning_rate": 3.6539210704088313e-06, "loss": 0.8265, "step": 10156 }, { "epoch": 0.733794498528004, "grad_norm": 6.2462137579348385, "learning_rate": 3.6536615822944833e-06, "loss": 0.7927, "step": 10157 }, { "epoch": 0.7338667437282136, "grad_norm": 7.519902588233669, "learning_rate": 3.6534020783874042e-06, "loss": 0.7935, "step": 10158 }, { "epoch": 0.733938988928423, "grad_norm": 10.593307440861947, "learning_rate": 3.653142558691146e-06, "loss": 0.828, "step": 10159 }, { "epoch": 0.7340112341286326, "grad_norm": 6.416913048443859, "learning_rate": 3.6528830232092618e-06, "loss": 0.7939, "step": 10160 }, { "epoch": 0.7340834793288421, "grad_norm": 5.602865283093389, "learning_rate": 3.6526234719453037e-06, "loss": 0.8268, "step": 10161 }, { "epoch": 0.7341557245290516, "grad_norm": 5.2146725744378, "learning_rate": 3.652363904902825e-06, "loss": 0.8833, "step": 10162 }, { "epoch": 0.7342279697292611, "grad_norm": 6.553783040622774, "learning_rate": 3.6521043220853804e-06, "loss": 0.8223, "step": 10163 }, { "epoch": 0.7343002149294706, "grad_norm": 6.703129837283365, "learning_rate": 3.6518447234965214e-06, "loss": 0.8505, "step": 10164 }, { "epoch": 0.7343724601296802, "grad_norm": 6.78864921830761, "learning_rate": 3.6515851091398024e-06, "loss": 0.9144, "step": 10165 }, { "epoch": 0.7344447053298896, "grad_norm": 7.061537685493335, "learning_rate": 3.651325479018778e-06, "loss": 0.8669, "step": 10166 }, { "epoch": 0.7345169505300991, "grad_norm": 6.080738437381405, "learning_rate": 3.651065833137001e-06, "loss": 0.8068, "step": 10167 }, { "epoch": 0.7345891957303087, "grad_norm": 5.260990627690146, "learning_rate": 3.650806171498027e-06, "loss": 0.7128, "step": 10168 }, { "epoch": 0.7346614409305182, "grad_norm": 6.391837828067332, "learning_rate": 3.6505464941054098e-06, "loss": 0.8503, "step": 10169 }, { "epoch": 0.7347336861307276, "grad_norm": 6.7156177405331, "learning_rate": 3.6502868009627046e-06, "loss": 0.8799, "step": 10170 }, { "epoch": 0.7348059313309372, "grad_norm": 5.751454376721576, "learning_rate": 3.650027092073466e-06, "loss": 0.7673, "step": 10171 }, { "epoch": 0.7348781765311467, "grad_norm": 6.380022052977903, "learning_rate": 3.649767367441249e-06, "loss": 0.8085, "step": 10172 }, { "epoch": 0.7349504217313563, "grad_norm": 6.217763544562767, "learning_rate": 3.6495076270696106e-06, "loss": 0.7188, "step": 10173 }, { "epoch": 0.7350226669315657, "grad_norm": 7.360927280393829, "learning_rate": 3.649247870962105e-06, "loss": 0.8507, "step": 10174 }, { "epoch": 0.7350949121317752, "grad_norm": 7.4542163873517175, "learning_rate": 3.648988099122288e-06, "loss": 0.8665, "step": 10175 }, { "epoch": 0.7351671573319848, "grad_norm": 7.098331534761713, "learning_rate": 3.648728311553716e-06, "loss": 0.8065, "step": 10176 }, { "epoch": 0.7352394025321942, "grad_norm": 6.341207361151277, "learning_rate": 3.648468508259946e-06, "loss": 0.8331, "step": 10177 }, { "epoch": 0.7353116477324038, "grad_norm": 6.062678698236191, "learning_rate": 3.648208689244533e-06, "loss": 0.8682, "step": 10178 }, { "epoch": 0.7353838929326133, "grad_norm": 6.829277726907892, "learning_rate": 3.647948854511035e-06, "loss": 0.767, "step": 10179 }, { "epoch": 0.7354561381328228, "grad_norm": 8.35504642969057, "learning_rate": 3.647689004063009e-06, "loss": 0.8313, "step": 10180 }, { "epoch": 0.7355283833330323, "grad_norm": 7.0480192824412775, "learning_rate": 3.6474291379040105e-06, "loss": 0.8574, "step": 10181 }, { "epoch": 0.7356006285332418, "grad_norm": 7.133863323642398, "learning_rate": 3.647169256037599e-06, "loss": 0.9374, "step": 10182 }, { "epoch": 0.7356728737334514, "grad_norm": 8.436143554160648, "learning_rate": 3.6469093584673306e-06, "loss": 0.8555, "step": 10183 }, { "epoch": 0.7357451189336608, "grad_norm": 5.512467039533069, "learning_rate": 3.6466494451967637e-06, "loss": 0.8146, "step": 10184 }, { "epoch": 0.7358173641338703, "grad_norm": 6.631381434253537, "learning_rate": 3.6463895162294566e-06, "loss": 0.8491, "step": 10185 }, { "epoch": 0.7358896093340799, "grad_norm": 5.645194867996698, "learning_rate": 3.6461295715689664e-06, "loss": 0.8917, "step": 10186 }, { "epoch": 0.7359618545342894, "grad_norm": 7.114721011109148, "learning_rate": 3.6458696112188522e-06, "loss": 0.8208, "step": 10187 }, { "epoch": 0.7360340997344988, "grad_norm": 7.5233784293988135, "learning_rate": 3.6456096351826743e-06, "loss": 0.8427, "step": 10188 }, { "epoch": 0.7361063449347084, "grad_norm": 6.528942814024245, "learning_rate": 3.645349643463989e-06, "loss": 0.8443, "step": 10189 }, { "epoch": 0.7361785901349179, "grad_norm": 5.211023881873977, "learning_rate": 3.645089636066356e-06, "loss": 0.8304, "step": 10190 }, { "epoch": 0.7362508353351275, "grad_norm": 6.218961012437588, "learning_rate": 3.644829612993335e-06, "loss": 0.8596, "step": 10191 }, { "epoch": 0.7363230805353369, "grad_norm": 5.537598151376896, "learning_rate": 3.6445695742484853e-06, "loss": 0.7752, "step": 10192 }, { "epoch": 0.7363953257355464, "grad_norm": 6.545213292713931, "learning_rate": 3.644309519835368e-06, "loss": 0.7764, "step": 10193 }, { "epoch": 0.736467570935756, "grad_norm": 5.875426581296632, "learning_rate": 3.644049449757541e-06, "loss": 0.8452, "step": 10194 }, { "epoch": 0.7365398161359654, "grad_norm": 6.7852979259820225, "learning_rate": 3.643789364018565e-06, "loss": 0.8134, "step": 10195 }, { "epoch": 0.736612061336175, "grad_norm": 5.652978277542916, "learning_rate": 3.6435292626220013e-06, "loss": 0.8294, "step": 10196 }, { "epoch": 0.7366843065363845, "grad_norm": 7.268952950869411, "learning_rate": 3.6432691455714102e-06, "loss": 0.8459, "step": 10197 }, { "epoch": 0.736756551736594, "grad_norm": 7.617685029825206, "learning_rate": 3.643009012870352e-06, "loss": 0.799, "step": 10198 }, { "epoch": 0.7368287969368035, "grad_norm": 5.7660725737737115, "learning_rate": 3.6427488645223878e-06, "loss": 0.8831, "step": 10199 }, { "epoch": 0.736901042137013, "grad_norm": 6.112275900760021, "learning_rate": 3.642488700531079e-06, "loss": 0.7668, "step": 10200 }, { "epoch": 0.7369732873372226, "grad_norm": 6.879242853597301, "learning_rate": 3.6422285208999862e-06, "loss": 0.8775, "step": 10201 }, { "epoch": 0.737045532537432, "grad_norm": 5.6493614781206585, "learning_rate": 3.641968325632673e-06, "loss": 0.83, "step": 10202 }, { "epoch": 0.7371177777376415, "grad_norm": 6.046480387741757, "learning_rate": 3.6417081147326994e-06, "loss": 0.8756, "step": 10203 }, { "epoch": 0.7371900229378511, "grad_norm": 6.330026298576794, "learning_rate": 3.641447888203628e-06, "loss": 0.8385, "step": 10204 }, { "epoch": 0.7372622681380606, "grad_norm": 5.640602851464912, "learning_rate": 3.641187646049022e-06, "loss": 0.9488, "step": 10205 }, { "epoch": 0.73733451333827, "grad_norm": 7.144339298696036, "learning_rate": 3.6409273882724426e-06, "loss": 0.9385, "step": 10206 }, { "epoch": 0.7374067585384796, "grad_norm": 5.981218825431428, "learning_rate": 3.6406671148774538e-06, "loss": 0.8876, "step": 10207 }, { "epoch": 0.7374790037386891, "grad_norm": 6.22802380270504, "learning_rate": 3.640406825867617e-06, "loss": 0.7354, "step": 10208 }, { "epoch": 0.7375512489388987, "grad_norm": 5.226030034653045, "learning_rate": 3.6401465212464965e-06, "loss": 0.8569, "step": 10209 }, { "epoch": 0.7376234941391081, "grad_norm": 6.256885245557426, "learning_rate": 3.6398862010176554e-06, "loss": 0.8759, "step": 10210 }, { "epoch": 0.7376957393393176, "grad_norm": 5.269726341713841, "learning_rate": 3.639625865184658e-06, "loss": 0.8067, "step": 10211 }, { "epoch": 0.7377679845395272, "grad_norm": 5.546471465570141, "learning_rate": 3.6393655137510662e-06, "loss": 0.8108, "step": 10212 }, { "epoch": 0.7378402297397366, "grad_norm": 5.923618512921059, "learning_rate": 3.6391051467204463e-06, "loss": 0.9281, "step": 10213 }, { "epoch": 0.7379124749399462, "grad_norm": 8.426089561031697, "learning_rate": 3.638844764096361e-06, "loss": 0.8597, "step": 10214 }, { "epoch": 0.7379847201401557, "grad_norm": 6.603714920461998, "learning_rate": 3.6385843658823743e-06, "loss": 0.8511, "step": 10215 }, { "epoch": 0.7380569653403652, "grad_norm": 5.52947607528184, "learning_rate": 3.638323952082053e-06, "loss": 0.7751, "step": 10216 }, { "epoch": 0.7381292105405747, "grad_norm": 5.893320291657208, "learning_rate": 3.6380635226989597e-06, "loss": 0.7567, "step": 10217 }, { "epoch": 0.7382014557407842, "grad_norm": 8.599504909015156, "learning_rate": 3.63780307773666e-06, "loss": 0.8363, "step": 10218 }, { "epoch": 0.7382737009409938, "grad_norm": 7.4765099470788865, "learning_rate": 3.6375426171987205e-06, "loss": 0.8288, "step": 10219 }, { "epoch": 0.7383459461412032, "grad_norm": 7.790969599857787, "learning_rate": 3.637282141088706e-06, "loss": 0.8106, "step": 10220 }, { "epoch": 0.7384181913414127, "grad_norm": 7.927835422951728, "learning_rate": 3.6370216494101818e-06, "loss": 0.8518, "step": 10221 }, { "epoch": 0.7384904365416223, "grad_norm": 6.422457109414249, "learning_rate": 3.6367611421667142e-06, "loss": 0.8751, "step": 10222 }, { "epoch": 0.7385626817418318, "grad_norm": 7.868416107826176, "learning_rate": 3.636500619361869e-06, "loss": 0.8802, "step": 10223 }, { "epoch": 0.7386349269420412, "grad_norm": 7.148796897983817, "learning_rate": 3.6362400809992127e-06, "loss": 0.9282, "step": 10224 }, { "epoch": 0.7387071721422508, "grad_norm": 8.734177152136448, "learning_rate": 3.6359795270823117e-06, "loss": 0.9333, "step": 10225 }, { "epoch": 0.7387794173424603, "grad_norm": 6.727425018198886, "learning_rate": 3.6357189576147336e-06, "loss": 0.862, "step": 10226 }, { "epoch": 0.7388516625426699, "grad_norm": 4.759318396869787, "learning_rate": 3.635458372600045e-06, "loss": 0.8036, "step": 10227 }, { "epoch": 0.7389239077428793, "grad_norm": 5.828908095533986, "learning_rate": 3.635197772041811e-06, "loss": 0.8469, "step": 10228 }, { "epoch": 0.7389961529430888, "grad_norm": 5.981104024064427, "learning_rate": 3.634937155943603e-06, "loss": 0.8184, "step": 10229 }, { "epoch": 0.7390683981432984, "grad_norm": 5.3366943537517875, "learning_rate": 3.634676524308986e-06, "loss": 0.7728, "step": 10230 }, { "epoch": 0.7391406433435078, "grad_norm": 8.199679335860488, "learning_rate": 3.6344158771415284e-06, "loss": 0.8582, "step": 10231 }, { "epoch": 0.7392128885437174, "grad_norm": 6.1037809317338425, "learning_rate": 3.634155214444798e-06, "loss": 0.8464, "step": 10232 }, { "epoch": 0.7392851337439269, "grad_norm": 5.94610197462396, "learning_rate": 3.633894536222363e-06, "loss": 0.9242, "step": 10233 }, { "epoch": 0.7393573789441364, "grad_norm": 6.087165600876178, "learning_rate": 3.6336338424777926e-06, "loss": 0.8536, "step": 10234 }, { "epoch": 0.7394296241443459, "grad_norm": 6.500627634117429, "learning_rate": 3.6333731332146547e-06, "loss": 0.8626, "step": 10235 }, { "epoch": 0.7395018693445554, "grad_norm": 5.061731350790767, "learning_rate": 3.63311240843652e-06, "loss": 0.7589, "step": 10236 }, { "epoch": 0.739574114544765, "grad_norm": 6.275076342114511, "learning_rate": 3.632851668146955e-06, "loss": 0.8536, "step": 10237 }, { "epoch": 0.7396463597449744, "grad_norm": 6.2194765519553386, "learning_rate": 3.63259091234953e-06, "loss": 0.799, "step": 10238 }, { "epoch": 0.7397186049451839, "grad_norm": 6.86543718583788, "learning_rate": 3.632330141047816e-06, "loss": 0.8603, "step": 10239 }, { "epoch": 0.7397908501453935, "grad_norm": 7.147414437549417, "learning_rate": 3.6320693542453807e-06, "loss": 0.8221, "step": 10240 }, { "epoch": 0.739863095345603, "grad_norm": 6.371638589900757, "learning_rate": 3.6318085519457956e-06, "loss": 0.7588, "step": 10241 }, { "epoch": 0.7399353405458124, "grad_norm": 6.566914263606359, "learning_rate": 3.631547734152629e-06, "loss": 0.7961, "step": 10242 }, { "epoch": 0.740007585746022, "grad_norm": 6.2638777201520766, "learning_rate": 3.6312869008694533e-06, "loss": 0.7899, "step": 10243 }, { "epoch": 0.7400798309462315, "grad_norm": 7.229515338444798, "learning_rate": 3.6310260520998386e-06, "loss": 0.8236, "step": 10244 }, { "epoch": 0.7401520761464411, "grad_norm": 7.327824112626199, "learning_rate": 3.6307651878473553e-06, "loss": 0.8084, "step": 10245 }, { "epoch": 0.7402243213466505, "grad_norm": 7.18337602477284, "learning_rate": 3.6305043081155744e-06, "loss": 0.8375, "step": 10246 }, { "epoch": 0.74029656654686, "grad_norm": 7.821152191355299, "learning_rate": 3.630243412908067e-06, "loss": 0.9676, "step": 10247 }, { "epoch": 0.7403688117470696, "grad_norm": 5.665482378604261, "learning_rate": 3.6299825022284052e-06, "loss": 0.7689, "step": 10248 }, { "epoch": 0.740441056947279, "grad_norm": 8.091363857062838, "learning_rate": 3.6297215760801603e-06, "loss": 0.8856, "step": 10249 }, { "epoch": 0.7405133021474886, "grad_norm": 8.307012367137384, "learning_rate": 3.6294606344669048e-06, "loss": 0.7867, "step": 10250 }, { "epoch": 0.7405855473476981, "grad_norm": 5.469114978055268, "learning_rate": 3.6291996773922088e-06, "loss": 0.854, "step": 10251 }, { "epoch": 0.7406577925479076, "grad_norm": 6.139644631902948, "learning_rate": 3.628938704859647e-06, "loss": 0.8101, "step": 10252 }, { "epoch": 0.7407300377481171, "grad_norm": 8.49059268747302, "learning_rate": 3.6286777168727905e-06, "loss": 0.7733, "step": 10253 }, { "epoch": 0.7408022829483266, "grad_norm": 8.171168688853463, "learning_rate": 3.6284167134352125e-06, "loss": 0.9165, "step": 10254 }, { "epoch": 0.7408745281485362, "grad_norm": 6.384486003551188, "learning_rate": 3.6281556945504866e-06, "loss": 0.8455, "step": 10255 }, { "epoch": 0.7409467733487456, "grad_norm": 6.298464742425114, "learning_rate": 3.6278946602221837e-06, "loss": 0.8131, "step": 10256 }, { "epoch": 0.7410190185489551, "grad_norm": 6.443667818094844, "learning_rate": 3.62763361045388e-06, "loss": 0.9077, "step": 10257 }, { "epoch": 0.7410912637491647, "grad_norm": 7.337945210403841, "learning_rate": 3.627372545249147e-06, "loss": 0.8185, "step": 10258 }, { "epoch": 0.7411635089493742, "grad_norm": 6.794496317336468, "learning_rate": 3.6271114646115595e-06, "loss": 0.7487, "step": 10259 }, { "epoch": 0.7412357541495836, "grad_norm": 5.165211421335975, "learning_rate": 3.626850368544691e-06, "loss": 0.7669, "step": 10260 }, { "epoch": 0.7413079993497932, "grad_norm": 8.730895630643147, "learning_rate": 3.6265892570521154e-06, "loss": 0.8391, "step": 10261 }, { "epoch": 0.7413802445500027, "grad_norm": 6.219043207205264, "learning_rate": 3.6263281301374083e-06, "loss": 0.7662, "step": 10262 }, { "epoch": 0.7414524897502123, "grad_norm": 6.003664169444578, "learning_rate": 3.626066987804144e-06, "loss": 0.7949, "step": 10263 }, { "epoch": 0.7415247349504217, "grad_norm": 6.473860486478579, "learning_rate": 3.625805830055897e-06, "loss": 0.8578, "step": 10264 }, { "epoch": 0.7415969801506312, "grad_norm": 6.521417106196938, "learning_rate": 3.6255446568962414e-06, "loss": 0.8583, "step": 10265 }, { "epoch": 0.7416692253508408, "grad_norm": 6.150351239850872, "learning_rate": 3.6252834683287534e-06, "loss": 0.8709, "step": 10266 }, { "epoch": 0.7417414705510502, "grad_norm": 6.117943260306369, "learning_rate": 3.6250222643570086e-06, "loss": 0.8749, "step": 10267 }, { "epoch": 0.7418137157512598, "grad_norm": 5.645849962198278, "learning_rate": 3.624761044984583e-06, "loss": 0.7926, "step": 10268 }, { "epoch": 0.7418859609514693, "grad_norm": 7.177928887670529, "learning_rate": 3.624499810215052e-06, "loss": 0.6999, "step": 10269 }, { "epoch": 0.7419582061516788, "grad_norm": 7.175211344407499, "learning_rate": 3.6242385600519914e-06, "loss": 0.8763, "step": 10270 }, { "epoch": 0.7420304513518883, "grad_norm": 5.9194411457318505, "learning_rate": 3.623977294498978e-06, "loss": 0.8311, "step": 10271 }, { "epoch": 0.7421026965520978, "grad_norm": 5.572225576551667, "learning_rate": 3.6237160135595878e-06, "loss": 0.8195, "step": 10272 }, { "epoch": 0.7421749417523074, "grad_norm": 6.5756683474068165, "learning_rate": 3.6234547172373984e-06, "loss": 0.8793, "step": 10273 }, { "epoch": 0.7422471869525168, "grad_norm": 5.508301452144328, "learning_rate": 3.6231934055359864e-06, "loss": 0.8248, "step": 10274 }, { "epoch": 0.7423194321527263, "grad_norm": 4.6332585600896, "learning_rate": 3.6229320784589277e-06, "loss": 0.7755, "step": 10275 }, { "epoch": 0.7423916773529359, "grad_norm": 7.271926083649672, "learning_rate": 3.6226707360098012e-06, "loss": 0.885, "step": 10276 }, { "epoch": 0.7424639225531454, "grad_norm": 5.508542795666514, "learning_rate": 3.6224093781921848e-06, "loss": 0.8753, "step": 10277 }, { "epoch": 0.7425361677533548, "grad_norm": 5.31034412396841, "learning_rate": 3.622148005009655e-06, "loss": 0.8475, "step": 10278 }, { "epoch": 0.7426084129535644, "grad_norm": 6.241599579243864, "learning_rate": 3.62188661646579e-06, "loss": 0.8122, "step": 10279 }, { "epoch": 0.7426806581537739, "grad_norm": 5.727397779720598, "learning_rate": 3.6216252125641686e-06, "loss": 0.8242, "step": 10280 }, { "epoch": 0.7427529033539835, "grad_norm": 6.531520710130622, "learning_rate": 3.621363793308369e-06, "loss": 0.8329, "step": 10281 }, { "epoch": 0.7428251485541929, "grad_norm": 7.409077937406811, "learning_rate": 3.6211023587019695e-06, "loss": 0.8496, "step": 10282 }, { "epoch": 0.7428973937544024, "grad_norm": 7.10388078608449, "learning_rate": 3.620840908748549e-06, "loss": 0.8306, "step": 10283 }, { "epoch": 0.742969638954612, "grad_norm": 7.274419765339589, "learning_rate": 3.6205794434516877e-06, "loss": 0.8508, "step": 10284 }, { "epoch": 0.7430418841548214, "grad_norm": 4.813414449633486, "learning_rate": 3.6203179628149626e-06, "loss": 0.7913, "step": 10285 }, { "epoch": 0.743114129355031, "grad_norm": 5.1832250422279476, "learning_rate": 3.620056466841955e-06, "loss": 0.7614, "step": 10286 }, { "epoch": 0.7431863745552405, "grad_norm": 6.700312809260725, "learning_rate": 3.6197949555362448e-06, "loss": 0.8246, "step": 10287 }, { "epoch": 0.74325861975545, "grad_norm": 7.1686536414963085, "learning_rate": 3.6195334289014107e-06, "loss": 0.8786, "step": 10288 }, { "epoch": 0.7433308649556595, "grad_norm": 5.409191588185365, "learning_rate": 3.6192718869410325e-06, "loss": 0.8378, "step": 10289 }, { "epoch": 0.743403110155869, "grad_norm": 5.98241709261562, "learning_rate": 3.6190103296586914e-06, "loss": 0.9293, "step": 10290 }, { "epoch": 0.7434753553560786, "grad_norm": 7.704633027384881, "learning_rate": 3.6187487570579683e-06, "loss": 0.8558, "step": 10291 }, { "epoch": 0.743547600556288, "grad_norm": 5.031395572428191, "learning_rate": 3.6184871691424427e-06, "loss": 0.8458, "step": 10292 }, { "epoch": 0.7436198457564975, "grad_norm": 7.484509811302175, "learning_rate": 3.6182255659156975e-06, "loss": 0.8037, "step": 10293 }, { "epoch": 0.7436920909567071, "grad_norm": 5.517900467630665, "learning_rate": 3.617963947381311e-06, "loss": 0.831, "step": 10294 }, { "epoch": 0.7437643361569166, "grad_norm": 8.379385184447361, "learning_rate": 3.6177023135428667e-06, "loss": 0.9099, "step": 10295 }, { "epoch": 0.743836581357126, "grad_norm": 7.394912991995219, "learning_rate": 3.617440664403946e-06, "loss": 0.8394, "step": 10296 }, { "epoch": 0.7439088265573356, "grad_norm": 6.135291348155995, "learning_rate": 3.61717899996813e-06, "loss": 0.7743, "step": 10297 }, { "epoch": 0.7439810717575451, "grad_norm": 6.137388818048458, "learning_rate": 3.616917320239001e-06, "loss": 0.7656, "step": 10298 }, { "epoch": 0.7440533169577547, "grad_norm": 6.20942671533022, "learning_rate": 3.61665562522014e-06, "loss": 0.8837, "step": 10299 }, { "epoch": 0.7441255621579641, "grad_norm": 7.549064618708178, "learning_rate": 3.616393914915132e-06, "loss": 0.8106, "step": 10300 }, { "epoch": 0.7441978073581736, "grad_norm": 6.423980442003095, "learning_rate": 3.6161321893275576e-06, "loss": 0.7821, "step": 10301 }, { "epoch": 0.7442700525583832, "grad_norm": 7.099662568576515, "learning_rate": 3.615870448461e-06, "loss": 0.8867, "step": 10302 }, { "epoch": 0.7443422977585926, "grad_norm": 6.485702342027791, "learning_rate": 3.6156086923190425e-06, "loss": 0.7988, "step": 10303 }, { "epoch": 0.7444145429588022, "grad_norm": 7.473225932283925, "learning_rate": 3.6153469209052685e-06, "loss": 0.8699, "step": 10304 }, { "epoch": 0.7444867881590117, "grad_norm": 7.27345087396394, "learning_rate": 3.6150851342232605e-06, "loss": 0.8922, "step": 10305 }, { "epoch": 0.7445590333592212, "grad_norm": 7.997558936581324, "learning_rate": 3.6148233322766037e-06, "loss": 0.9018, "step": 10306 }, { "epoch": 0.7446312785594307, "grad_norm": 5.714507984198188, "learning_rate": 3.6145615150688806e-06, "loss": 0.8514, "step": 10307 }, { "epoch": 0.7447035237596402, "grad_norm": 6.378348873382981, "learning_rate": 3.614299682603676e-06, "loss": 0.8393, "step": 10308 }, { "epoch": 0.7447757689598498, "grad_norm": 7.134689435074307, "learning_rate": 3.614037834884573e-06, "loss": 0.7894, "step": 10309 }, { "epoch": 0.7448480141600592, "grad_norm": 7.336437724225928, "learning_rate": 3.613775971915158e-06, "loss": 0.8902, "step": 10310 }, { "epoch": 0.7449202593602687, "grad_norm": 5.737044625089946, "learning_rate": 3.6135140936990144e-06, "loss": 0.8418, "step": 10311 }, { "epoch": 0.7449925045604783, "grad_norm": 6.274780281749195, "learning_rate": 3.613252200239728e-06, "loss": 0.8243, "step": 10312 }, { "epoch": 0.7450647497606878, "grad_norm": 5.413230666955004, "learning_rate": 3.612990291540882e-06, "loss": 0.8229, "step": 10313 }, { "epoch": 0.7451369949608972, "grad_norm": 6.748941126528346, "learning_rate": 3.612728367606064e-06, "loss": 0.9578, "step": 10314 }, { "epoch": 0.7452092401611068, "grad_norm": 5.178011201518568, "learning_rate": 3.6124664284388587e-06, "loss": 0.8085, "step": 10315 }, { "epoch": 0.7452814853613163, "grad_norm": 5.467482763332134, "learning_rate": 3.612204474042852e-06, "loss": 0.7916, "step": 10316 }, { "epoch": 0.7453537305615259, "grad_norm": 5.209259357782124, "learning_rate": 3.611942504421629e-06, "loss": 0.8854, "step": 10317 }, { "epoch": 0.7454259757617353, "grad_norm": 6.075016589985867, "learning_rate": 3.611680519578776e-06, "loss": 0.9329, "step": 10318 }, { "epoch": 0.7454982209619448, "grad_norm": 5.883092622812746, "learning_rate": 3.611418519517881e-06, "loss": 0.7863, "step": 10319 }, { "epoch": 0.7455704661621544, "grad_norm": 6.03677415611302, "learning_rate": 3.6111565042425297e-06, "loss": 0.8525, "step": 10320 }, { "epoch": 0.7456427113623638, "grad_norm": 6.118753790537641, "learning_rate": 3.610894473756308e-06, "loss": 0.8343, "step": 10321 }, { "epoch": 0.7457149565625734, "grad_norm": 6.201993363402407, "learning_rate": 3.6106324280628034e-06, "loss": 0.8955, "step": 10322 }, { "epoch": 0.7457872017627829, "grad_norm": 6.861463780504976, "learning_rate": 3.6103703671656034e-06, "loss": 0.8709, "step": 10323 }, { "epoch": 0.7458594469629924, "grad_norm": 5.527841670925313, "learning_rate": 3.6101082910682942e-06, "loss": 0.7704, "step": 10324 }, { "epoch": 0.7459316921632019, "grad_norm": 7.527381758304404, "learning_rate": 3.609846199774466e-06, "loss": 0.926, "step": 10325 }, { "epoch": 0.7460039373634114, "grad_norm": 5.85160128856887, "learning_rate": 3.609584093287704e-06, "loss": 0.8408, "step": 10326 }, { "epoch": 0.746076182563621, "grad_norm": 7.880017574310069, "learning_rate": 3.609321971611598e-06, "loss": 0.9078, "step": 10327 }, { "epoch": 0.7461484277638304, "grad_norm": 6.226379927922479, "learning_rate": 3.6090598347497348e-06, "loss": 0.8652, "step": 10328 }, { "epoch": 0.7462206729640399, "grad_norm": 6.644382553634534, "learning_rate": 3.608797682705704e-06, "loss": 0.898, "step": 10329 }, { "epoch": 0.7462929181642495, "grad_norm": 6.997126398174557, "learning_rate": 3.6085355154830947e-06, "loss": 0.8088, "step": 10330 }, { "epoch": 0.746365163364459, "grad_norm": 7.16514334642404, "learning_rate": 3.6082733330854935e-06, "loss": 0.8928, "step": 10331 }, { "epoch": 0.7464374085646684, "grad_norm": 7.793826315597193, "learning_rate": 3.6080111355164913e-06, "loss": 0.9048, "step": 10332 }, { "epoch": 0.746509653764878, "grad_norm": 5.915074774558902, "learning_rate": 3.607748922779677e-06, "loss": 0.8183, "step": 10333 }, { "epoch": 0.7465818989650875, "grad_norm": 5.371740905614833, "learning_rate": 3.607486694878641e-06, "loss": 0.8565, "step": 10334 }, { "epoch": 0.7466541441652971, "grad_norm": 6.140682152246758, "learning_rate": 3.6072244518169707e-06, "loss": 0.8, "step": 10335 }, { "epoch": 0.7467263893655065, "grad_norm": 6.480130270131234, "learning_rate": 3.606962193598258e-06, "loss": 0.913, "step": 10336 }, { "epoch": 0.746798634565716, "grad_norm": 5.652045273034587, "learning_rate": 3.606699920226092e-06, "loss": 0.827, "step": 10337 }, { "epoch": 0.7468708797659256, "grad_norm": 7.237221341212331, "learning_rate": 3.606437631704064e-06, "loss": 0.7938, "step": 10338 }, { "epoch": 0.746943124966135, "grad_norm": 5.116219418805005, "learning_rate": 3.6061753280357636e-06, "loss": 0.8058, "step": 10339 }, { "epoch": 0.7470153701663446, "grad_norm": 7.7014698161520805, "learning_rate": 3.605913009224782e-06, "loss": 0.7614, "step": 10340 }, { "epoch": 0.7470876153665541, "grad_norm": 7.3843340239354545, "learning_rate": 3.6056506752747093e-06, "loss": 0.9002, "step": 10341 }, { "epoch": 0.7471598605667636, "grad_norm": 6.342367087277068, "learning_rate": 3.6053883261891374e-06, "loss": 0.8039, "step": 10342 }, { "epoch": 0.7472321057669731, "grad_norm": 6.369429604280647, "learning_rate": 3.605125961971659e-06, "loss": 0.8101, "step": 10343 }, { "epoch": 0.7473043509671826, "grad_norm": 5.642692039240404, "learning_rate": 3.604863582625863e-06, "loss": 0.7834, "step": 10344 }, { "epoch": 0.7473765961673922, "grad_norm": 6.1857214018637, "learning_rate": 3.604601188155343e-06, "loss": 0.8676, "step": 10345 }, { "epoch": 0.7474488413676016, "grad_norm": 5.534441872897611, "learning_rate": 3.60433877856369e-06, "loss": 0.9013, "step": 10346 }, { "epoch": 0.7475210865678111, "grad_norm": 6.848201888825082, "learning_rate": 3.6040763538544966e-06, "loss": 0.8206, "step": 10347 }, { "epoch": 0.7475933317680207, "grad_norm": 6.271512050799258, "learning_rate": 3.6038139140313555e-06, "loss": 0.7998, "step": 10348 }, { "epoch": 0.7476655769682302, "grad_norm": 6.731344085290847, "learning_rate": 3.603551459097859e-06, "loss": 0.8587, "step": 10349 }, { "epoch": 0.7477378221684396, "grad_norm": 6.535540034214339, "learning_rate": 3.6032889890575996e-06, "loss": 0.8724, "step": 10350 }, { "epoch": 0.7478100673686492, "grad_norm": 6.174820363756196, "learning_rate": 3.60302650391417e-06, "loss": 0.7308, "step": 10351 }, { "epoch": 0.7478823125688587, "grad_norm": 6.587467038616837, "learning_rate": 3.602764003671165e-06, "loss": 0.7887, "step": 10352 }, { "epoch": 0.7479545577690682, "grad_norm": 5.966091023102754, "learning_rate": 3.6025014883321772e-06, "loss": 0.8199, "step": 10353 }, { "epoch": 0.7480268029692777, "grad_norm": 6.744367156545168, "learning_rate": 3.6022389579007994e-06, "loss": 0.8562, "step": 10354 }, { "epoch": 0.7480990481694872, "grad_norm": 6.182812080450769, "learning_rate": 3.601976412380626e-06, "loss": 0.841, "step": 10355 }, { "epoch": 0.7481712933696968, "grad_norm": 5.006940792586215, "learning_rate": 3.6017138517752513e-06, "loss": 0.7711, "step": 10356 }, { "epoch": 0.7482435385699062, "grad_norm": 6.420349682467927, "learning_rate": 3.6014512760882697e-06, "loss": 0.7515, "step": 10357 }, { "epoch": 0.7483157837701158, "grad_norm": 6.743604668013764, "learning_rate": 3.6011886853232746e-06, "loss": 0.859, "step": 10358 }, { "epoch": 0.7483880289703253, "grad_norm": 7.36118250718271, "learning_rate": 3.6009260794838624e-06, "loss": 0.8064, "step": 10359 }, { "epoch": 0.7484602741705348, "grad_norm": 6.367875849486441, "learning_rate": 3.6006634585736267e-06, "loss": 0.8877, "step": 10360 }, { "epoch": 0.7485325193707443, "grad_norm": 10.905586812751928, "learning_rate": 3.6004008225961622e-06, "loss": 0.8275, "step": 10361 }, { "epoch": 0.7486047645709538, "grad_norm": 6.64522244347337, "learning_rate": 3.600138171555066e-06, "loss": 0.8564, "step": 10362 }, { "epoch": 0.7486770097711634, "grad_norm": 5.768688845491127, "learning_rate": 3.5998755054539313e-06, "loss": 0.8387, "step": 10363 }, { "epoch": 0.7487492549713728, "grad_norm": 6.068710449592738, "learning_rate": 3.599612824296356e-06, "loss": 0.8749, "step": 10364 }, { "epoch": 0.7488215001715823, "grad_norm": 9.23819412287139, "learning_rate": 3.5993501280859338e-06, "loss": 0.8363, "step": 10365 }, { "epoch": 0.7488937453717919, "grad_norm": 4.938933345526414, "learning_rate": 3.5990874168262625e-06, "loss": 0.7156, "step": 10366 }, { "epoch": 0.7489659905720014, "grad_norm": 8.451826126238286, "learning_rate": 3.5988246905209377e-06, "loss": 0.9045, "step": 10367 }, { "epoch": 0.7490382357722108, "grad_norm": 5.150263229605698, "learning_rate": 3.5985619491735563e-06, "loss": 0.8206, "step": 10368 }, { "epoch": 0.7491104809724204, "grad_norm": 7.257059803542169, "learning_rate": 3.5982991927877148e-06, "loss": 0.8784, "step": 10369 }, { "epoch": 0.7491827261726299, "grad_norm": 7.234409010871052, "learning_rate": 3.598036421367009e-06, "loss": 1.0376, "step": 10370 }, { "epoch": 0.7492549713728394, "grad_norm": 6.31415498397221, "learning_rate": 3.5977736349150395e-06, "loss": 0.8008, "step": 10371 }, { "epoch": 0.7493272165730489, "grad_norm": 6.57250968854037, "learning_rate": 3.597510833435399e-06, "loss": 0.8568, "step": 10372 }, { "epoch": 0.7493994617732584, "grad_norm": 6.479243957778122, "learning_rate": 3.5972480169316894e-06, "loss": 0.8337, "step": 10373 }, { "epoch": 0.749471706973468, "grad_norm": 5.966561280639377, "learning_rate": 3.596985185407505e-06, "loss": 0.7875, "step": 10374 }, { "epoch": 0.7495439521736774, "grad_norm": 5.953836874062743, "learning_rate": 3.596722338866445e-06, "loss": 0.7788, "step": 10375 }, { "epoch": 0.749616197373887, "grad_norm": 6.198913220554616, "learning_rate": 3.596459477312108e-06, "loss": 0.8878, "step": 10376 }, { "epoch": 0.7496884425740965, "grad_norm": 5.761958747649895, "learning_rate": 3.596196600748093e-06, "loss": 0.7645, "step": 10377 }, { "epoch": 0.749760687774306, "grad_norm": 7.435754138331424, "learning_rate": 3.595933709177997e-06, "loss": 0.8017, "step": 10378 }, { "epoch": 0.7498329329745155, "grad_norm": 6.3123510362026005, "learning_rate": 3.5956708026054192e-06, "loss": 0.7727, "step": 10379 }, { "epoch": 0.749905178174725, "grad_norm": 5.261019993796077, "learning_rate": 3.5954078810339593e-06, "loss": 0.8077, "step": 10380 }, { "epoch": 0.7499774233749346, "grad_norm": 6.328478541918079, "learning_rate": 3.595144944467216e-06, "loss": 0.8477, "step": 10381 }, { "epoch": 0.750049668575144, "grad_norm": 8.725571892913292, "learning_rate": 3.5948819929087884e-06, "loss": 0.7962, "step": 10382 }, { "epoch": 0.7501219137753535, "grad_norm": 6.25471105406741, "learning_rate": 3.5946190263622765e-06, "loss": 0.8302, "step": 10383 }, { "epoch": 0.7501941589755631, "grad_norm": 7.093594587194878, "learning_rate": 3.59435604483128e-06, "loss": 0.9058, "step": 10384 }, { "epoch": 0.7502664041757726, "grad_norm": 6.4003626124654005, "learning_rate": 3.5940930483193994e-06, "loss": 0.7755, "step": 10385 }, { "epoch": 0.750338649375982, "grad_norm": 5.512618242236946, "learning_rate": 3.593830036830234e-06, "loss": 0.7454, "step": 10386 }, { "epoch": 0.7504108945761916, "grad_norm": 7.1702230042662, "learning_rate": 3.5935670103673855e-06, "loss": 0.8348, "step": 10387 }, { "epoch": 0.7504831397764011, "grad_norm": 7.031861138805249, "learning_rate": 3.593303968934453e-06, "loss": 0.8441, "step": 10388 }, { "epoch": 0.7505553849766106, "grad_norm": 6.918888179567395, "learning_rate": 3.593040912535038e-06, "loss": 0.8794, "step": 10389 }, { "epoch": 0.7506276301768201, "grad_norm": 6.297090058168423, "learning_rate": 3.5927778411727415e-06, "loss": 0.8438, "step": 10390 }, { "epoch": 0.7506998753770296, "grad_norm": 9.031797399068056, "learning_rate": 3.592514754851165e-06, "loss": 0.9037, "step": 10391 }, { "epoch": 0.7507721205772392, "grad_norm": 6.861514928605673, "learning_rate": 3.5922516535739103e-06, "loss": 0.8747, "step": 10392 }, { "epoch": 0.7508443657774486, "grad_norm": 5.267132008593221, "learning_rate": 3.5919885373445784e-06, "loss": 0.8582, "step": 10393 }, { "epoch": 0.7509166109776582, "grad_norm": 5.628718185724583, "learning_rate": 3.5917254061667705e-06, "loss": 0.8659, "step": 10394 }, { "epoch": 0.7509888561778677, "grad_norm": 5.6801388231962475, "learning_rate": 3.5914622600440903e-06, "loss": 0.7812, "step": 10395 }, { "epoch": 0.7510611013780772, "grad_norm": 5.247294864361511, "learning_rate": 3.5911990989801394e-06, "loss": 0.7765, "step": 10396 }, { "epoch": 0.7511333465782867, "grad_norm": 5.451910282586289, "learning_rate": 3.590935922978519e-06, "loss": 0.829, "step": 10397 }, { "epoch": 0.7512055917784962, "grad_norm": 5.893644575873129, "learning_rate": 3.5906727320428336e-06, "loss": 0.8306, "step": 10398 }, { "epoch": 0.7512778369787058, "grad_norm": 6.37621520184921, "learning_rate": 3.590409526176685e-06, "loss": 0.7894, "step": 10399 }, { "epoch": 0.7513500821789152, "grad_norm": 6.469447886097216, "learning_rate": 3.590146305383677e-06, "loss": 0.8033, "step": 10400 }, { "epoch": 0.7514223273791247, "grad_norm": 6.706959301357685, "learning_rate": 3.5898830696674124e-06, "loss": 0.8954, "step": 10401 }, { "epoch": 0.7514945725793343, "grad_norm": 6.375008676560425, "learning_rate": 3.589619819031495e-06, "loss": 0.8889, "step": 10402 }, { "epoch": 0.7515668177795438, "grad_norm": 7.427649634673069, "learning_rate": 3.5893565534795284e-06, "loss": 0.8624, "step": 10403 }, { "epoch": 0.7516390629797532, "grad_norm": 7.57859212477819, "learning_rate": 3.589093273015116e-06, "loss": 0.9158, "step": 10404 }, { "epoch": 0.7517113081799628, "grad_norm": 6.306470049373308, "learning_rate": 3.588829977641863e-06, "loss": 0.9048, "step": 10405 }, { "epoch": 0.7517835533801723, "grad_norm": 5.520446571862027, "learning_rate": 3.588566667363372e-06, "loss": 0.8396, "step": 10406 }, { "epoch": 0.7518557985803818, "grad_norm": 7.009859770543598, "learning_rate": 3.5883033421832493e-06, "loss": 0.8404, "step": 10407 }, { "epoch": 0.7519280437805913, "grad_norm": 5.724051930915066, "learning_rate": 3.588040002105098e-06, "loss": 0.8918, "step": 10408 }, { "epoch": 0.7520002889808008, "grad_norm": 6.22939137677581, "learning_rate": 3.587776647132525e-06, "loss": 0.8722, "step": 10409 }, { "epoch": 0.7520725341810104, "grad_norm": 8.852192976636099, "learning_rate": 3.5875132772691334e-06, "loss": 0.7867, "step": 10410 }, { "epoch": 0.7521447793812198, "grad_norm": 6.489036704401834, "learning_rate": 3.58724989251853e-06, "loss": 0.8779, "step": 10411 }, { "epoch": 0.7522170245814294, "grad_norm": 7.469367731956002, "learning_rate": 3.5869864928843196e-06, "loss": 0.868, "step": 10412 }, { "epoch": 0.7522892697816389, "grad_norm": 6.63124423588566, "learning_rate": 3.586723078370108e-06, "loss": 0.9208, "step": 10413 }, { "epoch": 0.7523615149818484, "grad_norm": 5.884674871728026, "learning_rate": 3.5864596489795017e-06, "loss": 0.8712, "step": 10414 }, { "epoch": 0.7524337601820579, "grad_norm": 6.749771962022616, "learning_rate": 3.5861962047161056e-06, "loss": 0.8116, "step": 10415 }, { "epoch": 0.7525060053822674, "grad_norm": 7.368403701474978, "learning_rate": 3.585932745583528e-06, "loss": 0.9096, "step": 10416 }, { "epoch": 0.752578250582477, "grad_norm": 7.6608842798477745, "learning_rate": 3.585669271585373e-06, "loss": 0.7975, "step": 10417 }, { "epoch": 0.7526504957826864, "grad_norm": 6.06744937808025, "learning_rate": 3.585405782725249e-06, "loss": 0.9163, "step": 10418 }, { "epoch": 0.7527227409828959, "grad_norm": 6.209242717867818, "learning_rate": 3.5851422790067635e-06, "loss": 0.8443, "step": 10419 }, { "epoch": 0.7527949861831055, "grad_norm": 6.036219945703615, "learning_rate": 3.584878760433522e-06, "loss": 0.7442, "step": 10420 }, { "epoch": 0.752867231383315, "grad_norm": 6.900964155421622, "learning_rate": 3.584615227009133e-06, "loss": 0.8348, "step": 10421 }, { "epoch": 0.7529394765835244, "grad_norm": 8.87765377288563, "learning_rate": 3.584351678737202e-06, "loss": 0.9392, "step": 10422 }, { "epoch": 0.753011721783734, "grad_norm": 6.234218050479766, "learning_rate": 3.5840881156213405e-06, "loss": 0.8032, "step": 10423 }, { "epoch": 0.7530839669839435, "grad_norm": 7.629082009224097, "learning_rate": 3.5838245376651537e-06, "loss": 0.8018, "step": 10424 }, { "epoch": 0.753156212184153, "grad_norm": 5.800877590214293, "learning_rate": 3.583560944872251e-06, "loss": 0.8363, "step": 10425 }, { "epoch": 0.7532284573843625, "grad_norm": 6.000661495619442, "learning_rate": 3.5832973372462393e-06, "loss": 0.8739, "step": 10426 }, { "epoch": 0.753300702584572, "grad_norm": 5.119638383135303, "learning_rate": 3.583033714790729e-06, "loss": 0.7968, "step": 10427 }, { "epoch": 0.7533729477847816, "grad_norm": 7.336074519071166, "learning_rate": 3.5827700775093277e-06, "loss": 0.8153, "step": 10428 }, { "epoch": 0.753445192984991, "grad_norm": 6.630647087912563, "learning_rate": 3.5825064254056453e-06, "loss": 0.7896, "step": 10429 }, { "epoch": 0.7535174381852006, "grad_norm": 7.575405490207403, "learning_rate": 3.5822427584832896e-06, "loss": 0.7714, "step": 10430 }, { "epoch": 0.7535896833854101, "grad_norm": 7.85572903285846, "learning_rate": 3.581979076745871e-06, "loss": 0.789, "step": 10431 }, { "epoch": 0.7536619285856196, "grad_norm": 7.237308574773784, "learning_rate": 3.5817153801969994e-06, "loss": 0.8421, "step": 10432 }, { "epoch": 0.7537341737858291, "grad_norm": 6.5362731015732205, "learning_rate": 3.581451668840284e-06, "loss": 0.8304, "step": 10433 }, { "epoch": 0.7538064189860386, "grad_norm": 6.036935922876278, "learning_rate": 3.581187942679335e-06, "loss": 0.8493, "step": 10434 }, { "epoch": 0.7538786641862482, "grad_norm": 5.335537137249515, "learning_rate": 3.5809242017177625e-06, "loss": 0.8401, "step": 10435 }, { "epoch": 0.7539509093864576, "grad_norm": 7.790183458236929, "learning_rate": 3.5806604459591766e-06, "loss": 0.7791, "step": 10436 }, { "epoch": 0.7540231545866671, "grad_norm": 7.927807755188231, "learning_rate": 3.580396675407189e-06, "loss": 0.7906, "step": 10437 }, { "epoch": 0.7540953997868767, "grad_norm": 8.141421112553607, "learning_rate": 3.580132890065409e-06, "loss": 0.8852, "step": 10438 }, { "epoch": 0.7541676449870862, "grad_norm": 7.030879710301722, "learning_rate": 3.579869089937449e-06, "loss": 0.8231, "step": 10439 }, { "epoch": 0.7542398901872956, "grad_norm": 6.5911663471587465, "learning_rate": 3.5796052750269193e-06, "loss": 0.8244, "step": 10440 }, { "epoch": 0.7543121353875052, "grad_norm": 5.943336589567727, "learning_rate": 3.5793414453374313e-06, "loss": 0.7886, "step": 10441 }, { "epoch": 0.7543843805877147, "grad_norm": 7.9323040633794575, "learning_rate": 3.5790776008725975e-06, "loss": 0.7652, "step": 10442 }, { "epoch": 0.7544566257879242, "grad_norm": 7.124078523711801, "learning_rate": 3.578813741636029e-06, "loss": 0.8355, "step": 10443 }, { "epoch": 0.7545288709881337, "grad_norm": 11.843865950125037, "learning_rate": 3.5785498676313393e-06, "loss": 0.8499, "step": 10444 }, { "epoch": 0.7546011161883432, "grad_norm": 6.227289671896984, "learning_rate": 3.5782859788621375e-06, "loss": 0.7846, "step": 10445 }, { "epoch": 0.7546733613885528, "grad_norm": 9.140208073424164, "learning_rate": 3.578022075332038e-06, "loss": 0.8407, "step": 10446 }, { "epoch": 0.7547456065887622, "grad_norm": 5.642844146658364, "learning_rate": 3.577758157044654e-06, "loss": 0.8171, "step": 10447 }, { "epoch": 0.7548178517889718, "grad_norm": 6.5411757104738415, "learning_rate": 3.577494224003598e-06, "loss": 0.8808, "step": 10448 }, { "epoch": 0.7548900969891813, "grad_norm": 8.826589589238614, "learning_rate": 3.5772302762124824e-06, "loss": 0.7447, "step": 10449 }, { "epoch": 0.7549623421893908, "grad_norm": 8.845502376489447, "learning_rate": 3.57696631367492e-06, "loss": 0.9079, "step": 10450 }, { "epoch": 0.7550345873896003, "grad_norm": 5.967721261252234, "learning_rate": 3.576702336394525e-06, "loss": 0.8447, "step": 10451 }, { "epoch": 0.7551068325898098, "grad_norm": 5.996736274613519, "learning_rate": 3.5764383443749124e-06, "loss": 0.7914, "step": 10452 }, { "epoch": 0.7551790777900194, "grad_norm": 5.809574826781109, "learning_rate": 3.576174337619694e-06, "loss": 0.9108, "step": 10453 }, { "epoch": 0.7552513229902288, "grad_norm": 5.643254647214288, "learning_rate": 3.575910316132484e-06, "loss": 0.8117, "step": 10454 }, { "epoch": 0.7553235681904383, "grad_norm": 6.233551047427836, "learning_rate": 3.575646279916898e-06, "loss": 0.91, "step": 10455 }, { "epoch": 0.7553958133906479, "grad_norm": 8.14848150605327, "learning_rate": 3.575382228976548e-06, "loss": 0.8034, "step": 10456 }, { "epoch": 0.7554680585908574, "grad_norm": 9.9298246480743, "learning_rate": 3.5751181633150524e-06, "loss": 0.8659, "step": 10457 }, { "epoch": 0.7555403037910668, "grad_norm": 5.597332707469724, "learning_rate": 3.574854082936022e-06, "loss": 0.8045, "step": 10458 }, { "epoch": 0.7556125489912764, "grad_norm": 5.863763981210682, "learning_rate": 3.5745899878430754e-06, "loss": 0.8686, "step": 10459 }, { "epoch": 0.7556847941914859, "grad_norm": 6.708715992630437, "learning_rate": 3.5743258780398252e-06, "loss": 0.7978, "step": 10460 }, { "epoch": 0.7557570393916954, "grad_norm": 6.504270177975059, "learning_rate": 3.5740617535298884e-06, "loss": 0.8092, "step": 10461 }, { "epoch": 0.7558292845919049, "grad_norm": 5.21717947896162, "learning_rate": 3.57379761431688e-06, "loss": 0.8351, "step": 10462 }, { "epoch": 0.7559015297921144, "grad_norm": 5.427610326600637, "learning_rate": 3.573533460404416e-06, "loss": 0.8526, "step": 10463 }, { "epoch": 0.755973774992324, "grad_norm": 5.2907239820185525, "learning_rate": 3.5732692917961125e-06, "loss": 0.7997, "step": 10464 }, { "epoch": 0.7560460201925334, "grad_norm": 6.418761006525069, "learning_rate": 3.5730051084955852e-06, "loss": 0.856, "step": 10465 }, { "epoch": 0.756118265392743, "grad_norm": 7.79960605898099, "learning_rate": 3.572740910506452e-06, "loss": 0.8134, "step": 10466 }, { "epoch": 0.7561905105929525, "grad_norm": 4.82136511786148, "learning_rate": 3.5724766978323278e-06, "loss": 0.8138, "step": 10467 }, { "epoch": 0.756262755793162, "grad_norm": 6.498766855495729, "learning_rate": 3.5722124704768313e-06, "loss": 0.8544, "step": 10468 }, { "epoch": 0.7563350009933715, "grad_norm": 6.321327307184824, "learning_rate": 3.571948228443578e-06, "loss": 0.8388, "step": 10469 }, { "epoch": 0.756407246193581, "grad_norm": 6.042671418226573, "learning_rate": 3.5716839717361856e-06, "loss": 0.8865, "step": 10470 }, { "epoch": 0.7564794913937906, "grad_norm": 5.843708874562843, "learning_rate": 3.5714197003582718e-06, "loss": 0.8275, "step": 10471 }, { "epoch": 0.756551736594, "grad_norm": 5.65725277605376, "learning_rate": 3.5711554143134548e-06, "loss": 0.8668, "step": 10472 }, { "epoch": 0.7566239817942095, "grad_norm": 5.64783927313802, "learning_rate": 3.5708911136053514e-06, "loss": 0.8162, "step": 10473 }, { "epoch": 0.7566962269944191, "grad_norm": 7.916722053200235, "learning_rate": 3.5706267982375802e-06, "loss": 0.8037, "step": 10474 }, { "epoch": 0.7567684721946286, "grad_norm": 6.431632321396098, "learning_rate": 3.5703624682137593e-06, "loss": 0.7871, "step": 10475 }, { "epoch": 0.756840717394838, "grad_norm": 5.398105550406105, "learning_rate": 3.570098123537507e-06, "loss": 0.8909, "step": 10476 }, { "epoch": 0.7569129625950476, "grad_norm": 5.375918287746145, "learning_rate": 3.5698337642124433e-06, "loss": 0.8707, "step": 10477 }, { "epoch": 0.7569852077952571, "grad_norm": 6.707647189794191, "learning_rate": 3.5695693902421856e-06, "loss": 0.8757, "step": 10478 }, { "epoch": 0.7570574529954666, "grad_norm": 5.603026811853188, "learning_rate": 3.5693050016303523e-06, "loss": 0.8601, "step": 10479 }, { "epoch": 0.7571296981956761, "grad_norm": 6.626453743986402, "learning_rate": 3.5690405983805653e-06, "loss": 0.9214, "step": 10480 }, { "epoch": 0.7572019433958856, "grad_norm": 7.36744250991909, "learning_rate": 3.568776180496442e-06, "loss": 0.8406, "step": 10481 }, { "epoch": 0.7572741885960952, "grad_norm": 5.757868814387091, "learning_rate": 3.568511747981602e-06, "loss": 0.8879, "step": 10482 }, { "epoch": 0.7573464337963046, "grad_norm": 6.3748130022934, "learning_rate": 3.5682473008396668e-06, "loss": 0.8347, "step": 10483 }, { "epoch": 0.7574186789965142, "grad_norm": 9.00254361977639, "learning_rate": 3.567982839074255e-06, "loss": 0.8419, "step": 10484 }, { "epoch": 0.7574909241967237, "grad_norm": 8.365496809229825, "learning_rate": 3.5677183626889877e-06, "loss": 0.8856, "step": 10485 }, { "epoch": 0.7575631693969332, "grad_norm": 6.5301737651517495, "learning_rate": 3.5674538716874848e-06, "loss": 0.9059, "step": 10486 }, { "epoch": 0.7576354145971427, "grad_norm": 6.751426546034115, "learning_rate": 3.5671893660733675e-06, "loss": 0.8193, "step": 10487 }, { "epoch": 0.7577076597973522, "grad_norm": 7.585198504893763, "learning_rate": 3.566924845850256e-06, "loss": 0.848, "step": 10488 }, { "epoch": 0.7577799049975618, "grad_norm": 6.57220177810741, "learning_rate": 3.566660311021772e-06, "loss": 0.8532, "step": 10489 }, { "epoch": 0.7578521501977712, "grad_norm": 5.499737993415306, "learning_rate": 3.566395761591536e-06, "loss": 0.8488, "step": 10490 }, { "epoch": 0.7579243953979807, "grad_norm": 6.118633464759449, "learning_rate": 3.5661311975631706e-06, "loss": 0.8521, "step": 10491 }, { "epoch": 0.7579966405981903, "grad_norm": 6.636509811767738, "learning_rate": 3.565866618940297e-06, "loss": 0.8563, "step": 10492 }, { "epoch": 0.7580688857983998, "grad_norm": 6.757220609465865, "learning_rate": 3.565602025726537e-06, "loss": 0.7759, "step": 10493 }, { "epoch": 0.7581411309986092, "grad_norm": 6.530931382872064, "learning_rate": 3.5653374179255123e-06, "loss": 0.8153, "step": 10494 }, { "epoch": 0.7582133761988188, "grad_norm": 6.733837699255314, "learning_rate": 3.5650727955408454e-06, "loss": 0.7907, "step": 10495 }, { "epoch": 0.7582856213990283, "grad_norm": 5.503701265090289, "learning_rate": 3.5648081585761597e-06, "loss": 0.8149, "step": 10496 }, { "epoch": 0.7583578665992378, "grad_norm": 5.972637089161547, "learning_rate": 3.5645435070350764e-06, "loss": 0.8546, "step": 10497 }, { "epoch": 0.7584301117994473, "grad_norm": 5.430567388800791, "learning_rate": 3.5642788409212193e-06, "loss": 0.7948, "step": 10498 }, { "epoch": 0.7585023569996568, "grad_norm": 5.949314930690025, "learning_rate": 3.5640141602382104e-06, "loss": 0.8826, "step": 10499 }, { "epoch": 0.7585746021998664, "grad_norm": 7.328943788379241, "learning_rate": 3.563749464989675e-06, "loss": 0.9075, "step": 10500 }, { "epoch": 0.7586468474000758, "grad_norm": 8.210538874422216, "learning_rate": 3.5634847551792353e-06, "loss": 0.8049, "step": 10501 }, { "epoch": 0.7587190926002854, "grad_norm": 6.00585397611546, "learning_rate": 3.563220030810515e-06, "loss": 0.8557, "step": 10502 }, { "epoch": 0.7587913378004949, "grad_norm": 5.155006397439444, "learning_rate": 3.5629552918871367e-06, "loss": 0.7559, "step": 10503 }, { "epoch": 0.7588635830007044, "grad_norm": 5.263951791316455, "learning_rate": 3.562690538412727e-06, "loss": 0.8152, "step": 10504 }, { "epoch": 0.7589358282009139, "grad_norm": 5.443470625601118, "learning_rate": 3.5624257703909087e-06, "loss": 0.8577, "step": 10505 }, { "epoch": 0.7590080734011234, "grad_norm": 5.7361000231291115, "learning_rate": 3.5621609878253066e-06, "loss": 0.8514, "step": 10506 }, { "epoch": 0.759080318601333, "grad_norm": 6.239262627292124, "learning_rate": 3.561896190719545e-06, "loss": 0.9443, "step": 10507 }, { "epoch": 0.7591525638015424, "grad_norm": 5.624222426182123, "learning_rate": 3.5616313790772493e-06, "loss": 0.7847, "step": 10508 }, { "epoch": 0.7592248090017519, "grad_norm": 6.050694878135963, "learning_rate": 3.561366552902045e-06, "loss": 0.7872, "step": 10509 }, { "epoch": 0.7592970542019615, "grad_norm": 6.650336705912111, "learning_rate": 3.5611017121975556e-06, "loss": 0.7142, "step": 10510 }, { "epoch": 0.759369299402171, "grad_norm": 6.592484628473559, "learning_rate": 3.560836856967408e-06, "loss": 0.8429, "step": 10511 }, { "epoch": 0.7594415446023804, "grad_norm": 5.012269415748212, "learning_rate": 3.5605719872152272e-06, "loss": 0.825, "step": 10512 }, { "epoch": 0.75951378980259, "grad_norm": 6.397917599451794, "learning_rate": 3.56030710294464e-06, "loss": 0.8652, "step": 10513 }, { "epoch": 0.7595860350027995, "grad_norm": 6.129196286556715, "learning_rate": 3.560042204159272e-06, "loss": 0.8195, "step": 10514 }, { "epoch": 0.759658280203009, "grad_norm": 5.195660159302839, "learning_rate": 3.559777290862748e-06, "loss": 0.7837, "step": 10515 }, { "epoch": 0.7597305254032185, "grad_norm": 6.647856528945503, "learning_rate": 3.559512363058697e-06, "loss": 0.7927, "step": 10516 }, { "epoch": 0.759802770603428, "grad_norm": 6.28217942682554, "learning_rate": 3.5592474207507437e-06, "loss": 0.76, "step": 10517 }, { "epoch": 0.7598750158036376, "grad_norm": 5.203018141557078, "learning_rate": 3.558982463942516e-06, "loss": 0.8928, "step": 10518 }, { "epoch": 0.759947261003847, "grad_norm": 7.65848430807119, "learning_rate": 3.558717492637641e-06, "loss": 0.9122, "step": 10519 }, { "epoch": 0.7600195062040566, "grad_norm": 5.435662320796119, "learning_rate": 3.5584525068397453e-06, "loss": 0.8218, "step": 10520 }, { "epoch": 0.7600917514042661, "grad_norm": 5.983650502305448, "learning_rate": 3.5581875065524564e-06, "loss": 0.8329, "step": 10521 }, { "epoch": 0.7601639966044756, "grad_norm": 6.616675274872965, "learning_rate": 3.557922491779402e-06, "loss": 0.8653, "step": 10522 }, { "epoch": 0.7602362418046851, "grad_norm": 6.395290919196882, "learning_rate": 3.5576574625242104e-06, "loss": 0.816, "step": 10523 }, { "epoch": 0.7603084870048946, "grad_norm": 6.026196828661792, "learning_rate": 3.5573924187905094e-06, "loss": 0.7575, "step": 10524 }, { "epoch": 0.7603807322051042, "grad_norm": 6.22370628793616, "learning_rate": 3.5571273605819272e-06, "loss": 0.8164, "step": 10525 }, { "epoch": 0.7604529774053136, "grad_norm": 6.244129322890147, "learning_rate": 3.556862287902092e-06, "loss": 0.894, "step": 10526 }, { "epoch": 0.7605252226055231, "grad_norm": 6.121209353467222, "learning_rate": 3.556597200754633e-06, "loss": 0.7327, "step": 10527 }, { "epoch": 0.7605974678057327, "grad_norm": 6.363606462779951, "learning_rate": 3.556332099143179e-06, "loss": 0.8706, "step": 10528 }, { "epoch": 0.7606697130059422, "grad_norm": 6.691164527887341, "learning_rate": 3.5560669830713578e-06, "loss": 0.8136, "step": 10529 }, { "epoch": 0.7607419582061516, "grad_norm": 4.956335525448243, "learning_rate": 3.5558018525428006e-06, "loss": 0.8064, "step": 10530 }, { "epoch": 0.7608142034063612, "grad_norm": 5.589741384020765, "learning_rate": 3.5555367075611347e-06, "loss": 0.8641, "step": 10531 }, { "epoch": 0.7608864486065707, "grad_norm": 6.709837213807129, "learning_rate": 3.5552715481299914e-06, "loss": 0.7754, "step": 10532 }, { "epoch": 0.7609586938067802, "grad_norm": 6.039711506503196, "learning_rate": 3.555006374253e-06, "loss": 0.8692, "step": 10533 }, { "epoch": 0.7610309390069897, "grad_norm": 5.444352841201337, "learning_rate": 3.554741185933791e-06, "loss": 0.9027, "step": 10534 }, { "epoch": 0.7611031842071992, "grad_norm": 5.50689351957725, "learning_rate": 3.5544759831759934e-06, "loss": 0.7378, "step": 10535 }, { "epoch": 0.7611754294074088, "grad_norm": 6.208633552136638, "learning_rate": 3.554210765983238e-06, "loss": 0.8857, "step": 10536 }, { "epoch": 0.7612476746076182, "grad_norm": 6.691144574020892, "learning_rate": 3.5539455343591566e-06, "loss": 0.8971, "step": 10537 }, { "epoch": 0.7613199198078278, "grad_norm": 6.657865373051906, "learning_rate": 3.553680288307379e-06, "loss": 0.8908, "step": 10538 }, { "epoch": 0.7613921650080373, "grad_norm": 6.1894374087643635, "learning_rate": 3.5534150278315366e-06, "loss": 0.756, "step": 10539 }, { "epoch": 0.7614644102082468, "grad_norm": 6.597001995850233, "learning_rate": 3.55314975293526e-06, "loss": 0.8363, "step": 10540 }, { "epoch": 0.7615366554084563, "grad_norm": 5.363451416694586, "learning_rate": 3.552884463622181e-06, "loss": 0.8343, "step": 10541 }, { "epoch": 0.7616089006086658, "grad_norm": 7.338509495711806, "learning_rate": 3.5526191598959307e-06, "loss": 0.8819, "step": 10542 }, { "epoch": 0.7616811458088754, "grad_norm": 6.359780188546681, "learning_rate": 3.552353841760143e-06, "loss": 0.8982, "step": 10543 }, { "epoch": 0.7617533910090848, "grad_norm": 6.60597433707842, "learning_rate": 3.5520885092184472e-06, "loss": 0.8839, "step": 10544 }, { "epoch": 0.7618256362092943, "grad_norm": 5.646738052607508, "learning_rate": 3.551823162274476e-06, "loss": 0.7699, "step": 10545 }, { "epoch": 0.7618978814095039, "grad_norm": 5.459058097109536, "learning_rate": 3.5515578009318635e-06, "loss": 0.805, "step": 10546 }, { "epoch": 0.7619701266097134, "grad_norm": 7.4426174990265626, "learning_rate": 3.5512924251942405e-06, "loss": 0.9882, "step": 10547 }, { "epoch": 0.7620423718099228, "grad_norm": 8.273687400321709, "learning_rate": 3.551027035065241e-06, "loss": 0.9147, "step": 10548 }, { "epoch": 0.7621146170101324, "grad_norm": 5.982226750613182, "learning_rate": 3.550761630548497e-06, "loss": 0.9244, "step": 10549 }, { "epoch": 0.7621868622103419, "grad_norm": 6.365251455082178, "learning_rate": 3.5504962116476427e-06, "loss": 0.8166, "step": 10550 }, { "epoch": 0.7622591074105514, "grad_norm": 5.427612786511464, "learning_rate": 3.5502307783663104e-06, "loss": 0.7612, "step": 10551 }, { "epoch": 0.7623313526107609, "grad_norm": 5.072101671156527, "learning_rate": 3.5499653307081345e-06, "loss": 0.9005, "step": 10552 }, { "epoch": 0.7624035978109704, "grad_norm": 5.53552718659393, "learning_rate": 3.549699868676749e-06, "loss": 0.7242, "step": 10553 }, { "epoch": 0.76247584301118, "grad_norm": 5.396789034107421, "learning_rate": 3.5494343922757864e-06, "loss": 0.779, "step": 10554 }, { "epoch": 0.7625480882113894, "grad_norm": 6.138810139530235, "learning_rate": 3.5491689015088813e-06, "loss": 0.8245, "step": 10555 }, { "epoch": 0.762620333411599, "grad_norm": 6.996164088318941, "learning_rate": 3.5489033963796694e-06, "loss": 0.8745, "step": 10556 }, { "epoch": 0.7626925786118085, "grad_norm": 7.465661850938508, "learning_rate": 3.548637876891785e-06, "loss": 0.8221, "step": 10557 }, { "epoch": 0.7627648238120179, "grad_norm": 6.3342044967340065, "learning_rate": 3.5483723430488614e-06, "loss": 0.9252, "step": 10558 }, { "epoch": 0.7628370690122275, "grad_norm": 6.820006268629846, "learning_rate": 3.548106794854535e-06, "loss": 0.8288, "step": 10559 }, { "epoch": 0.762909314212437, "grad_norm": 5.694418252197237, "learning_rate": 3.54784123231244e-06, "loss": 0.8387, "step": 10560 }, { "epoch": 0.7629815594126466, "grad_norm": 6.641500689045202, "learning_rate": 3.5475756554262118e-06, "loss": 0.8814, "step": 10561 }, { "epoch": 0.763053804612856, "grad_norm": 6.4993772942030645, "learning_rate": 3.547310064199487e-06, "loss": 0.8843, "step": 10562 }, { "epoch": 0.7631260498130655, "grad_norm": 7.053414771770263, "learning_rate": 3.5470444586359e-06, "loss": 0.8171, "step": 10563 }, { "epoch": 0.7631982950132751, "grad_norm": 5.552061766772821, "learning_rate": 3.5467788387390877e-06, "loss": 0.792, "step": 10564 }, { "epoch": 0.7632705402134846, "grad_norm": 5.451552899506092, "learning_rate": 3.5465132045126856e-06, "loss": 0.8616, "step": 10565 }, { "epoch": 0.763342785413694, "grad_norm": 6.012735202812496, "learning_rate": 3.5462475559603302e-06, "loss": 0.8862, "step": 10566 }, { "epoch": 0.7634150306139036, "grad_norm": 5.927288389318212, "learning_rate": 3.545981893085658e-06, "loss": 0.8018, "step": 10567 }, { "epoch": 0.7634872758141131, "grad_norm": 5.800008497560787, "learning_rate": 3.545716215892307e-06, "loss": 0.9079, "step": 10568 }, { "epoch": 0.7635595210143226, "grad_norm": 5.303506756306112, "learning_rate": 3.545450524383912e-06, "loss": 0.7976, "step": 10569 }, { "epoch": 0.7636317662145321, "grad_norm": 6.196592489738753, "learning_rate": 3.5451848185641114e-06, "loss": 0.747, "step": 10570 }, { "epoch": 0.7637040114147416, "grad_norm": 8.325207168704901, "learning_rate": 3.5449190984365423e-06, "loss": 0.8978, "step": 10571 }, { "epoch": 0.7637762566149512, "grad_norm": 6.439358341166243, "learning_rate": 3.5446533640048416e-06, "loss": 0.8297, "step": 10572 }, { "epoch": 0.7638485018151606, "grad_norm": 5.504948210788799, "learning_rate": 3.5443876152726476e-06, "loss": 0.8406, "step": 10573 }, { "epoch": 0.7639207470153702, "grad_norm": 4.908988431381112, "learning_rate": 3.544121852243598e-06, "loss": 0.7458, "step": 10574 }, { "epoch": 0.7639929922155797, "grad_norm": 7.2629384313928425, "learning_rate": 3.5438560749213306e-06, "loss": 0.8144, "step": 10575 }, { "epoch": 0.7640652374157891, "grad_norm": 5.697044329028349, "learning_rate": 3.543590283309485e-06, "loss": 0.8543, "step": 10576 }, { "epoch": 0.7641374826159987, "grad_norm": 7.312746483976866, "learning_rate": 3.543324477411698e-06, "loss": 0.8752, "step": 10577 }, { "epoch": 0.7642097278162082, "grad_norm": 6.54128651415407, "learning_rate": 3.5430586572316096e-06, "loss": 0.7252, "step": 10578 }, { "epoch": 0.7642819730164178, "grad_norm": 8.734695128441313, "learning_rate": 3.5427928227728568e-06, "loss": 0.8541, "step": 10579 }, { "epoch": 0.7643542182166272, "grad_norm": 6.40183519039124, "learning_rate": 3.542526974039081e-06, "loss": 0.8018, "step": 10580 }, { "epoch": 0.7644264634168367, "grad_norm": 5.862947803955302, "learning_rate": 3.5422611110339207e-06, "loss": 0.8342, "step": 10581 }, { "epoch": 0.7644987086170463, "grad_norm": 6.536021848494993, "learning_rate": 3.5419952337610147e-06, "loss": 0.8159, "step": 10582 }, { "epoch": 0.7645709538172558, "grad_norm": 8.345973943578697, "learning_rate": 3.5417293422240028e-06, "loss": 0.8684, "step": 10583 }, { "epoch": 0.7646431990174652, "grad_norm": 7.866895350133442, "learning_rate": 3.5414634364265245e-06, "loss": 0.7834, "step": 10584 }, { "epoch": 0.7647154442176748, "grad_norm": 7.393656522235721, "learning_rate": 3.5411975163722214e-06, "loss": 0.8873, "step": 10585 }, { "epoch": 0.7647876894178843, "grad_norm": 5.947575102272183, "learning_rate": 3.5409315820647322e-06, "loss": 0.8042, "step": 10586 }, { "epoch": 0.7648599346180938, "grad_norm": 6.775307860189217, "learning_rate": 3.5406656335076984e-06, "loss": 0.8111, "step": 10587 }, { "epoch": 0.7649321798183033, "grad_norm": 7.032340139015642, "learning_rate": 3.540399670704759e-06, "loss": 0.7652, "step": 10588 }, { "epoch": 0.7650044250185128, "grad_norm": 8.419301844849578, "learning_rate": 3.540133693659557e-06, "loss": 0.8302, "step": 10589 }, { "epoch": 0.7650766702187224, "grad_norm": 7.140903058470863, "learning_rate": 3.539867702375732e-06, "loss": 0.856, "step": 10590 }, { "epoch": 0.7651489154189318, "grad_norm": 10.425840890027056, "learning_rate": 3.5396016968569256e-06, "loss": 0.8022, "step": 10591 }, { "epoch": 0.7652211606191414, "grad_norm": 5.418930969384289, "learning_rate": 3.539335677106779e-06, "loss": 0.8049, "step": 10592 }, { "epoch": 0.7652934058193509, "grad_norm": 6.8877090893143444, "learning_rate": 3.539069643128934e-06, "loss": 0.8738, "step": 10593 }, { "epoch": 0.7653656510195603, "grad_norm": 7.643463294078783, "learning_rate": 3.5388035949270327e-06, "loss": 0.7744, "step": 10594 }, { "epoch": 0.7654378962197699, "grad_norm": 6.679774876073259, "learning_rate": 3.5385375325047167e-06, "loss": 0.784, "step": 10595 }, { "epoch": 0.7655101414199794, "grad_norm": 5.7324195458896465, "learning_rate": 3.5382714558656283e-06, "loss": 0.8695, "step": 10596 }, { "epoch": 0.765582386620189, "grad_norm": 5.557466436877643, "learning_rate": 3.5380053650134095e-06, "loss": 0.8842, "step": 10597 }, { "epoch": 0.7656546318203984, "grad_norm": 5.90507762111776, "learning_rate": 3.537739259951703e-06, "loss": 0.7624, "step": 10598 }, { "epoch": 0.7657268770206079, "grad_norm": 5.728745360633725, "learning_rate": 3.537473140684152e-06, "loss": 0.8139, "step": 10599 }, { "epoch": 0.7657991222208175, "grad_norm": 5.424982153515972, "learning_rate": 3.537207007214399e-06, "loss": 0.9093, "step": 10600 }, { "epoch": 0.765871367421027, "grad_norm": 6.143867548985138, "learning_rate": 3.5369408595460886e-06, "loss": 0.7784, "step": 10601 }, { "epoch": 0.7659436126212364, "grad_norm": 6.42538734782426, "learning_rate": 3.5366746976828614e-06, "loss": 0.8832, "step": 10602 }, { "epoch": 0.766015857821446, "grad_norm": 6.510030196862569, "learning_rate": 3.536408521628364e-06, "loss": 0.7811, "step": 10603 }, { "epoch": 0.7660881030216555, "grad_norm": 6.060112866650503, "learning_rate": 3.5361423313862375e-06, "loss": 0.8757, "step": 10604 }, { "epoch": 0.766160348221865, "grad_norm": 6.299575718716288, "learning_rate": 3.535876126960128e-06, "loss": 0.7758, "step": 10605 }, { "epoch": 0.7662325934220745, "grad_norm": 7.5679480735013565, "learning_rate": 3.5356099083536778e-06, "loss": 0.858, "step": 10606 }, { "epoch": 0.766304838622284, "grad_norm": 5.687315214739432, "learning_rate": 3.5353436755705317e-06, "loss": 0.7494, "step": 10607 }, { "epoch": 0.7663770838224936, "grad_norm": 6.171946078506851, "learning_rate": 3.5350774286143353e-06, "loss": 0.8186, "step": 10608 }, { "epoch": 0.766449329022703, "grad_norm": 7.618527025647894, "learning_rate": 3.5348111674887325e-06, "loss": 0.8172, "step": 10609 }, { "epoch": 0.7665215742229126, "grad_norm": 6.160830579967168, "learning_rate": 3.5345448921973684e-06, "loss": 0.8811, "step": 10610 }, { "epoch": 0.7665938194231221, "grad_norm": 5.902542807695653, "learning_rate": 3.534278602743888e-06, "loss": 0.9042, "step": 10611 }, { "epoch": 0.7666660646233315, "grad_norm": 6.131197227072039, "learning_rate": 3.5340122991319358e-06, "loss": 0.8464, "step": 10612 }, { "epoch": 0.7667383098235411, "grad_norm": 7.531193523274366, "learning_rate": 3.533745981365159e-06, "loss": 0.7866, "step": 10613 }, { "epoch": 0.7668105550237506, "grad_norm": 6.838688899419033, "learning_rate": 3.5334796494472024e-06, "loss": 0.8498, "step": 10614 }, { "epoch": 0.7668828002239602, "grad_norm": 6.23568642956032, "learning_rate": 3.533213303381711e-06, "loss": 0.8847, "step": 10615 }, { "epoch": 0.7669550454241696, "grad_norm": 6.5122254648753595, "learning_rate": 3.5329469431723325e-06, "loss": 0.8592, "step": 10616 }, { "epoch": 0.7670272906243791, "grad_norm": 6.341869958868229, "learning_rate": 3.5326805688227124e-06, "loss": 0.8843, "step": 10617 }, { "epoch": 0.7670995358245887, "grad_norm": 6.4453204900518655, "learning_rate": 3.5324141803364966e-06, "loss": 0.8272, "step": 10618 }, { "epoch": 0.7671717810247982, "grad_norm": 6.898292064753749, "learning_rate": 3.532147777717333e-06, "loss": 0.7771, "step": 10619 }, { "epoch": 0.7672440262250076, "grad_norm": 6.592485496438498, "learning_rate": 3.531881360968867e-06, "loss": 0.8498, "step": 10620 }, { "epoch": 0.7673162714252172, "grad_norm": 6.6331290988375695, "learning_rate": 3.5316149300947465e-06, "loss": 0.771, "step": 10621 }, { "epoch": 0.7673885166254267, "grad_norm": 6.09627445456172, "learning_rate": 3.5313484850986183e-06, "loss": 0.789, "step": 10622 }, { "epoch": 0.7674607618256362, "grad_norm": 8.21275104853156, "learning_rate": 3.531082025984131e-06, "loss": 0.7957, "step": 10623 }, { "epoch": 0.7675330070258457, "grad_norm": 8.401258610484168, "learning_rate": 3.530815552754931e-06, "loss": 0.8273, "step": 10624 }, { "epoch": 0.7676052522260552, "grad_norm": 5.994980938064266, "learning_rate": 3.530549065414667e-06, "loss": 0.8729, "step": 10625 }, { "epoch": 0.7676774974262648, "grad_norm": 5.885882423898645, "learning_rate": 3.5302825639669854e-06, "loss": 0.8322, "step": 10626 }, { "epoch": 0.7677497426264742, "grad_norm": 7.9570815881139705, "learning_rate": 3.5300160484155356e-06, "loss": 0.8047, "step": 10627 }, { "epoch": 0.7678219878266838, "grad_norm": 6.973921836736637, "learning_rate": 3.5297495187639662e-06, "loss": 0.755, "step": 10628 }, { "epoch": 0.7678942330268933, "grad_norm": 5.883103970099292, "learning_rate": 3.5294829750159255e-06, "loss": 0.7767, "step": 10629 }, { "epoch": 0.7679664782271027, "grad_norm": 7.362582340006726, "learning_rate": 3.529216417175062e-06, "loss": 0.9031, "step": 10630 }, { "epoch": 0.7680387234273123, "grad_norm": 5.5577181718278785, "learning_rate": 3.528949845245025e-06, "loss": 0.8295, "step": 10631 }, { "epoch": 0.7681109686275218, "grad_norm": 5.7538777419948905, "learning_rate": 3.528683259229464e-06, "loss": 0.9054, "step": 10632 }, { "epoch": 0.7681832138277314, "grad_norm": 5.861122135354874, "learning_rate": 3.528416659132027e-06, "loss": 0.7916, "step": 10633 }, { "epoch": 0.7682554590279408, "grad_norm": 6.562172436486383, "learning_rate": 3.5281500449563654e-06, "loss": 0.9045, "step": 10634 }, { "epoch": 0.7683277042281503, "grad_norm": 5.836603864545791, "learning_rate": 3.527883416706127e-06, "loss": 0.8442, "step": 10635 }, { "epoch": 0.7683999494283599, "grad_norm": 6.293012395100045, "learning_rate": 3.5276167743849633e-06, "loss": 0.8578, "step": 10636 }, { "epoch": 0.7684721946285694, "grad_norm": 5.924554465238063, "learning_rate": 3.527350117996524e-06, "loss": 0.8434, "step": 10637 }, { "epoch": 0.7685444398287788, "grad_norm": 6.566946503280891, "learning_rate": 3.5270834475444587e-06, "loss": 0.8394, "step": 10638 }, { "epoch": 0.7686166850289884, "grad_norm": 5.863196345050644, "learning_rate": 3.526816763032419e-06, "loss": 0.7872, "step": 10639 }, { "epoch": 0.7686889302291979, "grad_norm": 5.583012168339432, "learning_rate": 3.526550064464055e-06, "loss": 0.8849, "step": 10640 }, { "epoch": 0.7687611754294074, "grad_norm": 6.868146219695471, "learning_rate": 3.5262833518430175e-06, "loss": 0.8929, "step": 10641 }, { "epoch": 0.7688334206296169, "grad_norm": 6.481917244123683, "learning_rate": 3.5260166251729585e-06, "loss": 0.7789, "step": 10642 }, { "epoch": 0.7689056658298264, "grad_norm": 5.920488582760189, "learning_rate": 3.525749884457528e-06, "loss": 0.8758, "step": 10643 }, { "epoch": 0.768977911030036, "grad_norm": 5.958645721284922, "learning_rate": 3.5254831297003773e-06, "loss": 0.8314, "step": 10644 }, { "epoch": 0.7690501562302454, "grad_norm": 5.706615143005603, "learning_rate": 3.525216360905159e-06, "loss": 0.8552, "step": 10645 }, { "epoch": 0.769122401430455, "grad_norm": 5.307007766732325, "learning_rate": 3.5249495780755257e-06, "loss": 0.8474, "step": 10646 }, { "epoch": 0.7691946466306645, "grad_norm": 6.0440768290596845, "learning_rate": 3.524682781215128e-06, "loss": 0.9122, "step": 10647 }, { "epoch": 0.7692668918308739, "grad_norm": 7.33157000871741, "learning_rate": 3.5244159703276186e-06, "loss": 0.8607, "step": 10648 }, { "epoch": 0.7693391370310835, "grad_norm": 5.869195546228059, "learning_rate": 3.5241491454166497e-06, "loss": 0.8287, "step": 10649 }, { "epoch": 0.769411382231293, "grad_norm": 7.444158058288213, "learning_rate": 3.5238823064858744e-06, "loss": 0.8436, "step": 10650 }, { "epoch": 0.7694836274315026, "grad_norm": 6.265464571375854, "learning_rate": 3.523615453538946e-06, "loss": 0.7694, "step": 10651 }, { "epoch": 0.769555872631712, "grad_norm": 8.030449615850587, "learning_rate": 3.523348586579516e-06, "loss": 0.8184, "step": 10652 }, { "epoch": 0.7696281178319215, "grad_norm": 7.796132757599827, "learning_rate": 3.5230817056112387e-06, "loss": 0.908, "step": 10653 }, { "epoch": 0.7697003630321311, "grad_norm": 4.960979022956148, "learning_rate": 3.5228148106377672e-06, "loss": 0.8452, "step": 10654 }, { "epoch": 0.7697726082323406, "grad_norm": 6.585248483475117, "learning_rate": 3.522547901662755e-06, "loss": 0.862, "step": 10655 }, { "epoch": 0.76984485343255, "grad_norm": 5.437921617319735, "learning_rate": 3.5222809786898558e-06, "loss": 0.8518, "step": 10656 }, { "epoch": 0.7699170986327596, "grad_norm": 6.612718246117784, "learning_rate": 3.5220140417227244e-06, "loss": 0.8626, "step": 10657 }, { "epoch": 0.7699893438329691, "grad_norm": 7.023477420544918, "learning_rate": 3.5217470907650143e-06, "loss": 0.8247, "step": 10658 }, { "epoch": 0.7700615890331786, "grad_norm": 6.072633120513491, "learning_rate": 3.521480125820379e-06, "loss": 0.7633, "step": 10659 }, { "epoch": 0.7701338342333881, "grad_norm": 7.272303245927828, "learning_rate": 3.521213146892475e-06, "loss": 0.8663, "step": 10660 }, { "epoch": 0.7702060794335976, "grad_norm": 6.364304789822976, "learning_rate": 3.520946153984955e-06, "loss": 0.7622, "step": 10661 }, { "epoch": 0.7702783246338072, "grad_norm": 5.174047284724544, "learning_rate": 3.5206791471014757e-06, "loss": 0.855, "step": 10662 }, { "epoch": 0.7703505698340166, "grad_norm": 6.290803694330754, "learning_rate": 3.5204121262456903e-06, "loss": 0.7619, "step": 10663 }, { "epoch": 0.7704228150342262, "grad_norm": 5.681697694367903, "learning_rate": 3.5201450914212555e-06, "loss": 0.7897, "step": 10664 }, { "epoch": 0.7704950602344357, "grad_norm": 6.777202189173601, "learning_rate": 3.519878042631827e-06, "loss": 0.8137, "step": 10665 }, { "epoch": 0.7705673054346451, "grad_norm": 6.313074973330863, "learning_rate": 3.51961097988106e-06, "loss": 0.818, "step": 10666 }, { "epoch": 0.7706395506348547, "grad_norm": 6.508041103052703, "learning_rate": 3.5193439031726105e-06, "loss": 0.7999, "step": 10667 }, { "epoch": 0.7707117958350642, "grad_norm": 5.375243425401702, "learning_rate": 3.5190768125101337e-06, "loss": 0.8418, "step": 10668 }, { "epoch": 0.7707840410352738, "grad_norm": 8.758881775017315, "learning_rate": 3.5188097078972864e-06, "loss": 0.79, "step": 10669 }, { "epoch": 0.7708562862354832, "grad_norm": 6.427330799583555, "learning_rate": 3.518542589337726e-06, "loss": 0.8901, "step": 10670 }, { "epoch": 0.7709285314356927, "grad_norm": 6.417096738728443, "learning_rate": 3.5182754568351086e-06, "loss": 0.8532, "step": 10671 }, { "epoch": 0.7710007766359023, "grad_norm": 5.579537178178909, "learning_rate": 3.51800831039309e-06, "loss": 0.7419, "step": 10672 }, { "epoch": 0.7710730218361118, "grad_norm": 5.9462524152313385, "learning_rate": 3.5177411500153285e-06, "loss": 0.9244, "step": 10673 }, { "epoch": 0.7711452670363212, "grad_norm": 7.309651920574001, "learning_rate": 3.517473975705481e-06, "loss": 0.8429, "step": 10674 }, { "epoch": 0.7712175122365308, "grad_norm": 7.550506665742626, "learning_rate": 3.517206787467205e-06, "loss": 0.7823, "step": 10675 }, { "epoch": 0.7712897574367403, "grad_norm": 6.105115727757862, "learning_rate": 3.5169395853041577e-06, "loss": 0.885, "step": 10676 }, { "epoch": 0.7713620026369498, "grad_norm": 6.427144730911997, "learning_rate": 3.5166723692199967e-06, "loss": 0.7526, "step": 10677 }, { "epoch": 0.7714342478371593, "grad_norm": 7.19284681388989, "learning_rate": 3.5164051392183808e-06, "loss": 0.808, "step": 10678 }, { "epoch": 0.7715064930373688, "grad_norm": 6.859631648725839, "learning_rate": 3.5161378953029668e-06, "loss": 0.8124, "step": 10679 }, { "epoch": 0.7715787382375784, "grad_norm": 5.918095089410501, "learning_rate": 3.5158706374774155e-06, "loss": 0.8274, "step": 10680 }, { "epoch": 0.7716509834377878, "grad_norm": 6.869933602402283, "learning_rate": 3.5156033657453827e-06, "loss": 0.8375, "step": 10681 }, { "epoch": 0.7717232286379974, "grad_norm": 6.454438969396795, "learning_rate": 3.51533608011053e-06, "loss": 0.7488, "step": 10682 }, { "epoch": 0.7717954738382069, "grad_norm": 7.355527872505353, "learning_rate": 3.5150687805765125e-06, "loss": 0.8136, "step": 10683 }, { "epoch": 0.7718677190384163, "grad_norm": 5.8333111717166455, "learning_rate": 3.5148014671469925e-06, "loss": 0.7554, "step": 10684 }, { "epoch": 0.7719399642386259, "grad_norm": 6.256584361284761, "learning_rate": 3.5145341398256287e-06, "loss": 0.8494, "step": 10685 }, { "epoch": 0.7720122094388354, "grad_norm": 6.310665657250244, "learning_rate": 3.51426679861608e-06, "loss": 0.8092, "step": 10686 }, { "epoch": 0.772084454639045, "grad_norm": 6.214581343159968, "learning_rate": 3.5139994435220064e-06, "loss": 0.8539, "step": 10687 }, { "epoch": 0.7721566998392544, "grad_norm": 7.842974742252916, "learning_rate": 3.5137320745470677e-06, "loss": 0.8685, "step": 10688 }, { "epoch": 0.7722289450394639, "grad_norm": 6.1772406709734815, "learning_rate": 3.5134646916949243e-06, "loss": 0.79, "step": 10689 }, { "epoch": 0.7723011902396735, "grad_norm": 7.8637576311856945, "learning_rate": 3.5131972949692355e-06, "loss": 0.8096, "step": 10690 }, { "epoch": 0.772373435439883, "grad_norm": 6.002363693043304, "learning_rate": 3.5129298843736638e-06, "loss": 0.8608, "step": 10691 }, { "epoch": 0.7724456806400924, "grad_norm": 6.9108462681916265, "learning_rate": 3.512662459911867e-06, "loss": 0.8836, "step": 10692 }, { "epoch": 0.772517925840302, "grad_norm": 6.038372992352774, "learning_rate": 3.512395021587508e-06, "loss": 0.8082, "step": 10693 }, { "epoch": 0.7725901710405115, "grad_norm": 5.520585290866843, "learning_rate": 3.512127569404247e-06, "loss": 0.8239, "step": 10694 }, { "epoch": 0.772662416240721, "grad_norm": 5.267257301594557, "learning_rate": 3.511860103365746e-06, "loss": 0.6621, "step": 10695 }, { "epoch": 0.7727346614409305, "grad_norm": 4.896012041874956, "learning_rate": 3.5115926234756653e-06, "loss": 0.7684, "step": 10696 }, { "epoch": 0.77280690664114, "grad_norm": 5.796199461038126, "learning_rate": 3.5113251297376667e-06, "loss": 0.7914, "step": 10697 }, { "epoch": 0.7728791518413496, "grad_norm": 6.499596216424718, "learning_rate": 3.511057622155413e-06, "loss": 0.8679, "step": 10698 }, { "epoch": 0.772951397041559, "grad_norm": 8.258390841266195, "learning_rate": 3.510790100732565e-06, "loss": 0.8002, "step": 10699 }, { "epoch": 0.7730236422417686, "grad_norm": 6.449840383994313, "learning_rate": 3.510522565472786e-06, "loss": 0.7941, "step": 10700 }, { "epoch": 0.7730958874419781, "grad_norm": 7.2400360039084495, "learning_rate": 3.510255016379737e-06, "loss": 0.9143, "step": 10701 }, { "epoch": 0.7731681326421875, "grad_norm": 5.912850057872664, "learning_rate": 3.5099874534570814e-06, "loss": 0.8202, "step": 10702 }, { "epoch": 0.7732403778423971, "grad_norm": 5.370829361390548, "learning_rate": 3.5097198767084826e-06, "loss": 0.8047, "step": 10703 }, { "epoch": 0.7733126230426066, "grad_norm": 6.05189010477983, "learning_rate": 3.5094522861376017e-06, "loss": 0.7982, "step": 10704 }, { "epoch": 0.7733848682428162, "grad_norm": 6.391135346943391, "learning_rate": 3.5091846817481033e-06, "loss": 0.7919, "step": 10705 }, { "epoch": 0.7734571134430256, "grad_norm": 6.033654558822959, "learning_rate": 3.5089170635436496e-06, "loss": 0.7923, "step": 10706 }, { "epoch": 0.7735293586432351, "grad_norm": 5.908902334658875, "learning_rate": 3.5086494315279053e-06, "loss": 0.82, "step": 10707 }, { "epoch": 0.7736016038434447, "grad_norm": 6.812141339021287, "learning_rate": 3.5083817857045337e-06, "loss": 0.8198, "step": 10708 }, { "epoch": 0.7736738490436542, "grad_norm": 7.039134034736402, "learning_rate": 3.508114126077198e-06, "loss": 0.7494, "step": 10709 }, { "epoch": 0.7737460942438636, "grad_norm": 5.485244030040213, "learning_rate": 3.5078464526495625e-06, "loss": 0.7941, "step": 10710 }, { "epoch": 0.7738183394440732, "grad_norm": 5.675056342966967, "learning_rate": 3.5075787654252915e-06, "loss": 0.8092, "step": 10711 }, { "epoch": 0.7738905846442827, "grad_norm": 7.134118920441129, "learning_rate": 3.50731106440805e-06, "loss": 0.8541, "step": 10712 }, { "epoch": 0.7739628298444922, "grad_norm": 6.7151033663090285, "learning_rate": 3.5070433496015015e-06, "loss": 0.9155, "step": 10713 }, { "epoch": 0.7740350750447017, "grad_norm": 7.909955871304613, "learning_rate": 3.5067756210093123e-06, "loss": 0.8793, "step": 10714 }, { "epoch": 0.7741073202449112, "grad_norm": 6.360802792439539, "learning_rate": 3.5065078786351458e-06, "loss": 0.8361, "step": 10715 }, { "epoch": 0.7741795654451208, "grad_norm": 7.38176843435947, "learning_rate": 3.5062401224826682e-06, "loss": 0.8488, "step": 10716 }, { "epoch": 0.7742518106453302, "grad_norm": 6.340419552756779, "learning_rate": 3.505972352555545e-06, "loss": 0.8348, "step": 10717 }, { "epoch": 0.7743240558455398, "grad_norm": 8.009572024681137, "learning_rate": 3.505704568857441e-06, "loss": 0.9695, "step": 10718 }, { "epoch": 0.7743963010457493, "grad_norm": 5.870157396848021, "learning_rate": 3.505436771392022e-06, "loss": 0.9116, "step": 10719 }, { "epoch": 0.7744685462459587, "grad_norm": 6.599077576420749, "learning_rate": 3.505168960162955e-06, "loss": 0.8197, "step": 10720 }, { "epoch": 0.7745407914461683, "grad_norm": 6.955814964215642, "learning_rate": 3.504901135173905e-06, "loss": 0.8349, "step": 10721 }, { "epoch": 0.7746130366463778, "grad_norm": 6.368352079080192, "learning_rate": 3.5046332964285385e-06, "loss": 0.7805, "step": 10722 }, { "epoch": 0.7746852818465874, "grad_norm": 7.132955560158311, "learning_rate": 3.504365443930523e-06, "loss": 0.8641, "step": 10723 }, { "epoch": 0.7747575270467968, "grad_norm": 6.578950445074065, "learning_rate": 3.504097577683524e-06, "loss": 0.8808, "step": 10724 }, { "epoch": 0.7748297722470063, "grad_norm": 7.394785832830301, "learning_rate": 3.503829697691208e-06, "loss": 0.8171, "step": 10725 }, { "epoch": 0.7749020174472159, "grad_norm": 5.513068884734892, "learning_rate": 3.503561803957244e-06, "loss": 0.8009, "step": 10726 }, { "epoch": 0.7749742626474254, "grad_norm": 7.3606329164788775, "learning_rate": 3.5032938964852977e-06, "loss": 0.8237, "step": 10727 }, { "epoch": 0.7750465078476348, "grad_norm": 6.574916754946229, "learning_rate": 3.5030259752790373e-06, "loss": 0.882, "step": 10728 }, { "epoch": 0.7751187530478444, "grad_norm": 7.3587363709105995, "learning_rate": 3.5027580403421297e-06, "loss": 0.7892, "step": 10729 }, { "epoch": 0.7751909982480539, "grad_norm": 5.751569119185101, "learning_rate": 3.5024900916782427e-06, "loss": 0.7979, "step": 10730 }, { "epoch": 0.7752632434482634, "grad_norm": 6.314394827000447, "learning_rate": 3.5022221292910454e-06, "loss": 0.7975, "step": 10731 }, { "epoch": 0.7753354886484729, "grad_norm": 7.111092630335847, "learning_rate": 3.501954153184205e-06, "loss": 0.7689, "step": 10732 }, { "epoch": 0.7754077338486824, "grad_norm": 5.866254750153727, "learning_rate": 3.5016861633613906e-06, "loss": 0.8651, "step": 10733 }, { "epoch": 0.775479979048892, "grad_norm": 5.993592655793684, "learning_rate": 3.5014181598262696e-06, "loss": 0.826, "step": 10734 }, { "epoch": 0.7755522242491014, "grad_norm": 6.239717799432338, "learning_rate": 3.5011501425825124e-06, "loss": 0.7401, "step": 10735 }, { "epoch": 0.775624469449311, "grad_norm": 5.520351902345757, "learning_rate": 3.5008821116337867e-06, "loss": 0.7204, "step": 10736 }, { "epoch": 0.7756967146495205, "grad_norm": 6.93324465450405, "learning_rate": 3.5006140669837623e-06, "loss": 0.8439, "step": 10737 }, { "epoch": 0.7757689598497299, "grad_norm": 5.742861753568988, "learning_rate": 3.500346008636108e-06, "loss": 0.8466, "step": 10738 }, { "epoch": 0.7758412050499395, "grad_norm": 5.6687699322839, "learning_rate": 3.5000779365944932e-06, "loss": 0.8153, "step": 10739 }, { "epoch": 0.775913450250149, "grad_norm": 6.831064390294217, "learning_rate": 3.499809850862588e-06, "loss": 0.8649, "step": 10740 }, { "epoch": 0.7759856954503586, "grad_norm": 7.0480328135251575, "learning_rate": 3.4995417514440634e-06, "loss": 0.92, "step": 10741 }, { "epoch": 0.776057940650568, "grad_norm": 6.221424543627511, "learning_rate": 3.4992736383425875e-06, "loss": 0.827, "step": 10742 }, { "epoch": 0.7761301858507775, "grad_norm": 6.5107186778908455, "learning_rate": 3.4990055115618315e-06, "loss": 0.7781, "step": 10743 }, { "epoch": 0.7762024310509871, "grad_norm": 7.661703355209118, "learning_rate": 3.498737371105465e-06, "loss": 0.744, "step": 10744 }, { "epoch": 0.7762746762511966, "grad_norm": 5.063934311366891, "learning_rate": 3.49846921697716e-06, "loss": 0.826, "step": 10745 }, { "epoch": 0.776346921451406, "grad_norm": 9.164579489093542, "learning_rate": 3.4982010491805867e-06, "loss": 0.9183, "step": 10746 }, { "epoch": 0.7764191666516156, "grad_norm": 6.274496974768332, "learning_rate": 3.497932867719416e-06, "loss": 0.8265, "step": 10747 }, { "epoch": 0.7764914118518251, "grad_norm": 5.328129474716321, "learning_rate": 3.4976646725973197e-06, "loss": 0.7872, "step": 10748 }, { "epoch": 0.7765636570520346, "grad_norm": 6.025377962912141, "learning_rate": 3.497396463817968e-06, "loss": 0.8283, "step": 10749 }, { "epoch": 0.7766359022522441, "grad_norm": 7.837093857458594, "learning_rate": 3.4971282413850334e-06, "loss": 0.8334, "step": 10750 }, { "epoch": 0.7767081474524536, "grad_norm": 5.874074660130987, "learning_rate": 3.4968600053021875e-06, "loss": 0.827, "step": 10751 }, { "epoch": 0.7767803926526632, "grad_norm": 7.28049789277151, "learning_rate": 3.4965917555731016e-06, "loss": 0.8141, "step": 10752 }, { "epoch": 0.7768526378528726, "grad_norm": 7.610131447417492, "learning_rate": 3.496323492201449e-06, "loss": 0.7595, "step": 10753 }, { "epoch": 0.7769248830530822, "grad_norm": 7.096131046998895, "learning_rate": 3.4960552151909006e-06, "loss": 0.8712, "step": 10754 }, { "epoch": 0.7769971282532917, "grad_norm": 7.568057957707558, "learning_rate": 3.4957869245451306e-06, "loss": 0.8321, "step": 10755 }, { "epoch": 0.7770693734535011, "grad_norm": 7.574572298672216, "learning_rate": 3.4955186202678102e-06, "loss": 0.8597, "step": 10756 }, { "epoch": 0.7771416186537107, "grad_norm": 6.166614772818848, "learning_rate": 3.4952503023626133e-06, "loss": 0.8095, "step": 10757 }, { "epoch": 0.7772138638539202, "grad_norm": 9.39192238983893, "learning_rate": 3.4949819708332124e-06, "loss": 0.8174, "step": 10758 }, { "epoch": 0.7772861090541298, "grad_norm": 5.856974117493459, "learning_rate": 3.4947136256832803e-06, "loss": 0.7703, "step": 10759 }, { "epoch": 0.7773583542543392, "grad_norm": 7.386626825512103, "learning_rate": 3.494445266916492e-06, "loss": 0.8338, "step": 10760 }, { "epoch": 0.7774305994545487, "grad_norm": 5.6229303579127246, "learning_rate": 3.494176894536519e-06, "loss": 0.822, "step": 10761 }, { "epoch": 0.7775028446547583, "grad_norm": 5.279762555187511, "learning_rate": 3.493908508547037e-06, "loss": 0.8991, "step": 10762 }, { "epoch": 0.7775750898549678, "grad_norm": 5.269609070380712, "learning_rate": 3.493640108951719e-06, "loss": 0.7853, "step": 10763 }, { "epoch": 0.7776473350551772, "grad_norm": 5.860188420101507, "learning_rate": 3.4933716957542394e-06, "loss": 0.7821, "step": 10764 }, { "epoch": 0.7777195802553868, "grad_norm": 5.932871409805138, "learning_rate": 3.493103268958273e-06, "loss": 0.7959, "step": 10765 }, { "epoch": 0.7777918254555963, "grad_norm": 6.288011849350964, "learning_rate": 3.4928348285674934e-06, "loss": 0.8926, "step": 10766 }, { "epoch": 0.7778640706558058, "grad_norm": 6.358876801642472, "learning_rate": 3.4925663745855763e-06, "loss": 0.8274, "step": 10767 }, { "epoch": 0.7779363158560153, "grad_norm": 6.448111075663306, "learning_rate": 3.4922979070161954e-06, "loss": 0.763, "step": 10768 }, { "epoch": 0.7780085610562248, "grad_norm": 5.254780182600986, "learning_rate": 3.4920294258630276e-06, "loss": 0.8259, "step": 10769 }, { "epoch": 0.7780808062564344, "grad_norm": 6.793587005540996, "learning_rate": 3.491760931129747e-06, "loss": 0.8494, "step": 10770 }, { "epoch": 0.7781530514566438, "grad_norm": 5.067716948085989, "learning_rate": 3.4914924228200293e-06, "loss": 0.8714, "step": 10771 }, { "epoch": 0.7782252966568534, "grad_norm": 6.183252591465376, "learning_rate": 3.49122390093755e-06, "loss": 0.7658, "step": 10772 }, { "epoch": 0.7782975418570629, "grad_norm": 5.828265068273293, "learning_rate": 3.490955365485985e-06, "loss": 0.8373, "step": 10773 }, { "epoch": 0.7783697870572723, "grad_norm": 4.823560019550605, "learning_rate": 3.490686816469011e-06, "loss": 0.7522, "step": 10774 }, { "epoch": 0.7784420322574819, "grad_norm": 5.754058400801471, "learning_rate": 3.490418253890303e-06, "loss": 0.7513, "step": 10775 }, { "epoch": 0.7785142774576914, "grad_norm": 7.053708706670179, "learning_rate": 3.4901496777535387e-06, "loss": 0.924, "step": 10776 }, { "epoch": 0.778586522657901, "grad_norm": 5.800911785673526, "learning_rate": 3.489881088062394e-06, "loss": 0.879, "step": 10777 }, { "epoch": 0.7786587678581104, "grad_norm": 8.230773717356596, "learning_rate": 3.4896124848205454e-06, "loss": 0.8347, "step": 10778 }, { "epoch": 0.7787310130583199, "grad_norm": 7.823170691241923, "learning_rate": 3.4893438680316707e-06, "loss": 0.8933, "step": 10779 }, { "epoch": 0.7788032582585295, "grad_norm": 5.569121973220941, "learning_rate": 3.4890752376994464e-06, "loss": 0.7761, "step": 10780 }, { "epoch": 0.7788755034587389, "grad_norm": 7.872094723495987, "learning_rate": 3.48880659382755e-06, "loss": 0.8722, "step": 10781 }, { "epoch": 0.7789477486589484, "grad_norm": 6.640452088741481, "learning_rate": 3.488537936419659e-06, "loss": 0.7815, "step": 10782 }, { "epoch": 0.779019993859158, "grad_norm": 6.161800457787626, "learning_rate": 3.4882692654794515e-06, "loss": 0.8782, "step": 10783 }, { "epoch": 0.7790922390593675, "grad_norm": 5.778921947326423, "learning_rate": 3.488000581010605e-06, "loss": 0.8721, "step": 10784 }, { "epoch": 0.779164484259577, "grad_norm": 5.796168857555219, "learning_rate": 3.487731883016798e-06, "loss": 0.8364, "step": 10785 }, { "epoch": 0.7792367294597865, "grad_norm": 6.182134903224769, "learning_rate": 3.487463171501708e-06, "loss": 0.8099, "step": 10786 }, { "epoch": 0.779308974659996, "grad_norm": 7.4905055667170775, "learning_rate": 3.487194446469014e-06, "loss": 0.8094, "step": 10787 }, { "epoch": 0.7793812198602056, "grad_norm": 7.488279212310625, "learning_rate": 3.486925707922394e-06, "loss": 0.8606, "step": 10788 }, { "epoch": 0.779453465060415, "grad_norm": 6.307854784506526, "learning_rate": 3.486656955865528e-06, "loss": 0.8116, "step": 10789 }, { "epoch": 0.7795257102606246, "grad_norm": 5.636201175182961, "learning_rate": 3.486388190302095e-06, "loss": 0.8255, "step": 10790 }, { "epoch": 0.7795979554608341, "grad_norm": 7.147666881276933, "learning_rate": 3.4861194112357726e-06, "loss": 0.8888, "step": 10791 }, { "epoch": 0.7796702006610435, "grad_norm": 9.05478355481838, "learning_rate": 3.4858506186702413e-06, "loss": 0.8611, "step": 10792 }, { "epoch": 0.7797424458612531, "grad_norm": 7.812755611052807, "learning_rate": 3.4855818126091804e-06, "loss": 0.8666, "step": 10793 }, { "epoch": 0.7798146910614626, "grad_norm": 7.08285694390286, "learning_rate": 3.48531299305627e-06, "loss": 0.8833, "step": 10794 }, { "epoch": 0.7798869362616722, "grad_norm": 5.983306869122449, "learning_rate": 3.4850441600151896e-06, "loss": 0.8114, "step": 10795 }, { "epoch": 0.7799591814618816, "grad_norm": 6.980645261303582, "learning_rate": 3.4847753134896196e-06, "loss": 0.88, "step": 10796 }, { "epoch": 0.7800314266620911, "grad_norm": 6.9798290640550595, "learning_rate": 3.48450645348324e-06, "loss": 0.8571, "step": 10797 }, { "epoch": 0.7801036718623007, "grad_norm": 5.682157921083919, "learning_rate": 3.484237579999732e-06, "loss": 0.7503, "step": 10798 }, { "epoch": 0.7801759170625101, "grad_norm": 6.419784316958751, "learning_rate": 3.483968693042776e-06, "loss": 0.7537, "step": 10799 }, { "epoch": 0.7802481622627196, "grad_norm": 6.763106725414416, "learning_rate": 3.4836997926160516e-06, "loss": 0.8262, "step": 10800 }, { "epoch": 0.7803204074629292, "grad_norm": 6.001416674895427, "learning_rate": 3.4834308787232407e-06, "loss": 0.8825, "step": 10801 }, { "epoch": 0.7803926526631387, "grad_norm": 5.852827722237466, "learning_rate": 3.483161951368025e-06, "loss": 0.8146, "step": 10802 }, { "epoch": 0.7804648978633482, "grad_norm": 6.83021551817058, "learning_rate": 3.4828930105540857e-06, "loss": 0.9095, "step": 10803 }, { "epoch": 0.7805371430635577, "grad_norm": 6.06524658909458, "learning_rate": 3.4826240562851044e-06, "loss": 0.8603, "step": 10804 }, { "epoch": 0.7806093882637672, "grad_norm": 6.8946272457021, "learning_rate": 3.4823550885647626e-06, "loss": 0.7434, "step": 10805 }, { "epoch": 0.7806816334639768, "grad_norm": 7.187324322751142, "learning_rate": 3.482086107396742e-06, "loss": 0.9029, "step": 10806 }, { "epoch": 0.7807538786641862, "grad_norm": 6.393371439565968, "learning_rate": 3.481817112784726e-06, "loss": 0.8569, "step": 10807 }, { "epoch": 0.7808261238643958, "grad_norm": 6.138531743051364, "learning_rate": 3.4815481047323964e-06, "loss": 0.7915, "step": 10808 }, { "epoch": 0.7808983690646053, "grad_norm": 7.183879703474588, "learning_rate": 3.481279083243434e-06, "loss": 0.7465, "step": 10809 }, { "epoch": 0.7809706142648147, "grad_norm": 6.5232304888561305, "learning_rate": 3.481010048321523e-06, "loss": 0.8465, "step": 10810 }, { "epoch": 0.7810428594650243, "grad_norm": 6.374658014436962, "learning_rate": 3.4807409999703467e-06, "loss": 0.8044, "step": 10811 }, { "epoch": 0.7811151046652338, "grad_norm": 7.971864577839497, "learning_rate": 3.4804719381935877e-06, "loss": 0.8386, "step": 10812 }, { "epoch": 0.7811873498654434, "grad_norm": 7.126556494170211, "learning_rate": 3.4802028629949284e-06, "loss": 0.8035, "step": 10813 }, { "epoch": 0.7812595950656528, "grad_norm": 7.166292698292309, "learning_rate": 3.4799337743780535e-06, "loss": 0.844, "step": 10814 }, { "epoch": 0.7813318402658623, "grad_norm": 7.7883768191172225, "learning_rate": 3.4796646723466466e-06, "loss": 0.8647, "step": 10815 }, { "epoch": 0.7814040854660719, "grad_norm": 6.461658663557382, "learning_rate": 3.4793955569043903e-06, "loss": 0.8369, "step": 10816 }, { "epoch": 0.7814763306662813, "grad_norm": 7.5561610365711305, "learning_rate": 3.4791264280549695e-06, "loss": 0.9064, "step": 10817 }, { "epoch": 0.7815485758664908, "grad_norm": 6.548652207412039, "learning_rate": 3.478857285802068e-06, "loss": 0.8226, "step": 10818 }, { "epoch": 0.7816208210667004, "grad_norm": 6.086189783863553, "learning_rate": 3.478588130149371e-06, "loss": 0.7664, "step": 10819 }, { "epoch": 0.7816930662669099, "grad_norm": 7.688924765947219, "learning_rate": 3.478318961100561e-06, "loss": 0.7886, "step": 10820 }, { "epoch": 0.7817653114671194, "grad_norm": 6.439156032254138, "learning_rate": 3.478049778659325e-06, "loss": 0.7908, "step": 10821 }, { "epoch": 0.7818375566673289, "grad_norm": 7.561586435167154, "learning_rate": 3.4777805828293465e-06, "loss": 0.8983, "step": 10822 }, { "epoch": 0.7819098018675384, "grad_norm": 6.601640220049158, "learning_rate": 3.4775113736143113e-06, "loss": 0.831, "step": 10823 }, { "epoch": 0.781982047067748, "grad_norm": 4.7712071479150255, "learning_rate": 3.477242151017904e-06, "loss": 0.766, "step": 10824 }, { "epoch": 0.7820542922679574, "grad_norm": 5.495733166587519, "learning_rate": 3.4769729150438107e-06, "loss": 0.803, "step": 10825 }, { "epoch": 0.782126537468167, "grad_norm": 5.747199662016113, "learning_rate": 3.476703665695717e-06, "loss": 0.8342, "step": 10826 }, { "epoch": 0.7821987826683765, "grad_norm": 5.733751642506782, "learning_rate": 3.4764344029773082e-06, "loss": 0.8246, "step": 10827 }, { "epoch": 0.7822710278685859, "grad_norm": 5.925455827522134, "learning_rate": 3.4761651268922715e-06, "loss": 0.8875, "step": 10828 }, { "epoch": 0.7823432730687955, "grad_norm": 6.038467752994553, "learning_rate": 3.475895837444291e-06, "loss": 0.732, "step": 10829 }, { "epoch": 0.782415518269005, "grad_norm": 8.566878709754022, "learning_rate": 3.4756265346370545e-06, "loss": 0.8676, "step": 10830 }, { "epoch": 0.7824877634692146, "grad_norm": 5.133097694405165, "learning_rate": 3.475357218474248e-06, "loss": 0.7598, "step": 10831 }, { "epoch": 0.782560008669424, "grad_norm": 7.4592275527425596, "learning_rate": 3.4750878889595584e-06, "loss": 0.7929, "step": 10832 }, { "epoch": 0.7826322538696335, "grad_norm": 6.56947406585589, "learning_rate": 3.474818546096674e-06, "loss": 0.8469, "step": 10833 }, { "epoch": 0.7827044990698431, "grad_norm": 6.392968678395759, "learning_rate": 3.4745491898892787e-06, "loss": 0.8819, "step": 10834 }, { "epoch": 0.7827767442700525, "grad_norm": 5.5052281586558625, "learning_rate": 3.4742798203410633e-06, "loss": 0.8737, "step": 10835 }, { "epoch": 0.782848989470262, "grad_norm": 6.1475790701765085, "learning_rate": 3.474010437455713e-06, "loss": 0.7917, "step": 10836 }, { "epoch": 0.7829212346704716, "grad_norm": 6.079957974941174, "learning_rate": 3.473741041236916e-06, "loss": 0.7464, "step": 10837 }, { "epoch": 0.7829934798706811, "grad_norm": 7.242667074281197, "learning_rate": 3.47347163168836e-06, "loss": 0.9053, "step": 10838 }, { "epoch": 0.7830657250708906, "grad_norm": 6.057129673802097, "learning_rate": 3.4732022088137336e-06, "loss": 0.7316, "step": 10839 }, { "epoch": 0.7831379702711001, "grad_norm": 5.896699480073644, "learning_rate": 3.472932772616725e-06, "loss": 0.7898, "step": 10840 }, { "epoch": 0.7832102154713096, "grad_norm": 6.212725457565623, "learning_rate": 3.472663323101021e-06, "loss": 0.8257, "step": 10841 }, { "epoch": 0.7832824606715192, "grad_norm": 7.598723866586729, "learning_rate": 3.472393860270313e-06, "loss": 0.8208, "step": 10842 }, { "epoch": 0.7833547058717286, "grad_norm": 5.596275569268581, "learning_rate": 3.4721243841282866e-06, "loss": 0.7521, "step": 10843 }, { "epoch": 0.7834269510719382, "grad_norm": 5.504378569935221, "learning_rate": 3.4718548946786324e-06, "loss": 0.9345, "step": 10844 }, { "epoch": 0.7834991962721477, "grad_norm": 10.030492070641362, "learning_rate": 3.471585391925039e-06, "loss": 0.9532, "step": 10845 }, { "epoch": 0.7835714414723571, "grad_norm": 6.067831939180474, "learning_rate": 3.4713158758711973e-06, "loss": 0.7707, "step": 10846 }, { "epoch": 0.7836436866725667, "grad_norm": 6.654371972719766, "learning_rate": 3.4710463465207945e-06, "loss": 0.7826, "step": 10847 }, { "epoch": 0.7837159318727762, "grad_norm": 6.354843587203565, "learning_rate": 3.470776803877521e-06, "loss": 0.7858, "step": 10848 }, { "epoch": 0.7837881770729858, "grad_norm": 6.955394314544703, "learning_rate": 3.4705072479450675e-06, "loss": 0.8171, "step": 10849 }, { "epoch": 0.7838604222731952, "grad_norm": 5.672886445628892, "learning_rate": 3.470237678727123e-06, "loss": 0.8019, "step": 10850 }, { "epoch": 0.7839326674734047, "grad_norm": 6.5783639171280175, "learning_rate": 3.4699680962273785e-06, "loss": 0.8594, "step": 10851 }, { "epoch": 0.7840049126736143, "grad_norm": 6.135941366957497, "learning_rate": 3.469698500449523e-06, "loss": 0.8289, "step": 10852 }, { "epoch": 0.7840771578738237, "grad_norm": 7.899016799586293, "learning_rate": 3.4694288913972484e-06, "loss": 0.8463, "step": 10853 }, { "epoch": 0.7841494030740332, "grad_norm": 6.822275969341571, "learning_rate": 3.4691592690742447e-06, "loss": 0.871, "step": 10854 }, { "epoch": 0.7842216482742428, "grad_norm": 6.730399035658972, "learning_rate": 3.4688896334842038e-06, "loss": 0.8032, "step": 10855 }, { "epoch": 0.7842938934744523, "grad_norm": 5.917063346065971, "learning_rate": 3.4686199846308157e-06, "loss": 0.8344, "step": 10856 }, { "epoch": 0.7843661386746618, "grad_norm": 7.449510381598049, "learning_rate": 3.468350322517772e-06, "loss": 0.8456, "step": 10857 }, { "epoch": 0.7844383838748713, "grad_norm": 7.612386557950536, "learning_rate": 3.4680806471487636e-06, "loss": 0.8168, "step": 10858 }, { "epoch": 0.7845106290750808, "grad_norm": 6.778018304416842, "learning_rate": 3.4678109585274836e-06, "loss": 0.8801, "step": 10859 }, { "epoch": 0.7845828742752904, "grad_norm": 5.262591566435418, "learning_rate": 3.4675412566576237e-06, "loss": 0.8597, "step": 10860 }, { "epoch": 0.7846551194754998, "grad_norm": 4.843545232566934, "learning_rate": 3.4672715415428742e-06, "loss": 0.8101, "step": 10861 }, { "epoch": 0.7847273646757094, "grad_norm": 6.255557526670378, "learning_rate": 3.467001813186929e-06, "loss": 0.7853, "step": 10862 }, { "epoch": 0.7847996098759189, "grad_norm": 5.171790730230662, "learning_rate": 3.466732071593479e-06, "loss": 0.8247, "step": 10863 }, { "epoch": 0.7848718550761283, "grad_norm": 6.749917489006695, "learning_rate": 3.4664623167662182e-06, "loss": 0.8203, "step": 10864 }, { "epoch": 0.7849441002763379, "grad_norm": 4.958629161226854, "learning_rate": 3.4661925487088387e-06, "loss": 0.8384, "step": 10865 }, { "epoch": 0.7850163454765474, "grad_norm": 5.53873760093725, "learning_rate": 3.465922767425033e-06, "loss": 0.82, "step": 10866 }, { "epoch": 0.785088590676757, "grad_norm": 5.264741999000525, "learning_rate": 3.4656529729184944e-06, "loss": 0.7563, "step": 10867 }, { "epoch": 0.7851608358769664, "grad_norm": 5.344786604786762, "learning_rate": 3.465383165192917e-06, "loss": 0.8251, "step": 10868 }, { "epoch": 0.7852330810771759, "grad_norm": 5.331583371757399, "learning_rate": 3.4651133442519934e-06, "loss": 0.8202, "step": 10869 }, { "epoch": 0.7853053262773855, "grad_norm": 5.175992404301302, "learning_rate": 3.464843510099418e-06, "loss": 0.7863, "step": 10870 }, { "epoch": 0.7853775714775949, "grad_norm": 6.811140852270833, "learning_rate": 3.4645736627388836e-06, "loss": 0.852, "step": 10871 }, { "epoch": 0.7854498166778044, "grad_norm": 6.076594692075308, "learning_rate": 3.4643038021740844e-06, "loss": 0.842, "step": 10872 }, { "epoch": 0.785522061878014, "grad_norm": 6.736520943076174, "learning_rate": 3.4640339284087155e-06, "loss": 0.8668, "step": 10873 }, { "epoch": 0.7855943070782235, "grad_norm": 6.954098751162365, "learning_rate": 3.4637640414464703e-06, "loss": 0.7651, "step": 10874 }, { "epoch": 0.785666552278433, "grad_norm": 5.798988635577469, "learning_rate": 3.4634941412910437e-06, "loss": 0.822, "step": 10875 }, { "epoch": 0.7857387974786425, "grad_norm": 7.29907951299755, "learning_rate": 3.4632242279461303e-06, "loss": 0.8651, "step": 10876 }, { "epoch": 0.785811042678852, "grad_norm": 6.634000027122647, "learning_rate": 3.462954301415425e-06, "loss": 0.8315, "step": 10877 }, { "epoch": 0.7858832878790616, "grad_norm": 6.2470244381669175, "learning_rate": 3.4626843617026234e-06, "loss": 0.8274, "step": 10878 }, { "epoch": 0.785955533079271, "grad_norm": 6.837798295845525, "learning_rate": 3.46241440881142e-06, "loss": 0.7843, "step": 10879 }, { "epoch": 0.7860277782794806, "grad_norm": 5.410330054180245, "learning_rate": 3.4621444427455113e-06, "loss": 0.7867, "step": 10880 }, { "epoch": 0.7861000234796901, "grad_norm": 8.854850138295943, "learning_rate": 3.4618744635085917e-06, "loss": 0.9524, "step": 10881 }, { "epoch": 0.7861722686798995, "grad_norm": 5.999468144050321, "learning_rate": 3.4616044711043573e-06, "loss": 0.8041, "step": 10882 }, { "epoch": 0.7862445138801091, "grad_norm": 6.894142273906965, "learning_rate": 3.461334465536505e-06, "loss": 0.8613, "step": 10883 }, { "epoch": 0.7863167590803186, "grad_norm": 7.452313712887627, "learning_rate": 3.4610644468087295e-06, "loss": 0.8075, "step": 10884 }, { "epoch": 0.7863890042805282, "grad_norm": 6.144103484593544, "learning_rate": 3.460794414924729e-06, "loss": 0.8724, "step": 10885 }, { "epoch": 0.7864612494807376, "grad_norm": 5.832977647382449, "learning_rate": 3.460524369888198e-06, "loss": 0.8495, "step": 10886 }, { "epoch": 0.7865334946809471, "grad_norm": 5.018841529097074, "learning_rate": 3.460254311702834e-06, "loss": 0.8431, "step": 10887 }, { "epoch": 0.7866057398811567, "grad_norm": 7.1368374117803395, "learning_rate": 3.459984240372335e-06, "loss": 0.9079, "step": 10888 }, { "epoch": 0.7866779850813661, "grad_norm": 5.911969034818463, "learning_rate": 3.4597141559003968e-06, "loss": 0.8568, "step": 10889 }, { "epoch": 0.7867502302815756, "grad_norm": 6.120303163187766, "learning_rate": 3.459444058290717e-06, "loss": 0.9451, "step": 10890 }, { "epoch": 0.7868224754817852, "grad_norm": 5.359326420777918, "learning_rate": 3.4591739475469926e-06, "loss": 0.7852, "step": 10891 }, { "epoch": 0.7868947206819947, "grad_norm": 7.790510557629401, "learning_rate": 3.4589038236729224e-06, "loss": 0.7859, "step": 10892 }, { "epoch": 0.7869669658822042, "grad_norm": 6.247214345030423, "learning_rate": 3.4586336866722026e-06, "loss": 0.845, "step": 10893 }, { "epoch": 0.7870392110824137, "grad_norm": 7.40476775326116, "learning_rate": 3.4583635365485323e-06, "loss": 0.9222, "step": 10894 }, { "epoch": 0.7871114562826232, "grad_norm": 6.049904234577486, "learning_rate": 3.4580933733056095e-06, "loss": 0.8278, "step": 10895 }, { "epoch": 0.7871837014828328, "grad_norm": 7.118010187598899, "learning_rate": 3.4578231969471315e-06, "loss": 0.8716, "step": 10896 }, { "epoch": 0.7872559466830422, "grad_norm": 4.93029491231136, "learning_rate": 3.4575530074767983e-06, "loss": 0.7258, "step": 10897 }, { "epoch": 0.7873281918832518, "grad_norm": 5.085627158170263, "learning_rate": 3.4572828048983083e-06, "loss": 0.7785, "step": 10898 }, { "epoch": 0.7874004370834613, "grad_norm": 5.884160144548196, "learning_rate": 3.4570125892153593e-06, "loss": 0.7981, "step": 10899 }, { "epoch": 0.7874726822836707, "grad_norm": 5.563614122920878, "learning_rate": 3.456742360431651e-06, "loss": 0.7205, "step": 10900 }, { "epoch": 0.7875449274838803, "grad_norm": 5.908563070130583, "learning_rate": 3.4564721185508833e-06, "loss": 0.7953, "step": 10901 }, { "epoch": 0.7876171726840898, "grad_norm": 6.533974695189967, "learning_rate": 3.456201863576754e-06, "loss": 0.879, "step": 10902 }, { "epoch": 0.7876894178842994, "grad_norm": 5.764550747335261, "learning_rate": 3.4559315955129646e-06, "loss": 0.8742, "step": 10903 }, { "epoch": 0.7877616630845088, "grad_norm": 7.437706183132106, "learning_rate": 3.4556613143632135e-06, "loss": 0.9027, "step": 10904 }, { "epoch": 0.7878339082847183, "grad_norm": 6.770653950197874, "learning_rate": 3.4553910201312008e-06, "loss": 0.814, "step": 10905 }, { "epoch": 0.7879061534849279, "grad_norm": 6.083347930193942, "learning_rate": 3.455120712820627e-06, "loss": 0.7928, "step": 10906 }, { "epoch": 0.7879783986851373, "grad_norm": 6.158474128294053, "learning_rate": 3.454850392435192e-06, "loss": 0.8639, "step": 10907 }, { "epoch": 0.7880506438853468, "grad_norm": 5.836305496889907, "learning_rate": 3.4545800589785977e-06, "loss": 0.8495, "step": 10908 }, { "epoch": 0.7881228890855564, "grad_norm": 5.299089447113142, "learning_rate": 3.4543097124545422e-06, "loss": 0.8523, "step": 10909 }, { "epoch": 0.7881951342857659, "grad_norm": 5.536992944102161, "learning_rate": 3.454039352866728e-06, "loss": 0.8749, "step": 10910 }, { "epoch": 0.7882673794859754, "grad_norm": 5.3316483021251235, "learning_rate": 3.4537689802188555e-06, "loss": 0.828, "step": 10911 }, { "epoch": 0.7883396246861849, "grad_norm": 7.248101873865031, "learning_rate": 3.453498594514628e-06, "loss": 0.8331, "step": 10912 }, { "epoch": 0.7884118698863944, "grad_norm": 5.7776332780097786, "learning_rate": 3.453228195757743e-06, "loss": 0.828, "step": 10913 }, { "epoch": 0.788484115086604, "grad_norm": 6.297809086658009, "learning_rate": 3.452957783951906e-06, "loss": 0.709, "step": 10914 }, { "epoch": 0.7885563602868134, "grad_norm": 5.676608209822329, "learning_rate": 3.452687359100815e-06, "loss": 0.8414, "step": 10915 }, { "epoch": 0.788628605487023, "grad_norm": 7.513596734905021, "learning_rate": 3.452416921208175e-06, "loss": 0.8935, "step": 10916 }, { "epoch": 0.7887008506872325, "grad_norm": 6.462423723313001, "learning_rate": 3.4521464702776864e-06, "loss": 0.8358, "step": 10917 }, { "epoch": 0.7887730958874419, "grad_norm": 7.803410972698919, "learning_rate": 3.451876006313052e-06, "loss": 0.8898, "step": 10918 }, { "epoch": 0.7888453410876515, "grad_norm": 5.78712321758385, "learning_rate": 3.4516055293179734e-06, "loss": 0.8194, "step": 10919 }, { "epoch": 0.788917586287861, "grad_norm": 6.150222538589146, "learning_rate": 3.451335039296155e-06, "loss": 0.8302, "step": 10920 }, { "epoch": 0.7889898314880706, "grad_norm": 6.0249525012917955, "learning_rate": 3.4510645362512986e-06, "loss": 0.8328, "step": 10921 }, { "epoch": 0.78906207668828, "grad_norm": 5.802672579400005, "learning_rate": 3.450794020187107e-06, "loss": 0.8418, "step": 10922 }, { "epoch": 0.7891343218884895, "grad_norm": 6.37553672775042, "learning_rate": 3.4505234911072827e-06, "loss": 0.8932, "step": 10923 }, { "epoch": 0.7892065670886991, "grad_norm": 6.469604730574585, "learning_rate": 3.4502529490155305e-06, "loss": 0.8821, "step": 10924 }, { "epoch": 0.7892788122889085, "grad_norm": 6.466674766666544, "learning_rate": 3.449982393915553e-06, "loss": 0.7714, "step": 10925 }, { "epoch": 0.789351057489118, "grad_norm": 5.472298130513691, "learning_rate": 3.449711825811055e-06, "loss": 0.8308, "step": 10926 }, { "epoch": 0.7894233026893276, "grad_norm": 5.744008135053989, "learning_rate": 3.4494412447057386e-06, "loss": 0.8138, "step": 10927 }, { "epoch": 0.7894955478895371, "grad_norm": 9.540259733682978, "learning_rate": 3.4491706506033094e-06, "loss": 0.8271, "step": 10928 }, { "epoch": 0.7895677930897466, "grad_norm": 7.201449979373593, "learning_rate": 3.4489000435074697e-06, "loss": 0.7663, "step": 10929 }, { "epoch": 0.7896400382899561, "grad_norm": 7.061071740951183, "learning_rate": 3.4486294234219266e-06, "loss": 0.8867, "step": 10930 }, { "epoch": 0.7897122834901656, "grad_norm": 6.79155008726952, "learning_rate": 3.448358790350383e-06, "loss": 0.8199, "step": 10931 }, { "epoch": 0.7897845286903752, "grad_norm": 6.4623936185068125, "learning_rate": 3.448088144296543e-06, "loss": 0.8448, "step": 10932 }, { "epoch": 0.7898567738905846, "grad_norm": 6.167161904459015, "learning_rate": 3.4478174852641134e-06, "loss": 0.7884, "step": 10933 }, { "epoch": 0.7899290190907942, "grad_norm": 7.168146764223461, "learning_rate": 3.447546813256798e-06, "loss": 0.8145, "step": 10934 }, { "epoch": 0.7900012642910037, "grad_norm": 5.156547399093851, "learning_rate": 3.447276128278303e-06, "loss": 0.7998, "step": 10935 }, { "epoch": 0.7900735094912131, "grad_norm": 7.466981558744659, "learning_rate": 3.447005430332332e-06, "loss": 0.785, "step": 10936 }, { "epoch": 0.7901457546914227, "grad_norm": 5.8763237942993936, "learning_rate": 3.446734719422593e-06, "loss": 0.7942, "step": 10937 }, { "epoch": 0.7902179998916322, "grad_norm": 7.200241074234918, "learning_rate": 3.4464639955527905e-06, "loss": 0.853, "step": 10938 }, { "epoch": 0.7902902450918418, "grad_norm": 5.257359206000942, "learning_rate": 3.446193258726631e-06, "loss": 0.7918, "step": 10939 }, { "epoch": 0.7903624902920512, "grad_norm": 5.485962032218716, "learning_rate": 3.4459225089478204e-06, "loss": 0.8618, "step": 10940 }, { "epoch": 0.7904347354922607, "grad_norm": 6.655846525330193, "learning_rate": 3.445651746220065e-06, "loss": 0.8561, "step": 10941 }, { "epoch": 0.7905069806924703, "grad_norm": 5.511958386722274, "learning_rate": 3.445380970547072e-06, "loss": 0.7722, "step": 10942 }, { "epoch": 0.7905792258926797, "grad_norm": 8.206922484107928, "learning_rate": 3.4451101819325467e-06, "loss": 0.8604, "step": 10943 }, { "epoch": 0.7906514710928892, "grad_norm": 7.9459740767460545, "learning_rate": 3.4448393803801973e-06, "loss": 0.8923, "step": 10944 }, { "epoch": 0.7907237162930988, "grad_norm": 5.631297845668949, "learning_rate": 3.4445685658937293e-06, "loss": 0.8876, "step": 10945 }, { "epoch": 0.7907959614933083, "grad_norm": 5.370815688825123, "learning_rate": 3.4442977384768527e-06, "loss": 0.8629, "step": 10946 }, { "epoch": 0.7908682066935178, "grad_norm": 5.96497261392809, "learning_rate": 3.444026898133273e-06, "loss": 0.7976, "step": 10947 }, { "epoch": 0.7909404518937273, "grad_norm": 6.598725525307869, "learning_rate": 3.443756044866697e-06, "loss": 0.8072, "step": 10948 }, { "epoch": 0.7910126970939368, "grad_norm": 6.203367905941485, "learning_rate": 3.4434851786808345e-06, "loss": 0.8744, "step": 10949 }, { "epoch": 0.7910849422941464, "grad_norm": 6.537357085144684, "learning_rate": 3.4432142995793915e-06, "loss": 0.8891, "step": 10950 }, { "epoch": 0.7911571874943558, "grad_norm": 7.138920085886645, "learning_rate": 3.442943407566078e-06, "loss": 0.8565, "step": 10951 }, { "epoch": 0.7912294326945654, "grad_norm": 7.363644926363744, "learning_rate": 3.442672502644601e-06, "loss": 0.738, "step": 10952 }, { "epoch": 0.7913016778947749, "grad_norm": 6.735276730522567, "learning_rate": 3.4424015848186693e-06, "loss": 0.9392, "step": 10953 }, { "epoch": 0.7913739230949843, "grad_norm": 6.046718861537927, "learning_rate": 3.442130654091992e-06, "loss": 0.7512, "step": 10954 }, { "epoch": 0.7914461682951939, "grad_norm": 5.592121761049671, "learning_rate": 3.4418597104682777e-06, "loss": 0.8264, "step": 10955 }, { "epoch": 0.7915184134954034, "grad_norm": 7.335132233741571, "learning_rate": 3.441588753951235e-06, "loss": 0.8121, "step": 10956 }, { "epoch": 0.791590658695613, "grad_norm": 6.3081943444579425, "learning_rate": 3.4413177845445733e-06, "loss": 0.8675, "step": 10957 }, { "epoch": 0.7916629038958224, "grad_norm": 5.871491378750563, "learning_rate": 3.4410468022520015e-06, "loss": 0.8772, "step": 10958 }, { "epoch": 0.7917351490960319, "grad_norm": 5.064606216653052, "learning_rate": 3.44077580707723e-06, "loss": 0.7705, "step": 10959 }, { "epoch": 0.7918073942962415, "grad_norm": 5.028751108130082, "learning_rate": 3.4405047990239694e-06, "loss": 0.7912, "step": 10960 }, { "epoch": 0.7918796394964509, "grad_norm": 5.554273372616251, "learning_rate": 3.4402337780959265e-06, "loss": 0.8812, "step": 10961 }, { "epoch": 0.7919518846966604, "grad_norm": 6.728728225959075, "learning_rate": 3.4399627442968144e-06, "loss": 0.7472, "step": 10962 }, { "epoch": 0.79202412989687, "grad_norm": 5.820369520484113, "learning_rate": 3.4396916976303417e-06, "loss": 0.7688, "step": 10963 }, { "epoch": 0.7920963750970795, "grad_norm": 5.569687218367762, "learning_rate": 3.4394206381002194e-06, "loss": 0.8699, "step": 10964 }, { "epoch": 0.792168620297289, "grad_norm": 7.906957059462607, "learning_rate": 3.4391495657101583e-06, "loss": 0.8608, "step": 10965 }, { "epoch": 0.7922408654974985, "grad_norm": 5.8269372297329625, "learning_rate": 3.438878480463868e-06, "loss": 0.7832, "step": 10966 }, { "epoch": 0.792313110697708, "grad_norm": 6.093128661732635, "learning_rate": 3.4386073823650605e-06, "loss": 0.8184, "step": 10967 }, { "epoch": 0.7923853558979176, "grad_norm": 6.568189788362336, "learning_rate": 3.438336271417447e-06, "loss": 0.9302, "step": 10968 }, { "epoch": 0.792457601098127, "grad_norm": 5.709210553486617, "learning_rate": 3.438065147624739e-06, "loss": 0.7511, "step": 10969 }, { "epoch": 0.7925298462983366, "grad_norm": 7.2091796319398345, "learning_rate": 3.4377940109906466e-06, "loss": 0.9033, "step": 10970 }, { "epoch": 0.7926020914985461, "grad_norm": 5.750706836543926, "learning_rate": 3.4375228615188834e-06, "loss": 0.8756, "step": 10971 }, { "epoch": 0.7926743366987555, "grad_norm": 6.825519963952952, "learning_rate": 3.4372516992131587e-06, "loss": 0.774, "step": 10972 }, { "epoch": 0.7927465818989651, "grad_norm": 6.6506186290761, "learning_rate": 3.436980524077187e-06, "loss": 0.8718, "step": 10973 }, { "epoch": 0.7928188270991746, "grad_norm": 6.844715346442694, "learning_rate": 3.43670933611468e-06, "loss": 0.8387, "step": 10974 }, { "epoch": 0.7928910722993842, "grad_norm": 5.825485327153009, "learning_rate": 3.436438135329348e-06, "loss": 0.8411, "step": 10975 }, { "epoch": 0.7929633174995936, "grad_norm": 5.1576447421171485, "learning_rate": 3.4361669217249056e-06, "loss": 0.8186, "step": 10976 }, { "epoch": 0.7930355626998031, "grad_norm": 6.342790202176651, "learning_rate": 3.435895695305065e-06, "loss": 0.8597, "step": 10977 }, { "epoch": 0.7931078079000127, "grad_norm": 5.471711844425184, "learning_rate": 3.43562445607354e-06, "loss": 0.7698, "step": 10978 }, { "epoch": 0.7931800531002221, "grad_norm": 5.88043253910151, "learning_rate": 3.435353204034041e-06, "loss": 0.8072, "step": 10979 }, { "epoch": 0.7932522983004316, "grad_norm": 6.775685079361181, "learning_rate": 3.435081939190284e-06, "loss": 0.8412, "step": 10980 }, { "epoch": 0.7933245435006412, "grad_norm": 7.152191743844865, "learning_rate": 3.4348106615459807e-06, "loss": 0.8635, "step": 10981 }, { "epoch": 0.7933967887008507, "grad_norm": 5.531898589820374, "learning_rate": 3.4345393711048454e-06, "loss": 0.768, "step": 10982 }, { "epoch": 0.7934690339010602, "grad_norm": 6.708984943595318, "learning_rate": 3.434268067870592e-06, "loss": 0.9403, "step": 10983 }, { "epoch": 0.7935412791012697, "grad_norm": 7.059595101392534, "learning_rate": 3.433996751846934e-06, "loss": 0.8295, "step": 10984 }, { "epoch": 0.7936135243014792, "grad_norm": 5.850490008315743, "learning_rate": 3.433725423037586e-06, "loss": 0.8324, "step": 10985 }, { "epoch": 0.7936857695016888, "grad_norm": 5.510939555847806, "learning_rate": 3.4334540814462606e-06, "loss": 0.8045, "step": 10986 }, { "epoch": 0.7937580147018982, "grad_norm": 5.280684943132655, "learning_rate": 3.433182727076675e-06, "loss": 0.7555, "step": 10987 }, { "epoch": 0.7938302599021078, "grad_norm": 6.4600822131709545, "learning_rate": 3.4329113599325426e-06, "loss": 0.7502, "step": 10988 }, { "epoch": 0.7939025051023173, "grad_norm": 5.68362059701563, "learning_rate": 3.4326399800175774e-06, "loss": 0.8823, "step": 10989 }, { "epoch": 0.7939747503025267, "grad_norm": 5.876618831884851, "learning_rate": 3.432368587335495e-06, "loss": 0.7787, "step": 10990 }, { "epoch": 0.7940469955027363, "grad_norm": 6.277528789762642, "learning_rate": 3.4320971818900105e-06, "loss": 0.8696, "step": 10991 }, { "epoch": 0.7941192407029458, "grad_norm": 6.683774106274908, "learning_rate": 3.4318257636848396e-06, "loss": 0.8076, "step": 10992 }, { "epoch": 0.7941914859031554, "grad_norm": 4.99390650421607, "learning_rate": 3.431554332723697e-06, "loss": 0.8099, "step": 10993 }, { "epoch": 0.7942637311033648, "grad_norm": 6.005330261245493, "learning_rate": 3.4312828890103e-06, "loss": 0.7473, "step": 10994 }, { "epoch": 0.7943359763035743, "grad_norm": 6.133800801140057, "learning_rate": 3.431011432548362e-06, "loss": 0.8251, "step": 10995 }, { "epoch": 0.7944082215037839, "grad_norm": 6.935440995126049, "learning_rate": 3.430739963341601e-06, "loss": 0.8025, "step": 10996 }, { "epoch": 0.7944804667039933, "grad_norm": 6.179282540460806, "learning_rate": 3.4304684813937335e-06, "loss": 0.8754, "step": 10997 }, { "epoch": 0.7945527119042028, "grad_norm": 9.181470903890311, "learning_rate": 3.430196986708474e-06, "loss": 0.9057, "step": 10998 }, { "epoch": 0.7946249571044124, "grad_norm": 7.254731574531608, "learning_rate": 3.4299254792895398e-06, "loss": 0.8063, "step": 10999 }, { "epoch": 0.7946972023046219, "grad_norm": 6.629105933019279, "learning_rate": 3.4296539591406476e-06, "loss": 0.8914, "step": 11000 }, { "epoch": 0.7947694475048314, "grad_norm": 6.878467864278506, "learning_rate": 3.429382426265515e-06, "loss": 0.7659, "step": 11001 }, { "epoch": 0.7948416927050409, "grad_norm": 7.281387262319399, "learning_rate": 3.4291108806678584e-06, "loss": 0.8016, "step": 11002 }, { "epoch": 0.7949139379052504, "grad_norm": 7.20614567306447, "learning_rate": 3.428839322351396e-06, "loss": 0.8205, "step": 11003 }, { "epoch": 0.7949861831054599, "grad_norm": 5.411919239769574, "learning_rate": 3.428567751319844e-06, "loss": 0.8017, "step": 11004 }, { "epoch": 0.7950584283056694, "grad_norm": 6.154654823869212, "learning_rate": 3.42829616757692e-06, "loss": 0.8229, "step": 11005 }, { "epoch": 0.795130673505879, "grad_norm": 4.986544815417484, "learning_rate": 3.4280245711263434e-06, "loss": 0.8604, "step": 11006 }, { "epoch": 0.7952029187060885, "grad_norm": 8.773806203294482, "learning_rate": 3.4277529619718297e-06, "loss": 0.7431, "step": 11007 }, { "epoch": 0.7952751639062979, "grad_norm": 5.589537670144714, "learning_rate": 3.427481340117099e-06, "loss": 0.8321, "step": 11008 }, { "epoch": 0.7953474091065075, "grad_norm": 8.433766082798092, "learning_rate": 3.4272097055658688e-06, "loss": 0.8738, "step": 11009 }, { "epoch": 0.795419654306717, "grad_norm": 6.846096524894594, "learning_rate": 3.4269380583218576e-06, "loss": 0.8456, "step": 11010 }, { "epoch": 0.7954918995069266, "grad_norm": 5.231477852329032, "learning_rate": 3.426666398388784e-06, "loss": 0.8211, "step": 11011 }, { "epoch": 0.795564144707136, "grad_norm": 6.481322523721076, "learning_rate": 3.4263947257703673e-06, "loss": 0.8999, "step": 11012 }, { "epoch": 0.7956363899073455, "grad_norm": 6.395828329906515, "learning_rate": 3.426123040470326e-06, "loss": 0.8722, "step": 11013 }, { "epoch": 0.7957086351075551, "grad_norm": 7.16234132813993, "learning_rate": 3.4258513424923785e-06, "loss": 0.9548, "step": 11014 }, { "epoch": 0.7957808803077645, "grad_norm": 6.25311201819617, "learning_rate": 3.4255796318402463e-06, "loss": 0.8477, "step": 11015 }, { "epoch": 0.795853125507974, "grad_norm": 7.191172316774977, "learning_rate": 3.4253079085176468e-06, "loss": 0.7575, "step": 11016 }, { "epoch": 0.7959253707081836, "grad_norm": 6.408044479996779, "learning_rate": 3.425036172528301e-06, "loss": 0.7991, "step": 11017 }, { "epoch": 0.7959976159083931, "grad_norm": 7.110175789807372, "learning_rate": 3.4247644238759274e-06, "loss": 0.8475, "step": 11018 }, { "epoch": 0.7960698611086026, "grad_norm": 7.063974412260446, "learning_rate": 3.424492662564247e-06, "loss": 0.847, "step": 11019 }, { "epoch": 0.7961421063088121, "grad_norm": 5.948526523766628, "learning_rate": 3.42422088859698e-06, "loss": 0.8495, "step": 11020 }, { "epoch": 0.7962143515090216, "grad_norm": 5.262732370642163, "learning_rate": 3.4239491019778475e-06, "loss": 0.8312, "step": 11021 }, { "epoch": 0.7962865967092311, "grad_norm": 4.96020598537585, "learning_rate": 3.423677302710569e-06, "loss": 0.8214, "step": 11022 }, { "epoch": 0.7963588419094406, "grad_norm": 5.5272519592643325, "learning_rate": 3.423405490798865e-06, "loss": 0.721, "step": 11023 }, { "epoch": 0.7964310871096502, "grad_norm": 5.46811327906085, "learning_rate": 3.4231336662464563e-06, "loss": 0.7903, "step": 11024 }, { "epoch": 0.7965033323098597, "grad_norm": 7.543078239511544, "learning_rate": 3.422861829057065e-06, "loss": 0.8251, "step": 11025 }, { "epoch": 0.7965755775100691, "grad_norm": 8.668080972544155, "learning_rate": 3.422589979234412e-06, "loss": 0.8406, "step": 11026 }, { "epoch": 0.7966478227102787, "grad_norm": 5.493337931276144, "learning_rate": 3.4223181167822183e-06, "loss": 0.812, "step": 11027 }, { "epoch": 0.7967200679104882, "grad_norm": 7.222269892739017, "learning_rate": 3.4220462417042053e-06, "loss": 0.8902, "step": 11028 }, { "epoch": 0.7967923131106978, "grad_norm": 6.2380938230101775, "learning_rate": 3.4217743540040966e-06, "loss": 0.8558, "step": 11029 }, { "epoch": 0.7968645583109072, "grad_norm": 6.191333854564914, "learning_rate": 3.421502453685611e-06, "loss": 0.817, "step": 11030 }, { "epoch": 0.7969368035111167, "grad_norm": 5.831477533053339, "learning_rate": 3.4212305407524738e-06, "loss": 0.7897, "step": 11031 }, { "epoch": 0.7970090487113263, "grad_norm": 8.898409634915765, "learning_rate": 3.420958615208405e-06, "loss": 0.787, "step": 11032 }, { "epoch": 0.7970812939115357, "grad_norm": 7.828865671047914, "learning_rate": 3.4206866770571273e-06, "loss": 0.8215, "step": 11033 }, { "epoch": 0.7971535391117452, "grad_norm": 5.723499431269833, "learning_rate": 3.4204147263023647e-06, "loss": 0.7277, "step": 11034 }, { "epoch": 0.7972257843119548, "grad_norm": 6.04468490801688, "learning_rate": 3.420142762947839e-06, "loss": 0.7764, "step": 11035 }, { "epoch": 0.7972980295121643, "grad_norm": 5.450753732059319, "learning_rate": 3.419870786997273e-06, "loss": 0.7975, "step": 11036 }, { "epoch": 0.7973702747123738, "grad_norm": 6.512093371328785, "learning_rate": 3.419598798454391e-06, "loss": 0.8568, "step": 11037 }, { "epoch": 0.7974425199125833, "grad_norm": 6.595956442240917, "learning_rate": 3.4193267973229145e-06, "loss": 0.8569, "step": 11038 }, { "epoch": 0.7975147651127928, "grad_norm": 8.716588343699364, "learning_rate": 3.419054783606568e-06, "loss": 0.8576, "step": 11039 }, { "epoch": 0.7975870103130023, "grad_norm": 6.947396582526604, "learning_rate": 3.4187827573090758e-06, "loss": 0.8458, "step": 11040 }, { "epoch": 0.7976592555132118, "grad_norm": 7.248964696165912, "learning_rate": 3.4185107184341604e-06, "loss": 0.8081, "step": 11041 }, { "epoch": 0.7977315007134214, "grad_norm": 5.000423604187297, "learning_rate": 3.418238666985547e-06, "loss": 0.8, "step": 11042 }, { "epoch": 0.7978037459136309, "grad_norm": 6.646989137918714, "learning_rate": 3.4179666029669583e-06, "loss": 0.7919, "step": 11043 }, { "epoch": 0.7978759911138403, "grad_norm": 6.575298218436709, "learning_rate": 3.4176945263821204e-06, "loss": 0.8284, "step": 11044 }, { "epoch": 0.7979482363140499, "grad_norm": 5.289204763192434, "learning_rate": 3.4174224372347565e-06, "loss": 0.7927, "step": 11045 }, { "epoch": 0.7980204815142594, "grad_norm": 5.766980846571003, "learning_rate": 3.417150335528592e-06, "loss": 0.7748, "step": 11046 }, { "epoch": 0.798092726714469, "grad_norm": 6.817511998422358, "learning_rate": 3.416878221267352e-06, "loss": 0.8119, "step": 11047 }, { "epoch": 0.7981649719146784, "grad_norm": 8.753795563501477, "learning_rate": 3.41660609445476e-06, "loss": 0.8469, "step": 11048 }, { "epoch": 0.7982372171148879, "grad_norm": 7.110844521028856, "learning_rate": 3.4163339550945435e-06, "loss": 0.8742, "step": 11049 }, { "epoch": 0.7983094623150975, "grad_norm": 7.432395914723807, "learning_rate": 3.416061803190426e-06, "loss": 0.889, "step": 11050 }, { "epoch": 0.7983817075153069, "grad_norm": 7.236171557010671, "learning_rate": 3.415789638746134e-06, "loss": 0.834, "step": 11051 }, { "epoch": 0.7984539527155164, "grad_norm": 7.945506705852317, "learning_rate": 3.415517461765392e-06, "loss": 0.8368, "step": 11052 }, { "epoch": 0.798526197915726, "grad_norm": 5.938127344016779, "learning_rate": 3.4152452722519273e-06, "loss": 0.8134, "step": 11053 }, { "epoch": 0.7985984431159355, "grad_norm": 5.447548887288166, "learning_rate": 3.4149730702094664e-06, "loss": 0.8534, "step": 11054 }, { "epoch": 0.798670688316145, "grad_norm": 6.646974503490203, "learning_rate": 3.4147008556417333e-06, "loss": 0.8802, "step": 11055 }, { "epoch": 0.7987429335163545, "grad_norm": 6.1382269217121355, "learning_rate": 3.4144286285524564e-06, "loss": 0.8238, "step": 11056 }, { "epoch": 0.798815178716564, "grad_norm": 10.186618134880629, "learning_rate": 3.4141563889453612e-06, "loss": 0.9405, "step": 11057 }, { "epoch": 0.7988874239167735, "grad_norm": 5.719341977023798, "learning_rate": 3.413884136824175e-06, "loss": 0.8713, "step": 11058 }, { "epoch": 0.798959669116983, "grad_norm": 6.566240389134439, "learning_rate": 3.4136118721926243e-06, "loss": 0.8032, "step": 11059 }, { "epoch": 0.7990319143171926, "grad_norm": 7.1508318117101055, "learning_rate": 3.413339595054437e-06, "loss": 0.8984, "step": 11060 }, { "epoch": 0.7991041595174021, "grad_norm": 7.223402233396632, "learning_rate": 3.413067305413339e-06, "loss": 0.8766, "step": 11061 }, { "epoch": 0.7991764047176115, "grad_norm": 5.637646509537815, "learning_rate": 3.4127950032730596e-06, "loss": 0.7118, "step": 11062 }, { "epoch": 0.7992486499178211, "grad_norm": 6.316497141526782, "learning_rate": 3.4125226886373246e-06, "loss": 0.9031, "step": 11063 }, { "epoch": 0.7993208951180306, "grad_norm": 6.157539658598782, "learning_rate": 3.4122503615098623e-06, "loss": 0.8319, "step": 11064 }, { "epoch": 0.7993931403182402, "grad_norm": 5.3804417941416744, "learning_rate": 3.411978021894402e-06, "loss": 0.8245, "step": 11065 }, { "epoch": 0.7994653855184496, "grad_norm": 6.62728871092538, "learning_rate": 3.4117056697946694e-06, "loss": 0.8634, "step": 11066 }, { "epoch": 0.7995376307186591, "grad_norm": 5.324937689331554, "learning_rate": 3.4114333052143943e-06, "loss": 0.862, "step": 11067 }, { "epoch": 0.7996098759188687, "grad_norm": 5.699234479394846, "learning_rate": 3.4111609281573057e-06, "loss": 0.8047, "step": 11068 }, { "epoch": 0.7996821211190781, "grad_norm": 6.628782811811088, "learning_rate": 3.4108885386271313e-06, "loss": 0.9097, "step": 11069 }, { "epoch": 0.7997543663192876, "grad_norm": 5.461052409042294, "learning_rate": 3.410616136627599e-06, "loss": 0.7523, "step": 11070 }, { "epoch": 0.7998266115194972, "grad_norm": 6.072172962363909, "learning_rate": 3.4103437221624404e-06, "loss": 0.789, "step": 11071 }, { "epoch": 0.7998988567197067, "grad_norm": 5.44678852758965, "learning_rate": 3.410071295235382e-06, "loss": 0.7837, "step": 11072 }, { "epoch": 0.7999711019199162, "grad_norm": 6.08301745426371, "learning_rate": 3.4097988558501544e-06, "loss": 0.8344, "step": 11073 }, { "epoch": 0.8000433471201257, "grad_norm": 6.007486758683844, "learning_rate": 3.4095264040104873e-06, "loss": 0.8723, "step": 11074 }, { "epoch": 0.8001155923203352, "grad_norm": 5.0587888702113295, "learning_rate": 3.4092539397201097e-06, "loss": 0.8579, "step": 11075 }, { "epoch": 0.8001878375205447, "grad_norm": 5.247878599716583, "learning_rate": 3.408981462982751e-06, "loss": 0.7996, "step": 11076 }, { "epoch": 0.8002600827207542, "grad_norm": 7.165620890174083, "learning_rate": 3.4087089738021422e-06, "loss": 0.8701, "step": 11077 }, { "epoch": 0.8003323279209638, "grad_norm": 5.687126398915897, "learning_rate": 3.408436472182014e-06, "loss": 0.8512, "step": 11078 }, { "epoch": 0.8004045731211733, "grad_norm": 6.227229639016919, "learning_rate": 3.4081639581260954e-06, "loss": 0.8512, "step": 11079 }, { "epoch": 0.8004768183213827, "grad_norm": 5.968822219172056, "learning_rate": 3.4078914316381173e-06, "loss": 0.8815, "step": 11080 }, { "epoch": 0.8005490635215923, "grad_norm": 6.064829585235616, "learning_rate": 3.4076188927218103e-06, "loss": 0.862, "step": 11081 }, { "epoch": 0.8006213087218018, "grad_norm": 6.220746050676072, "learning_rate": 3.407346341380905e-06, "loss": 0.748, "step": 11082 }, { "epoch": 0.8006935539220114, "grad_norm": 6.988936536930088, "learning_rate": 3.4070737776191332e-06, "loss": 0.8137, "step": 11083 }, { "epoch": 0.8007657991222208, "grad_norm": 8.137294446748621, "learning_rate": 3.406801201440225e-06, "loss": 0.8297, "step": 11084 }, { "epoch": 0.8008380443224303, "grad_norm": 7.860635402321027, "learning_rate": 3.4065286128479134e-06, "loss": 0.8754, "step": 11085 }, { "epoch": 0.8009102895226399, "grad_norm": 6.794114808620387, "learning_rate": 3.406256011845928e-06, "loss": 0.8727, "step": 11086 }, { "epoch": 0.8009825347228493, "grad_norm": 7.063988452802575, "learning_rate": 3.405983398438003e-06, "loss": 0.8457, "step": 11087 }, { "epoch": 0.8010547799230588, "grad_norm": 6.348941576598922, "learning_rate": 3.4057107726278682e-06, "loss": 0.8392, "step": 11088 }, { "epoch": 0.8011270251232684, "grad_norm": 6.177898934241254, "learning_rate": 3.4054381344192556e-06, "loss": 0.8234, "step": 11089 }, { "epoch": 0.8011992703234779, "grad_norm": 6.8167895889702335, "learning_rate": 3.4051654838158976e-06, "loss": 0.9577, "step": 11090 }, { "epoch": 0.8012715155236874, "grad_norm": 5.018530268627351, "learning_rate": 3.404892820821527e-06, "loss": 0.8784, "step": 11091 }, { "epoch": 0.8013437607238969, "grad_norm": 7.29975680638561, "learning_rate": 3.4046201454398774e-06, "loss": 0.8411, "step": 11092 }, { "epoch": 0.8014160059241064, "grad_norm": 6.331514030290976, "learning_rate": 3.4043474576746794e-06, "loss": 0.8674, "step": 11093 }, { "epoch": 0.8014882511243159, "grad_norm": 6.374957813796539, "learning_rate": 3.404074757529668e-06, "loss": 0.8196, "step": 11094 }, { "epoch": 0.8015604963245254, "grad_norm": 5.683023220935095, "learning_rate": 3.403802045008574e-06, "loss": 0.789, "step": 11095 }, { "epoch": 0.801632741524735, "grad_norm": 6.144128319393711, "learning_rate": 3.403529320115132e-06, "loss": 0.8844, "step": 11096 }, { "epoch": 0.8017049867249445, "grad_norm": 8.634471613053174, "learning_rate": 3.4032565828530756e-06, "loss": 0.8484, "step": 11097 }, { "epoch": 0.8017772319251539, "grad_norm": 6.4933302478404356, "learning_rate": 3.402983833226138e-06, "loss": 0.8658, "step": 11098 }, { "epoch": 0.8018494771253635, "grad_norm": 6.59426950152851, "learning_rate": 3.402711071238052e-06, "loss": 0.809, "step": 11099 }, { "epoch": 0.801921722325573, "grad_norm": 6.349349834168093, "learning_rate": 3.4024382968925523e-06, "loss": 0.7569, "step": 11100 }, { "epoch": 0.8019939675257826, "grad_norm": 5.061727394206136, "learning_rate": 3.4021655101933736e-06, "loss": 0.8199, "step": 11101 }, { "epoch": 0.802066212725992, "grad_norm": 6.123557095967046, "learning_rate": 3.4018927111442496e-06, "loss": 0.7759, "step": 11102 }, { "epoch": 0.8021384579262015, "grad_norm": 5.836274776900448, "learning_rate": 3.401619899748915e-06, "loss": 0.8873, "step": 11103 }, { "epoch": 0.8022107031264111, "grad_norm": 7.676408659552152, "learning_rate": 3.4013470760111034e-06, "loss": 0.7621, "step": 11104 }, { "epoch": 0.8022829483266205, "grad_norm": 6.562742101654023, "learning_rate": 3.4010742399345503e-06, "loss": 0.8713, "step": 11105 }, { "epoch": 0.80235519352683, "grad_norm": 6.212985793832562, "learning_rate": 3.400801391522991e-06, "loss": 0.851, "step": 11106 }, { "epoch": 0.8024274387270396, "grad_norm": 6.890278433390659, "learning_rate": 3.4005285307801595e-06, "loss": 0.8213, "step": 11107 }, { "epoch": 0.8024996839272491, "grad_norm": 8.06090772415876, "learning_rate": 3.400255657709792e-06, "loss": 0.8911, "step": 11108 }, { "epoch": 0.8025719291274586, "grad_norm": 6.2211912337937045, "learning_rate": 3.3999827723156225e-06, "loss": 0.8418, "step": 11109 }, { "epoch": 0.8026441743276681, "grad_norm": 6.683804355426712, "learning_rate": 3.399709874601389e-06, "loss": 0.8981, "step": 11110 }, { "epoch": 0.8027164195278776, "grad_norm": 6.472689548514982, "learning_rate": 3.3994369645708247e-06, "loss": 0.7351, "step": 11111 }, { "epoch": 0.8027886647280871, "grad_norm": 7.351972675941009, "learning_rate": 3.399164042227668e-06, "loss": 0.7798, "step": 11112 }, { "epoch": 0.8028609099282966, "grad_norm": 6.093334633780465, "learning_rate": 3.3988911075756527e-06, "loss": 0.8413, "step": 11113 }, { "epoch": 0.8029331551285062, "grad_norm": 6.572785663860997, "learning_rate": 3.3986181606185163e-06, "loss": 0.8555, "step": 11114 }, { "epoch": 0.8030054003287157, "grad_norm": 8.793819893001384, "learning_rate": 3.3983452013599955e-06, "loss": 0.8485, "step": 11115 }, { "epoch": 0.8030776455289251, "grad_norm": 6.286437667985486, "learning_rate": 3.398072229803826e-06, "loss": 0.8626, "step": 11116 }, { "epoch": 0.8031498907291347, "grad_norm": 6.442965659386763, "learning_rate": 3.3977992459537456e-06, "loss": 0.8564, "step": 11117 }, { "epoch": 0.8032221359293442, "grad_norm": 5.817845337522981, "learning_rate": 3.39752624981349e-06, "loss": 0.7627, "step": 11118 }, { "epoch": 0.8032943811295538, "grad_norm": 6.9655480662530245, "learning_rate": 3.3972532413867965e-06, "loss": 0.8189, "step": 11119 }, { "epoch": 0.8033666263297632, "grad_norm": 5.529718909129944, "learning_rate": 3.3969802206774037e-06, "loss": 0.7838, "step": 11120 }, { "epoch": 0.8034388715299727, "grad_norm": 6.181882523900518, "learning_rate": 3.3967071876890477e-06, "loss": 0.8686, "step": 11121 }, { "epoch": 0.8035111167301823, "grad_norm": 6.8200001158952, "learning_rate": 3.3964341424254665e-06, "loss": 0.8728, "step": 11122 }, { "epoch": 0.8035833619303917, "grad_norm": 6.814066295627096, "learning_rate": 3.3961610848903977e-06, "loss": 0.8219, "step": 11123 }, { "epoch": 0.8036556071306012, "grad_norm": 6.053581363173096, "learning_rate": 3.39588801508758e-06, "loss": 0.8499, "step": 11124 }, { "epoch": 0.8037278523308108, "grad_norm": 7.102978857830365, "learning_rate": 3.3956149330207506e-06, "loss": 0.8221, "step": 11125 }, { "epoch": 0.8038000975310203, "grad_norm": 7.120530985631197, "learning_rate": 3.3953418386936487e-06, "loss": 0.8437, "step": 11126 }, { "epoch": 0.8038723427312298, "grad_norm": 4.778419604303804, "learning_rate": 3.3950687321100118e-06, "loss": 0.748, "step": 11127 }, { "epoch": 0.8039445879314393, "grad_norm": 5.2859164055728405, "learning_rate": 3.3947956132735787e-06, "loss": 0.8963, "step": 11128 }, { "epoch": 0.8040168331316488, "grad_norm": 5.070782514749538, "learning_rate": 3.394522482188089e-06, "loss": 0.7922, "step": 11129 }, { "epoch": 0.8040890783318583, "grad_norm": 6.77849498282456, "learning_rate": 3.3942493388572804e-06, "loss": 0.8081, "step": 11130 }, { "epoch": 0.8041613235320678, "grad_norm": 6.740017326174237, "learning_rate": 3.3939761832848937e-06, "loss": 0.856, "step": 11131 }, { "epoch": 0.8042335687322774, "grad_norm": 6.288606956521713, "learning_rate": 3.3937030154746665e-06, "loss": 0.7607, "step": 11132 }, { "epoch": 0.8043058139324869, "grad_norm": 6.84823753905758, "learning_rate": 3.3934298354303384e-06, "loss": 0.9005, "step": 11133 }, { "epoch": 0.8043780591326963, "grad_norm": 6.554790800642348, "learning_rate": 3.3931566431556503e-06, "loss": 0.8444, "step": 11134 }, { "epoch": 0.8044503043329059, "grad_norm": 5.8890142287504945, "learning_rate": 3.3928834386543414e-06, "loss": 0.8533, "step": 11135 }, { "epoch": 0.8045225495331154, "grad_norm": 5.46801002945779, "learning_rate": 3.392610221930151e-06, "loss": 0.8047, "step": 11136 }, { "epoch": 0.804594794733325, "grad_norm": 5.526023334996495, "learning_rate": 3.39233699298682e-06, "loss": 0.7787, "step": 11137 }, { "epoch": 0.8046670399335344, "grad_norm": 6.384631790620869, "learning_rate": 3.3920637518280886e-06, "loss": 0.8735, "step": 11138 }, { "epoch": 0.8047392851337439, "grad_norm": 7.723033133665258, "learning_rate": 3.3917904984576967e-06, "loss": 0.8438, "step": 11139 }, { "epoch": 0.8048115303339535, "grad_norm": 7.923095609088768, "learning_rate": 3.3915172328793857e-06, "loss": 0.8689, "step": 11140 }, { "epoch": 0.8048837755341629, "grad_norm": 6.976410218015372, "learning_rate": 3.3912439550968956e-06, "loss": 0.8692, "step": 11141 }, { "epoch": 0.8049560207343724, "grad_norm": 6.3215475679270545, "learning_rate": 3.3909706651139674e-06, "loss": 0.8317, "step": 11142 }, { "epoch": 0.805028265934582, "grad_norm": 5.9722972934154495, "learning_rate": 3.390697362934343e-06, "loss": 0.8343, "step": 11143 }, { "epoch": 0.8051005111347915, "grad_norm": 5.114060239852559, "learning_rate": 3.3904240485617634e-06, "loss": 0.7569, "step": 11144 }, { "epoch": 0.805172756335001, "grad_norm": 6.608281182576017, "learning_rate": 3.3901507219999707e-06, "loss": 0.8612, "step": 11145 }, { "epoch": 0.8052450015352105, "grad_norm": 6.321434119516914, "learning_rate": 3.389877383252705e-06, "loss": 0.8824, "step": 11146 }, { "epoch": 0.80531724673542, "grad_norm": 5.848570768084873, "learning_rate": 3.3896040323237084e-06, "loss": 0.833, "step": 11147 }, { "epoch": 0.8053894919356295, "grad_norm": 6.072638145939018, "learning_rate": 3.3893306692167236e-06, "loss": 0.8166, "step": 11148 }, { "epoch": 0.805461737135839, "grad_norm": 5.1540731747946396, "learning_rate": 3.3890572939354927e-06, "loss": 0.9002, "step": 11149 }, { "epoch": 0.8055339823360486, "grad_norm": 6.928097238171007, "learning_rate": 3.388783906483758e-06, "loss": 0.8295, "step": 11150 }, { "epoch": 0.8056062275362581, "grad_norm": 8.467252141378783, "learning_rate": 3.388510506865261e-06, "loss": 0.8545, "step": 11151 }, { "epoch": 0.8056784727364675, "grad_norm": 5.804283323117466, "learning_rate": 3.3882370950837453e-06, "loss": 0.8529, "step": 11152 }, { "epoch": 0.8057507179366771, "grad_norm": 7.480601336945091, "learning_rate": 3.3879636711429542e-06, "loss": 0.8175, "step": 11153 }, { "epoch": 0.8058229631368866, "grad_norm": 5.398930175205377, "learning_rate": 3.3876902350466296e-06, "loss": 0.8308, "step": 11154 }, { "epoch": 0.8058952083370962, "grad_norm": 5.892075094538233, "learning_rate": 3.387416786798514e-06, "loss": 0.9056, "step": 11155 }, { "epoch": 0.8059674535373056, "grad_norm": 5.949518508021882, "learning_rate": 3.3871433264023525e-06, "loss": 0.7877, "step": 11156 }, { "epoch": 0.8060396987375151, "grad_norm": 6.664786296145662, "learning_rate": 3.3868698538618873e-06, "loss": 0.808, "step": 11157 }, { "epoch": 0.8061119439377247, "grad_norm": 5.691640834647334, "learning_rate": 3.3865963691808625e-06, "loss": 0.7602, "step": 11158 }, { "epoch": 0.8061841891379341, "grad_norm": 6.662645017036918, "learning_rate": 3.3863228723630214e-06, "loss": 0.8016, "step": 11159 }, { "epoch": 0.8062564343381436, "grad_norm": 6.345008161583139, "learning_rate": 3.3860493634121094e-06, "loss": 0.8088, "step": 11160 }, { "epoch": 0.8063286795383532, "grad_norm": 6.387249416504931, "learning_rate": 3.385775842331869e-06, "loss": 0.8512, "step": 11161 }, { "epoch": 0.8064009247385627, "grad_norm": 7.226463259453713, "learning_rate": 3.3855023091260447e-06, "loss": 0.8734, "step": 11162 }, { "epoch": 0.8064731699387722, "grad_norm": 5.85282446338678, "learning_rate": 3.385228763798382e-06, "loss": 0.7996, "step": 11163 }, { "epoch": 0.8065454151389817, "grad_norm": 11.561827603970979, "learning_rate": 3.3849552063526246e-06, "loss": 0.9146, "step": 11164 }, { "epoch": 0.8066176603391912, "grad_norm": 6.815830641437892, "learning_rate": 3.3846816367925178e-06, "loss": 0.8161, "step": 11165 }, { "epoch": 0.8066899055394007, "grad_norm": 6.817147166036423, "learning_rate": 3.384408055121805e-06, "loss": 0.8036, "step": 11166 }, { "epoch": 0.8067621507396102, "grad_norm": 5.540121431913445, "learning_rate": 3.3841344613442345e-06, "loss": 0.93, "step": 11167 }, { "epoch": 0.8068343959398198, "grad_norm": 8.918351153923437, "learning_rate": 3.383860855463549e-06, "loss": 0.8436, "step": 11168 }, { "epoch": 0.8069066411400293, "grad_norm": 5.714380147698621, "learning_rate": 3.3835872374834943e-06, "loss": 0.8617, "step": 11169 }, { "epoch": 0.8069788863402387, "grad_norm": 5.837290738457629, "learning_rate": 3.3833136074078165e-06, "loss": 0.8198, "step": 11170 }, { "epoch": 0.8070511315404483, "grad_norm": 7.799248282571427, "learning_rate": 3.3830399652402614e-06, "loss": 0.7595, "step": 11171 }, { "epoch": 0.8071233767406578, "grad_norm": 5.8045284613980215, "learning_rate": 3.382766310984575e-06, "loss": 0.8343, "step": 11172 }, { "epoch": 0.8071956219408674, "grad_norm": 7.173749691155083, "learning_rate": 3.382492644644503e-06, "loss": 0.8269, "step": 11173 }, { "epoch": 0.8072678671410768, "grad_norm": 7.204022597782122, "learning_rate": 3.382218966223792e-06, "loss": 0.77, "step": 11174 }, { "epoch": 0.8073401123412863, "grad_norm": 6.671770653366544, "learning_rate": 3.3819452757261882e-06, "loss": 0.7891, "step": 11175 }, { "epoch": 0.8074123575414959, "grad_norm": 8.293775076350325, "learning_rate": 3.3816715731554384e-06, "loss": 0.8479, "step": 11176 }, { "epoch": 0.8074846027417053, "grad_norm": 8.210165319601995, "learning_rate": 3.381397858515289e-06, "loss": 0.9174, "step": 11177 }, { "epoch": 0.8075568479419148, "grad_norm": 6.589694687944371, "learning_rate": 3.381124131809488e-06, "loss": 0.7838, "step": 11178 }, { "epoch": 0.8076290931421244, "grad_norm": 4.417805986585131, "learning_rate": 3.3808503930417812e-06, "loss": 0.7794, "step": 11179 }, { "epoch": 0.8077013383423339, "grad_norm": 5.178101447816742, "learning_rate": 3.3805766422159164e-06, "loss": 0.7508, "step": 11180 }, { "epoch": 0.8077735835425434, "grad_norm": 5.807196053258109, "learning_rate": 3.380302879335642e-06, "loss": 0.8896, "step": 11181 }, { "epoch": 0.8078458287427529, "grad_norm": 5.874676634634747, "learning_rate": 3.3800291044047034e-06, "loss": 0.8719, "step": 11182 }, { "epoch": 0.8079180739429624, "grad_norm": 7.6515543337235625, "learning_rate": 3.379755317426851e-06, "loss": 0.756, "step": 11183 }, { "epoch": 0.8079903191431719, "grad_norm": 5.379865528964568, "learning_rate": 3.3794815184058305e-06, "loss": 0.7991, "step": 11184 }, { "epoch": 0.8080625643433814, "grad_norm": 6.7492525605234555, "learning_rate": 3.379207707345391e-06, "loss": 0.8335, "step": 11185 }, { "epoch": 0.808134809543591, "grad_norm": 7.852296345136606, "learning_rate": 3.378933884249281e-06, "loss": 0.9868, "step": 11186 }, { "epoch": 0.8082070547438005, "grad_norm": 7.390507574136511, "learning_rate": 3.378660049121248e-06, "loss": 0.8746, "step": 11187 }, { "epoch": 0.8082792999440099, "grad_norm": 6.385953879562888, "learning_rate": 3.378386201965042e-06, "loss": 0.8566, "step": 11188 }, { "epoch": 0.8083515451442195, "grad_norm": 6.253558252243996, "learning_rate": 3.3781123427844097e-06, "loss": 0.8734, "step": 11189 }, { "epoch": 0.808423790344429, "grad_norm": 6.2223610673278404, "learning_rate": 3.377838471583102e-06, "loss": 0.814, "step": 11190 }, { "epoch": 0.8084960355446386, "grad_norm": 7.187845238396732, "learning_rate": 3.3775645883648674e-06, "loss": 0.827, "step": 11191 }, { "epoch": 0.808568280744848, "grad_norm": 7.031579853373991, "learning_rate": 3.377290693133455e-06, "loss": 0.7874, "step": 11192 }, { "epoch": 0.8086405259450575, "grad_norm": 5.910360853406539, "learning_rate": 3.3770167858926133e-06, "loss": 0.9004, "step": 11193 }, { "epoch": 0.8087127711452671, "grad_norm": 8.458124117463978, "learning_rate": 3.3767428666460935e-06, "loss": 0.8542, "step": 11194 }, { "epoch": 0.8087850163454765, "grad_norm": 5.254225347535595, "learning_rate": 3.376468935397645e-06, "loss": 0.8289, "step": 11195 }, { "epoch": 0.808857261545686, "grad_norm": 6.317395417676596, "learning_rate": 3.3761949921510162e-06, "loss": 0.8568, "step": 11196 }, { "epoch": 0.8089295067458956, "grad_norm": 5.710649934513632, "learning_rate": 3.3759210369099593e-06, "loss": 0.7864, "step": 11197 }, { "epoch": 0.8090017519461051, "grad_norm": 6.7405300821652165, "learning_rate": 3.375647069678223e-06, "loss": 0.7975, "step": 11198 }, { "epoch": 0.8090739971463146, "grad_norm": 7.025677587880592, "learning_rate": 3.3753730904595585e-06, "loss": 0.9106, "step": 11199 }, { "epoch": 0.8091462423465241, "grad_norm": 5.131934150314962, "learning_rate": 3.3750990992577156e-06, "loss": 0.8568, "step": 11200 }, { "epoch": 0.8092184875467336, "grad_norm": 5.244321385967531, "learning_rate": 3.3748250960764454e-06, "loss": 0.8713, "step": 11201 }, { "epoch": 0.8092907327469431, "grad_norm": 7.91933663836486, "learning_rate": 3.3745510809194994e-06, "loss": 0.7631, "step": 11202 }, { "epoch": 0.8093629779471526, "grad_norm": 6.8714063095126425, "learning_rate": 3.3742770537906283e-06, "loss": 0.7616, "step": 11203 }, { "epoch": 0.8094352231473622, "grad_norm": 7.280145520443127, "learning_rate": 3.3740030146935827e-06, "loss": 0.8165, "step": 11204 }, { "epoch": 0.8095074683475717, "grad_norm": 6.267692548230999, "learning_rate": 3.3737289636321146e-06, "loss": 0.8558, "step": 11205 }, { "epoch": 0.8095797135477811, "grad_norm": 6.497953826449066, "learning_rate": 3.373454900609976e-06, "loss": 0.8336, "step": 11206 }, { "epoch": 0.8096519587479907, "grad_norm": 7.487764423267442, "learning_rate": 3.3731808256309167e-06, "loss": 0.8558, "step": 11207 }, { "epoch": 0.8097242039482002, "grad_norm": 6.230814976175587, "learning_rate": 3.372906738698691e-06, "loss": 0.8213, "step": 11208 }, { "epoch": 0.8097964491484096, "grad_norm": 6.502540385389625, "learning_rate": 3.372632639817049e-06, "loss": 0.7591, "step": 11209 }, { "epoch": 0.8098686943486192, "grad_norm": 5.581994859431328, "learning_rate": 3.3723585289897445e-06, "loss": 0.7324, "step": 11210 }, { "epoch": 0.8099409395488287, "grad_norm": 6.461573651326252, "learning_rate": 3.3720844062205287e-06, "loss": 0.7496, "step": 11211 }, { "epoch": 0.8100131847490383, "grad_norm": 5.65638151068628, "learning_rate": 3.3718102715131547e-06, "loss": 0.8051, "step": 11212 }, { "epoch": 0.8100854299492477, "grad_norm": 6.980074179181408, "learning_rate": 3.3715361248713746e-06, "loss": 0.8054, "step": 11213 }, { "epoch": 0.8101576751494572, "grad_norm": 6.528661479609126, "learning_rate": 3.3712619662989413e-06, "loss": 0.7962, "step": 11214 }, { "epoch": 0.8102299203496668, "grad_norm": 5.46264901034334, "learning_rate": 3.3709877957996096e-06, "loss": 0.8043, "step": 11215 }, { "epoch": 0.8103021655498763, "grad_norm": 8.470882583627604, "learning_rate": 3.37071361337713e-06, "loss": 0.8721, "step": 11216 }, { "epoch": 0.8103744107500858, "grad_norm": 7.173623131685217, "learning_rate": 3.370439419035258e-06, "loss": 0.7935, "step": 11217 }, { "epoch": 0.8104466559502953, "grad_norm": 7.282123300465572, "learning_rate": 3.3701652127777448e-06, "loss": 0.8223, "step": 11218 }, { "epoch": 0.8105189011505048, "grad_norm": 5.161066014084048, "learning_rate": 3.369890994608347e-06, "loss": 0.6912, "step": 11219 }, { "epoch": 0.8105911463507143, "grad_norm": 6.605274127639937, "learning_rate": 3.3696167645308166e-06, "loss": 0.9514, "step": 11220 }, { "epoch": 0.8106633915509238, "grad_norm": 5.508061483179925, "learning_rate": 3.3693425225489074e-06, "loss": 0.7874, "step": 11221 }, { "epoch": 0.8107356367511334, "grad_norm": 7.861571473329835, "learning_rate": 3.369068268666374e-06, "loss": 0.8024, "step": 11222 }, { "epoch": 0.8108078819513429, "grad_norm": 6.248252624386621, "learning_rate": 3.3687940028869704e-06, "loss": 0.7714, "step": 11223 }, { "epoch": 0.8108801271515523, "grad_norm": 5.44761296064562, "learning_rate": 3.368519725214453e-06, "loss": 0.8005, "step": 11224 }, { "epoch": 0.8109523723517619, "grad_norm": 6.650917460318151, "learning_rate": 3.3682454356525736e-06, "loss": 0.9325, "step": 11225 }, { "epoch": 0.8110246175519714, "grad_norm": 6.701997533202962, "learning_rate": 3.367971134205089e-06, "loss": 0.8781, "step": 11226 }, { "epoch": 0.8110968627521808, "grad_norm": 6.51382794743679, "learning_rate": 3.367696820875753e-06, "loss": 0.7803, "step": 11227 }, { "epoch": 0.8111691079523904, "grad_norm": 7.131080308478288, "learning_rate": 3.3674224956683214e-06, "loss": 0.875, "step": 11228 }, { "epoch": 0.8112413531525999, "grad_norm": 5.942761790118446, "learning_rate": 3.36714815858655e-06, "loss": 0.8045, "step": 11229 }, { "epoch": 0.8113135983528095, "grad_norm": 7.119649919625778, "learning_rate": 3.366873809634193e-06, "loss": 0.7952, "step": 11230 }, { "epoch": 0.8113858435530189, "grad_norm": 6.541134012813373, "learning_rate": 3.3665994488150073e-06, "loss": 0.8796, "step": 11231 }, { "epoch": 0.8114580887532284, "grad_norm": 5.2966076268594104, "learning_rate": 3.366325076132747e-06, "loss": 0.7319, "step": 11232 }, { "epoch": 0.811530333953438, "grad_norm": 6.73029276240823, "learning_rate": 3.3660506915911695e-06, "loss": 0.7501, "step": 11233 }, { "epoch": 0.8116025791536475, "grad_norm": 7.001063674765785, "learning_rate": 3.36577629519403e-06, "loss": 0.8711, "step": 11234 }, { "epoch": 0.811674824353857, "grad_norm": 6.213398379911772, "learning_rate": 3.365501886945086e-06, "loss": 0.8098, "step": 11235 }, { "epoch": 0.8117470695540665, "grad_norm": 6.29930624396635, "learning_rate": 3.3652274668480933e-06, "loss": 0.7675, "step": 11236 }, { "epoch": 0.811819314754276, "grad_norm": 5.555323396706279, "learning_rate": 3.3649530349068073e-06, "loss": 0.8576, "step": 11237 }, { "epoch": 0.8118915599544855, "grad_norm": 6.4156368799931105, "learning_rate": 3.364678591124987e-06, "loss": 0.7982, "step": 11238 }, { "epoch": 0.811963805154695, "grad_norm": 4.908041851583433, "learning_rate": 3.364404135506388e-06, "loss": 0.7528, "step": 11239 }, { "epoch": 0.8120360503549046, "grad_norm": 4.902186174633873, "learning_rate": 3.3641296680547676e-06, "loss": 0.8001, "step": 11240 }, { "epoch": 0.8121082955551141, "grad_norm": 6.146903596781472, "learning_rate": 3.3638551887738825e-06, "loss": 0.7777, "step": 11241 }, { "epoch": 0.8121805407553235, "grad_norm": 5.818700293176015, "learning_rate": 3.36358069766749e-06, "loss": 0.8004, "step": 11242 }, { "epoch": 0.8122527859555331, "grad_norm": 7.805273513253666, "learning_rate": 3.363306194739349e-06, "loss": 0.7925, "step": 11243 }, { "epoch": 0.8123250311557426, "grad_norm": 7.139441060219193, "learning_rate": 3.3630316799932173e-06, "loss": 0.8027, "step": 11244 }, { "epoch": 0.812397276355952, "grad_norm": 6.779134816018912, "learning_rate": 3.3627571534328513e-06, "loss": 0.9045, "step": 11245 }, { "epoch": 0.8124695215561616, "grad_norm": 6.964363667816854, "learning_rate": 3.3624826150620093e-06, "loss": 0.796, "step": 11246 }, { "epoch": 0.8125417667563711, "grad_norm": 6.243061028424973, "learning_rate": 3.36220806488445e-06, "loss": 0.8756, "step": 11247 }, { "epoch": 0.8126140119565807, "grad_norm": 6.709519117608265, "learning_rate": 3.3619335029039325e-06, "loss": 0.8508, "step": 11248 }, { "epoch": 0.8126862571567901, "grad_norm": 6.189134786405679, "learning_rate": 3.361658929124214e-06, "loss": 0.777, "step": 11249 }, { "epoch": 0.8127585023569996, "grad_norm": 8.136347900964372, "learning_rate": 3.3613843435490546e-06, "loss": 0.8584, "step": 11250 }, { "epoch": 0.8128307475572092, "grad_norm": 6.548341427541798, "learning_rate": 3.361109746182211e-06, "loss": 0.9019, "step": 11251 }, { "epoch": 0.8129029927574187, "grad_norm": 5.120294411062417, "learning_rate": 3.360835137027445e-06, "loss": 0.8202, "step": 11252 }, { "epoch": 0.8129752379576282, "grad_norm": 5.923195402349484, "learning_rate": 3.3605605160885137e-06, "loss": 0.7361, "step": 11253 }, { "epoch": 0.8130474831578377, "grad_norm": 9.472945388628476, "learning_rate": 3.3602858833691776e-06, "loss": 0.8898, "step": 11254 }, { "epoch": 0.8131197283580472, "grad_norm": 7.085205309470132, "learning_rate": 3.3600112388731947e-06, "loss": 0.8119, "step": 11255 }, { "epoch": 0.8131919735582567, "grad_norm": 8.00381521803406, "learning_rate": 3.359736582604326e-06, "loss": 0.8809, "step": 11256 }, { "epoch": 0.8132642187584662, "grad_norm": 6.477913820216984, "learning_rate": 3.3594619145663316e-06, "loss": 0.8672, "step": 11257 }, { "epoch": 0.8133364639586758, "grad_norm": 4.483457252085093, "learning_rate": 3.3591872347629705e-06, "loss": 0.711, "step": 11258 }, { "epoch": 0.8134087091588853, "grad_norm": 7.670202877141756, "learning_rate": 3.3589125431980037e-06, "loss": 0.8135, "step": 11259 }, { "epoch": 0.8134809543590947, "grad_norm": 8.153097076777595, "learning_rate": 3.3586378398751905e-06, "loss": 0.8127, "step": 11260 }, { "epoch": 0.8135531995593043, "grad_norm": 6.578991033308787, "learning_rate": 3.358363124798292e-06, "loss": 0.8254, "step": 11261 }, { "epoch": 0.8136254447595138, "grad_norm": 6.37414485207348, "learning_rate": 3.3580883979710693e-06, "loss": 0.8749, "step": 11262 }, { "epoch": 0.8136976899597232, "grad_norm": 6.157034422185453, "learning_rate": 3.3578136593972825e-06, "loss": 0.8423, "step": 11263 }, { "epoch": 0.8137699351599328, "grad_norm": 5.057951402568738, "learning_rate": 3.3575389090806926e-06, "loss": 0.8191, "step": 11264 }, { "epoch": 0.8138421803601423, "grad_norm": 5.895996882278674, "learning_rate": 3.3572641470250613e-06, "loss": 0.8218, "step": 11265 }, { "epoch": 0.8139144255603519, "grad_norm": 8.241561272863017, "learning_rate": 3.3569893732341484e-06, "loss": 0.7856, "step": 11266 }, { "epoch": 0.8139866707605613, "grad_norm": 7.126514742272422, "learning_rate": 3.3567145877117174e-06, "loss": 0.8109, "step": 11267 }, { "epoch": 0.8140589159607708, "grad_norm": 6.326898451251701, "learning_rate": 3.3564397904615286e-06, "loss": 0.8569, "step": 11268 }, { "epoch": 0.8141311611609804, "grad_norm": 6.490299028228983, "learning_rate": 3.3561649814873443e-06, "loss": 0.8171, "step": 11269 }, { "epoch": 0.8142034063611899, "grad_norm": 5.878611651671836, "learning_rate": 3.3558901607929256e-06, "loss": 0.7776, "step": 11270 }, { "epoch": 0.8142756515613994, "grad_norm": 7.793547568181628, "learning_rate": 3.355615328382036e-06, "loss": 0.8974, "step": 11271 }, { "epoch": 0.8143478967616089, "grad_norm": 5.123844481669878, "learning_rate": 3.3553404842584363e-06, "loss": 0.7929, "step": 11272 }, { "epoch": 0.8144201419618184, "grad_norm": 6.071423756573475, "learning_rate": 3.355065628425889e-06, "loss": 0.8006, "step": 11273 }, { "epoch": 0.8144923871620279, "grad_norm": 7.899187514619038, "learning_rate": 3.354790760888158e-06, "loss": 0.7992, "step": 11274 }, { "epoch": 0.8145646323622374, "grad_norm": 6.7059061465318575, "learning_rate": 3.354515881649005e-06, "loss": 0.8997, "step": 11275 }, { "epoch": 0.814636877562447, "grad_norm": 5.0179120610096755, "learning_rate": 3.354240990712193e-06, "loss": 0.789, "step": 11276 }, { "epoch": 0.8147091227626565, "grad_norm": 5.347120148095966, "learning_rate": 3.353966088081485e-06, "loss": 0.7797, "step": 11277 }, { "epoch": 0.8147813679628659, "grad_norm": 6.543032007623316, "learning_rate": 3.3536911737606447e-06, "loss": 0.9313, "step": 11278 }, { "epoch": 0.8148536131630755, "grad_norm": 6.489020244075938, "learning_rate": 3.3534162477534342e-06, "loss": 0.6895, "step": 11279 }, { "epoch": 0.814925858363285, "grad_norm": 5.566318016022186, "learning_rate": 3.3531413100636184e-06, "loss": 0.812, "step": 11280 }, { "epoch": 0.8149981035634944, "grad_norm": 6.821049356583719, "learning_rate": 3.352866360694961e-06, "loss": 0.9714, "step": 11281 }, { "epoch": 0.815070348763704, "grad_norm": 5.94187332514423, "learning_rate": 3.352591399651225e-06, "loss": 0.7803, "step": 11282 }, { "epoch": 0.8151425939639135, "grad_norm": 6.9542186091338145, "learning_rate": 3.352316426936175e-06, "loss": 0.9381, "step": 11283 }, { "epoch": 0.8152148391641231, "grad_norm": 7.35996148431071, "learning_rate": 3.352041442553574e-06, "loss": 0.8092, "step": 11284 }, { "epoch": 0.8152870843643325, "grad_norm": 5.245090914248339, "learning_rate": 3.3517664465071875e-06, "loss": 0.7674, "step": 11285 }, { "epoch": 0.815359329564542, "grad_norm": 4.870029948179681, "learning_rate": 3.3514914388007804e-06, "loss": 0.8457, "step": 11286 }, { "epoch": 0.8154315747647516, "grad_norm": 5.44222256136872, "learning_rate": 3.3512164194381157e-06, "loss": 0.84, "step": 11287 }, { "epoch": 0.8155038199649611, "grad_norm": 6.1917139981678275, "learning_rate": 3.35094138842296e-06, "loss": 0.84, "step": 11288 }, { "epoch": 0.8155760651651706, "grad_norm": 6.5785996363244985, "learning_rate": 3.3506663457590764e-06, "loss": 0.8314, "step": 11289 }, { "epoch": 0.8156483103653801, "grad_norm": 6.689110962964624, "learning_rate": 3.3503912914502323e-06, "loss": 0.7925, "step": 11290 }, { "epoch": 0.8157205555655896, "grad_norm": 7.198030138754489, "learning_rate": 3.3501162255001905e-06, "loss": 0.8682, "step": 11291 }, { "epoch": 0.8157928007657991, "grad_norm": 5.686074885306943, "learning_rate": 3.349841147912719e-06, "loss": 0.8238, "step": 11292 }, { "epoch": 0.8158650459660086, "grad_norm": 6.9259822878675825, "learning_rate": 3.3495660586915812e-06, "loss": 0.7917, "step": 11293 }, { "epoch": 0.8159372911662182, "grad_norm": 6.515108010252317, "learning_rate": 3.3492909578405434e-06, "loss": 0.777, "step": 11294 }, { "epoch": 0.8160095363664277, "grad_norm": 4.8970738996803655, "learning_rate": 3.3490158453633727e-06, "loss": 0.7439, "step": 11295 }, { "epoch": 0.8160817815666371, "grad_norm": 6.039027126351769, "learning_rate": 3.3487407212638334e-06, "loss": 0.7945, "step": 11296 }, { "epoch": 0.8161540267668467, "grad_norm": 4.821289952621703, "learning_rate": 3.348465585545694e-06, "loss": 0.7613, "step": 11297 }, { "epoch": 0.8162262719670562, "grad_norm": 7.105187962600759, "learning_rate": 3.3481904382127177e-06, "loss": 0.8573, "step": 11298 }, { "epoch": 0.8162985171672656, "grad_norm": 5.938712869542945, "learning_rate": 3.3479152792686736e-06, "loss": 0.8111, "step": 11299 }, { "epoch": 0.8163707623674752, "grad_norm": 6.273870735838424, "learning_rate": 3.347640108717328e-06, "loss": 0.8091, "step": 11300 }, { "epoch": 0.8164430075676847, "grad_norm": 6.794316092930495, "learning_rate": 3.3473649265624476e-06, "loss": 0.8084, "step": 11301 }, { "epoch": 0.8165152527678943, "grad_norm": 6.648541639074969, "learning_rate": 3.347089732807799e-06, "loss": 0.8165, "step": 11302 }, { "epoch": 0.8165874979681037, "grad_norm": 6.8675782805301155, "learning_rate": 3.346814527457149e-06, "loss": 0.7647, "step": 11303 }, { "epoch": 0.8166597431683132, "grad_norm": 5.8458189540186805, "learning_rate": 3.346539310514267e-06, "loss": 0.7401, "step": 11304 }, { "epoch": 0.8167319883685228, "grad_norm": 6.35086847809222, "learning_rate": 3.346264081982918e-06, "loss": 0.8217, "step": 11305 }, { "epoch": 0.8168042335687323, "grad_norm": 6.387198949837455, "learning_rate": 3.3459888418668716e-06, "loss": 0.8561, "step": 11306 }, { "epoch": 0.8168764787689418, "grad_norm": 7.6035038349992075, "learning_rate": 3.345713590169894e-06, "loss": 0.7938, "step": 11307 }, { "epoch": 0.8169487239691513, "grad_norm": 7.148813973607654, "learning_rate": 3.345438326895755e-06, "loss": 0.8103, "step": 11308 }, { "epoch": 0.8170209691693608, "grad_norm": 6.484740318210815, "learning_rate": 3.3451630520482203e-06, "loss": 0.8732, "step": 11309 }, { "epoch": 0.8170932143695703, "grad_norm": 5.9481045436637645, "learning_rate": 3.3448877656310606e-06, "loss": 0.8769, "step": 11310 }, { "epoch": 0.8171654595697798, "grad_norm": 5.036715553866094, "learning_rate": 3.344612467648044e-06, "loss": 0.8543, "step": 11311 }, { "epoch": 0.8172377047699894, "grad_norm": 5.265061164348526, "learning_rate": 3.3443371581029376e-06, "loss": 0.7439, "step": 11312 }, { "epoch": 0.8173099499701989, "grad_norm": 6.81914147530355, "learning_rate": 3.3440618369995115e-06, "loss": 0.8275, "step": 11313 }, { "epoch": 0.8173821951704083, "grad_norm": 6.601159150029898, "learning_rate": 3.3437865043415337e-06, "loss": 0.8997, "step": 11314 }, { "epoch": 0.8174544403706179, "grad_norm": 5.182644697525677, "learning_rate": 3.343511160132774e-06, "loss": 0.7792, "step": 11315 }, { "epoch": 0.8175266855708274, "grad_norm": 6.127865179565845, "learning_rate": 3.343235804377002e-06, "loss": 0.7674, "step": 11316 }, { "epoch": 0.8175989307710368, "grad_norm": 6.627534777284922, "learning_rate": 3.3429604370779854e-06, "loss": 0.7202, "step": 11317 }, { "epoch": 0.8176711759712464, "grad_norm": 7.767779721029272, "learning_rate": 3.3426850582394964e-06, "loss": 0.7436, "step": 11318 }, { "epoch": 0.8177434211714559, "grad_norm": 6.906390296581199, "learning_rate": 3.342409667865303e-06, "loss": 0.8846, "step": 11319 }, { "epoch": 0.8178156663716655, "grad_norm": 5.829938754615554, "learning_rate": 3.342134265959175e-06, "loss": 0.8487, "step": 11320 }, { "epoch": 0.8178879115718749, "grad_norm": 5.719545069389807, "learning_rate": 3.3418588525248826e-06, "loss": 0.8571, "step": 11321 }, { "epoch": 0.8179601567720844, "grad_norm": 6.58478851975168, "learning_rate": 3.341583427566196e-06, "loss": 0.8773, "step": 11322 }, { "epoch": 0.818032401972294, "grad_norm": 8.295525450064979, "learning_rate": 3.3413079910868856e-06, "loss": 0.8591, "step": 11323 }, { "epoch": 0.8181046471725035, "grad_norm": 5.8172689583295805, "learning_rate": 3.341032543090723e-06, "loss": 0.8587, "step": 11324 }, { "epoch": 0.818176892372713, "grad_norm": 5.975316455074262, "learning_rate": 3.340757083581477e-06, "loss": 0.7819, "step": 11325 }, { "epoch": 0.8182491375729225, "grad_norm": 4.913465503034668, "learning_rate": 3.34048161256292e-06, "loss": 0.7441, "step": 11326 }, { "epoch": 0.818321382773132, "grad_norm": 7.354483823628394, "learning_rate": 3.340206130038822e-06, "loss": 0.8677, "step": 11327 }, { "epoch": 0.8183936279733415, "grad_norm": 5.2920420155605346, "learning_rate": 3.3399306360129543e-06, "loss": 0.8743, "step": 11328 }, { "epoch": 0.818465873173551, "grad_norm": 6.726189615190903, "learning_rate": 3.3396551304890883e-06, "loss": 0.8536, "step": 11329 }, { "epoch": 0.8185381183737606, "grad_norm": 6.252506211379549, "learning_rate": 3.3393796134709954e-06, "loss": 0.7993, "step": 11330 }, { "epoch": 0.8186103635739701, "grad_norm": 5.506413968055585, "learning_rate": 3.3391040849624483e-06, "loss": 0.7562, "step": 11331 }, { "epoch": 0.8186826087741795, "grad_norm": 7.920806872948909, "learning_rate": 3.3388285449672162e-06, "loss": 0.8834, "step": 11332 }, { "epoch": 0.8187548539743891, "grad_norm": 5.819063480181064, "learning_rate": 3.3385529934890744e-06, "loss": 0.7967, "step": 11333 }, { "epoch": 0.8188270991745986, "grad_norm": 5.545844527686342, "learning_rate": 3.3382774305317922e-06, "loss": 0.8035, "step": 11334 }, { "epoch": 0.818899344374808, "grad_norm": 5.248410302126054, "learning_rate": 3.3380018560991435e-06, "loss": 0.7744, "step": 11335 }, { "epoch": 0.8189715895750176, "grad_norm": 6.496076720324942, "learning_rate": 3.3377262701948994e-06, "loss": 0.8332, "step": 11336 }, { "epoch": 0.8190438347752271, "grad_norm": 5.114495467643882, "learning_rate": 3.337450672822833e-06, "loss": 0.8485, "step": 11337 }, { "epoch": 0.8191160799754367, "grad_norm": 5.326167040529985, "learning_rate": 3.3371750639867174e-06, "loss": 0.8645, "step": 11338 }, { "epoch": 0.8191883251756461, "grad_norm": 5.583870458625097, "learning_rate": 3.336899443690325e-06, "loss": 0.7246, "step": 11339 }, { "epoch": 0.8192605703758556, "grad_norm": 7.460982493319831, "learning_rate": 3.33662381193743e-06, "loss": 0.8018, "step": 11340 }, { "epoch": 0.8193328155760652, "grad_norm": 7.586335506664448, "learning_rate": 3.3363481687318034e-06, "loss": 0.9752, "step": 11341 }, { "epoch": 0.8194050607762747, "grad_norm": 5.856458909777653, "learning_rate": 3.33607251407722e-06, "loss": 0.8631, "step": 11342 }, { "epoch": 0.8194773059764842, "grad_norm": 6.617631088935681, "learning_rate": 3.335796847977454e-06, "loss": 0.8517, "step": 11343 }, { "epoch": 0.8195495511766937, "grad_norm": 6.63674289099494, "learning_rate": 3.3355211704362766e-06, "loss": 0.8455, "step": 11344 }, { "epoch": 0.8196217963769032, "grad_norm": 10.813995434316494, "learning_rate": 3.3352454814574647e-06, "loss": 0.864, "step": 11345 }, { "epoch": 0.8196940415771127, "grad_norm": 7.690083139513314, "learning_rate": 3.3349697810447888e-06, "loss": 0.7788, "step": 11346 }, { "epoch": 0.8197662867773222, "grad_norm": 7.091126078506805, "learning_rate": 3.3346940692020267e-06, "loss": 0.8192, "step": 11347 }, { "epoch": 0.8198385319775318, "grad_norm": 7.312063611695033, "learning_rate": 3.3344183459329498e-06, "loss": 0.8429, "step": 11348 }, { "epoch": 0.8199107771777413, "grad_norm": 6.519639203438913, "learning_rate": 3.3341426112413346e-06, "loss": 0.8723, "step": 11349 }, { "epoch": 0.8199830223779507, "grad_norm": 6.614148735927895, "learning_rate": 3.3338668651309538e-06, "loss": 0.8091, "step": 11350 }, { "epoch": 0.8200552675781603, "grad_norm": 4.7384054320211995, "learning_rate": 3.3335911076055834e-06, "loss": 0.784, "step": 11351 }, { "epoch": 0.8201275127783698, "grad_norm": 5.3756442016641826, "learning_rate": 3.3333153386689983e-06, "loss": 0.7946, "step": 11352 }, { "epoch": 0.8201997579785792, "grad_norm": 6.634256769565185, "learning_rate": 3.3330395583249725e-06, "loss": 0.883, "step": 11353 }, { "epoch": 0.8202720031787888, "grad_norm": 6.519997280096879, "learning_rate": 3.3327637665772828e-06, "loss": 0.8101, "step": 11354 }, { "epoch": 0.8203442483789983, "grad_norm": 8.734407755836388, "learning_rate": 3.332487963429703e-06, "loss": 0.851, "step": 11355 }, { "epoch": 0.8204164935792079, "grad_norm": 5.139079409438394, "learning_rate": 3.3322121488860097e-06, "loss": 0.8581, "step": 11356 }, { "epoch": 0.8204887387794173, "grad_norm": 5.777226548105481, "learning_rate": 3.331936322949978e-06, "loss": 0.8054, "step": 11357 }, { "epoch": 0.8205609839796268, "grad_norm": 9.607086554234165, "learning_rate": 3.3316604856253843e-06, "loss": 0.7881, "step": 11358 }, { "epoch": 0.8206332291798364, "grad_norm": 8.911362208091903, "learning_rate": 3.3313846369160045e-06, "loss": 0.878, "step": 11359 }, { "epoch": 0.8207054743800459, "grad_norm": 5.570459564536111, "learning_rate": 3.331108776825614e-06, "loss": 0.7213, "step": 11360 }, { "epoch": 0.8207777195802554, "grad_norm": 5.631729847302519, "learning_rate": 3.33083290535799e-06, "loss": 0.8186, "step": 11361 }, { "epoch": 0.8208499647804649, "grad_norm": 6.708597150397269, "learning_rate": 3.3305570225169087e-06, "loss": 0.8091, "step": 11362 }, { "epoch": 0.8209222099806744, "grad_norm": 7.01257176218016, "learning_rate": 3.3302811283061463e-06, "loss": 0.7812, "step": 11363 }, { "epoch": 0.8209944551808839, "grad_norm": 8.032481058373246, "learning_rate": 3.3300052227294804e-06, "loss": 0.8887, "step": 11364 }, { "epoch": 0.8210667003810934, "grad_norm": 6.305073817257372, "learning_rate": 3.329729305790687e-06, "loss": 0.7606, "step": 11365 }, { "epoch": 0.821138945581303, "grad_norm": 7.555502183887741, "learning_rate": 3.3294533774935435e-06, "loss": 0.8236, "step": 11366 }, { "epoch": 0.8212111907815125, "grad_norm": 8.124686836663162, "learning_rate": 3.329177437841828e-06, "loss": 0.8566, "step": 11367 }, { "epoch": 0.8212834359817219, "grad_norm": 6.229945853841499, "learning_rate": 3.328901486839317e-06, "loss": 0.866, "step": 11368 }, { "epoch": 0.8213556811819315, "grad_norm": 6.39404026539772, "learning_rate": 3.328625524489788e-06, "loss": 0.7499, "step": 11369 }, { "epoch": 0.821427926382141, "grad_norm": 7.2352405137457625, "learning_rate": 3.3283495507970185e-06, "loss": 0.7754, "step": 11370 }, { "epoch": 0.8215001715823504, "grad_norm": 7.239740939906146, "learning_rate": 3.328073565764788e-06, "loss": 0.8481, "step": 11371 }, { "epoch": 0.82157241678256, "grad_norm": 7.239044066911428, "learning_rate": 3.3277975693968727e-06, "loss": 0.7812, "step": 11372 }, { "epoch": 0.8216446619827695, "grad_norm": 7.170520928903681, "learning_rate": 3.3275215616970514e-06, "loss": 0.8122, "step": 11373 }, { "epoch": 0.8217169071829791, "grad_norm": 6.966336914484076, "learning_rate": 3.3272455426691017e-06, "loss": 0.9223, "step": 11374 }, { "epoch": 0.8217891523831885, "grad_norm": 5.09060759239897, "learning_rate": 3.326969512316804e-06, "loss": 0.8003, "step": 11375 }, { "epoch": 0.821861397583398, "grad_norm": 6.577128935220715, "learning_rate": 3.3266934706439357e-06, "loss": 0.7849, "step": 11376 }, { "epoch": 0.8219336427836076, "grad_norm": 6.4159585471205425, "learning_rate": 3.3264174176542754e-06, "loss": 0.7488, "step": 11377 }, { "epoch": 0.8220058879838171, "grad_norm": 5.832597013959227, "learning_rate": 3.326141353351602e-06, "loss": 0.812, "step": 11378 }, { "epoch": 0.8220781331840266, "grad_norm": 7.64789633156333, "learning_rate": 3.3258652777396947e-06, "loss": 0.8093, "step": 11379 }, { "epoch": 0.8221503783842361, "grad_norm": 5.410283871464068, "learning_rate": 3.325589190822334e-06, "loss": 0.7977, "step": 11380 }, { "epoch": 0.8222226235844456, "grad_norm": 5.048932580217818, "learning_rate": 3.3253130926032977e-06, "loss": 0.7586, "step": 11381 }, { "epoch": 0.8222948687846551, "grad_norm": 5.706404237031079, "learning_rate": 3.325036983086366e-06, "loss": 0.8648, "step": 11382 }, { "epoch": 0.8223671139848646, "grad_norm": 5.1742032161008895, "learning_rate": 3.324760862275319e-06, "loss": 0.8243, "step": 11383 }, { "epoch": 0.8224393591850742, "grad_norm": 5.43076319296213, "learning_rate": 3.3244847301739357e-06, "loss": 0.7542, "step": 11384 }, { "epoch": 0.8225116043852837, "grad_norm": 5.5603254546822445, "learning_rate": 3.3242085867859965e-06, "loss": 0.7587, "step": 11385 }, { "epoch": 0.8225838495854931, "grad_norm": 5.858137564646153, "learning_rate": 3.323932432115282e-06, "loss": 0.8244, "step": 11386 }, { "epoch": 0.8226560947857027, "grad_norm": 7.243448388417223, "learning_rate": 3.3236562661655725e-06, "loss": 0.8902, "step": 11387 }, { "epoch": 0.8227283399859122, "grad_norm": 5.629340637515516, "learning_rate": 3.3233800889406475e-06, "loss": 0.7866, "step": 11388 }, { "epoch": 0.8228005851861216, "grad_norm": 5.986196215920261, "learning_rate": 3.323103900444289e-06, "loss": 0.7757, "step": 11389 }, { "epoch": 0.8228728303863312, "grad_norm": 8.149446302456449, "learning_rate": 3.3228277006802774e-06, "loss": 0.7958, "step": 11390 }, { "epoch": 0.8229450755865407, "grad_norm": 6.89655683550152, "learning_rate": 3.3225514896523927e-06, "loss": 0.8654, "step": 11391 }, { "epoch": 0.8230173207867503, "grad_norm": 5.09375299558961, "learning_rate": 3.322275267364418e-06, "loss": 0.7877, "step": 11392 }, { "epoch": 0.8230895659869597, "grad_norm": 6.349511447645346, "learning_rate": 3.321999033820132e-06, "loss": 0.8139, "step": 11393 }, { "epoch": 0.8231618111871692, "grad_norm": 6.189915041240343, "learning_rate": 3.321722789023318e-06, "loss": 0.9134, "step": 11394 }, { "epoch": 0.8232340563873788, "grad_norm": 6.329317861144465, "learning_rate": 3.321446532977757e-06, "loss": 0.7969, "step": 11395 }, { "epoch": 0.8233063015875883, "grad_norm": 6.503469274922677, "learning_rate": 3.321170265687231e-06, "loss": 0.8145, "step": 11396 }, { "epoch": 0.8233785467877978, "grad_norm": 5.779601815839756, "learning_rate": 3.3208939871555218e-06, "loss": 0.7862, "step": 11397 }, { "epoch": 0.8234507919880073, "grad_norm": 6.982757039595709, "learning_rate": 3.3206176973864102e-06, "loss": 0.879, "step": 11398 }, { "epoch": 0.8235230371882168, "grad_norm": 8.051958630095022, "learning_rate": 3.3203413963836805e-06, "loss": 0.8713, "step": 11399 }, { "epoch": 0.8235952823884263, "grad_norm": 6.4432220213080615, "learning_rate": 3.320065084151114e-06, "loss": 0.8132, "step": 11400 }, { "epoch": 0.8236675275886358, "grad_norm": 6.513498229038486, "learning_rate": 3.319788760692493e-06, "loss": 0.7659, "step": 11401 }, { "epoch": 0.8237397727888454, "grad_norm": 5.385007369267514, "learning_rate": 3.3195124260116002e-06, "loss": 0.7342, "step": 11402 }, { "epoch": 0.8238120179890549, "grad_norm": 6.454338790867343, "learning_rate": 3.3192360801122187e-06, "loss": 0.8599, "step": 11403 }, { "epoch": 0.8238842631892643, "grad_norm": 7.019784488684686, "learning_rate": 3.318959722998132e-06, "loss": 0.8666, "step": 11404 }, { "epoch": 0.8239565083894739, "grad_norm": 7.316036314810031, "learning_rate": 3.318683354673122e-06, "loss": 0.8089, "step": 11405 }, { "epoch": 0.8240287535896834, "grad_norm": 6.045875959642636, "learning_rate": 3.3184069751409732e-06, "loss": 0.8353, "step": 11406 }, { "epoch": 0.8241009987898928, "grad_norm": 8.870877020139853, "learning_rate": 3.3181305844054675e-06, "loss": 0.8995, "step": 11407 }, { "epoch": 0.8241732439901024, "grad_norm": 5.431615341769635, "learning_rate": 3.3178541824703892e-06, "loss": 0.7225, "step": 11408 }, { "epoch": 0.8242454891903119, "grad_norm": 7.104871191028961, "learning_rate": 3.317577769339523e-06, "loss": 0.7835, "step": 11409 }, { "epoch": 0.8243177343905215, "grad_norm": 6.379297640415421, "learning_rate": 3.3173013450166515e-06, "loss": 0.7706, "step": 11410 }, { "epoch": 0.8243899795907309, "grad_norm": 5.713126664461386, "learning_rate": 3.3170249095055595e-06, "loss": 0.755, "step": 11411 }, { "epoch": 0.8244622247909404, "grad_norm": 6.385826342469068, "learning_rate": 3.31674846281003e-06, "loss": 0.7898, "step": 11412 }, { "epoch": 0.82453446999115, "grad_norm": 6.8038981315975215, "learning_rate": 3.3164720049338488e-06, "loss": 0.786, "step": 11413 }, { "epoch": 0.8246067151913595, "grad_norm": 4.94214980973071, "learning_rate": 3.3161955358807996e-06, "loss": 0.7663, "step": 11414 }, { "epoch": 0.824678960391569, "grad_norm": 5.961289461676763, "learning_rate": 3.3159190556546676e-06, "loss": 0.7968, "step": 11415 }, { "epoch": 0.8247512055917785, "grad_norm": 7.305971801786937, "learning_rate": 3.3156425642592373e-06, "loss": 0.8662, "step": 11416 }, { "epoch": 0.824823450791988, "grad_norm": 7.339636376960107, "learning_rate": 3.315366061698293e-06, "loss": 0.8161, "step": 11417 }, { "epoch": 0.8248956959921975, "grad_norm": 8.131297326738231, "learning_rate": 3.3150895479756207e-06, "loss": 0.8697, "step": 11418 }, { "epoch": 0.824967941192407, "grad_norm": 7.917147518077331, "learning_rate": 3.3148130230950053e-06, "loss": 0.8376, "step": 11419 }, { "epoch": 0.8250401863926166, "grad_norm": 6.520438997900509, "learning_rate": 3.314536487060233e-06, "loss": 0.7796, "step": 11420 }, { "epoch": 0.8251124315928261, "grad_norm": 5.521864865460511, "learning_rate": 3.314259939875088e-06, "loss": 0.8931, "step": 11421 }, { "epoch": 0.8251846767930355, "grad_norm": 6.43703007140224, "learning_rate": 3.3139833815433565e-06, "loss": 0.791, "step": 11422 }, { "epoch": 0.8252569219932451, "grad_norm": 7.707127272273089, "learning_rate": 3.3137068120688243e-06, "loss": 0.7622, "step": 11423 }, { "epoch": 0.8253291671934546, "grad_norm": 7.540765220201539, "learning_rate": 3.3134302314552785e-06, "loss": 0.7377, "step": 11424 }, { "epoch": 0.825401412393664, "grad_norm": 6.074021236238106, "learning_rate": 3.3131536397065046e-06, "loss": 0.7955, "step": 11425 }, { "epoch": 0.8254736575938736, "grad_norm": 5.137076872318951, "learning_rate": 3.312877036826288e-06, "loss": 0.7216, "step": 11426 }, { "epoch": 0.8255459027940831, "grad_norm": 5.88234008675461, "learning_rate": 3.3126004228184163e-06, "loss": 0.8223, "step": 11427 }, { "epoch": 0.8256181479942927, "grad_norm": 7.383456538128971, "learning_rate": 3.312323797686675e-06, "loss": 0.8327, "step": 11428 }, { "epoch": 0.8256903931945021, "grad_norm": 5.74188887669506, "learning_rate": 3.3120471614348524e-06, "loss": 0.7838, "step": 11429 }, { "epoch": 0.8257626383947116, "grad_norm": 6.737138150323397, "learning_rate": 3.3117705140667345e-06, "loss": 0.7729, "step": 11430 }, { "epoch": 0.8258348835949212, "grad_norm": 5.247179636197096, "learning_rate": 3.311493855586108e-06, "loss": 0.8495, "step": 11431 }, { "epoch": 0.8259071287951306, "grad_norm": 7.578462258925681, "learning_rate": 3.3112171859967614e-06, "loss": 0.7664, "step": 11432 }, { "epoch": 0.8259793739953402, "grad_norm": 6.099338257963613, "learning_rate": 3.310940505302481e-06, "loss": 0.8137, "step": 11433 }, { "epoch": 0.8260516191955497, "grad_norm": 7.640487950505921, "learning_rate": 3.3106638135070558e-06, "loss": 0.8539, "step": 11434 }, { "epoch": 0.8261238643957592, "grad_norm": 6.15308186296538, "learning_rate": 3.310387110614271e-06, "loss": 0.7892, "step": 11435 }, { "epoch": 0.8261961095959687, "grad_norm": 5.439555338750144, "learning_rate": 3.3101103966279164e-06, "loss": 0.7547, "step": 11436 }, { "epoch": 0.8262683547961782, "grad_norm": 6.492671650118308, "learning_rate": 3.309833671551779e-06, "loss": 0.8263, "step": 11437 }, { "epoch": 0.8263405999963878, "grad_norm": 5.660675172468663, "learning_rate": 3.3095569353896485e-06, "loss": 0.7068, "step": 11438 }, { "epoch": 0.8264128451965973, "grad_norm": 6.352949115440711, "learning_rate": 3.309280188145312e-06, "loss": 0.7906, "step": 11439 }, { "epoch": 0.8264850903968067, "grad_norm": 7.425784332258697, "learning_rate": 3.3090034298225576e-06, "loss": 0.7775, "step": 11440 }, { "epoch": 0.8265573355970163, "grad_norm": 6.026253800105108, "learning_rate": 3.308726660425174e-06, "loss": 0.7823, "step": 11441 }, { "epoch": 0.8266295807972258, "grad_norm": 7.687332523661619, "learning_rate": 3.308449879956951e-06, "loss": 0.8376, "step": 11442 }, { "epoch": 0.8267018259974352, "grad_norm": 6.375880760236189, "learning_rate": 3.308173088421677e-06, "loss": 0.8607, "step": 11443 }, { "epoch": 0.8267740711976448, "grad_norm": 7.605065976269895, "learning_rate": 3.30789628582314e-06, "loss": 0.9042, "step": 11444 }, { "epoch": 0.8268463163978543, "grad_norm": 6.029835589493973, "learning_rate": 3.307619472165131e-06, "loss": 0.9088, "step": 11445 }, { "epoch": 0.8269185615980639, "grad_norm": 5.5759396158941374, "learning_rate": 3.307342647451438e-06, "loss": 0.7395, "step": 11446 }, { "epoch": 0.8269908067982733, "grad_norm": 6.549695411181198, "learning_rate": 3.3070658116858517e-06, "loss": 0.7701, "step": 11447 }, { "epoch": 0.8270630519984828, "grad_norm": 6.349268725482382, "learning_rate": 3.3067889648721606e-06, "loss": 0.7561, "step": 11448 }, { "epoch": 0.8271352971986924, "grad_norm": 7.109709276997053, "learning_rate": 3.306512107014155e-06, "loss": 0.8537, "step": 11449 }, { "epoch": 0.8272075423989018, "grad_norm": 9.007088307022853, "learning_rate": 3.306235238115625e-06, "loss": 0.7677, "step": 11450 }, { "epoch": 0.8272797875991114, "grad_norm": 5.125693669709801, "learning_rate": 3.30595835818036e-06, "loss": 0.7584, "step": 11451 }, { "epoch": 0.8273520327993209, "grad_norm": 6.122705769505947, "learning_rate": 3.305681467212152e-06, "loss": 0.8021, "step": 11452 }, { "epoch": 0.8274242779995304, "grad_norm": 6.549769669770898, "learning_rate": 3.30540456521479e-06, "loss": 0.8702, "step": 11453 }, { "epoch": 0.8274965231997399, "grad_norm": 6.499217353065873, "learning_rate": 3.3051276521920646e-06, "loss": 0.8069, "step": 11454 }, { "epoch": 0.8275687683999494, "grad_norm": 6.474568427000982, "learning_rate": 3.304850728147766e-06, "loss": 0.8687, "step": 11455 }, { "epoch": 0.827641013600159, "grad_norm": 6.480573528010706, "learning_rate": 3.304573793085687e-06, "loss": 0.7499, "step": 11456 }, { "epoch": 0.8277132588003685, "grad_norm": 5.952117914836629, "learning_rate": 3.3042968470096163e-06, "loss": 0.8592, "step": 11457 }, { "epoch": 0.8277855040005779, "grad_norm": 4.9891218106035495, "learning_rate": 3.3040198899233477e-06, "loss": 0.8395, "step": 11458 }, { "epoch": 0.8278577492007875, "grad_norm": 5.744775139746551, "learning_rate": 3.3037429218306704e-06, "loss": 0.7696, "step": 11459 }, { "epoch": 0.827929994400997, "grad_norm": 5.909648583512833, "learning_rate": 3.3034659427353768e-06, "loss": 0.7802, "step": 11460 }, { "epoch": 0.8280022396012064, "grad_norm": 7.223285257730777, "learning_rate": 3.303188952641258e-06, "loss": 0.7946, "step": 11461 }, { "epoch": 0.828074484801416, "grad_norm": 8.09807597727333, "learning_rate": 3.302911951552106e-06, "loss": 0.8491, "step": 11462 }, { "epoch": 0.8281467300016255, "grad_norm": 5.3708217260660955, "learning_rate": 3.3026349394717132e-06, "loss": 0.8884, "step": 11463 }, { "epoch": 0.8282189752018351, "grad_norm": 5.901358056964846, "learning_rate": 3.302357916403871e-06, "loss": 0.8783, "step": 11464 }, { "epoch": 0.8282912204020445, "grad_norm": 6.11597260046261, "learning_rate": 3.3020808823523716e-06, "loss": 0.8766, "step": 11465 }, { "epoch": 0.828363465602254, "grad_norm": 7.149664234733936, "learning_rate": 3.3018038373210083e-06, "loss": 0.8096, "step": 11466 }, { "epoch": 0.8284357108024636, "grad_norm": 6.768670434035038, "learning_rate": 3.3015267813135726e-06, "loss": 0.8222, "step": 11467 }, { "epoch": 0.828507956002673, "grad_norm": 6.979863768726469, "learning_rate": 3.3012497143338584e-06, "loss": 0.8518, "step": 11468 }, { "epoch": 0.8285802012028826, "grad_norm": 7.8237007110549746, "learning_rate": 3.3009726363856563e-06, "loss": 0.888, "step": 11469 }, { "epoch": 0.8286524464030921, "grad_norm": 6.6958500436013715, "learning_rate": 3.300695547472762e-06, "loss": 0.7668, "step": 11470 }, { "epoch": 0.8287246916033016, "grad_norm": 7.130756395192457, "learning_rate": 3.3004184475989665e-06, "loss": 0.8211, "step": 11471 }, { "epoch": 0.8287969368035111, "grad_norm": 7.15298294052976, "learning_rate": 3.3001413367680645e-06, "loss": 0.8437, "step": 11472 }, { "epoch": 0.8288691820037206, "grad_norm": 6.516021643259015, "learning_rate": 3.299864214983849e-06, "loss": 0.7933, "step": 11473 }, { "epoch": 0.8289414272039302, "grad_norm": 8.555148121874163, "learning_rate": 3.2995870822501132e-06, "loss": 0.7827, "step": 11474 }, { "epoch": 0.8290136724041397, "grad_norm": 6.935556500362935, "learning_rate": 3.2993099385706505e-06, "loss": 0.8539, "step": 11475 }, { "epoch": 0.8290859176043491, "grad_norm": 6.853332499690815, "learning_rate": 3.2990327839492564e-06, "loss": 0.8411, "step": 11476 }, { "epoch": 0.8291581628045587, "grad_norm": 4.9197636995712655, "learning_rate": 3.2987556183897235e-06, "loss": 0.7658, "step": 11477 }, { "epoch": 0.8292304080047682, "grad_norm": 5.529544200910616, "learning_rate": 3.298478441895846e-06, "loss": 0.7458, "step": 11478 }, { "epoch": 0.8293026532049776, "grad_norm": 7.016191013672022, "learning_rate": 3.298201254471419e-06, "loss": 0.8148, "step": 11479 }, { "epoch": 0.8293748984051872, "grad_norm": 7.806622056363956, "learning_rate": 3.2979240561202366e-06, "loss": 0.8909, "step": 11480 }, { "epoch": 0.8294471436053967, "grad_norm": 5.16775431516101, "learning_rate": 3.297646846846093e-06, "loss": 0.7619, "step": 11481 }, { "epoch": 0.8295193888056063, "grad_norm": 5.88619772080402, "learning_rate": 3.297369626652784e-06, "loss": 0.8419, "step": 11482 }, { "epoch": 0.8295916340058157, "grad_norm": 4.475972561135959, "learning_rate": 3.2970923955441033e-06, "loss": 0.8222, "step": 11483 }, { "epoch": 0.8296638792060252, "grad_norm": 7.105847500127508, "learning_rate": 3.296815153523847e-06, "loss": 0.8349, "step": 11484 }, { "epoch": 0.8297361244062348, "grad_norm": 6.879549775140999, "learning_rate": 3.2965379005958097e-06, "loss": 0.8247, "step": 11485 }, { "epoch": 0.8298083696064442, "grad_norm": 6.795317653391786, "learning_rate": 3.296260636763788e-06, "loss": 0.8702, "step": 11486 }, { "epoch": 0.8298806148066538, "grad_norm": 6.424216481685876, "learning_rate": 3.295983362031575e-06, "loss": 0.84, "step": 11487 }, { "epoch": 0.8299528600068633, "grad_norm": 7.571377674353282, "learning_rate": 3.295706076402968e-06, "loss": 0.8233, "step": 11488 }, { "epoch": 0.8300251052070728, "grad_norm": 6.158340021868813, "learning_rate": 3.295428779881763e-06, "loss": 0.8013, "step": 11489 }, { "epoch": 0.8300973504072823, "grad_norm": 6.029759988895217, "learning_rate": 3.2951514724717566e-06, "loss": 0.8013, "step": 11490 }, { "epoch": 0.8301695956074918, "grad_norm": 7.916590024342892, "learning_rate": 3.2948741541767432e-06, "loss": 0.8656, "step": 11491 }, { "epoch": 0.8302418408077014, "grad_norm": 6.198793219977031, "learning_rate": 3.2945968250005197e-06, "loss": 0.9329, "step": 11492 }, { "epoch": 0.8303140860079109, "grad_norm": 6.210048395972633, "learning_rate": 3.294319484946882e-06, "loss": 0.773, "step": 11493 }, { "epoch": 0.8303863312081203, "grad_norm": 5.146362812843263, "learning_rate": 3.2940421340196278e-06, "loss": 0.7592, "step": 11494 }, { "epoch": 0.8304585764083299, "grad_norm": 8.29839579694379, "learning_rate": 3.2937647722225535e-06, "loss": 0.8261, "step": 11495 }, { "epoch": 0.8305308216085394, "grad_norm": 8.52888159661409, "learning_rate": 3.2934873995594555e-06, "loss": 0.8148, "step": 11496 }, { "epoch": 0.8306030668087488, "grad_norm": 6.586334831949761, "learning_rate": 3.293210016034131e-06, "loss": 0.83, "step": 11497 }, { "epoch": 0.8306753120089584, "grad_norm": 5.596934855709216, "learning_rate": 3.2929326216503776e-06, "loss": 0.8686, "step": 11498 }, { "epoch": 0.8307475572091679, "grad_norm": 6.030551375806381, "learning_rate": 3.292655216411992e-06, "loss": 0.7803, "step": 11499 }, { "epoch": 0.8308198024093775, "grad_norm": 8.135243223900053, "learning_rate": 3.292377800322773e-06, "loss": 0.8807, "step": 11500 }, { "epoch": 0.8308920476095869, "grad_norm": 6.144364865833749, "learning_rate": 3.2921003733865166e-06, "loss": 0.7987, "step": 11501 }, { "epoch": 0.8309642928097964, "grad_norm": 6.586326723374057, "learning_rate": 3.29182293560702e-06, "loss": 0.8224, "step": 11502 }, { "epoch": 0.831036538010006, "grad_norm": 6.810062357299141, "learning_rate": 3.291545486988083e-06, "loss": 0.7869, "step": 11503 }, { "epoch": 0.8311087832102154, "grad_norm": 7.298946503171012, "learning_rate": 3.2912680275335036e-06, "loss": 0.8519, "step": 11504 }, { "epoch": 0.831181028410425, "grad_norm": 6.961592620950206, "learning_rate": 3.2909905572470785e-06, "loss": 0.8845, "step": 11505 }, { "epoch": 0.8312532736106345, "grad_norm": 5.07895482252337, "learning_rate": 3.2907130761326073e-06, "loss": 0.7546, "step": 11506 }, { "epoch": 0.831325518810844, "grad_norm": 5.6028993253689405, "learning_rate": 3.2904355841938873e-06, "loss": 0.8311, "step": 11507 }, { "epoch": 0.8313977640110535, "grad_norm": 5.113984714549363, "learning_rate": 3.290158081434718e-06, "loss": 0.7281, "step": 11508 }, { "epoch": 0.831470009211263, "grad_norm": 6.329938011890401, "learning_rate": 3.2898805678588986e-06, "loss": 0.815, "step": 11509 }, { "epoch": 0.8315422544114726, "grad_norm": 6.585924467569401, "learning_rate": 3.2896030434702273e-06, "loss": 0.822, "step": 11510 }, { "epoch": 0.8316144996116821, "grad_norm": 9.232537772635677, "learning_rate": 3.2893255082725034e-06, "loss": 0.7827, "step": 11511 }, { "epoch": 0.8316867448118915, "grad_norm": 5.81509833556955, "learning_rate": 3.2890479622695258e-06, "loss": 0.8371, "step": 11512 }, { "epoch": 0.8317589900121011, "grad_norm": 5.6302540295275945, "learning_rate": 3.2887704054650946e-06, "loss": 0.8429, "step": 11513 }, { "epoch": 0.8318312352123106, "grad_norm": 8.26332634329901, "learning_rate": 3.2884928378630094e-06, "loss": 0.8338, "step": 11514 }, { "epoch": 0.83190348041252, "grad_norm": 6.497907741973054, "learning_rate": 3.288215259467069e-06, "loss": 0.8494, "step": 11515 }, { "epoch": 0.8319757256127296, "grad_norm": 5.4124542093432355, "learning_rate": 3.287937670281074e-06, "loss": 0.7068, "step": 11516 }, { "epoch": 0.8320479708129391, "grad_norm": 6.32005839550226, "learning_rate": 3.2876600703088236e-06, "loss": 0.868, "step": 11517 }, { "epoch": 0.8321202160131487, "grad_norm": 6.242831276936364, "learning_rate": 3.2873824595541193e-06, "loss": 0.8102, "step": 11518 }, { "epoch": 0.8321924612133581, "grad_norm": 7.287472926722599, "learning_rate": 3.2871048380207593e-06, "loss": 0.7913, "step": 11519 }, { "epoch": 0.8322647064135676, "grad_norm": 6.717854284861123, "learning_rate": 3.2868272057125465e-06, "loss": 0.8224, "step": 11520 }, { "epoch": 0.8323369516137772, "grad_norm": 6.613976862781291, "learning_rate": 3.2865495626332793e-06, "loss": 0.8045, "step": 11521 }, { "epoch": 0.8324091968139866, "grad_norm": 5.878497117719554, "learning_rate": 3.2862719087867593e-06, "loss": 0.7492, "step": 11522 }, { "epoch": 0.8324814420141962, "grad_norm": 5.821255560133632, "learning_rate": 3.285994244176787e-06, "loss": 0.8902, "step": 11523 }, { "epoch": 0.8325536872144057, "grad_norm": 6.599289144901713, "learning_rate": 3.2857165688071653e-06, "loss": 0.7644, "step": 11524 }, { "epoch": 0.8326259324146152, "grad_norm": 6.528778922863969, "learning_rate": 3.285438882681693e-06, "loss": 0.7089, "step": 11525 }, { "epoch": 0.8326981776148247, "grad_norm": 7.5582800827197385, "learning_rate": 3.2851611858041714e-06, "loss": 0.8247, "step": 11526 }, { "epoch": 0.8327704228150342, "grad_norm": 5.955029761098294, "learning_rate": 3.284883478178404e-06, "loss": 0.8233, "step": 11527 }, { "epoch": 0.8328426680152438, "grad_norm": 6.221254697387467, "learning_rate": 3.2846057598081902e-06, "loss": 0.7908, "step": 11528 }, { "epoch": 0.8329149132154533, "grad_norm": 5.054161645813933, "learning_rate": 3.284328030697334e-06, "loss": 0.7594, "step": 11529 }, { "epoch": 0.8329871584156627, "grad_norm": 6.292210974316778, "learning_rate": 3.2840502908496354e-06, "loss": 0.8054, "step": 11530 }, { "epoch": 0.8330594036158723, "grad_norm": 7.106951960884245, "learning_rate": 3.283772540268897e-06, "loss": 0.7805, "step": 11531 }, { "epoch": 0.8331316488160818, "grad_norm": 7.9073989696724025, "learning_rate": 3.2834947789589212e-06, "loss": 0.9024, "step": 11532 }, { "epoch": 0.8332038940162912, "grad_norm": 5.370632082431616, "learning_rate": 3.28321700692351e-06, "loss": 0.8062, "step": 11533 }, { "epoch": 0.8332761392165008, "grad_norm": 7.901639174364121, "learning_rate": 3.2829392241664665e-06, "loss": 0.8643, "step": 11534 }, { "epoch": 0.8333483844167103, "grad_norm": 6.475756697459186, "learning_rate": 3.2826614306915926e-06, "loss": 0.7635, "step": 11535 }, { "epoch": 0.8334206296169199, "grad_norm": 4.944892082001054, "learning_rate": 3.2823836265026914e-06, "loss": 0.7205, "step": 11536 }, { "epoch": 0.8334928748171293, "grad_norm": 5.111875709527112, "learning_rate": 3.282105811603566e-06, "loss": 0.7717, "step": 11537 }, { "epoch": 0.8335651200173388, "grad_norm": 8.206613376587992, "learning_rate": 3.2818279859980194e-06, "loss": 0.7974, "step": 11538 }, { "epoch": 0.8336373652175484, "grad_norm": 6.48241245950657, "learning_rate": 3.2815501496898545e-06, "loss": 0.8463, "step": 11539 }, { "epoch": 0.8337096104177578, "grad_norm": 6.535339243459333, "learning_rate": 3.2812723026828745e-06, "loss": 0.8187, "step": 11540 }, { "epoch": 0.8337818556179674, "grad_norm": 6.517750199069515, "learning_rate": 3.280994444980884e-06, "loss": 0.8316, "step": 11541 }, { "epoch": 0.8338541008181769, "grad_norm": 6.15904273187375, "learning_rate": 3.2807165765876856e-06, "loss": 0.8753, "step": 11542 }, { "epoch": 0.8339263460183864, "grad_norm": 7.8353664384074015, "learning_rate": 3.280438697507084e-06, "loss": 0.8784, "step": 11543 }, { "epoch": 0.8339985912185959, "grad_norm": 7.129195467164104, "learning_rate": 3.280160807742882e-06, "loss": 0.8384, "step": 11544 }, { "epoch": 0.8340708364188054, "grad_norm": 5.695009409266227, "learning_rate": 3.279882907298884e-06, "loss": 0.8428, "step": 11545 }, { "epoch": 0.834143081619015, "grad_norm": 7.430294104360534, "learning_rate": 3.279604996178895e-06, "loss": 0.8645, "step": 11546 }, { "epoch": 0.8342153268192245, "grad_norm": 5.1828638524480555, "learning_rate": 3.2793270743867195e-06, "loss": 0.7696, "step": 11547 }, { "epoch": 0.8342875720194339, "grad_norm": 6.195264164277868, "learning_rate": 3.2790491419261604e-06, "loss": 0.813, "step": 11548 }, { "epoch": 0.8343598172196435, "grad_norm": 6.584222500143543, "learning_rate": 3.2787711988010244e-06, "loss": 0.8239, "step": 11549 }, { "epoch": 0.834432062419853, "grad_norm": 7.236429602442995, "learning_rate": 3.278493245015115e-06, "loss": 0.8006, "step": 11550 }, { "epoch": 0.8345043076200624, "grad_norm": 5.40031239347721, "learning_rate": 3.2782152805722374e-06, "loss": 0.7628, "step": 11551 }, { "epoch": 0.834576552820272, "grad_norm": 7.918677653499308, "learning_rate": 3.277937305476197e-06, "loss": 0.8147, "step": 11552 }, { "epoch": 0.8346487980204815, "grad_norm": 7.380883924691244, "learning_rate": 3.2776593197307983e-06, "loss": 0.9229, "step": 11553 }, { "epoch": 0.8347210432206911, "grad_norm": 8.21206172429749, "learning_rate": 3.2773813233398477e-06, "loss": 0.8647, "step": 11554 }, { "epoch": 0.8347932884209005, "grad_norm": 7.1789436162511056, "learning_rate": 3.2771033163071508e-06, "loss": 0.8669, "step": 11555 }, { "epoch": 0.83486553362111, "grad_norm": 7.991396330662164, "learning_rate": 3.2768252986365124e-06, "loss": 0.8435, "step": 11556 }, { "epoch": 0.8349377788213196, "grad_norm": 7.091321621968038, "learning_rate": 3.2765472703317397e-06, "loss": 0.8232, "step": 11557 }, { "epoch": 0.835010024021529, "grad_norm": 6.3470678813857, "learning_rate": 3.276269231396637e-06, "loss": 0.7398, "step": 11558 }, { "epoch": 0.8350822692217386, "grad_norm": 7.120176857884138, "learning_rate": 3.275991181835011e-06, "loss": 0.8881, "step": 11559 }, { "epoch": 0.8351545144219481, "grad_norm": 5.914544956377931, "learning_rate": 3.275713121650669e-06, "loss": 0.7927, "step": 11560 }, { "epoch": 0.8352267596221576, "grad_norm": 5.146540707708772, "learning_rate": 3.2754350508474165e-06, "loss": 0.7465, "step": 11561 }, { "epoch": 0.8352990048223671, "grad_norm": 5.723672717960402, "learning_rate": 3.2751569694290595e-06, "loss": 0.8673, "step": 11562 }, { "epoch": 0.8353712500225766, "grad_norm": 9.141742970493878, "learning_rate": 3.2748788773994066e-06, "loss": 0.8792, "step": 11563 }, { "epoch": 0.8354434952227862, "grad_norm": 5.7658185228573915, "learning_rate": 3.2746007747622623e-06, "loss": 0.8922, "step": 11564 }, { "epoch": 0.8355157404229957, "grad_norm": 5.712996126162596, "learning_rate": 3.2743226615214353e-06, "loss": 0.8799, "step": 11565 }, { "epoch": 0.8355879856232051, "grad_norm": 7.2665106777048925, "learning_rate": 3.274044537680733e-06, "loss": 0.7749, "step": 11566 }, { "epoch": 0.8356602308234147, "grad_norm": 5.715361711831572, "learning_rate": 3.2737664032439613e-06, "loss": 0.7872, "step": 11567 }, { "epoch": 0.8357324760236242, "grad_norm": 6.252763671196284, "learning_rate": 3.2734882582149287e-06, "loss": 0.7866, "step": 11568 }, { "epoch": 0.8358047212238336, "grad_norm": 6.431395367609716, "learning_rate": 3.273210102597442e-06, "loss": 0.7927, "step": 11569 }, { "epoch": 0.8358769664240432, "grad_norm": 6.046671546056448, "learning_rate": 3.2729319363953093e-06, "loss": 0.7249, "step": 11570 }, { "epoch": 0.8359492116242527, "grad_norm": 6.2134619231008585, "learning_rate": 3.272653759612339e-06, "loss": 0.7491, "step": 11571 }, { "epoch": 0.8360214568244623, "grad_norm": 5.716322086429418, "learning_rate": 3.272375572252339e-06, "loss": 0.7713, "step": 11572 }, { "epoch": 0.8360937020246717, "grad_norm": 5.909131190022645, "learning_rate": 3.272097374319116e-06, "loss": 0.7934, "step": 11573 }, { "epoch": 0.8361659472248812, "grad_norm": 6.503228779468355, "learning_rate": 3.2718191658164797e-06, "loss": 0.8186, "step": 11574 }, { "epoch": 0.8362381924250908, "grad_norm": 6.937551687237072, "learning_rate": 3.271540946748239e-06, "loss": 0.8134, "step": 11575 }, { "epoch": 0.8363104376253002, "grad_norm": 5.875697642359061, "learning_rate": 3.271262717118201e-06, "loss": 0.8263, "step": 11576 }, { "epoch": 0.8363826828255098, "grad_norm": 6.0410591960116395, "learning_rate": 3.2709844769301757e-06, "loss": 0.8234, "step": 11577 }, { "epoch": 0.8364549280257193, "grad_norm": 7.055796454748383, "learning_rate": 3.2707062261879712e-06, "loss": 0.8886, "step": 11578 }, { "epoch": 0.8365271732259288, "grad_norm": 5.071199454153681, "learning_rate": 3.2704279648953975e-06, "loss": 0.8318, "step": 11579 }, { "epoch": 0.8365994184261383, "grad_norm": 5.3046520833109865, "learning_rate": 3.2701496930562625e-06, "loss": 0.8593, "step": 11580 }, { "epoch": 0.8366716636263478, "grad_norm": 7.039719020698259, "learning_rate": 3.269871410674377e-06, "loss": 0.811, "step": 11581 }, { "epoch": 0.8367439088265574, "grad_norm": 6.321662824599231, "learning_rate": 3.269593117753549e-06, "loss": 0.9437, "step": 11582 }, { "epoch": 0.8368161540267669, "grad_norm": 6.900843925282147, "learning_rate": 3.2693148142975884e-06, "loss": 0.8931, "step": 11583 }, { "epoch": 0.8368883992269763, "grad_norm": 6.21200548502624, "learning_rate": 3.2690365003103065e-06, "loss": 0.7997, "step": 11584 }, { "epoch": 0.8369606444271859, "grad_norm": 6.077063303502391, "learning_rate": 3.2687581757955113e-06, "loss": 0.7806, "step": 11585 }, { "epoch": 0.8370328896273954, "grad_norm": 5.876189740425212, "learning_rate": 3.268479840757014e-06, "loss": 0.775, "step": 11586 }, { "epoch": 0.8371051348276048, "grad_norm": 6.730631130604303, "learning_rate": 3.2682014951986236e-06, "loss": 0.8075, "step": 11587 }, { "epoch": 0.8371773800278144, "grad_norm": 5.596382075956563, "learning_rate": 3.2679231391241516e-06, "loss": 0.7684, "step": 11588 }, { "epoch": 0.8372496252280239, "grad_norm": 6.287513456842117, "learning_rate": 3.2676447725374077e-06, "loss": 0.8405, "step": 11589 }, { "epoch": 0.8373218704282335, "grad_norm": 6.444228128478615, "learning_rate": 3.267366395442204e-06, "loss": 0.7527, "step": 11590 }, { "epoch": 0.8373941156284429, "grad_norm": 5.459701987716705, "learning_rate": 3.2670880078423495e-06, "loss": 0.7485, "step": 11591 }, { "epoch": 0.8374663608286524, "grad_norm": 5.583470964396064, "learning_rate": 3.266809609741655e-06, "loss": 0.7833, "step": 11592 }, { "epoch": 0.837538606028862, "grad_norm": 7.664781449219974, "learning_rate": 3.2665312011439337e-06, "loss": 0.8433, "step": 11593 }, { "epoch": 0.8376108512290714, "grad_norm": 5.199275611893231, "learning_rate": 3.266252782052994e-06, "loss": 0.8576, "step": 11594 }, { "epoch": 0.837683096429281, "grad_norm": 5.399132602397941, "learning_rate": 3.2659743524726506e-06, "loss": 0.8157, "step": 11595 }, { "epoch": 0.8377553416294905, "grad_norm": 6.238487932941212, "learning_rate": 3.2656959124067117e-06, "loss": 0.8295, "step": 11596 }, { "epoch": 0.8378275868297, "grad_norm": 7.805768584409826, "learning_rate": 3.26541746185899e-06, "loss": 0.7925, "step": 11597 }, { "epoch": 0.8378998320299095, "grad_norm": 5.6044658973344434, "learning_rate": 3.265139000833298e-06, "loss": 0.8061, "step": 11598 }, { "epoch": 0.837972077230119, "grad_norm": 6.042529691245222, "learning_rate": 3.264860529333448e-06, "loss": 0.8264, "step": 11599 }, { "epoch": 0.8380443224303286, "grad_norm": 7.164284805625964, "learning_rate": 3.2645820473632508e-06, "loss": 0.8212, "step": 11600 }, { "epoch": 0.8381165676305381, "grad_norm": 7.441129167659631, "learning_rate": 3.2643035549265183e-06, "loss": 0.7826, "step": 11601 }, { "epoch": 0.8381888128307475, "grad_norm": 6.804679651617173, "learning_rate": 3.264025052027064e-06, "loss": 0.8012, "step": 11602 }, { "epoch": 0.8382610580309571, "grad_norm": 6.456840720022465, "learning_rate": 3.2637465386686993e-06, "loss": 0.8893, "step": 11603 }, { "epoch": 0.8383333032311666, "grad_norm": 5.769761005390619, "learning_rate": 3.263468014855239e-06, "loss": 0.8209, "step": 11604 }, { "epoch": 0.838405548431376, "grad_norm": 6.723303666712089, "learning_rate": 3.263189480590493e-06, "loss": 0.813, "step": 11605 }, { "epoch": 0.8384777936315856, "grad_norm": 8.857271782424561, "learning_rate": 3.2629109358782763e-06, "loss": 0.873, "step": 11606 }, { "epoch": 0.8385500388317951, "grad_norm": 7.016658579382989, "learning_rate": 3.2626323807224014e-06, "loss": 0.7405, "step": 11607 }, { "epoch": 0.8386222840320047, "grad_norm": 7.626984134860135, "learning_rate": 3.2623538151266803e-06, "loss": 0.9326, "step": 11608 }, { "epoch": 0.8386945292322141, "grad_norm": 6.8819852541848965, "learning_rate": 3.2620752390949284e-06, "loss": 0.865, "step": 11609 }, { "epoch": 0.8387667744324236, "grad_norm": 6.968841962677963, "learning_rate": 3.261796652630958e-06, "loss": 0.8184, "step": 11610 }, { "epoch": 0.8388390196326332, "grad_norm": 7.771827231928745, "learning_rate": 3.2615180557385826e-06, "loss": 0.8434, "step": 11611 }, { "epoch": 0.8389112648328426, "grad_norm": 6.995258360229963, "learning_rate": 3.2612394484216163e-06, "loss": 0.8235, "step": 11612 }, { "epoch": 0.8389835100330522, "grad_norm": 7.939186457698763, "learning_rate": 3.2609608306838734e-06, "loss": 0.7352, "step": 11613 }, { "epoch": 0.8390557552332617, "grad_norm": 6.019598740800266, "learning_rate": 3.2606822025291673e-06, "loss": 0.7951, "step": 11614 }, { "epoch": 0.8391280004334712, "grad_norm": 6.926448232653508, "learning_rate": 3.260403563961313e-06, "loss": 0.7761, "step": 11615 }, { "epoch": 0.8392002456336807, "grad_norm": 7.005468956921905, "learning_rate": 3.2601249149841243e-06, "loss": 0.7521, "step": 11616 }, { "epoch": 0.8392724908338902, "grad_norm": 6.9267792216687685, "learning_rate": 3.259846255601415e-06, "loss": 0.8, "step": 11617 }, { "epoch": 0.8393447360340998, "grad_norm": 6.7069058370445624, "learning_rate": 3.2595675858170007e-06, "loss": 0.7933, "step": 11618 }, { "epoch": 0.8394169812343093, "grad_norm": 6.262649552621104, "learning_rate": 3.259288905634696e-06, "loss": 0.8941, "step": 11619 }, { "epoch": 0.8394892264345187, "grad_norm": 7.189888996652152, "learning_rate": 3.2590102150583156e-06, "loss": 0.8544, "step": 11620 }, { "epoch": 0.8395614716347283, "grad_norm": 6.952058105910723, "learning_rate": 3.2587315140916744e-06, "loss": 0.8114, "step": 11621 }, { "epoch": 0.8396337168349378, "grad_norm": 7.438754448765159, "learning_rate": 3.2584528027385885e-06, "loss": 0.8239, "step": 11622 }, { "epoch": 0.8397059620351472, "grad_norm": 5.296428830420823, "learning_rate": 3.2581740810028726e-06, "loss": 0.806, "step": 11623 }, { "epoch": 0.8397782072353568, "grad_norm": 5.390149966331725, "learning_rate": 3.2578953488883426e-06, "loss": 0.7966, "step": 11624 }, { "epoch": 0.8398504524355663, "grad_norm": 6.45533755151911, "learning_rate": 3.2576166063988126e-06, "loss": 0.7744, "step": 11625 }, { "epoch": 0.8399226976357759, "grad_norm": 5.062044099601326, "learning_rate": 3.2573378535381002e-06, "loss": 0.8584, "step": 11626 }, { "epoch": 0.8399949428359853, "grad_norm": 6.581487308292023, "learning_rate": 3.2570590903100206e-06, "loss": 0.8672, "step": 11627 }, { "epoch": 0.8400671880361948, "grad_norm": 7.271823265504761, "learning_rate": 3.25678031671839e-06, "loss": 0.8339, "step": 11628 }, { "epoch": 0.8401394332364044, "grad_norm": 5.96657374787393, "learning_rate": 3.256501532767024e-06, "loss": 0.8826, "step": 11629 }, { "epoch": 0.8402116784366138, "grad_norm": 5.489620297890798, "learning_rate": 3.25622273845974e-06, "loss": 0.8067, "step": 11630 }, { "epoch": 0.8402839236368234, "grad_norm": 5.804581693811943, "learning_rate": 3.255943933800353e-06, "loss": 0.7998, "step": 11631 }, { "epoch": 0.8403561688370329, "grad_norm": 5.183361010871089, "learning_rate": 3.2556651187926813e-06, "loss": 0.8616, "step": 11632 }, { "epoch": 0.8404284140372424, "grad_norm": 5.3465465344435845, "learning_rate": 3.2553862934405405e-06, "loss": 0.7519, "step": 11633 }, { "epoch": 0.8405006592374519, "grad_norm": 6.81875882799397, "learning_rate": 3.2551074577477482e-06, "loss": 0.7523, "step": 11634 }, { "epoch": 0.8405729044376614, "grad_norm": 7.884377361090114, "learning_rate": 3.2548286117181203e-06, "loss": 0.8021, "step": 11635 }, { "epoch": 0.840645149637871, "grad_norm": 5.702997577236624, "learning_rate": 3.2545497553554757e-06, "loss": 0.8478, "step": 11636 }, { "epoch": 0.8407173948380804, "grad_norm": 6.412012997105419, "learning_rate": 3.2542708886636306e-06, "loss": 0.7689, "step": 11637 }, { "epoch": 0.8407896400382899, "grad_norm": 6.650041290384471, "learning_rate": 3.2539920116464026e-06, "loss": 0.8308, "step": 11638 }, { "epoch": 0.8408618852384995, "grad_norm": 6.688487327634971, "learning_rate": 3.2537131243076094e-06, "loss": 0.8993, "step": 11639 }, { "epoch": 0.840934130438709, "grad_norm": 4.528003871606041, "learning_rate": 3.2534342266510684e-06, "loss": 0.7351, "step": 11640 }, { "epoch": 0.8410063756389184, "grad_norm": 6.4249292985759165, "learning_rate": 3.2531553186805985e-06, "loss": 0.8561, "step": 11641 }, { "epoch": 0.841078620839128, "grad_norm": 6.715330025295318, "learning_rate": 3.252876400400016e-06, "loss": 0.816, "step": 11642 }, { "epoch": 0.8411508660393375, "grad_norm": 5.852914081119489, "learning_rate": 3.2525974718131413e-06, "loss": 0.7914, "step": 11643 }, { "epoch": 0.8412231112395471, "grad_norm": 6.600797385395713, "learning_rate": 3.2523185329237916e-06, "loss": 0.7818, "step": 11644 }, { "epoch": 0.8412953564397565, "grad_norm": 6.697235436648206, "learning_rate": 3.252039583735784e-06, "loss": 0.8399, "step": 11645 }, { "epoch": 0.841367601639966, "grad_norm": 6.058890930010031, "learning_rate": 3.25176062425294e-06, "loss": 0.7463, "step": 11646 }, { "epoch": 0.8414398468401756, "grad_norm": 5.627382918113897, "learning_rate": 3.251481654479076e-06, "loss": 0.847, "step": 11647 }, { "epoch": 0.841512092040385, "grad_norm": 6.983956889837689, "learning_rate": 3.251202674418012e-06, "loss": 0.911, "step": 11648 }, { "epoch": 0.8415843372405946, "grad_norm": 6.215149110093286, "learning_rate": 3.2509236840735657e-06, "loss": 0.8401, "step": 11649 }, { "epoch": 0.8416565824408041, "grad_norm": 5.882873453784485, "learning_rate": 3.250644683449558e-06, "loss": 0.8055, "step": 11650 }, { "epoch": 0.8417288276410136, "grad_norm": 5.7126933058872424, "learning_rate": 3.250365672549807e-06, "loss": 0.7502, "step": 11651 }, { "epoch": 0.8418010728412231, "grad_norm": 6.625468687357026, "learning_rate": 3.2500866513781333e-06, "loss": 0.8519, "step": 11652 }, { "epoch": 0.8418733180414326, "grad_norm": 6.599826417432081, "learning_rate": 3.2498076199383554e-06, "loss": 0.8531, "step": 11653 }, { "epoch": 0.8419455632416422, "grad_norm": 7.123190666820516, "learning_rate": 3.249528578234293e-06, "loss": 0.8396, "step": 11654 }, { "epoch": 0.8420178084418516, "grad_norm": 5.7420217477140145, "learning_rate": 3.2492495262697665e-06, "loss": 0.7699, "step": 11655 }, { "epoch": 0.8420900536420611, "grad_norm": 6.906760080410323, "learning_rate": 3.2489704640485957e-06, "loss": 0.8548, "step": 11656 }, { "epoch": 0.8421622988422707, "grad_norm": 7.027839884862605, "learning_rate": 3.2486913915746014e-06, "loss": 0.8013, "step": 11657 }, { "epoch": 0.8422345440424802, "grad_norm": 4.717608383219391, "learning_rate": 3.248412308851603e-06, "loss": 0.8035, "step": 11658 }, { "epoch": 0.8423067892426896, "grad_norm": 7.1738618911708745, "learning_rate": 3.2481332158834204e-06, "loss": 0.802, "step": 11659 }, { "epoch": 0.8423790344428992, "grad_norm": 6.638348357445165, "learning_rate": 3.2478541126738755e-06, "loss": 0.905, "step": 11660 }, { "epoch": 0.8424512796431087, "grad_norm": 5.180966962899329, "learning_rate": 3.247574999226789e-06, "loss": 0.8547, "step": 11661 }, { "epoch": 0.8425235248433183, "grad_norm": 6.311227500242283, "learning_rate": 3.2472958755459803e-06, "loss": 0.8299, "step": 11662 }, { "epoch": 0.8425957700435277, "grad_norm": 6.41193535830349, "learning_rate": 3.2470167416352714e-06, "loss": 0.85, "step": 11663 }, { "epoch": 0.8426680152437372, "grad_norm": 6.208753976930067, "learning_rate": 3.2467375974984845e-06, "loss": 0.7287, "step": 11664 }, { "epoch": 0.8427402604439468, "grad_norm": 5.893147138585756, "learning_rate": 3.2464584431394384e-06, "loss": 0.7652, "step": 11665 }, { "epoch": 0.8428125056441562, "grad_norm": 6.327909489482295, "learning_rate": 3.2461792785619568e-06, "loss": 0.7552, "step": 11666 }, { "epoch": 0.8428847508443658, "grad_norm": 7.53716631351582, "learning_rate": 3.2459001037698595e-06, "loss": 0.7991, "step": 11667 }, { "epoch": 0.8429569960445753, "grad_norm": 6.789579136969959, "learning_rate": 3.2456209187669686e-06, "loss": 0.7851, "step": 11668 }, { "epoch": 0.8430292412447848, "grad_norm": 7.161913367409256, "learning_rate": 3.2453417235571066e-06, "loss": 0.8827, "step": 11669 }, { "epoch": 0.8431014864449943, "grad_norm": 9.084912177476507, "learning_rate": 3.245062518144096e-06, "loss": 0.9136, "step": 11670 }, { "epoch": 0.8431737316452038, "grad_norm": 6.648459016471499, "learning_rate": 3.244783302531757e-06, "loss": 0.742, "step": 11671 }, { "epoch": 0.8432459768454134, "grad_norm": 6.087519664066835, "learning_rate": 3.2445040767239133e-06, "loss": 0.8171, "step": 11672 }, { "epoch": 0.8433182220456228, "grad_norm": 7.196291381653345, "learning_rate": 3.244224840724387e-06, "loss": 0.8727, "step": 11673 }, { "epoch": 0.8433904672458323, "grad_norm": 5.937690169651937, "learning_rate": 3.2439455945370002e-06, "loss": 0.8295, "step": 11674 }, { "epoch": 0.8434627124460419, "grad_norm": 7.521967188639079, "learning_rate": 3.2436663381655763e-06, "loss": 0.9085, "step": 11675 }, { "epoch": 0.8435349576462514, "grad_norm": 5.979568343897886, "learning_rate": 3.243387071613937e-06, "loss": 0.8551, "step": 11676 }, { "epoch": 0.8436072028464608, "grad_norm": 5.689682761736301, "learning_rate": 3.243107794885906e-06, "loss": 0.7917, "step": 11677 }, { "epoch": 0.8436794480466704, "grad_norm": 6.79649586990378, "learning_rate": 3.2428285079853063e-06, "loss": 0.8582, "step": 11678 }, { "epoch": 0.8437516932468799, "grad_norm": 6.5043907008014115, "learning_rate": 3.2425492109159614e-06, "loss": 0.8355, "step": 11679 }, { "epoch": 0.8438239384470895, "grad_norm": 7.260173368459454, "learning_rate": 3.242269903681694e-06, "loss": 0.83, "step": 11680 }, { "epoch": 0.8438961836472989, "grad_norm": 6.281517212910727, "learning_rate": 3.241990586286329e-06, "loss": 0.8123, "step": 11681 }, { "epoch": 0.8439684288475084, "grad_norm": 5.994011751730456, "learning_rate": 3.2417112587336874e-06, "loss": 0.8966, "step": 11682 }, { "epoch": 0.844040674047718, "grad_norm": 6.546943463898434, "learning_rate": 3.241431921027595e-06, "loss": 0.8307, "step": 11683 }, { "epoch": 0.8441129192479274, "grad_norm": 6.661455151932717, "learning_rate": 3.2411525731718763e-06, "loss": 0.8247, "step": 11684 }, { "epoch": 0.844185164448137, "grad_norm": 6.240363359386834, "learning_rate": 3.2408732151703533e-06, "loss": 0.8511, "step": 11685 }, { "epoch": 0.8442574096483465, "grad_norm": 8.410666968062417, "learning_rate": 3.2405938470268515e-06, "loss": 0.8687, "step": 11686 }, { "epoch": 0.844329654848556, "grad_norm": 7.188279350155303, "learning_rate": 3.2403144687451947e-06, "loss": 0.7789, "step": 11687 }, { "epoch": 0.8444019000487655, "grad_norm": 7.535190422016216, "learning_rate": 3.240035080329208e-06, "loss": 0.7997, "step": 11688 }, { "epoch": 0.844474145248975, "grad_norm": 6.245286003020317, "learning_rate": 3.2397556817827164e-06, "loss": 0.7332, "step": 11689 }, { "epoch": 0.8445463904491846, "grad_norm": 6.404153024745385, "learning_rate": 3.2394762731095433e-06, "loss": 0.7529, "step": 11690 }, { "epoch": 0.844618635649394, "grad_norm": 6.294372212660582, "learning_rate": 3.2391968543135132e-06, "loss": 0.817, "step": 11691 }, { "epoch": 0.8446908808496035, "grad_norm": 6.158996588987065, "learning_rate": 3.238917425398453e-06, "loss": 0.9345, "step": 11692 }, { "epoch": 0.8447631260498131, "grad_norm": 6.173363460250297, "learning_rate": 3.238637986368187e-06, "loss": 0.8587, "step": 11693 }, { "epoch": 0.8448353712500226, "grad_norm": 6.983808319520148, "learning_rate": 3.2383585372265403e-06, "loss": 0.7946, "step": 11694 }, { "epoch": 0.844907616450232, "grad_norm": 5.76106127074969, "learning_rate": 3.238079077977339e-06, "loss": 0.7399, "step": 11695 }, { "epoch": 0.8449798616504416, "grad_norm": 8.641996140362426, "learning_rate": 3.2377996086244077e-06, "loss": 0.8426, "step": 11696 }, { "epoch": 0.8450521068506511, "grad_norm": 6.380986712457608, "learning_rate": 3.2375201291715724e-06, "loss": 0.7465, "step": 11697 }, { "epoch": 0.8451243520508607, "grad_norm": 6.307744718409044, "learning_rate": 3.2372406396226597e-06, "loss": 0.8029, "step": 11698 }, { "epoch": 0.8451965972510701, "grad_norm": 5.48433639643198, "learning_rate": 3.236961139981495e-06, "loss": 0.8225, "step": 11699 }, { "epoch": 0.8452688424512796, "grad_norm": 5.939314073203761, "learning_rate": 3.2366816302519046e-06, "loss": 0.883, "step": 11700 }, { "epoch": 0.8453410876514892, "grad_norm": 5.604209965913217, "learning_rate": 3.2364021104377135e-06, "loss": 0.778, "step": 11701 }, { "epoch": 0.8454133328516986, "grad_norm": 7.466475519503804, "learning_rate": 3.236122580542751e-06, "loss": 0.7767, "step": 11702 }, { "epoch": 0.8454855780519082, "grad_norm": 6.822373820445376, "learning_rate": 3.2358430405708408e-06, "loss": 0.788, "step": 11703 }, { "epoch": 0.8455578232521177, "grad_norm": 5.032944103795383, "learning_rate": 3.2355634905258117e-06, "loss": 0.8681, "step": 11704 }, { "epoch": 0.8456300684523272, "grad_norm": 5.68435481373546, "learning_rate": 3.2352839304114887e-06, "loss": 0.7869, "step": 11705 }, { "epoch": 0.8457023136525367, "grad_norm": 6.42379516704452, "learning_rate": 3.2350043602316996e-06, "loss": 0.7986, "step": 11706 }, { "epoch": 0.8457745588527462, "grad_norm": 5.840235015111831, "learning_rate": 3.234724779990272e-06, "loss": 0.8376, "step": 11707 }, { "epoch": 0.8458468040529558, "grad_norm": 7.412310088685583, "learning_rate": 3.234445189691032e-06, "loss": 0.8416, "step": 11708 }, { "epoch": 0.8459190492531652, "grad_norm": 6.21002658905873, "learning_rate": 3.234165589337809e-06, "loss": 0.8348, "step": 11709 }, { "epoch": 0.8459912944533747, "grad_norm": 6.177018043554704, "learning_rate": 3.233885978934428e-06, "loss": 0.8176, "step": 11710 }, { "epoch": 0.8460635396535843, "grad_norm": 7.910822695690775, "learning_rate": 3.233606358484717e-06, "loss": 0.8103, "step": 11711 }, { "epoch": 0.8461357848537938, "grad_norm": 6.261341638090571, "learning_rate": 3.233326727992506e-06, "loss": 0.867, "step": 11712 }, { "epoch": 0.8462080300540032, "grad_norm": 7.353660357141425, "learning_rate": 3.233047087461621e-06, "loss": 0.7917, "step": 11713 }, { "epoch": 0.8462802752542128, "grad_norm": 6.570473323128358, "learning_rate": 3.2327674368958905e-06, "loss": 0.8808, "step": 11714 }, { "epoch": 0.8463525204544223, "grad_norm": 8.861569840832114, "learning_rate": 3.232487776299143e-06, "loss": 0.7705, "step": 11715 }, { "epoch": 0.8464247656546319, "grad_norm": 6.976680331714198, "learning_rate": 3.2322081056752058e-06, "loss": 0.7995, "step": 11716 }, { "epoch": 0.8464970108548413, "grad_norm": 7.197422774318954, "learning_rate": 3.231928425027909e-06, "loss": 0.7511, "step": 11717 }, { "epoch": 0.8465692560550508, "grad_norm": 5.466324622610767, "learning_rate": 3.2316487343610805e-06, "loss": 0.7874, "step": 11718 }, { "epoch": 0.8466415012552604, "grad_norm": 7.210677748741197, "learning_rate": 3.2313690336785482e-06, "loss": 0.8816, "step": 11719 }, { "epoch": 0.8467137464554698, "grad_norm": 6.060716504333332, "learning_rate": 3.2310893229841416e-06, "loss": 0.8462, "step": 11720 }, { "epoch": 0.8467859916556794, "grad_norm": 6.208276564916719, "learning_rate": 3.2308096022816896e-06, "loss": 0.8431, "step": 11721 }, { "epoch": 0.8468582368558889, "grad_norm": 7.973569123386947, "learning_rate": 3.2305298715750226e-06, "loss": 0.8598, "step": 11722 }, { "epoch": 0.8469304820560984, "grad_norm": 6.278825434285342, "learning_rate": 3.230250130867969e-06, "loss": 0.8542, "step": 11723 }, { "epoch": 0.8470027272563079, "grad_norm": 5.845967081306792, "learning_rate": 3.229970380164357e-06, "loss": 0.8103, "step": 11724 }, { "epoch": 0.8470749724565174, "grad_norm": 6.444646035618563, "learning_rate": 3.2296906194680176e-06, "loss": 0.8217, "step": 11725 }, { "epoch": 0.847147217656727, "grad_norm": 5.13549400204299, "learning_rate": 3.2294108487827807e-06, "loss": 0.7905, "step": 11726 }, { "epoch": 0.8472194628569364, "grad_norm": 5.84035291206372, "learning_rate": 3.2291310681124756e-06, "loss": 0.7593, "step": 11727 }, { "epoch": 0.8472917080571459, "grad_norm": 5.352429218136028, "learning_rate": 3.228851277460932e-06, "loss": 0.7756, "step": 11728 }, { "epoch": 0.8473639532573555, "grad_norm": 7.181139445304635, "learning_rate": 3.228571476831981e-06, "loss": 0.8346, "step": 11729 }, { "epoch": 0.847436198457565, "grad_norm": 5.5913994821462465, "learning_rate": 3.228291666229451e-06, "loss": 0.8155, "step": 11730 }, { "epoch": 0.8475084436577744, "grad_norm": 6.285794108986263, "learning_rate": 3.2280118456571743e-06, "loss": 0.7788, "step": 11731 }, { "epoch": 0.847580688857984, "grad_norm": 6.836091795133689, "learning_rate": 3.2277320151189804e-06, "loss": 0.8714, "step": 11732 }, { "epoch": 0.8476529340581935, "grad_norm": 6.028413570096128, "learning_rate": 3.2274521746187004e-06, "loss": 0.8928, "step": 11733 }, { "epoch": 0.8477251792584031, "grad_norm": 7.69818538735309, "learning_rate": 3.227172324160165e-06, "loss": 0.8296, "step": 11734 }, { "epoch": 0.8477974244586125, "grad_norm": 6.33113715927022, "learning_rate": 3.226892463747205e-06, "loss": 0.843, "step": 11735 }, { "epoch": 0.847869669658822, "grad_norm": 5.5368661763170115, "learning_rate": 3.2266125933836517e-06, "loss": 0.8055, "step": 11736 }, { "epoch": 0.8479419148590316, "grad_norm": 8.629936243133368, "learning_rate": 3.2263327130733364e-06, "loss": 0.8303, "step": 11737 }, { "epoch": 0.848014160059241, "grad_norm": 6.111343730005644, "learning_rate": 3.2260528228200898e-06, "loss": 0.8592, "step": 11738 }, { "epoch": 0.8480864052594506, "grad_norm": 5.939450234811036, "learning_rate": 3.225772922627744e-06, "loss": 0.7409, "step": 11739 }, { "epoch": 0.8481586504596601, "grad_norm": 6.805977314792686, "learning_rate": 3.22549301250013e-06, "loss": 0.8383, "step": 11740 }, { "epoch": 0.8482308956598696, "grad_norm": 5.612104766455093, "learning_rate": 3.2252130924410807e-06, "loss": 0.8064, "step": 11741 }, { "epoch": 0.8483031408600791, "grad_norm": 7.148497801145936, "learning_rate": 3.224933162454427e-06, "loss": 0.7755, "step": 11742 }, { "epoch": 0.8483753860602886, "grad_norm": 6.034629388916295, "learning_rate": 3.2246532225440007e-06, "loss": 0.8026, "step": 11743 }, { "epoch": 0.8484476312604982, "grad_norm": 6.2657714217811815, "learning_rate": 3.2243732727136346e-06, "loss": 0.8491, "step": 11744 }, { "epoch": 0.8485198764607076, "grad_norm": 6.588047255308935, "learning_rate": 3.2240933129671613e-06, "loss": 0.7798, "step": 11745 }, { "epoch": 0.8485921216609171, "grad_norm": 5.603975465294704, "learning_rate": 3.2238133433084125e-06, "loss": 0.8465, "step": 11746 }, { "epoch": 0.8486643668611267, "grad_norm": 8.244209338066524, "learning_rate": 3.2235333637412213e-06, "loss": 0.8596, "step": 11747 }, { "epoch": 0.8487366120613362, "grad_norm": 8.272946437710718, "learning_rate": 3.2232533742694193e-06, "loss": 0.7418, "step": 11748 }, { "epoch": 0.8488088572615456, "grad_norm": 6.259013276258711, "learning_rate": 3.2229733748968407e-06, "loss": 0.88, "step": 11749 }, { "epoch": 0.8488811024617552, "grad_norm": 6.2196446067620155, "learning_rate": 3.2226933656273186e-06, "loss": 0.7556, "step": 11750 }, { "epoch": 0.8489533476619647, "grad_norm": 6.361975735572226, "learning_rate": 3.2224133464646846e-06, "loss": 0.7689, "step": 11751 }, { "epoch": 0.8490255928621743, "grad_norm": 8.338967313432292, "learning_rate": 3.2221333174127732e-06, "loss": 0.806, "step": 11752 }, { "epoch": 0.8490978380623837, "grad_norm": 8.681544176559305, "learning_rate": 3.2218532784754177e-06, "loss": 0.7933, "step": 11753 }, { "epoch": 0.8491700832625932, "grad_norm": 5.850390898857408, "learning_rate": 3.22157322965645e-06, "loss": 0.7988, "step": 11754 }, { "epoch": 0.8492423284628028, "grad_norm": 5.130467173699697, "learning_rate": 3.221293170959706e-06, "loss": 0.9204, "step": 11755 }, { "epoch": 0.8493145736630122, "grad_norm": 6.809005541011076, "learning_rate": 3.221013102389019e-06, "loss": 0.7734, "step": 11756 }, { "epoch": 0.8493868188632218, "grad_norm": 6.215981945684227, "learning_rate": 3.220733023948222e-06, "loss": 0.8094, "step": 11757 }, { "epoch": 0.8494590640634313, "grad_norm": 4.881845216690771, "learning_rate": 3.2204529356411484e-06, "loss": 0.8224, "step": 11758 }, { "epoch": 0.8495313092636408, "grad_norm": 5.285141273133344, "learning_rate": 3.2201728374716353e-06, "loss": 0.7461, "step": 11759 }, { "epoch": 0.8496035544638503, "grad_norm": 6.675304665916943, "learning_rate": 3.219892729443514e-06, "loss": 0.8215, "step": 11760 }, { "epoch": 0.8496757996640598, "grad_norm": 5.768709014383283, "learning_rate": 3.2196126115606208e-06, "loss": 0.7984, "step": 11761 }, { "epoch": 0.8497480448642694, "grad_norm": 5.49149393013566, "learning_rate": 3.2193324838267893e-06, "loss": 0.7521, "step": 11762 }, { "epoch": 0.8498202900644788, "grad_norm": 5.402684441648002, "learning_rate": 3.219052346245855e-06, "loss": 0.7472, "step": 11763 }, { "epoch": 0.8498925352646883, "grad_norm": 7.35683285487315, "learning_rate": 3.2187721988216526e-06, "loss": 0.8016, "step": 11764 }, { "epoch": 0.8499647804648979, "grad_norm": 5.384990899123468, "learning_rate": 3.218492041558016e-06, "loss": 0.7845, "step": 11765 }, { "epoch": 0.8500370256651074, "grad_norm": 5.34211250781398, "learning_rate": 3.218211874458782e-06, "loss": 0.8541, "step": 11766 }, { "epoch": 0.8501092708653168, "grad_norm": 7.630211721860027, "learning_rate": 3.217931697527785e-06, "loss": 0.7766, "step": 11767 }, { "epoch": 0.8501815160655264, "grad_norm": 5.631378456877584, "learning_rate": 3.21765151076886e-06, "loss": 0.8412, "step": 11768 }, { "epoch": 0.8502537612657359, "grad_norm": 5.177231518513448, "learning_rate": 3.217371314185843e-06, "loss": 0.7644, "step": 11769 }, { "epoch": 0.8503260064659455, "grad_norm": 6.38619640209131, "learning_rate": 3.2170911077825705e-06, "loss": 0.8778, "step": 11770 }, { "epoch": 0.8503982516661549, "grad_norm": 5.296813424351718, "learning_rate": 3.2168108915628776e-06, "loss": 0.7983, "step": 11771 }, { "epoch": 0.8504704968663644, "grad_norm": 7.900568552925331, "learning_rate": 3.2165306655305994e-06, "loss": 0.8997, "step": 11772 }, { "epoch": 0.850542742066574, "grad_norm": 6.450260293129348, "learning_rate": 3.216250429689573e-06, "loss": 0.7782, "step": 11773 }, { "epoch": 0.8506149872667834, "grad_norm": 7.346792297027599, "learning_rate": 3.215970184043634e-06, "loss": 0.7214, "step": 11774 }, { "epoch": 0.850687232466993, "grad_norm": 5.83287693236412, "learning_rate": 3.2156899285966202e-06, "loss": 0.7359, "step": 11775 }, { "epoch": 0.8507594776672025, "grad_norm": 5.50389169511028, "learning_rate": 3.215409663352366e-06, "loss": 0.7934, "step": 11776 }, { "epoch": 0.850831722867412, "grad_norm": 6.1139963140105325, "learning_rate": 3.215129388314709e-06, "loss": 0.8224, "step": 11777 }, { "epoch": 0.8509039680676215, "grad_norm": 6.6731559006269485, "learning_rate": 3.214849103487486e-06, "loss": 0.771, "step": 11778 }, { "epoch": 0.850976213267831, "grad_norm": 6.652412275318672, "learning_rate": 3.214568808874534e-06, "loss": 0.6574, "step": 11779 }, { "epoch": 0.8510484584680406, "grad_norm": 7.322672493030502, "learning_rate": 3.2142885044796905e-06, "loss": 0.8225, "step": 11780 }, { "epoch": 0.85112070366825, "grad_norm": 7.451612914945306, "learning_rate": 3.214008190306791e-06, "loss": 0.8451, "step": 11781 }, { "epoch": 0.8511929488684595, "grad_norm": 7.670922742740776, "learning_rate": 3.213727866359674e-06, "loss": 0.82, "step": 11782 }, { "epoch": 0.8512651940686691, "grad_norm": 5.959891731251683, "learning_rate": 3.2134475326421764e-06, "loss": 0.7642, "step": 11783 }, { "epoch": 0.8513374392688786, "grad_norm": 7.259308988194674, "learning_rate": 3.2131671891581367e-06, "loss": 0.9033, "step": 11784 }, { "epoch": 0.851409684469088, "grad_norm": 6.5827802916707014, "learning_rate": 3.2128868359113918e-06, "loss": 0.8549, "step": 11785 }, { "epoch": 0.8514819296692976, "grad_norm": 11.79521860784428, "learning_rate": 3.2126064729057795e-06, "loss": 0.8422, "step": 11786 }, { "epoch": 0.8515541748695071, "grad_norm": 5.419380251653479, "learning_rate": 3.2123261001451374e-06, "loss": 0.7304, "step": 11787 }, { "epoch": 0.8516264200697167, "grad_norm": 5.223897439811575, "learning_rate": 3.2120457176333046e-06, "loss": 0.7566, "step": 11788 }, { "epoch": 0.8516986652699261, "grad_norm": 7.660584013266639, "learning_rate": 3.2117653253741186e-06, "loss": 0.8733, "step": 11789 }, { "epoch": 0.8517709104701356, "grad_norm": 6.726662026974931, "learning_rate": 3.2114849233714186e-06, "loss": 0.7144, "step": 11790 }, { "epoch": 0.8518431556703452, "grad_norm": 6.839664441628132, "learning_rate": 3.211204511629041e-06, "loss": 0.8141, "step": 11791 }, { "epoch": 0.8519154008705546, "grad_norm": 7.154596795771135, "learning_rate": 3.210924090150827e-06, "loss": 0.7293, "step": 11792 }, { "epoch": 0.8519876460707642, "grad_norm": 7.848092507084499, "learning_rate": 3.2106436589406144e-06, "loss": 0.8429, "step": 11793 }, { "epoch": 0.8520598912709737, "grad_norm": 6.08093134171617, "learning_rate": 3.210363218002241e-06, "loss": 0.7883, "step": 11794 }, { "epoch": 0.8521321364711832, "grad_norm": 5.042720631967462, "learning_rate": 3.2100827673395474e-06, "loss": 0.7595, "step": 11795 }, { "epoch": 0.8522043816713927, "grad_norm": 7.883836662671395, "learning_rate": 3.2098023069563716e-06, "loss": 0.8881, "step": 11796 }, { "epoch": 0.8522766268716022, "grad_norm": 5.301841185710497, "learning_rate": 3.2095218368565535e-06, "loss": 0.7328, "step": 11797 }, { "epoch": 0.8523488720718118, "grad_norm": 5.104397557025316, "learning_rate": 3.2092413570439327e-06, "loss": 0.7303, "step": 11798 }, { "epoch": 0.8524211172720212, "grad_norm": 7.006742499921421, "learning_rate": 3.2089608675223476e-06, "loss": 0.8151, "step": 11799 }, { "epoch": 0.8524933624722307, "grad_norm": 5.986332904259919, "learning_rate": 3.2086803682956393e-06, "loss": 0.813, "step": 11800 }, { "epoch": 0.8525656076724403, "grad_norm": 9.898700328248985, "learning_rate": 3.2083998593676467e-06, "loss": 0.8025, "step": 11801 }, { "epoch": 0.8526378528726498, "grad_norm": 5.247019512101701, "learning_rate": 3.2081193407422106e-06, "loss": 0.8719, "step": 11802 }, { "epoch": 0.8527100980728592, "grad_norm": 7.407899310768629, "learning_rate": 3.2078388124231702e-06, "loss": 0.8054, "step": 11803 }, { "epoch": 0.8527823432730688, "grad_norm": 5.855068078561499, "learning_rate": 3.2075582744143664e-06, "loss": 0.8124, "step": 11804 }, { "epoch": 0.8528545884732783, "grad_norm": 7.970169479501827, "learning_rate": 3.207277726719639e-06, "loss": 0.8305, "step": 11805 }, { "epoch": 0.8529268336734879, "grad_norm": 7.316033968438347, "learning_rate": 3.2069971693428283e-06, "loss": 0.862, "step": 11806 }, { "epoch": 0.8529990788736973, "grad_norm": 8.523575345471047, "learning_rate": 3.2067166022877757e-06, "loss": 0.886, "step": 11807 }, { "epoch": 0.8530713240739068, "grad_norm": 6.674625160465181, "learning_rate": 3.206436025558321e-06, "loss": 0.8015, "step": 11808 }, { "epoch": 0.8531435692741164, "grad_norm": 5.215584897297987, "learning_rate": 3.206155439158306e-06, "loss": 0.7482, "step": 11809 }, { "epoch": 0.8532158144743258, "grad_norm": 6.493917406878033, "learning_rate": 3.2058748430915715e-06, "loss": 0.8118, "step": 11810 }, { "epoch": 0.8532880596745354, "grad_norm": 6.761269098820144, "learning_rate": 3.205594237361958e-06, "loss": 0.7938, "step": 11811 }, { "epoch": 0.8533603048747449, "grad_norm": 5.425028386898748, "learning_rate": 3.2053136219733076e-06, "loss": 0.7691, "step": 11812 }, { "epoch": 0.8534325500749544, "grad_norm": 7.2715761812630495, "learning_rate": 3.205032996929462e-06, "loss": 0.7974, "step": 11813 }, { "epoch": 0.8535047952751639, "grad_norm": 6.344243974667701, "learning_rate": 3.2047523622342614e-06, "loss": 0.8043, "step": 11814 }, { "epoch": 0.8535770404753734, "grad_norm": 6.817792883674856, "learning_rate": 3.2044717178915473e-06, "loss": 0.7964, "step": 11815 }, { "epoch": 0.853649285675583, "grad_norm": 6.223707513796351, "learning_rate": 3.2041910639051636e-06, "loss": 0.8915, "step": 11816 }, { "epoch": 0.8537215308757924, "grad_norm": 6.012011901965514, "learning_rate": 3.203910400278951e-06, "loss": 0.7916, "step": 11817 }, { "epoch": 0.8537937760760019, "grad_norm": 5.476483615192835, "learning_rate": 3.2036297270167514e-06, "loss": 0.8908, "step": 11818 }, { "epoch": 0.8538660212762115, "grad_norm": 5.353511705922498, "learning_rate": 3.2033490441224068e-06, "loss": 0.8038, "step": 11819 }, { "epoch": 0.853938266476421, "grad_norm": 7.488729018791809, "learning_rate": 3.2030683515997603e-06, "loss": 0.8453, "step": 11820 }, { "epoch": 0.8540105116766304, "grad_norm": 6.7350835936299704, "learning_rate": 3.202787649452654e-06, "loss": 0.7741, "step": 11821 }, { "epoch": 0.85408275687684, "grad_norm": 7.337395438857842, "learning_rate": 3.20250693768493e-06, "loss": 0.8094, "step": 11822 }, { "epoch": 0.8541550020770495, "grad_norm": 6.24930843341854, "learning_rate": 3.202226216300432e-06, "loss": 0.7921, "step": 11823 }, { "epoch": 0.8542272472772591, "grad_norm": 6.125078784669484, "learning_rate": 3.201945485303002e-06, "loss": 0.8807, "step": 11824 }, { "epoch": 0.8542994924774685, "grad_norm": 5.954931750927388, "learning_rate": 3.201664744696483e-06, "loss": 0.7781, "step": 11825 }, { "epoch": 0.854371737677678, "grad_norm": 6.219165577253505, "learning_rate": 3.2013839944847185e-06, "loss": 0.8766, "step": 11826 }, { "epoch": 0.8544439828778876, "grad_norm": 6.010418429669189, "learning_rate": 3.2011032346715525e-06, "loss": 0.7877, "step": 11827 }, { "epoch": 0.854516228078097, "grad_norm": 6.5613553456711955, "learning_rate": 3.2008224652608273e-06, "loss": 0.8273, "step": 11828 }, { "epoch": 0.8545884732783066, "grad_norm": 5.234517109777997, "learning_rate": 3.2005416862563858e-06, "loss": 0.7794, "step": 11829 }, { "epoch": 0.8546607184785161, "grad_norm": 6.0484282792853055, "learning_rate": 3.200260897662074e-06, "loss": 0.7987, "step": 11830 }, { "epoch": 0.8547329636787256, "grad_norm": 7.877904689300396, "learning_rate": 3.1999800994817332e-06, "loss": 0.7378, "step": 11831 }, { "epoch": 0.8548052088789351, "grad_norm": 6.905690442369962, "learning_rate": 3.1996992917192094e-06, "loss": 0.7411, "step": 11832 }, { "epoch": 0.8548774540791446, "grad_norm": 6.528712897811542, "learning_rate": 3.199418474378344e-06, "loss": 0.8274, "step": 11833 }, { "epoch": 0.8549496992793542, "grad_norm": 9.75105671169176, "learning_rate": 3.199137647462984e-06, "loss": 0.6573, "step": 11834 }, { "epoch": 0.8550219444795636, "grad_norm": 5.907463796716954, "learning_rate": 3.198856810976972e-06, "loss": 0.7886, "step": 11835 }, { "epoch": 0.8550941896797731, "grad_norm": 7.635429378844153, "learning_rate": 3.1985759649241534e-06, "loss": 0.8621, "step": 11836 }, { "epoch": 0.8551664348799827, "grad_norm": 6.770354487775121, "learning_rate": 3.1982951093083715e-06, "loss": 0.8115, "step": 11837 }, { "epoch": 0.8552386800801922, "grad_norm": 5.477208685743371, "learning_rate": 3.198014244133472e-06, "loss": 0.761, "step": 11838 }, { "epoch": 0.8553109252804016, "grad_norm": 6.5752784931106065, "learning_rate": 3.197733369403299e-06, "loss": 0.819, "step": 11839 }, { "epoch": 0.8553831704806112, "grad_norm": 7.382928756525136, "learning_rate": 3.1974524851216985e-06, "loss": 0.8628, "step": 11840 }, { "epoch": 0.8554554156808207, "grad_norm": 8.002532081432443, "learning_rate": 3.1971715912925157e-06, "loss": 0.8658, "step": 11841 }, { "epoch": 0.8555276608810303, "grad_norm": 6.207710627448002, "learning_rate": 3.1968906879195936e-06, "loss": 0.8326, "step": 11842 }, { "epoch": 0.8555999060812397, "grad_norm": 8.020237597515766, "learning_rate": 3.1966097750067797e-06, "loss": 0.7756, "step": 11843 }, { "epoch": 0.8556721512814492, "grad_norm": 5.60300298282422, "learning_rate": 3.196328852557919e-06, "loss": 0.7712, "step": 11844 }, { "epoch": 0.8557443964816588, "grad_norm": 6.1658727117584435, "learning_rate": 3.1960479205768576e-06, "loss": 0.8772, "step": 11845 }, { "epoch": 0.8558166416818682, "grad_norm": 5.686528720293142, "learning_rate": 3.19576697906744e-06, "loss": 0.8437, "step": 11846 }, { "epoch": 0.8558888868820778, "grad_norm": 5.694047449844793, "learning_rate": 3.1954860280335127e-06, "loss": 0.7675, "step": 11847 }, { "epoch": 0.8559611320822873, "grad_norm": 5.849679804459152, "learning_rate": 3.1952050674789215e-06, "loss": 0.78, "step": 11848 }, { "epoch": 0.8560333772824968, "grad_norm": 6.583149999971073, "learning_rate": 3.1949240974075124e-06, "loss": 0.8117, "step": 11849 }, { "epoch": 0.8561056224827063, "grad_norm": 5.94603108338085, "learning_rate": 3.194643117823133e-06, "loss": 0.773, "step": 11850 }, { "epoch": 0.8561778676829158, "grad_norm": 4.689372388088308, "learning_rate": 3.194362128729628e-06, "loss": 0.7757, "step": 11851 }, { "epoch": 0.8562501128831254, "grad_norm": 5.788840429909017, "learning_rate": 3.194081130130845e-06, "loss": 0.7504, "step": 11852 }, { "epoch": 0.8563223580833348, "grad_norm": 6.936075150167387, "learning_rate": 3.193800122030629e-06, "loss": 0.7986, "step": 11853 }, { "epoch": 0.8563946032835443, "grad_norm": 5.653286826884116, "learning_rate": 3.1935191044328294e-06, "loss": 0.7788, "step": 11854 }, { "epoch": 0.8564668484837539, "grad_norm": 7.249298324830658, "learning_rate": 3.1932380773412917e-06, "loss": 0.8462, "step": 11855 }, { "epoch": 0.8565390936839634, "grad_norm": 5.7132602043917045, "learning_rate": 3.1929570407598633e-06, "loss": 0.7955, "step": 11856 }, { "epoch": 0.8566113388841728, "grad_norm": 6.325036922543077, "learning_rate": 3.1926759946923896e-06, "loss": 0.9241, "step": 11857 }, { "epoch": 0.8566835840843824, "grad_norm": 6.4073789020574745, "learning_rate": 3.19239493914272e-06, "loss": 0.8587, "step": 11858 }, { "epoch": 0.8567558292845919, "grad_norm": 8.090265768658007, "learning_rate": 3.192113874114702e-06, "loss": 0.8365, "step": 11859 }, { "epoch": 0.8568280744848014, "grad_norm": 5.463740730356327, "learning_rate": 3.191832799612182e-06, "loss": 0.8066, "step": 11860 }, { "epoch": 0.8569003196850109, "grad_norm": 5.5959313257896675, "learning_rate": 3.191551715639008e-06, "loss": 0.8538, "step": 11861 }, { "epoch": 0.8569725648852204, "grad_norm": 5.656276808195707, "learning_rate": 3.191270622199028e-06, "loss": 0.808, "step": 11862 }, { "epoch": 0.85704481008543, "grad_norm": 6.592938269237219, "learning_rate": 3.1909895192960895e-06, "loss": 0.8161, "step": 11863 }, { "epoch": 0.8571170552856394, "grad_norm": 5.706139172985059, "learning_rate": 3.1907084069340423e-06, "loss": 0.8474, "step": 11864 }, { "epoch": 0.857189300485849, "grad_norm": 8.234094256658656, "learning_rate": 3.190427285116732e-06, "loss": 0.884, "step": 11865 }, { "epoch": 0.8572615456860585, "grad_norm": 6.203411566507301, "learning_rate": 3.190146153848009e-06, "loss": 0.9281, "step": 11866 }, { "epoch": 0.857333790886268, "grad_norm": 6.296703856913545, "learning_rate": 3.1898650131317197e-06, "loss": 0.8232, "step": 11867 }, { "epoch": 0.8574060360864775, "grad_norm": 6.764930601363163, "learning_rate": 3.189583862971716e-06, "loss": 0.8365, "step": 11868 }, { "epoch": 0.857478281286687, "grad_norm": 6.4473582217487175, "learning_rate": 3.189302703371843e-06, "loss": 0.7354, "step": 11869 }, { "epoch": 0.8575505264868966, "grad_norm": 6.186638049574689, "learning_rate": 3.1890215343359526e-06, "loss": 0.8286, "step": 11870 }, { "epoch": 0.857622771687106, "grad_norm": 6.224711718644054, "learning_rate": 3.1887403558678916e-06, "loss": 0.8408, "step": 11871 }, { "epoch": 0.8576950168873155, "grad_norm": 7.127131093197943, "learning_rate": 3.1884591679715094e-06, "loss": 0.8283, "step": 11872 }, { "epoch": 0.8577672620875251, "grad_norm": 5.002326996045562, "learning_rate": 3.1881779706506566e-06, "loss": 0.7822, "step": 11873 }, { "epoch": 0.8578395072877346, "grad_norm": 5.818859600014227, "learning_rate": 3.1878967639091813e-06, "loss": 0.8843, "step": 11874 }, { "epoch": 0.857911752487944, "grad_norm": 7.004738974653327, "learning_rate": 3.187615547750934e-06, "loss": 0.7804, "step": 11875 }, { "epoch": 0.8579839976881536, "grad_norm": 7.860611865662106, "learning_rate": 3.187334322179763e-06, "loss": 0.8209, "step": 11876 }, { "epoch": 0.8580562428883631, "grad_norm": 6.982602434291322, "learning_rate": 3.187053087199519e-06, "loss": 0.807, "step": 11877 }, { "epoch": 0.8581284880885726, "grad_norm": 6.423242873442547, "learning_rate": 3.1867718428140514e-06, "loss": 0.7731, "step": 11878 }, { "epoch": 0.8582007332887821, "grad_norm": 5.876133261606165, "learning_rate": 3.1864905890272113e-06, "loss": 0.8251, "step": 11879 }, { "epoch": 0.8582729784889916, "grad_norm": 7.091569607433203, "learning_rate": 3.1862093258428485e-06, "loss": 0.8146, "step": 11880 }, { "epoch": 0.8583452236892012, "grad_norm": 6.454951362692562, "learning_rate": 3.185928053264811e-06, "loss": 0.7994, "step": 11881 }, { "epoch": 0.8584174688894106, "grad_norm": 7.070184358731749, "learning_rate": 3.1856467712969524e-06, "loss": 0.8859, "step": 11882 }, { "epoch": 0.8584897140896202, "grad_norm": 6.522433668554504, "learning_rate": 3.1853654799431215e-06, "loss": 0.95, "step": 11883 }, { "epoch": 0.8585619592898297, "grad_norm": 7.075269919055402, "learning_rate": 3.1850841792071695e-06, "loss": 0.8607, "step": 11884 }, { "epoch": 0.8586342044900392, "grad_norm": 7.676060546763432, "learning_rate": 3.1848028690929467e-06, "loss": 0.87, "step": 11885 }, { "epoch": 0.8587064496902487, "grad_norm": 7.095438079965948, "learning_rate": 3.1845215496043045e-06, "loss": 0.8155, "step": 11886 }, { "epoch": 0.8587786948904582, "grad_norm": 5.578040883688256, "learning_rate": 3.184240220745094e-06, "loss": 0.8395, "step": 11887 }, { "epoch": 0.8588509400906678, "grad_norm": 6.679167217493381, "learning_rate": 3.183958882519166e-06, "loss": 0.9021, "step": 11888 }, { "epoch": 0.8589231852908772, "grad_norm": 6.296606015501774, "learning_rate": 3.1836775349303722e-06, "loss": 0.7594, "step": 11889 }, { "epoch": 0.8589954304910867, "grad_norm": 5.749317958020923, "learning_rate": 3.1833961779825636e-06, "loss": 0.7761, "step": 11890 }, { "epoch": 0.8590676756912963, "grad_norm": 5.641475032254489, "learning_rate": 3.183114811679591e-06, "loss": 0.8302, "step": 11891 }, { "epoch": 0.8591399208915058, "grad_norm": 6.562257598759779, "learning_rate": 3.182833436025308e-06, "loss": 0.7851, "step": 11892 }, { "epoch": 0.8592121660917152, "grad_norm": 7.976750206303302, "learning_rate": 3.1825520510235658e-06, "loss": 0.8432, "step": 11893 }, { "epoch": 0.8592844112919248, "grad_norm": 6.829476578457046, "learning_rate": 3.1822706566782153e-06, "loss": 0.8689, "step": 11894 }, { "epoch": 0.8593566564921343, "grad_norm": 5.977907515626255, "learning_rate": 3.1819892529931095e-06, "loss": 0.8298, "step": 11895 }, { "epoch": 0.8594289016923438, "grad_norm": 7.103063980693005, "learning_rate": 3.181707839972101e-06, "loss": 0.7798, "step": 11896 }, { "epoch": 0.8595011468925533, "grad_norm": 6.612090867190675, "learning_rate": 3.1814264176190402e-06, "loss": 0.8073, "step": 11897 }, { "epoch": 0.8595733920927628, "grad_norm": 7.8838006147916415, "learning_rate": 3.181144985937782e-06, "loss": 0.7453, "step": 11898 }, { "epoch": 0.8596456372929724, "grad_norm": 6.079386680688281, "learning_rate": 3.180863544932177e-06, "loss": 0.7369, "step": 11899 }, { "epoch": 0.8597178824931818, "grad_norm": 5.949935579351889, "learning_rate": 3.1805820946060785e-06, "loss": 0.8119, "step": 11900 }, { "epoch": 0.8597901276933914, "grad_norm": 6.595912199177443, "learning_rate": 3.18030063496334e-06, "loss": 0.8122, "step": 11901 }, { "epoch": 0.8598623728936009, "grad_norm": 6.938863259478474, "learning_rate": 3.1800191660078146e-06, "loss": 0.8248, "step": 11902 }, { "epoch": 0.8599346180938104, "grad_norm": 5.986704400388975, "learning_rate": 3.1797376877433543e-06, "loss": 0.7971, "step": 11903 }, { "epoch": 0.8600068632940199, "grad_norm": 6.476087159313063, "learning_rate": 3.1794562001738126e-06, "loss": 0.6817, "step": 11904 }, { "epoch": 0.8600791084942294, "grad_norm": 6.404872243710405, "learning_rate": 3.1791747033030436e-06, "loss": 0.8221, "step": 11905 }, { "epoch": 0.860151353694439, "grad_norm": 6.1996931676926685, "learning_rate": 3.1788931971348997e-06, "loss": 0.9214, "step": 11906 }, { "epoch": 0.8602235988946484, "grad_norm": 8.34547709331462, "learning_rate": 3.178611681673236e-06, "loss": 0.9349, "step": 11907 }, { "epoch": 0.8602958440948579, "grad_norm": 6.606501538586493, "learning_rate": 3.1783301569219037e-06, "loss": 0.838, "step": 11908 }, { "epoch": 0.8603680892950675, "grad_norm": 5.712433208183618, "learning_rate": 3.17804862288476e-06, "loss": 0.8745, "step": 11909 }, { "epoch": 0.860440334495277, "grad_norm": 7.251370267615401, "learning_rate": 3.177767079565656e-06, "loss": 0.8786, "step": 11910 }, { "epoch": 0.8605125796954864, "grad_norm": 6.246092833900026, "learning_rate": 3.177485526968447e-06, "loss": 0.761, "step": 11911 }, { "epoch": 0.860584824895696, "grad_norm": 7.093389426322708, "learning_rate": 3.1772039650969875e-06, "loss": 0.8138, "step": 11912 }, { "epoch": 0.8606570700959055, "grad_norm": 6.908379826161107, "learning_rate": 3.176922393955132e-06, "loss": 0.8153, "step": 11913 }, { "epoch": 0.860729315296115, "grad_norm": 5.732633820081021, "learning_rate": 3.176640813546733e-06, "loss": 0.8883, "step": 11914 }, { "epoch": 0.8608015604963245, "grad_norm": 5.281739703165254, "learning_rate": 3.176359223875648e-06, "loss": 0.8153, "step": 11915 }, { "epoch": 0.860873805696534, "grad_norm": 7.189237964978614, "learning_rate": 3.17607762494573e-06, "loss": 0.8065, "step": 11916 }, { "epoch": 0.8609460508967436, "grad_norm": 5.81878551959906, "learning_rate": 3.1757960167608343e-06, "loss": 0.7443, "step": 11917 }, { "epoch": 0.861018296096953, "grad_norm": 5.591793634571625, "learning_rate": 3.175514399324816e-06, "loss": 0.7965, "step": 11918 }, { "epoch": 0.8610905412971626, "grad_norm": 7.432030213178604, "learning_rate": 3.17523277264153e-06, "loss": 0.8006, "step": 11919 }, { "epoch": 0.8611627864973721, "grad_norm": 6.717518112447815, "learning_rate": 3.1749511367148313e-06, "loss": 0.8232, "step": 11920 }, { "epoch": 0.8612350316975816, "grad_norm": 6.414763362382226, "learning_rate": 3.174669491548576e-06, "loss": 0.8633, "step": 11921 }, { "epoch": 0.8613072768977911, "grad_norm": 7.876491587023945, "learning_rate": 3.1743878371466187e-06, "loss": 0.8786, "step": 11922 }, { "epoch": 0.8613795220980006, "grad_norm": 5.655102186552993, "learning_rate": 3.174106173512816e-06, "loss": 0.8209, "step": 11923 }, { "epoch": 0.8614517672982102, "grad_norm": 5.620807526427631, "learning_rate": 3.1738245006510227e-06, "loss": 0.7693, "step": 11924 }, { "epoch": 0.8615240124984196, "grad_norm": 5.933535245967342, "learning_rate": 3.1735428185650958e-06, "loss": 0.8391, "step": 11925 }, { "epoch": 0.8615962576986291, "grad_norm": 5.490600182917378, "learning_rate": 3.17326112725889e-06, "loss": 0.7571, "step": 11926 }, { "epoch": 0.8616685028988387, "grad_norm": 6.276889179247527, "learning_rate": 3.172979426736263e-06, "loss": 0.7738, "step": 11927 }, { "epoch": 0.8617407480990482, "grad_norm": 8.08774417467324, "learning_rate": 3.172697717001069e-06, "loss": 0.7624, "step": 11928 }, { "epoch": 0.8618129932992576, "grad_norm": 9.037610622377949, "learning_rate": 3.172415998057167e-06, "loss": 0.845, "step": 11929 }, { "epoch": 0.8618852384994672, "grad_norm": 6.704171479173841, "learning_rate": 3.1721342699084113e-06, "loss": 0.7853, "step": 11930 }, { "epoch": 0.8619574836996767, "grad_norm": 7.231289100376409, "learning_rate": 3.171852532558659e-06, "loss": 0.8627, "step": 11931 }, { "epoch": 0.8620297288998862, "grad_norm": 6.054890684594775, "learning_rate": 3.1715707860117678e-06, "loss": 0.848, "step": 11932 }, { "epoch": 0.8621019741000957, "grad_norm": 6.990353886237376, "learning_rate": 3.1712890302715937e-06, "loss": 0.7995, "step": 11933 }, { "epoch": 0.8621742193003052, "grad_norm": 8.502124296533834, "learning_rate": 3.1710072653419936e-06, "loss": 0.8144, "step": 11934 }, { "epoch": 0.8622464645005148, "grad_norm": 6.780520351550681, "learning_rate": 3.1707254912268255e-06, "loss": 0.7989, "step": 11935 }, { "epoch": 0.8623187097007242, "grad_norm": 7.475405995242076, "learning_rate": 3.1704437079299465e-06, "loss": 0.8756, "step": 11936 }, { "epoch": 0.8623909549009338, "grad_norm": 6.217753728299413, "learning_rate": 3.170161915455214e-06, "loss": 0.7581, "step": 11937 }, { "epoch": 0.8624632001011433, "grad_norm": 7.47060457451338, "learning_rate": 3.169880113806484e-06, "loss": 0.8221, "step": 11938 }, { "epoch": 0.8625354453013528, "grad_norm": 6.6464975756554185, "learning_rate": 3.169598302987616e-06, "loss": 0.8097, "step": 11939 }, { "epoch": 0.8626076905015623, "grad_norm": 5.596992277602498, "learning_rate": 3.169316483002467e-06, "loss": 0.799, "step": 11940 }, { "epoch": 0.8626799357017718, "grad_norm": 7.517325924582257, "learning_rate": 3.1690346538548954e-06, "loss": 0.8547, "step": 11941 }, { "epoch": 0.8627521809019814, "grad_norm": 5.833470079544877, "learning_rate": 3.1687528155487584e-06, "loss": 0.8128, "step": 11942 }, { "epoch": 0.8628244261021908, "grad_norm": 7.440984342534248, "learning_rate": 3.1684709680879148e-06, "loss": 0.7763, "step": 11943 }, { "epoch": 0.8628966713024003, "grad_norm": 5.332324568397387, "learning_rate": 3.1681891114762227e-06, "loss": 0.7569, "step": 11944 }, { "epoch": 0.8629689165026099, "grad_norm": 4.974511411051997, "learning_rate": 3.1679072457175408e-06, "loss": 0.839, "step": 11945 }, { "epoch": 0.8630411617028194, "grad_norm": 5.71421125227233, "learning_rate": 3.1676253708157273e-06, "loss": 0.8337, "step": 11946 }, { "epoch": 0.8631134069030288, "grad_norm": 4.70060221283721, "learning_rate": 3.1673434867746406e-06, "loss": 0.8034, "step": 11947 }, { "epoch": 0.8631856521032384, "grad_norm": 6.11238792644332, "learning_rate": 3.167061593598139e-06, "loss": 0.8386, "step": 11948 }, { "epoch": 0.8632578973034479, "grad_norm": 6.334489951010509, "learning_rate": 3.166779691290083e-06, "loss": 0.805, "step": 11949 }, { "epoch": 0.8633301425036574, "grad_norm": 7.7674968466799905, "learning_rate": 3.1664977798543307e-06, "loss": 0.8571, "step": 11950 }, { "epoch": 0.8634023877038669, "grad_norm": 6.042333666935162, "learning_rate": 3.166215859294741e-06, "loss": 0.8206, "step": 11951 }, { "epoch": 0.8634746329040764, "grad_norm": 6.9534220771332205, "learning_rate": 3.1659339296151735e-06, "loss": 0.7475, "step": 11952 }, { "epoch": 0.863546878104286, "grad_norm": 6.344236157960903, "learning_rate": 3.1656519908194884e-06, "loss": 0.7532, "step": 11953 }, { "epoch": 0.8636191233044954, "grad_norm": 6.089001492729888, "learning_rate": 3.165370042911543e-06, "loss": 0.8321, "step": 11954 }, { "epoch": 0.863691368504705, "grad_norm": 5.540053952777231, "learning_rate": 3.1650880858951993e-06, "loss": 0.8332, "step": 11955 }, { "epoch": 0.8637636137049145, "grad_norm": 6.188934468151543, "learning_rate": 3.1648061197743156e-06, "loss": 0.7817, "step": 11956 }, { "epoch": 0.863835858905124, "grad_norm": 6.495724959465528, "learning_rate": 3.1645241445527524e-06, "loss": 0.8652, "step": 11957 }, { "epoch": 0.8639081041053335, "grad_norm": 6.784611163428308, "learning_rate": 3.16424216023437e-06, "loss": 0.766, "step": 11958 }, { "epoch": 0.863980349305543, "grad_norm": 6.326317196926111, "learning_rate": 3.163960166823028e-06, "loss": 0.8517, "step": 11959 }, { "epoch": 0.8640525945057526, "grad_norm": 6.84520577053865, "learning_rate": 3.163678164322587e-06, "loss": 0.7331, "step": 11960 }, { "epoch": 0.864124839705962, "grad_norm": 7.161400686321012, "learning_rate": 3.1633961527369073e-06, "loss": 0.7634, "step": 11961 }, { "epoch": 0.8641970849061715, "grad_norm": 6.391555531848192, "learning_rate": 3.1631141320698487e-06, "loss": 0.7968, "step": 11962 }, { "epoch": 0.8642693301063811, "grad_norm": 5.5832051884004095, "learning_rate": 3.1628321023252727e-06, "loss": 0.778, "step": 11963 }, { "epoch": 0.8643415753065906, "grad_norm": 5.756360392386931, "learning_rate": 3.1625500635070405e-06, "loss": 0.8263, "step": 11964 }, { "epoch": 0.8644138205068, "grad_norm": 6.352910986061915, "learning_rate": 3.1622680156190116e-06, "loss": 0.8965, "step": 11965 }, { "epoch": 0.8644860657070096, "grad_norm": 5.925396599352582, "learning_rate": 3.161985958665048e-06, "loss": 0.8057, "step": 11966 }, { "epoch": 0.8645583109072191, "grad_norm": 7.076964024198843, "learning_rate": 3.1617038926490105e-06, "loss": 0.8171, "step": 11967 }, { "epoch": 0.8646305561074286, "grad_norm": 5.355412299973734, "learning_rate": 3.161421817574761e-06, "loss": 0.7898, "step": 11968 }, { "epoch": 0.8647028013076381, "grad_norm": 5.08265948389219, "learning_rate": 3.1611397334461608e-06, "loss": 0.708, "step": 11969 }, { "epoch": 0.8647750465078476, "grad_norm": 5.551225186769021, "learning_rate": 3.16085764026707e-06, "loss": 0.7743, "step": 11970 }, { "epoch": 0.8648472917080572, "grad_norm": 6.474592583405828, "learning_rate": 3.1605755380413516e-06, "loss": 0.8378, "step": 11971 }, { "epoch": 0.8649195369082666, "grad_norm": 6.951713504467324, "learning_rate": 3.1602934267728664e-06, "loss": 0.8824, "step": 11972 }, { "epoch": 0.8649917821084762, "grad_norm": 6.572557571524639, "learning_rate": 3.160011306465478e-06, "loss": 0.7805, "step": 11973 }, { "epoch": 0.8650640273086857, "grad_norm": 5.625260749707276, "learning_rate": 3.1597291771230466e-06, "loss": 0.7983, "step": 11974 }, { "epoch": 0.8651362725088952, "grad_norm": 7.328633044755857, "learning_rate": 3.159447038749435e-06, "loss": 0.8139, "step": 11975 }, { "epoch": 0.8652085177091047, "grad_norm": 6.847371297283456, "learning_rate": 3.1591648913485053e-06, "loss": 0.7243, "step": 11976 }, { "epoch": 0.8652807629093142, "grad_norm": 5.303664635540616, "learning_rate": 3.1588827349241203e-06, "loss": 0.793, "step": 11977 }, { "epoch": 0.8653530081095238, "grad_norm": 6.868622752461006, "learning_rate": 3.1586005694801423e-06, "loss": 0.8087, "step": 11978 }, { "epoch": 0.8654252533097332, "grad_norm": 6.679116386403594, "learning_rate": 3.1583183950204345e-06, "loss": 0.8416, "step": 11979 }, { "epoch": 0.8654974985099427, "grad_norm": 6.223863196078147, "learning_rate": 3.1580362115488577e-06, "loss": 0.8566, "step": 11980 }, { "epoch": 0.8655697437101523, "grad_norm": 5.59519828929095, "learning_rate": 3.157754019069277e-06, "loss": 0.7249, "step": 11981 }, { "epoch": 0.8656419889103618, "grad_norm": 5.827039029395432, "learning_rate": 3.157471817585554e-06, "loss": 0.8061, "step": 11982 }, { "epoch": 0.8657142341105712, "grad_norm": 6.393836224255174, "learning_rate": 3.157189607101553e-06, "loss": 0.7693, "step": 11983 }, { "epoch": 0.8657864793107808, "grad_norm": 4.803734860144488, "learning_rate": 3.156907387621136e-06, "loss": 0.8584, "step": 11984 }, { "epoch": 0.8658587245109903, "grad_norm": 6.265730935435501, "learning_rate": 3.1566251591481667e-06, "loss": 0.8592, "step": 11985 }, { "epoch": 0.8659309697111998, "grad_norm": 4.786564485066732, "learning_rate": 3.1563429216865095e-06, "loss": 0.7639, "step": 11986 }, { "epoch": 0.8660032149114093, "grad_norm": 6.517637532037466, "learning_rate": 3.156060675240027e-06, "loss": 0.768, "step": 11987 }, { "epoch": 0.8660754601116188, "grad_norm": 8.007823932918667, "learning_rate": 3.155778419812583e-06, "loss": 0.886, "step": 11988 }, { "epoch": 0.8661477053118284, "grad_norm": 5.472409838425596, "learning_rate": 3.1554961554080423e-06, "loss": 0.693, "step": 11989 }, { "epoch": 0.8662199505120378, "grad_norm": 7.771685869701902, "learning_rate": 3.155213882030267e-06, "loss": 0.8646, "step": 11990 }, { "epoch": 0.8662921957122474, "grad_norm": 6.143427008116295, "learning_rate": 3.154931599683123e-06, "loss": 0.841, "step": 11991 }, { "epoch": 0.8663644409124569, "grad_norm": 7.467817048735773, "learning_rate": 3.1546493083704744e-06, "loss": 0.7645, "step": 11992 }, { "epoch": 0.8664366861126664, "grad_norm": 6.37373608794324, "learning_rate": 3.154367008096185e-06, "loss": 0.7968, "step": 11993 }, { "epoch": 0.8665089313128759, "grad_norm": 5.7429902845510075, "learning_rate": 3.1540846988641182e-06, "loss": 0.8267, "step": 11994 }, { "epoch": 0.8665811765130854, "grad_norm": 5.991851996189348, "learning_rate": 3.153802380678141e-06, "loss": 0.8058, "step": 11995 }, { "epoch": 0.866653421713295, "grad_norm": 6.191025471334999, "learning_rate": 3.153520053542116e-06, "loss": 0.7829, "step": 11996 }, { "epoch": 0.8667256669135044, "grad_norm": 6.473642756370587, "learning_rate": 3.1532377174599093e-06, "loss": 0.8414, "step": 11997 }, { "epoch": 0.8667979121137139, "grad_norm": 8.430817394879382, "learning_rate": 3.152955372435386e-06, "loss": 0.8077, "step": 11998 }, { "epoch": 0.8668701573139235, "grad_norm": 5.307772878207558, "learning_rate": 3.1526730184724102e-06, "loss": 0.7253, "step": 11999 }, { "epoch": 0.866942402514133, "grad_norm": 5.890833519601794, "learning_rate": 3.1523906555748476e-06, "loss": 0.8284, "step": 12000 }, { "epoch": 0.8670146477143424, "grad_norm": 7.385817789036079, "learning_rate": 3.152108283746563e-06, "loss": 0.7869, "step": 12001 }, { "epoch": 0.867086892914552, "grad_norm": 6.343418958322912, "learning_rate": 3.1518259029914226e-06, "loss": 0.866, "step": 12002 }, { "epoch": 0.8671591381147615, "grad_norm": 6.459017743998321, "learning_rate": 3.151543513313292e-06, "loss": 0.8249, "step": 12003 }, { "epoch": 0.867231383314971, "grad_norm": 6.82761938112943, "learning_rate": 3.1512611147160367e-06, "loss": 0.8134, "step": 12004 }, { "epoch": 0.8673036285151805, "grad_norm": 6.403075981045592, "learning_rate": 3.150978707203521e-06, "loss": 0.8118, "step": 12005 }, { "epoch": 0.86737587371539, "grad_norm": 7.044510607537406, "learning_rate": 3.1506962907796134e-06, "loss": 0.8954, "step": 12006 }, { "epoch": 0.8674481189155996, "grad_norm": 7.063727078134814, "learning_rate": 3.1504138654481797e-06, "loss": 0.835, "step": 12007 }, { "epoch": 0.867520364115809, "grad_norm": 7.645722543466374, "learning_rate": 3.1501314312130837e-06, "loss": 0.8532, "step": 12008 }, { "epoch": 0.8675926093160186, "grad_norm": 6.347517426366575, "learning_rate": 3.1498489880781936e-06, "loss": 0.8495, "step": 12009 }, { "epoch": 0.8676648545162281, "grad_norm": 6.351784715131867, "learning_rate": 3.149566536047376e-06, "loss": 0.8718, "step": 12010 }, { "epoch": 0.8677370997164376, "grad_norm": 5.556273844166797, "learning_rate": 3.1492840751244965e-06, "loss": 0.7649, "step": 12011 }, { "epoch": 0.8678093449166471, "grad_norm": 7.131421056863997, "learning_rate": 3.149001605313422e-06, "loss": 0.8053, "step": 12012 }, { "epoch": 0.8678815901168566, "grad_norm": 6.807991425774416, "learning_rate": 3.1487191266180195e-06, "loss": 0.8049, "step": 12013 }, { "epoch": 0.8679538353170662, "grad_norm": 5.873117023400491, "learning_rate": 3.1484366390421554e-06, "loss": 0.8482, "step": 12014 }, { "epoch": 0.8680260805172756, "grad_norm": 5.873124168097971, "learning_rate": 3.1481541425896976e-06, "loss": 0.7926, "step": 12015 }, { "epoch": 0.8680983257174851, "grad_norm": 6.60522417171761, "learning_rate": 3.1478716372645135e-06, "loss": 0.8951, "step": 12016 }, { "epoch": 0.8681705709176947, "grad_norm": 7.696832466085405, "learning_rate": 3.1475891230704687e-06, "loss": 0.8801, "step": 12017 }, { "epoch": 0.8682428161179042, "grad_norm": 5.764117945928941, "learning_rate": 3.1473066000114325e-06, "loss": 0.826, "step": 12018 }, { "epoch": 0.8683150613181136, "grad_norm": 5.270055158062252, "learning_rate": 3.147024068091271e-06, "loss": 0.782, "step": 12019 }, { "epoch": 0.8683873065183232, "grad_norm": 6.022002250779921, "learning_rate": 3.1467415273138522e-06, "loss": 0.8506, "step": 12020 }, { "epoch": 0.8684595517185327, "grad_norm": 6.926668526603637, "learning_rate": 3.1464589776830444e-06, "loss": 0.7298, "step": 12021 }, { "epoch": 0.8685317969187422, "grad_norm": 4.835766914082901, "learning_rate": 3.146176419202715e-06, "loss": 0.7291, "step": 12022 }, { "epoch": 0.8686040421189517, "grad_norm": 6.9243479287425505, "learning_rate": 3.1458938518767325e-06, "loss": 0.85, "step": 12023 }, { "epoch": 0.8686762873191612, "grad_norm": 5.895800838672968, "learning_rate": 3.145611275708964e-06, "loss": 0.8492, "step": 12024 }, { "epoch": 0.8687485325193708, "grad_norm": 5.293914777945283, "learning_rate": 3.1453286907032795e-06, "loss": 0.8038, "step": 12025 }, { "epoch": 0.8688207777195802, "grad_norm": 6.150000173677271, "learning_rate": 3.145046096863545e-06, "loss": 0.7541, "step": 12026 }, { "epoch": 0.8688930229197898, "grad_norm": 5.748767886630619, "learning_rate": 3.144763494193631e-06, "loss": 0.7476, "step": 12027 }, { "epoch": 0.8689652681199993, "grad_norm": 5.811436053288795, "learning_rate": 3.1444808826974055e-06, "loss": 0.7905, "step": 12028 }, { "epoch": 0.8690375133202088, "grad_norm": 5.079725746801665, "learning_rate": 3.1441982623787365e-06, "loss": 0.8704, "step": 12029 }, { "epoch": 0.8691097585204183, "grad_norm": 4.782737749793603, "learning_rate": 3.1439156332414945e-06, "loss": 0.7236, "step": 12030 }, { "epoch": 0.8691820037206278, "grad_norm": 5.204833703806236, "learning_rate": 3.1436329952895466e-06, "loss": 0.78, "step": 12031 }, { "epoch": 0.8692542489208374, "grad_norm": 5.494205196474446, "learning_rate": 3.1433503485267636e-06, "loss": 0.7941, "step": 12032 }, { "epoch": 0.8693264941210468, "grad_norm": 6.436277699398877, "learning_rate": 3.143067692957012e-06, "loss": 0.7918, "step": 12033 }, { "epoch": 0.8693987393212563, "grad_norm": 6.864608401849459, "learning_rate": 3.142785028584165e-06, "loss": 0.7386, "step": 12034 }, { "epoch": 0.8694709845214659, "grad_norm": 6.35931531423542, "learning_rate": 3.1425023554120893e-06, "loss": 0.8894, "step": 12035 }, { "epoch": 0.8695432297216754, "grad_norm": 5.965876821455443, "learning_rate": 3.1422196734446553e-06, "loss": 0.8252, "step": 12036 }, { "epoch": 0.8696154749218848, "grad_norm": 5.490527231717056, "learning_rate": 3.141936982685732e-06, "loss": 0.7817, "step": 12037 }, { "epoch": 0.8696877201220944, "grad_norm": 5.376813183510841, "learning_rate": 3.1416542831391906e-06, "loss": 0.8127, "step": 12038 }, { "epoch": 0.8697599653223039, "grad_norm": 5.541578746279863, "learning_rate": 3.1413715748089e-06, "loss": 0.7278, "step": 12039 }, { "epoch": 0.8698322105225134, "grad_norm": 6.589680794610124, "learning_rate": 3.14108885769873e-06, "loss": 0.8536, "step": 12040 }, { "epoch": 0.8699044557227229, "grad_norm": 6.566109963199529, "learning_rate": 3.1408061318125527e-06, "loss": 0.8281, "step": 12041 }, { "epoch": 0.8699767009229324, "grad_norm": 6.849739136566711, "learning_rate": 3.140523397154236e-06, "loss": 0.8343, "step": 12042 }, { "epoch": 0.870048946123142, "grad_norm": 6.669979925168324, "learning_rate": 3.1402406537276513e-06, "loss": 0.8204, "step": 12043 }, { "epoch": 0.8701211913233514, "grad_norm": 5.511194627310196, "learning_rate": 3.13995790153667e-06, "loss": 0.786, "step": 12044 }, { "epoch": 0.870193436523561, "grad_norm": 5.322545505829057, "learning_rate": 3.139675140585161e-06, "loss": 0.8092, "step": 12045 }, { "epoch": 0.8702656817237705, "grad_norm": 7.14036509502039, "learning_rate": 3.1393923708769968e-06, "loss": 0.9214, "step": 12046 }, { "epoch": 0.87033792692398, "grad_norm": 8.452116788014834, "learning_rate": 3.1391095924160463e-06, "loss": 0.7815, "step": 12047 }, { "epoch": 0.8704101721241895, "grad_norm": 8.938931863967436, "learning_rate": 3.1388268052061827e-06, "loss": 0.7579, "step": 12048 }, { "epoch": 0.870482417324399, "grad_norm": 5.498853737419928, "learning_rate": 3.1385440092512753e-06, "loss": 0.7773, "step": 12049 }, { "epoch": 0.8705546625246086, "grad_norm": 7.320611351206941, "learning_rate": 3.1382612045551975e-06, "loss": 0.8084, "step": 12050 }, { "epoch": 0.870626907724818, "grad_norm": 7.908046834865841, "learning_rate": 3.1379783911218185e-06, "loss": 0.79, "step": 12051 }, { "epoch": 0.8706991529250275, "grad_norm": 6.926948840525518, "learning_rate": 3.1376955689550102e-06, "loss": 0.7945, "step": 12052 }, { "epoch": 0.8707713981252371, "grad_norm": 7.970171154678128, "learning_rate": 3.137412738058646e-06, "loss": 0.7397, "step": 12053 }, { "epoch": 0.8708436433254466, "grad_norm": 8.627632955512135, "learning_rate": 3.1371298984365958e-06, "loss": 0.7913, "step": 12054 }, { "epoch": 0.870915888525656, "grad_norm": 8.966763053669062, "learning_rate": 3.136847050092732e-06, "loss": 0.8595, "step": 12055 }, { "epoch": 0.8709881337258656, "grad_norm": 6.400842384689891, "learning_rate": 3.1365641930309266e-06, "loss": 0.7675, "step": 12056 }, { "epoch": 0.8710603789260751, "grad_norm": 6.548722109006178, "learning_rate": 3.1362813272550506e-06, "loss": 0.8527, "step": 12057 }, { "epoch": 0.8711326241262846, "grad_norm": 6.280886511462617, "learning_rate": 3.1359984527689785e-06, "loss": 0.7801, "step": 12058 }, { "epoch": 0.8712048693264941, "grad_norm": 7.394937752957572, "learning_rate": 3.135715569576581e-06, "loss": 0.7768, "step": 12059 }, { "epoch": 0.8712771145267036, "grad_norm": 6.635406957246952, "learning_rate": 3.135432677681732e-06, "loss": 0.8127, "step": 12060 }, { "epoch": 0.8713493597269132, "grad_norm": 6.042166046901564, "learning_rate": 3.135149777088301e-06, "loss": 0.8084, "step": 12061 }, { "epoch": 0.8714216049271226, "grad_norm": 5.780265435588523, "learning_rate": 3.134866867800164e-06, "loss": 0.852, "step": 12062 }, { "epoch": 0.8714938501273322, "grad_norm": 6.946489438208102, "learning_rate": 3.1345839498211922e-06, "loss": 0.8052, "step": 12063 }, { "epoch": 0.8715660953275417, "grad_norm": 6.263018972247211, "learning_rate": 3.1343010231552597e-06, "loss": 0.8817, "step": 12064 }, { "epoch": 0.8716383405277511, "grad_norm": 5.982055533286222, "learning_rate": 3.1340180878062378e-06, "loss": 0.754, "step": 12065 }, { "epoch": 0.8717105857279607, "grad_norm": 5.780515221992712, "learning_rate": 3.133735143778e-06, "loss": 0.8626, "step": 12066 }, { "epoch": 0.8717828309281702, "grad_norm": 6.166284738346522, "learning_rate": 3.133452191074421e-06, "loss": 0.8408, "step": 12067 }, { "epoch": 0.8718550761283798, "grad_norm": 6.703185608062974, "learning_rate": 3.133169229699373e-06, "loss": 0.784, "step": 12068 }, { "epoch": 0.8719273213285892, "grad_norm": 8.807292725427445, "learning_rate": 3.1328862596567304e-06, "loss": 0.8029, "step": 12069 }, { "epoch": 0.8719995665287987, "grad_norm": 6.009657399192076, "learning_rate": 3.132603280950366e-06, "loss": 0.8746, "step": 12070 }, { "epoch": 0.8720718117290083, "grad_norm": 7.2924930067947535, "learning_rate": 3.1323202935841536e-06, "loss": 0.7225, "step": 12071 }, { "epoch": 0.8721440569292178, "grad_norm": 4.971515965280811, "learning_rate": 3.1320372975619673e-06, "loss": 0.7223, "step": 12072 }, { "epoch": 0.8722163021294272, "grad_norm": 6.8142255645044, "learning_rate": 3.131754292887682e-06, "loss": 0.8472, "step": 12073 }, { "epoch": 0.8722885473296368, "grad_norm": 6.217791459476984, "learning_rate": 3.13147127956517e-06, "loss": 0.7235, "step": 12074 }, { "epoch": 0.8723607925298463, "grad_norm": 5.8096562473945585, "learning_rate": 3.131188257598307e-06, "loss": 0.7647, "step": 12075 }, { "epoch": 0.8724330377300558, "grad_norm": 7.121737352206859, "learning_rate": 3.1309052269909668e-06, "loss": 0.8374, "step": 12076 }, { "epoch": 0.8725052829302653, "grad_norm": 6.052128365415331, "learning_rate": 3.130622187747024e-06, "loss": 0.7682, "step": 12077 }, { "epoch": 0.8725775281304748, "grad_norm": 5.919791063797241, "learning_rate": 3.130339139870353e-06, "loss": 0.8519, "step": 12078 }, { "epoch": 0.8726497733306844, "grad_norm": 6.077507713372605, "learning_rate": 3.1300560833648285e-06, "loss": 0.7757, "step": 12079 }, { "epoch": 0.8727220185308938, "grad_norm": 5.780784794416366, "learning_rate": 3.129773018234325e-06, "loss": 0.8415, "step": 12080 }, { "epoch": 0.8727942637311034, "grad_norm": 7.821543593783335, "learning_rate": 3.129489944482718e-06, "loss": 0.7518, "step": 12081 }, { "epoch": 0.8728665089313129, "grad_norm": 6.431680066972137, "learning_rate": 3.1292068621138833e-06, "loss": 0.8497, "step": 12082 }, { "epoch": 0.8729387541315223, "grad_norm": 5.96625918199243, "learning_rate": 3.1289237711316943e-06, "loss": 0.8606, "step": 12083 }, { "epoch": 0.8730109993317319, "grad_norm": 6.5258097646741735, "learning_rate": 3.1286406715400282e-06, "loss": 0.8004, "step": 12084 }, { "epoch": 0.8730832445319414, "grad_norm": 6.284610892961292, "learning_rate": 3.1283575633427585e-06, "loss": 0.7706, "step": 12085 }, { "epoch": 0.873155489732151, "grad_norm": 6.525007705391613, "learning_rate": 3.1280744465437617e-06, "loss": 0.7754, "step": 12086 }, { "epoch": 0.8732277349323604, "grad_norm": 5.7597702611035935, "learning_rate": 3.127791321146914e-06, "loss": 0.7471, "step": 12087 }, { "epoch": 0.8732999801325699, "grad_norm": 9.5674995866132, "learning_rate": 3.12750818715609e-06, "loss": 0.8168, "step": 12088 }, { "epoch": 0.8733722253327795, "grad_norm": 7.071175706974495, "learning_rate": 3.127225044575166e-06, "loss": 0.8085, "step": 12089 }, { "epoch": 0.873444470532989, "grad_norm": 7.394120855374015, "learning_rate": 3.1269418934080186e-06, "loss": 0.8242, "step": 12090 }, { "epoch": 0.8735167157331984, "grad_norm": 6.008991815229743, "learning_rate": 3.1266587336585234e-06, "loss": 0.7883, "step": 12091 }, { "epoch": 0.873588960933408, "grad_norm": 6.6171183216572835, "learning_rate": 3.1263755653305568e-06, "loss": 0.7643, "step": 12092 }, { "epoch": 0.8736612061336175, "grad_norm": 6.134342153483604, "learning_rate": 3.1260923884279947e-06, "loss": 0.7704, "step": 12093 }, { "epoch": 0.873733451333827, "grad_norm": 5.489664597123946, "learning_rate": 3.1258092029547145e-06, "loss": 0.6943, "step": 12094 }, { "epoch": 0.8738056965340365, "grad_norm": 7.026411095350127, "learning_rate": 3.1255260089145913e-06, "loss": 0.7421, "step": 12095 }, { "epoch": 0.873877941734246, "grad_norm": 5.5553245983860045, "learning_rate": 3.125242806311504e-06, "loss": 0.7245, "step": 12096 }, { "epoch": 0.8739501869344556, "grad_norm": 7.425353831071053, "learning_rate": 3.124959595149327e-06, "loss": 0.8148, "step": 12097 }, { "epoch": 0.874022432134665, "grad_norm": 5.471860513281259, "learning_rate": 3.1246763754319392e-06, "loss": 0.8119, "step": 12098 }, { "epoch": 0.8740946773348746, "grad_norm": 6.131608473071402, "learning_rate": 3.124393147163216e-06, "loss": 0.7927, "step": 12099 }, { "epoch": 0.8741669225350841, "grad_norm": 6.249048084246793, "learning_rate": 3.1241099103470358e-06, "loss": 0.7281, "step": 12100 }, { "epoch": 0.8742391677352935, "grad_norm": 5.816171122577087, "learning_rate": 3.123826664987276e-06, "loss": 0.8328, "step": 12101 }, { "epoch": 0.8743114129355031, "grad_norm": 6.310202605415037, "learning_rate": 3.123543411087813e-06, "loss": 0.788, "step": 12102 }, { "epoch": 0.8743836581357126, "grad_norm": 6.007160682191706, "learning_rate": 3.1232601486525255e-06, "loss": 0.8152, "step": 12103 }, { "epoch": 0.8744559033359222, "grad_norm": 6.471595912130357, "learning_rate": 3.1229768776852893e-06, "loss": 0.8975, "step": 12104 }, { "epoch": 0.8745281485361316, "grad_norm": 8.859006843789025, "learning_rate": 3.122693598189984e-06, "loss": 0.8683, "step": 12105 }, { "epoch": 0.8746003937363411, "grad_norm": 5.402361579473923, "learning_rate": 3.122410310170487e-06, "loss": 0.7754, "step": 12106 }, { "epoch": 0.8746726389365507, "grad_norm": 6.467724999936584, "learning_rate": 3.1221270136306764e-06, "loss": 0.8217, "step": 12107 }, { "epoch": 0.8747448841367602, "grad_norm": 6.05089094679453, "learning_rate": 3.121843708574429e-06, "loss": 0.8196, "step": 12108 }, { "epoch": 0.8748171293369696, "grad_norm": 6.658849647310584, "learning_rate": 3.121560395005625e-06, "loss": 0.7885, "step": 12109 }, { "epoch": 0.8748893745371792, "grad_norm": 6.870033828674274, "learning_rate": 3.1212770729281418e-06, "loss": 0.7689, "step": 12110 }, { "epoch": 0.8749616197373887, "grad_norm": 5.09138836555331, "learning_rate": 3.120993742345857e-06, "loss": 0.8195, "step": 12111 }, { "epoch": 0.8750338649375982, "grad_norm": 6.932255317560209, "learning_rate": 3.120710403262651e-06, "loss": 0.7713, "step": 12112 }, { "epoch": 0.8751061101378077, "grad_norm": 6.092027000001724, "learning_rate": 3.1204270556824013e-06, "loss": 0.7956, "step": 12113 }, { "epoch": 0.8751783553380172, "grad_norm": 6.840042294377074, "learning_rate": 3.1201436996089864e-06, "loss": 0.8749, "step": 12114 }, { "epoch": 0.8752506005382268, "grad_norm": 6.188183062833599, "learning_rate": 3.119860335046286e-06, "loss": 0.8332, "step": 12115 }, { "epoch": 0.8753228457384362, "grad_norm": 7.067246993845728, "learning_rate": 3.119576961998179e-06, "loss": 0.9409, "step": 12116 }, { "epoch": 0.8753950909386458, "grad_norm": 7.634741392021532, "learning_rate": 3.1192935804685443e-06, "loss": 0.835, "step": 12117 }, { "epoch": 0.8754673361388553, "grad_norm": 5.777376764188971, "learning_rate": 3.119010190461261e-06, "loss": 0.7747, "step": 12118 }, { "epoch": 0.8755395813390647, "grad_norm": 6.44738573426443, "learning_rate": 3.11872679198021e-06, "loss": 0.8257, "step": 12119 }, { "epoch": 0.8756118265392743, "grad_norm": 6.654439903896519, "learning_rate": 3.118443385029269e-06, "loss": 0.7707, "step": 12120 }, { "epoch": 0.8756840717394838, "grad_norm": 5.397269491103323, "learning_rate": 3.118159969612319e-06, "loss": 0.8519, "step": 12121 }, { "epoch": 0.8757563169396934, "grad_norm": 6.666971231497203, "learning_rate": 3.1178765457332376e-06, "loss": 0.8382, "step": 12122 }, { "epoch": 0.8758285621399028, "grad_norm": 6.3367788412381625, "learning_rate": 3.1175931133959065e-06, "loss": 0.8114, "step": 12123 }, { "epoch": 0.8759008073401123, "grad_norm": 7.267236411947713, "learning_rate": 3.1173096726042053e-06, "loss": 0.8688, "step": 12124 }, { "epoch": 0.8759730525403219, "grad_norm": 8.700873092162531, "learning_rate": 3.117026223362014e-06, "loss": 0.8398, "step": 12125 }, { "epoch": 0.8760452977405314, "grad_norm": 7.359511841880632, "learning_rate": 3.1167427656732135e-06, "loss": 0.933, "step": 12126 }, { "epoch": 0.8761175429407408, "grad_norm": 6.335122541704162, "learning_rate": 3.1164592995416826e-06, "loss": 0.9002, "step": 12127 }, { "epoch": 0.8761897881409504, "grad_norm": 5.77422305739083, "learning_rate": 3.1161758249713027e-06, "loss": 0.7862, "step": 12128 }, { "epoch": 0.8762620333411599, "grad_norm": 5.481871639481622, "learning_rate": 3.1158923419659536e-06, "loss": 0.788, "step": 12129 }, { "epoch": 0.8763342785413694, "grad_norm": 5.290227358859244, "learning_rate": 3.115608850529517e-06, "loss": 0.855, "step": 12130 }, { "epoch": 0.8764065237415789, "grad_norm": 6.397084895474822, "learning_rate": 3.1153253506658737e-06, "loss": 0.7937, "step": 12131 }, { "epoch": 0.8764787689417884, "grad_norm": 6.7698222965600765, "learning_rate": 3.1150418423789034e-06, "loss": 0.8083, "step": 12132 }, { "epoch": 0.876551014141998, "grad_norm": 6.276112747317484, "learning_rate": 3.1147583256724884e-06, "loss": 0.7898, "step": 12133 }, { "epoch": 0.8766232593422074, "grad_norm": 6.511288157816478, "learning_rate": 3.1144748005505092e-06, "loss": 0.7281, "step": 12134 }, { "epoch": 0.876695504542417, "grad_norm": 6.068536329190997, "learning_rate": 3.1141912670168474e-06, "loss": 0.7945, "step": 12135 }, { "epoch": 0.8767677497426265, "grad_norm": 7.29634932068249, "learning_rate": 3.1139077250753837e-06, "loss": 0.829, "step": 12136 }, { "epoch": 0.8768399949428359, "grad_norm": 6.528260637465664, "learning_rate": 3.1136241747299988e-06, "loss": 0.7724, "step": 12137 }, { "epoch": 0.8769122401430455, "grad_norm": 6.235592830713007, "learning_rate": 3.1133406159845762e-06, "loss": 0.6996, "step": 12138 }, { "epoch": 0.876984485343255, "grad_norm": 6.170225130545975, "learning_rate": 3.113057048842998e-06, "loss": 0.8499, "step": 12139 }, { "epoch": 0.8770567305434646, "grad_norm": 6.725977784727253, "learning_rate": 3.112773473309143e-06, "loss": 0.7746, "step": 12140 }, { "epoch": 0.877128975743674, "grad_norm": 6.192238582657389, "learning_rate": 3.1124898893868966e-06, "loss": 0.7692, "step": 12141 }, { "epoch": 0.8772012209438835, "grad_norm": 9.059347090952828, "learning_rate": 3.112206297080138e-06, "loss": 0.7712, "step": 12142 }, { "epoch": 0.8772734661440931, "grad_norm": 6.858964286447603, "learning_rate": 3.1119226963927505e-06, "loss": 0.783, "step": 12143 }, { "epoch": 0.8773457113443026, "grad_norm": 7.300509542161787, "learning_rate": 3.1116390873286174e-06, "loss": 0.8156, "step": 12144 }, { "epoch": 0.877417956544512, "grad_norm": 6.272751862147287, "learning_rate": 3.1113554698916188e-06, "loss": 0.8794, "step": 12145 }, { "epoch": 0.8774902017447216, "grad_norm": 6.4982752345657016, "learning_rate": 3.111071844085639e-06, "loss": 0.7666, "step": 12146 }, { "epoch": 0.8775624469449311, "grad_norm": 6.483020613449726, "learning_rate": 3.1107882099145604e-06, "loss": 0.8677, "step": 12147 }, { "epoch": 0.8776346921451406, "grad_norm": 7.6601323463421025, "learning_rate": 3.1105045673822654e-06, "loss": 0.8479, "step": 12148 }, { "epoch": 0.8777069373453501, "grad_norm": 6.915121465072684, "learning_rate": 3.110220916492637e-06, "loss": 0.9173, "step": 12149 }, { "epoch": 0.8777791825455596, "grad_norm": 6.1441255254837035, "learning_rate": 3.109937257249558e-06, "loss": 0.8478, "step": 12150 }, { "epoch": 0.8778514277457692, "grad_norm": 7.837493954888892, "learning_rate": 3.1096535896569115e-06, "loss": 0.7265, "step": 12151 }, { "epoch": 0.8779236729459786, "grad_norm": 7.039792716269217, "learning_rate": 3.1093699137185802e-06, "loss": 0.8454, "step": 12152 }, { "epoch": 0.8779959181461882, "grad_norm": 6.419395692138481, "learning_rate": 3.1090862294384487e-06, "loss": 0.8578, "step": 12153 }, { "epoch": 0.8780681633463977, "grad_norm": 6.2513359167964175, "learning_rate": 3.1088025368203994e-06, "loss": 0.7931, "step": 12154 }, { "epoch": 0.8781404085466071, "grad_norm": 6.186017099568376, "learning_rate": 3.108518835868316e-06, "loss": 0.7899, "step": 12155 }, { "epoch": 0.8782126537468167, "grad_norm": 5.9486400300225855, "learning_rate": 3.1082351265860815e-06, "loss": 0.8051, "step": 12156 }, { "epoch": 0.8782848989470262, "grad_norm": 6.924712622012623, "learning_rate": 3.1079514089775815e-06, "loss": 0.7883, "step": 12157 }, { "epoch": 0.8783571441472358, "grad_norm": 6.42693521173353, "learning_rate": 3.1076676830466983e-06, "loss": 0.8462, "step": 12158 }, { "epoch": 0.8784293893474452, "grad_norm": 5.375831539682209, "learning_rate": 3.1073839487973167e-06, "loss": 0.7919, "step": 12159 }, { "epoch": 0.8785016345476547, "grad_norm": 5.962905978730642, "learning_rate": 3.10710020623332e-06, "loss": 0.7934, "step": 12160 }, { "epoch": 0.8785738797478643, "grad_norm": 5.144715325401294, "learning_rate": 3.106816455358593e-06, "loss": 0.8412, "step": 12161 }, { "epoch": 0.8786461249480738, "grad_norm": 6.59762849683328, "learning_rate": 3.1065326961770204e-06, "loss": 0.8408, "step": 12162 }, { "epoch": 0.8787183701482832, "grad_norm": 6.10906123065345, "learning_rate": 3.106248928692485e-06, "loss": 0.8162, "step": 12163 }, { "epoch": 0.8787906153484928, "grad_norm": 6.6868816339403025, "learning_rate": 3.1059651529088738e-06, "loss": 0.9187, "step": 12164 }, { "epoch": 0.8788628605487023, "grad_norm": 6.333132422757295, "learning_rate": 3.105681368830069e-06, "loss": 0.8097, "step": 12165 }, { "epoch": 0.8789351057489118, "grad_norm": 5.839343689396985, "learning_rate": 3.105397576459957e-06, "loss": 0.8276, "step": 12166 }, { "epoch": 0.8790073509491213, "grad_norm": 7.106487383605884, "learning_rate": 3.1051137758024225e-06, "loss": 0.8634, "step": 12167 }, { "epoch": 0.8790795961493308, "grad_norm": 7.366405846076588, "learning_rate": 3.1048299668613495e-06, "loss": 0.7889, "step": 12168 }, { "epoch": 0.8791518413495404, "grad_norm": 5.590279125689968, "learning_rate": 3.1045461496406247e-06, "loss": 0.7896, "step": 12169 }, { "epoch": 0.8792240865497498, "grad_norm": 5.547146986619528, "learning_rate": 3.1042623241441318e-06, "loss": 0.7707, "step": 12170 }, { "epoch": 0.8792963317499594, "grad_norm": 6.178509895402604, "learning_rate": 3.1039784903757573e-06, "loss": 0.7232, "step": 12171 }, { "epoch": 0.8793685769501689, "grad_norm": 5.5161490475086925, "learning_rate": 3.1036946483393863e-06, "loss": 0.8192, "step": 12172 }, { "epoch": 0.8794408221503783, "grad_norm": 6.471398442588308, "learning_rate": 3.1034107980389044e-06, "loss": 0.8416, "step": 12173 }, { "epoch": 0.8795130673505879, "grad_norm": 5.5801743448017636, "learning_rate": 3.103126939478197e-06, "loss": 0.783, "step": 12174 }, { "epoch": 0.8795853125507974, "grad_norm": 6.711145542104171, "learning_rate": 3.1028430726611496e-06, "loss": 0.8357, "step": 12175 }, { "epoch": 0.879657557751007, "grad_norm": 6.342030860712712, "learning_rate": 3.10255919759165e-06, "loss": 0.763, "step": 12176 }, { "epoch": 0.8797298029512164, "grad_norm": 5.480500420324565, "learning_rate": 3.102275314273581e-06, "loss": 0.8179, "step": 12177 }, { "epoch": 0.8798020481514259, "grad_norm": 6.2991354697304, "learning_rate": 3.1019914227108323e-06, "loss": 0.8371, "step": 12178 }, { "epoch": 0.8798742933516355, "grad_norm": 6.689416913679363, "learning_rate": 3.1017075229072873e-06, "loss": 0.7862, "step": 12179 }, { "epoch": 0.879946538551845, "grad_norm": 8.251372714425356, "learning_rate": 3.1014236148668336e-06, "loss": 0.8173, "step": 12180 }, { "epoch": 0.8800187837520544, "grad_norm": 6.873820810627892, "learning_rate": 3.1011396985933576e-06, "loss": 0.825, "step": 12181 }, { "epoch": 0.880091028952264, "grad_norm": 5.354707425884377, "learning_rate": 3.100855774090746e-06, "loss": 0.8725, "step": 12182 }, { "epoch": 0.8801632741524735, "grad_norm": 6.08555733077284, "learning_rate": 3.100571841362886e-06, "loss": 0.8819, "step": 12183 }, { "epoch": 0.880235519352683, "grad_norm": 5.509698812626637, "learning_rate": 3.1002879004136633e-06, "loss": 0.7214, "step": 12184 }, { "epoch": 0.8803077645528925, "grad_norm": 5.7424635217460445, "learning_rate": 3.1000039512469656e-06, "loss": 0.7438, "step": 12185 }, { "epoch": 0.880380009753102, "grad_norm": 6.111953230354358, "learning_rate": 3.0997199938666788e-06, "loss": 0.8164, "step": 12186 }, { "epoch": 0.8804522549533116, "grad_norm": 8.224096147680246, "learning_rate": 3.0994360282766924e-06, "loss": 0.8786, "step": 12187 }, { "epoch": 0.880524500153521, "grad_norm": 5.6867853071948264, "learning_rate": 3.0991520544808914e-06, "loss": 0.8595, "step": 12188 }, { "epoch": 0.8805967453537306, "grad_norm": 7.073601563617402, "learning_rate": 3.098868072483163e-06, "loss": 0.8092, "step": 12189 }, { "epoch": 0.8806689905539401, "grad_norm": 6.763030296870384, "learning_rate": 3.098584082287397e-06, "loss": 0.7241, "step": 12190 }, { "epoch": 0.8807412357541495, "grad_norm": 6.170208128845532, "learning_rate": 3.0983000838974797e-06, "loss": 0.7118, "step": 12191 }, { "epoch": 0.8808134809543591, "grad_norm": 6.258223349886956, "learning_rate": 3.0980160773172985e-06, "loss": 0.8681, "step": 12192 }, { "epoch": 0.8808857261545686, "grad_norm": 8.158512430170232, "learning_rate": 3.0977320625507413e-06, "loss": 0.7476, "step": 12193 }, { "epoch": 0.8809579713547782, "grad_norm": 9.606817336196972, "learning_rate": 3.0974480396016963e-06, "loss": 0.8359, "step": 12194 }, { "epoch": 0.8810302165549876, "grad_norm": 9.065863044503773, "learning_rate": 3.0971640084740514e-06, "loss": 0.8979, "step": 12195 }, { "epoch": 0.8811024617551971, "grad_norm": 7.046963777849871, "learning_rate": 3.0968799691716957e-06, "loss": 0.801, "step": 12196 }, { "epoch": 0.8811747069554067, "grad_norm": 8.233386796579513, "learning_rate": 3.096595921698516e-06, "loss": 0.8493, "step": 12197 }, { "epoch": 0.8812469521556162, "grad_norm": 7.177701158511577, "learning_rate": 3.096311866058401e-06, "loss": 0.937, "step": 12198 }, { "epoch": 0.8813191973558256, "grad_norm": 6.596979444174829, "learning_rate": 3.0960278022552398e-06, "loss": 0.7736, "step": 12199 }, { "epoch": 0.8813914425560352, "grad_norm": 10.59341727215317, "learning_rate": 3.0957437302929217e-06, "loss": 0.7976, "step": 12200 }, { "epoch": 0.8814636877562447, "grad_norm": 6.567134419675296, "learning_rate": 3.0954596501753335e-06, "loss": 0.818, "step": 12201 }, { "epoch": 0.8815359329564542, "grad_norm": 5.481197121168287, "learning_rate": 3.0951755619063657e-06, "loss": 0.8326, "step": 12202 }, { "epoch": 0.8816081781566637, "grad_norm": 8.793484130831175, "learning_rate": 3.094891465489906e-06, "loss": 0.7881, "step": 12203 }, { "epoch": 0.8816804233568732, "grad_norm": 7.022892711247368, "learning_rate": 3.094607360929844e-06, "loss": 0.8238, "step": 12204 }, { "epoch": 0.8817526685570828, "grad_norm": 5.9827524879057945, "learning_rate": 3.0943232482300696e-06, "loss": 0.7896, "step": 12205 }, { "epoch": 0.8818249137572922, "grad_norm": 6.284010550972709, "learning_rate": 3.09403912739447e-06, "loss": 0.9508, "step": 12206 }, { "epoch": 0.8818971589575018, "grad_norm": 7.806091121281509, "learning_rate": 3.0937549984269376e-06, "loss": 0.7749, "step": 12207 }, { "epoch": 0.8819694041577113, "grad_norm": 6.518144957277495, "learning_rate": 3.0934708613313595e-06, "loss": 0.868, "step": 12208 }, { "epoch": 0.8820416493579207, "grad_norm": 5.767164071448111, "learning_rate": 3.093186716111626e-06, "loss": 0.861, "step": 12209 }, { "epoch": 0.8821138945581303, "grad_norm": 5.574108740209565, "learning_rate": 3.0929025627716282e-06, "loss": 0.7889, "step": 12210 }, { "epoch": 0.8821861397583398, "grad_norm": 7.555951017738979, "learning_rate": 3.0926184013152534e-06, "loss": 0.7985, "step": 12211 }, { "epoch": 0.8822583849585494, "grad_norm": 5.792522925149188, "learning_rate": 3.0923342317463934e-06, "loss": 0.8116, "step": 12212 }, { "epoch": 0.8823306301587588, "grad_norm": 5.559060051044875, "learning_rate": 3.0920500540689365e-06, "loss": 0.857, "step": 12213 }, { "epoch": 0.8824028753589683, "grad_norm": 5.783263149806699, "learning_rate": 3.0917658682867753e-06, "loss": 0.849, "step": 12214 }, { "epoch": 0.8824751205591779, "grad_norm": 6.121522188559745, "learning_rate": 3.0914816744037986e-06, "loss": 0.8283, "step": 12215 }, { "epoch": 0.8825473657593874, "grad_norm": 8.02048111839966, "learning_rate": 3.091197472423897e-06, "loss": 0.8523, "step": 12216 }, { "epoch": 0.8826196109595968, "grad_norm": 8.043482389348155, "learning_rate": 3.090913262350961e-06, "loss": 0.8072, "step": 12217 }, { "epoch": 0.8826918561598064, "grad_norm": 6.062338600763136, "learning_rate": 3.0906290441888807e-06, "loss": 0.7467, "step": 12218 }, { "epoch": 0.8827641013600159, "grad_norm": 5.3312528048974155, "learning_rate": 3.090344817941548e-06, "loss": 0.8168, "step": 12219 }, { "epoch": 0.8828363465602254, "grad_norm": 6.048755915071891, "learning_rate": 3.0900605836128526e-06, "loss": 0.838, "step": 12220 }, { "epoch": 0.8829085917604349, "grad_norm": 5.270313021142132, "learning_rate": 3.089776341206687e-06, "loss": 0.8194, "step": 12221 }, { "epoch": 0.8829808369606444, "grad_norm": 5.6369901236217, "learning_rate": 3.0894920907269403e-06, "loss": 0.8038, "step": 12222 }, { "epoch": 0.883053082160854, "grad_norm": 10.221219478942752, "learning_rate": 3.0892078321775047e-06, "loss": 0.9285, "step": 12223 }, { "epoch": 0.8831253273610634, "grad_norm": 7.529725992959686, "learning_rate": 3.088923565562271e-06, "loss": 0.8364, "step": 12224 }, { "epoch": 0.883197572561273, "grad_norm": 6.007530255378421, "learning_rate": 3.088639290885132e-06, "loss": 0.8, "step": 12225 }, { "epoch": 0.8832698177614825, "grad_norm": 6.478845062942112, "learning_rate": 3.088355008149978e-06, "loss": 0.7643, "step": 12226 }, { "epoch": 0.8833420629616919, "grad_norm": 6.162890265992416, "learning_rate": 3.0880707173606998e-06, "loss": 0.8571, "step": 12227 }, { "epoch": 0.8834143081619015, "grad_norm": 5.9519980658046086, "learning_rate": 3.0877864185211916e-06, "loss": 0.7356, "step": 12228 }, { "epoch": 0.883486553362111, "grad_norm": 6.60006859483619, "learning_rate": 3.0875021116353423e-06, "loss": 0.8571, "step": 12229 }, { "epoch": 0.8835587985623206, "grad_norm": 7.494128917439382, "learning_rate": 3.087217796707046e-06, "loss": 0.8115, "step": 12230 }, { "epoch": 0.88363104376253, "grad_norm": 5.526487034699543, "learning_rate": 3.0869334737401935e-06, "loss": 0.8495, "step": 12231 }, { "epoch": 0.8837032889627395, "grad_norm": 6.199417505281053, "learning_rate": 3.0866491427386775e-06, "loss": 0.828, "step": 12232 }, { "epoch": 0.8837755341629491, "grad_norm": 5.7665052284766745, "learning_rate": 3.086364803706391e-06, "loss": 0.777, "step": 12233 }, { "epoch": 0.8838477793631586, "grad_norm": 5.970573106921785, "learning_rate": 3.0860804566472245e-06, "loss": 0.8673, "step": 12234 }, { "epoch": 0.883920024563368, "grad_norm": 5.8896428511258945, "learning_rate": 3.085796101565073e-06, "loss": 0.7449, "step": 12235 }, { "epoch": 0.8839922697635776, "grad_norm": 7.927186528326605, "learning_rate": 3.0855117384638267e-06, "loss": 0.9266, "step": 12236 }, { "epoch": 0.8840645149637871, "grad_norm": 6.39695519498156, "learning_rate": 3.085227367347379e-06, "loss": 0.8471, "step": 12237 }, { "epoch": 0.8841367601639966, "grad_norm": 6.352891470936987, "learning_rate": 3.0849429882196238e-06, "loss": 0.7075, "step": 12238 }, { "epoch": 0.8842090053642061, "grad_norm": 6.557708153338728, "learning_rate": 3.0846586010844538e-06, "loss": 0.877, "step": 12239 }, { "epoch": 0.8842812505644156, "grad_norm": 6.67432281830957, "learning_rate": 3.0843742059457606e-06, "loss": 0.7369, "step": 12240 }, { "epoch": 0.8843534957646252, "grad_norm": 5.317890618687471, "learning_rate": 3.084089802807438e-06, "loss": 0.8354, "step": 12241 }, { "epoch": 0.8844257409648346, "grad_norm": 5.850420566667388, "learning_rate": 3.08380539167338e-06, "loss": 0.8039, "step": 12242 }, { "epoch": 0.8844979861650442, "grad_norm": 5.582345258290969, "learning_rate": 3.083520972547479e-06, "loss": 0.7629, "step": 12243 }, { "epoch": 0.8845702313652537, "grad_norm": 5.810399486050851, "learning_rate": 3.08323654543363e-06, "loss": 0.8144, "step": 12244 }, { "epoch": 0.8846424765654631, "grad_norm": 5.415753605499286, "learning_rate": 3.0829521103357246e-06, "loss": 0.7579, "step": 12245 }, { "epoch": 0.8847147217656727, "grad_norm": 5.60230100224679, "learning_rate": 3.082667667257658e-06, "loss": 0.7792, "step": 12246 }, { "epoch": 0.8847869669658822, "grad_norm": 5.475213465600815, "learning_rate": 3.082383216203323e-06, "loss": 0.7406, "step": 12247 }, { "epoch": 0.8848592121660918, "grad_norm": 6.608311200034689, "learning_rate": 3.082098757176614e-06, "loss": 0.908, "step": 12248 }, { "epoch": 0.8849314573663012, "grad_norm": 7.5968317756380355, "learning_rate": 3.0818142901814254e-06, "loss": 0.7737, "step": 12249 }, { "epoch": 0.8850037025665107, "grad_norm": 5.283421177339377, "learning_rate": 3.081529815221651e-06, "loss": 0.7891, "step": 12250 }, { "epoch": 0.8850759477667203, "grad_norm": 6.215431439745574, "learning_rate": 3.081245332301184e-06, "loss": 0.778, "step": 12251 }, { "epoch": 0.8851481929669298, "grad_norm": 5.424013447162534, "learning_rate": 3.0809608414239205e-06, "loss": 0.8207, "step": 12252 }, { "epoch": 0.8852204381671392, "grad_norm": 6.9588111515417825, "learning_rate": 3.080676342593755e-06, "loss": 0.8477, "step": 12253 }, { "epoch": 0.8852926833673488, "grad_norm": 7.20972595286601, "learning_rate": 3.0803918358145796e-06, "loss": 0.8153, "step": 12254 }, { "epoch": 0.8853649285675583, "grad_norm": 5.732340688309245, "learning_rate": 3.080107321090291e-06, "loss": 0.7879, "step": 12255 }, { "epoch": 0.8854371737677678, "grad_norm": 5.880061140715688, "learning_rate": 3.0798227984247837e-06, "loss": 0.7966, "step": 12256 }, { "epoch": 0.8855094189679773, "grad_norm": 7.741292676593974, "learning_rate": 3.079538267821953e-06, "loss": 0.7884, "step": 12257 }, { "epoch": 0.8855816641681868, "grad_norm": 6.863423536515983, "learning_rate": 3.0792537292856933e-06, "loss": 0.8172, "step": 12258 }, { "epoch": 0.8856539093683964, "grad_norm": 6.2380754774883735, "learning_rate": 3.0789691828199e-06, "loss": 0.8132, "step": 12259 }, { "epoch": 0.8857261545686058, "grad_norm": 6.402533816390159, "learning_rate": 3.078684628428467e-06, "loss": 0.8502, "step": 12260 }, { "epoch": 0.8857983997688154, "grad_norm": 6.433591674730236, "learning_rate": 3.0784000661152914e-06, "loss": 0.8411, "step": 12261 }, { "epoch": 0.8858706449690249, "grad_norm": 5.399183296326768, "learning_rate": 3.0781154958842683e-06, "loss": 0.7745, "step": 12262 }, { "epoch": 0.8859428901692343, "grad_norm": 6.353727865441353, "learning_rate": 3.0778309177392923e-06, "loss": 0.8849, "step": 12263 }, { "epoch": 0.8860151353694439, "grad_norm": 5.188275750307656, "learning_rate": 3.077546331684261e-06, "loss": 0.8247, "step": 12264 }, { "epoch": 0.8860873805696534, "grad_norm": 5.749790685409808, "learning_rate": 3.077261737723067e-06, "loss": 0.773, "step": 12265 }, { "epoch": 0.886159625769863, "grad_norm": 5.218285088760303, "learning_rate": 3.076977135859609e-06, "loss": 0.7856, "step": 12266 }, { "epoch": 0.8862318709700724, "grad_norm": 6.353490708047514, "learning_rate": 3.0766925260977827e-06, "loss": 0.7909, "step": 12267 }, { "epoch": 0.8863041161702819, "grad_norm": 7.692592019915217, "learning_rate": 3.0764079084414822e-06, "loss": 0.8202, "step": 12268 }, { "epoch": 0.8863763613704915, "grad_norm": 6.561128018786575, "learning_rate": 3.0761232828946053e-06, "loss": 0.8616, "step": 12269 }, { "epoch": 0.886448606570701, "grad_norm": 5.576374708986075, "learning_rate": 3.0758386494610483e-06, "loss": 0.7168, "step": 12270 }, { "epoch": 0.8865208517709104, "grad_norm": 7.3572243303716185, "learning_rate": 3.075554008144708e-06, "loss": 0.8684, "step": 12271 }, { "epoch": 0.88659309697112, "grad_norm": 7.5698922338839925, "learning_rate": 3.0752693589494787e-06, "loss": 0.8297, "step": 12272 }, { "epoch": 0.8866653421713295, "grad_norm": 6.795738949598767, "learning_rate": 3.0749847018792597e-06, "loss": 0.8358, "step": 12273 }, { "epoch": 0.886737587371539, "grad_norm": 6.052225431874019, "learning_rate": 3.074700036937946e-06, "loss": 0.8288, "step": 12274 }, { "epoch": 0.8868098325717485, "grad_norm": 5.75141060153687, "learning_rate": 3.074415364129435e-06, "loss": 0.8175, "step": 12275 }, { "epoch": 0.886882077771958, "grad_norm": 7.240292330999494, "learning_rate": 3.074130683457624e-06, "loss": 0.7703, "step": 12276 }, { "epoch": 0.8869543229721676, "grad_norm": 6.700983732512595, "learning_rate": 3.073845994926409e-06, "loss": 0.8419, "step": 12277 }, { "epoch": 0.887026568172377, "grad_norm": 6.280744997287126, "learning_rate": 3.0735612985396897e-06, "loss": 0.7895, "step": 12278 }, { "epoch": 0.8870988133725866, "grad_norm": 6.546567340623061, "learning_rate": 3.0732765943013594e-06, "loss": 0.7191, "step": 12279 }, { "epoch": 0.8871710585727961, "grad_norm": 5.501434832594598, "learning_rate": 3.0729918822153188e-06, "loss": 0.7695, "step": 12280 }, { "epoch": 0.8872433037730055, "grad_norm": 6.485389721997546, "learning_rate": 3.072707162285464e-06, "loss": 0.8447, "step": 12281 }, { "epoch": 0.8873155489732151, "grad_norm": 5.746265691470649, "learning_rate": 3.0724224345156926e-06, "loss": 0.775, "step": 12282 }, { "epoch": 0.8873877941734246, "grad_norm": 6.3855452739797345, "learning_rate": 3.0721376989099026e-06, "loss": 0.8573, "step": 12283 }, { "epoch": 0.8874600393736342, "grad_norm": 6.7984883816111905, "learning_rate": 3.071852955471992e-06, "loss": 0.8556, "step": 12284 }, { "epoch": 0.8875322845738436, "grad_norm": 6.090491729577887, "learning_rate": 3.0715682042058585e-06, "loss": 0.7352, "step": 12285 }, { "epoch": 0.8876045297740531, "grad_norm": 6.998073857835923, "learning_rate": 3.0712834451154e-06, "loss": 0.836, "step": 12286 }, { "epoch": 0.8876767749742627, "grad_norm": 6.084432981554752, "learning_rate": 3.070998678204515e-06, "loss": 0.7856, "step": 12287 }, { "epoch": 0.8877490201744721, "grad_norm": 6.89053420787691, "learning_rate": 3.0707139034771013e-06, "loss": 0.8702, "step": 12288 }, { "epoch": 0.8878212653746816, "grad_norm": 6.551173144397199, "learning_rate": 3.070429120937057e-06, "loss": 0.8719, "step": 12289 }, { "epoch": 0.8878935105748912, "grad_norm": 6.659208544651692, "learning_rate": 3.0701443305882806e-06, "loss": 0.7901, "step": 12290 }, { "epoch": 0.8879657557751007, "grad_norm": 7.574106436925157, "learning_rate": 3.069859532434672e-06, "loss": 0.8258, "step": 12291 }, { "epoch": 0.8880380009753102, "grad_norm": 6.56695754025022, "learning_rate": 3.0695747264801286e-06, "loss": 0.7954, "step": 12292 }, { "epoch": 0.8881102461755197, "grad_norm": 5.59523698020236, "learning_rate": 3.069289912728549e-06, "loss": 0.8767, "step": 12293 }, { "epoch": 0.8881824913757292, "grad_norm": 5.910950097462124, "learning_rate": 3.0690050911838325e-06, "loss": 0.7875, "step": 12294 }, { "epoch": 0.8882547365759388, "grad_norm": 7.101359180859406, "learning_rate": 3.0687202618498786e-06, "loss": 0.8294, "step": 12295 }, { "epoch": 0.8883269817761482, "grad_norm": 6.9159246146228694, "learning_rate": 3.0684354247305857e-06, "loss": 0.8581, "step": 12296 }, { "epoch": 0.8883992269763578, "grad_norm": 7.669084527041683, "learning_rate": 3.0681505798298527e-06, "loss": 0.7467, "step": 12297 }, { "epoch": 0.8884714721765673, "grad_norm": 5.506386949825715, "learning_rate": 3.06786572715158e-06, "loss": 0.7912, "step": 12298 }, { "epoch": 0.8885437173767767, "grad_norm": 5.966627772255971, "learning_rate": 3.0675808666996665e-06, "loss": 0.817, "step": 12299 }, { "epoch": 0.8886159625769863, "grad_norm": 5.035075374217138, "learning_rate": 3.0672959984780115e-06, "loss": 0.7807, "step": 12300 }, { "epoch": 0.8886882077771958, "grad_norm": 6.787758231124561, "learning_rate": 3.0670111224905146e-06, "loss": 0.8065, "step": 12301 }, { "epoch": 0.8887604529774054, "grad_norm": 5.9638236122169745, "learning_rate": 3.066726238741076e-06, "loss": 0.7278, "step": 12302 }, { "epoch": 0.8888326981776148, "grad_norm": 9.382396171029885, "learning_rate": 3.0664413472335945e-06, "loss": 0.9062, "step": 12303 }, { "epoch": 0.8889049433778243, "grad_norm": 6.463224105554676, "learning_rate": 3.066156447971971e-06, "loss": 0.823, "step": 12304 }, { "epoch": 0.8889771885780339, "grad_norm": 6.537647089570791, "learning_rate": 3.0658715409601057e-06, "loss": 0.8055, "step": 12305 }, { "epoch": 0.8890494337782433, "grad_norm": 6.765826004249251, "learning_rate": 3.0655866262018987e-06, "loss": 0.7913, "step": 12306 }, { "epoch": 0.8891216789784528, "grad_norm": 7.9830789428615185, "learning_rate": 3.0653017037012493e-06, "loss": 0.86, "step": 12307 }, { "epoch": 0.8891939241786624, "grad_norm": 7.035575383995734, "learning_rate": 3.065016773462059e-06, "loss": 0.8494, "step": 12308 }, { "epoch": 0.8892661693788719, "grad_norm": 5.798483078000605, "learning_rate": 3.0647318354882273e-06, "loss": 0.7853, "step": 12309 }, { "epoch": 0.8893384145790814, "grad_norm": 6.108388055172032, "learning_rate": 3.0644468897836554e-06, "loss": 0.813, "step": 12310 }, { "epoch": 0.8894106597792909, "grad_norm": 11.249924723055441, "learning_rate": 3.064161936352244e-06, "loss": 0.7566, "step": 12311 }, { "epoch": 0.8894829049795004, "grad_norm": 5.856753319736302, "learning_rate": 3.063876975197893e-06, "loss": 0.731, "step": 12312 }, { "epoch": 0.88955515017971, "grad_norm": 8.135964844454241, "learning_rate": 3.063592006324505e-06, "loss": 0.801, "step": 12313 }, { "epoch": 0.8896273953799194, "grad_norm": 6.497530835009888, "learning_rate": 3.0633070297359797e-06, "loss": 0.7664, "step": 12314 }, { "epoch": 0.889699640580129, "grad_norm": 5.705435506929376, "learning_rate": 3.0630220454362185e-06, "loss": 0.8277, "step": 12315 }, { "epoch": 0.8897718857803385, "grad_norm": 7.213010149124771, "learning_rate": 3.062737053429123e-06, "loss": 0.7488, "step": 12316 }, { "epoch": 0.8898441309805479, "grad_norm": 7.58398638697501, "learning_rate": 3.0624520537185935e-06, "loss": 0.8676, "step": 12317 }, { "epoch": 0.8899163761807575, "grad_norm": 5.954100521388502, "learning_rate": 3.0621670463085324e-06, "loss": 0.7988, "step": 12318 }, { "epoch": 0.889988621380967, "grad_norm": 7.186911915479372, "learning_rate": 3.0618820312028415e-06, "loss": 0.7808, "step": 12319 }, { "epoch": 0.8900608665811766, "grad_norm": 5.578857715685643, "learning_rate": 3.0615970084054207e-06, "loss": 0.8116, "step": 12320 }, { "epoch": 0.890133111781386, "grad_norm": 5.688074774571207, "learning_rate": 3.0613119779201738e-06, "loss": 0.7689, "step": 12321 }, { "epoch": 0.8902053569815955, "grad_norm": 7.11784110224351, "learning_rate": 3.061026939751001e-06, "loss": 0.799, "step": 12322 }, { "epoch": 0.8902776021818051, "grad_norm": 5.496529958369367, "learning_rate": 3.060741893901806e-06, "loss": 0.8212, "step": 12323 }, { "epoch": 0.8903498473820145, "grad_norm": 6.189181320948998, "learning_rate": 3.0604568403764895e-06, "loss": 0.8607, "step": 12324 }, { "epoch": 0.890422092582224, "grad_norm": 6.535207033387564, "learning_rate": 3.0601717791789537e-06, "loss": 0.8358, "step": 12325 }, { "epoch": 0.8904943377824336, "grad_norm": 6.259610835644713, "learning_rate": 3.0598867103131015e-06, "loss": 0.7749, "step": 12326 }, { "epoch": 0.8905665829826431, "grad_norm": 5.807082738382494, "learning_rate": 3.0596016337828344e-06, "loss": 0.7582, "step": 12327 }, { "epoch": 0.8906388281828526, "grad_norm": 5.007653291882258, "learning_rate": 3.0593165495920564e-06, "loss": 0.7427, "step": 12328 }, { "epoch": 0.8907110733830621, "grad_norm": 8.125617840090877, "learning_rate": 3.0590314577446685e-06, "loss": 0.8657, "step": 12329 }, { "epoch": 0.8907833185832716, "grad_norm": 6.499422194401898, "learning_rate": 3.0587463582445743e-06, "loss": 0.7843, "step": 12330 }, { "epoch": 0.8908555637834812, "grad_norm": 5.454518662011206, "learning_rate": 3.0584612510956755e-06, "loss": 0.8661, "step": 12331 }, { "epoch": 0.8909278089836906, "grad_norm": 6.3459781435672165, "learning_rate": 3.0581761363018762e-06, "loss": 0.785, "step": 12332 }, { "epoch": 0.8910000541839002, "grad_norm": 6.4119588582699505, "learning_rate": 3.0578910138670796e-06, "loss": 0.7747, "step": 12333 }, { "epoch": 0.8910722993841097, "grad_norm": 6.757487912183105, "learning_rate": 3.0576058837951878e-06, "loss": 0.8813, "step": 12334 }, { "epoch": 0.8911445445843191, "grad_norm": 5.1685494509701675, "learning_rate": 3.057320746090105e-06, "loss": 0.7849, "step": 12335 }, { "epoch": 0.8912167897845287, "grad_norm": 5.445353656979954, "learning_rate": 3.057035600755732e-06, "loss": 0.8016, "step": 12336 }, { "epoch": 0.8912890349847382, "grad_norm": 5.771359122477604, "learning_rate": 3.0567504477959764e-06, "loss": 0.7967, "step": 12337 }, { "epoch": 0.8913612801849478, "grad_norm": 6.156908029156534, "learning_rate": 3.0564652872147384e-06, "loss": 0.7928, "step": 12338 }, { "epoch": 0.8914335253851572, "grad_norm": 6.924545151959328, "learning_rate": 3.056180119015923e-06, "loss": 0.8455, "step": 12339 }, { "epoch": 0.8915057705853667, "grad_norm": 7.323726025093033, "learning_rate": 3.055894943203433e-06, "loss": 0.7925, "step": 12340 }, { "epoch": 0.8915780157855763, "grad_norm": 5.186054258075227, "learning_rate": 3.055609759781173e-06, "loss": 0.816, "step": 12341 }, { "epoch": 0.8916502609857857, "grad_norm": 8.001791753391764, "learning_rate": 3.0553245687530474e-06, "loss": 0.8441, "step": 12342 }, { "epoch": 0.8917225061859952, "grad_norm": 7.292416633911479, "learning_rate": 3.055039370122959e-06, "loss": 0.8824, "step": 12343 }, { "epoch": 0.8917947513862048, "grad_norm": 7.769469877051468, "learning_rate": 3.054754163894813e-06, "loss": 0.8206, "step": 12344 }, { "epoch": 0.8918669965864143, "grad_norm": 8.535483403341075, "learning_rate": 3.054468950072513e-06, "loss": 0.8466, "step": 12345 }, { "epoch": 0.8919392417866238, "grad_norm": 6.5939617394172405, "learning_rate": 3.0541837286599634e-06, "loss": 0.8802, "step": 12346 }, { "epoch": 0.8920114869868333, "grad_norm": 5.167173524831324, "learning_rate": 3.0538984996610693e-06, "loss": 0.8031, "step": 12347 }, { "epoch": 0.8920837321870428, "grad_norm": 5.502846761381607, "learning_rate": 3.053613263079735e-06, "loss": 0.8696, "step": 12348 }, { "epoch": 0.8921559773872524, "grad_norm": 5.9513202644980385, "learning_rate": 3.053328018919865e-06, "loss": 0.8053, "step": 12349 }, { "epoch": 0.8922282225874618, "grad_norm": 9.712624536057078, "learning_rate": 3.053042767185362e-06, "loss": 0.7897, "step": 12350 }, { "epoch": 0.8923004677876714, "grad_norm": 6.976198603419036, "learning_rate": 3.0527575078801358e-06, "loss": 0.7761, "step": 12351 }, { "epoch": 0.8923727129878809, "grad_norm": 8.696972848696094, "learning_rate": 3.052472241008087e-06, "loss": 0.801, "step": 12352 }, { "epoch": 0.8924449581880903, "grad_norm": 5.023330616147615, "learning_rate": 3.052186966573123e-06, "loss": 0.7928, "step": 12353 }, { "epoch": 0.8925172033882999, "grad_norm": 7.099634628565815, "learning_rate": 3.051901684579147e-06, "loss": 0.9359, "step": 12354 }, { "epoch": 0.8925894485885094, "grad_norm": 5.802772503871701, "learning_rate": 3.0516163950300663e-06, "loss": 0.7898, "step": 12355 }, { "epoch": 0.892661693788719, "grad_norm": 5.91422472067395, "learning_rate": 3.051331097929785e-06, "loss": 0.8172, "step": 12356 }, { "epoch": 0.8927339389889284, "grad_norm": 5.0503392096177215, "learning_rate": 3.0510457932822094e-06, "loss": 0.805, "step": 12357 }, { "epoch": 0.8928061841891379, "grad_norm": 8.101361440519822, "learning_rate": 3.0507604810912455e-06, "loss": 0.8539, "step": 12358 }, { "epoch": 0.8928784293893475, "grad_norm": 7.72128192595492, "learning_rate": 3.050475161360797e-06, "loss": 0.7608, "step": 12359 }, { "epoch": 0.8929506745895569, "grad_norm": 6.946169273644647, "learning_rate": 3.050189834094771e-06, "loss": 0.7629, "step": 12360 }, { "epoch": 0.8930229197897664, "grad_norm": 5.941767393794787, "learning_rate": 3.049904499297074e-06, "loss": 0.7637, "step": 12361 }, { "epoch": 0.893095164989976, "grad_norm": 5.374830021499263, "learning_rate": 3.0496191569716116e-06, "loss": 0.7519, "step": 12362 }, { "epoch": 0.8931674101901855, "grad_norm": 7.663451750798488, "learning_rate": 3.049333807122289e-06, "loss": 0.7921, "step": 12363 }, { "epoch": 0.893239655390395, "grad_norm": 5.752540690490764, "learning_rate": 3.0490484497530125e-06, "loss": 0.829, "step": 12364 }, { "epoch": 0.8933119005906045, "grad_norm": 5.920105199136891, "learning_rate": 3.0487630848676907e-06, "loss": 0.8054, "step": 12365 }, { "epoch": 0.893384145790814, "grad_norm": 7.742544956982083, "learning_rate": 3.048477712470227e-06, "loss": 0.8465, "step": 12366 }, { "epoch": 0.8934563909910236, "grad_norm": 8.560986016072144, "learning_rate": 3.04819233256453e-06, "loss": 0.8314, "step": 12367 }, { "epoch": 0.893528636191233, "grad_norm": 7.205166541880919, "learning_rate": 3.0479069451545058e-06, "loss": 0.8124, "step": 12368 }, { "epoch": 0.8936008813914426, "grad_norm": 6.138239351012818, "learning_rate": 3.0476215502440606e-06, "loss": 0.7837, "step": 12369 }, { "epoch": 0.8936731265916521, "grad_norm": 5.07474440508855, "learning_rate": 3.047336147837101e-06, "loss": 0.8359, "step": 12370 }, { "epoch": 0.8937453717918615, "grad_norm": 6.233997763786612, "learning_rate": 3.0470507379375354e-06, "loss": 0.8447, "step": 12371 }, { "epoch": 0.8938176169920711, "grad_norm": 6.559857226888266, "learning_rate": 3.0467653205492693e-06, "loss": 0.7428, "step": 12372 }, { "epoch": 0.8938898621922806, "grad_norm": 7.329526722449251, "learning_rate": 3.046479895676211e-06, "loss": 0.7836, "step": 12373 }, { "epoch": 0.8939621073924902, "grad_norm": 6.364599682609508, "learning_rate": 3.0461944633222673e-06, "loss": 0.8351, "step": 12374 }, { "epoch": 0.8940343525926996, "grad_norm": 5.972305916290932, "learning_rate": 3.0459090234913445e-06, "loss": 0.9225, "step": 12375 }, { "epoch": 0.8941065977929091, "grad_norm": 5.309871247593073, "learning_rate": 3.0456235761873528e-06, "loss": 0.7915, "step": 12376 }, { "epoch": 0.8941788429931187, "grad_norm": 7.796774701102122, "learning_rate": 3.0453381214141966e-06, "loss": 0.8494, "step": 12377 }, { "epoch": 0.8942510881933281, "grad_norm": 5.045521085083237, "learning_rate": 3.045052659175785e-06, "loss": 0.7808, "step": 12378 }, { "epoch": 0.8943233333935376, "grad_norm": 5.56104490986199, "learning_rate": 3.0447671894760257e-06, "loss": 0.7466, "step": 12379 }, { "epoch": 0.8943955785937472, "grad_norm": 5.681355605282235, "learning_rate": 3.0444817123188274e-06, "loss": 0.8995, "step": 12380 }, { "epoch": 0.8944678237939567, "grad_norm": 5.219846690181058, "learning_rate": 3.044196227708097e-06, "loss": 0.775, "step": 12381 }, { "epoch": 0.8945400689941662, "grad_norm": 8.09329234121752, "learning_rate": 3.043910735647742e-06, "loss": 0.8415, "step": 12382 }, { "epoch": 0.8946123141943757, "grad_norm": 7.3602506922444935, "learning_rate": 3.043625236141672e-06, "loss": 0.787, "step": 12383 }, { "epoch": 0.8946845593945852, "grad_norm": 5.908181172479877, "learning_rate": 3.0433397291937937e-06, "loss": 0.7857, "step": 12384 }, { "epoch": 0.8947568045947948, "grad_norm": 6.794295319093559, "learning_rate": 3.0430542148080176e-06, "loss": 0.7527, "step": 12385 }, { "epoch": 0.8948290497950042, "grad_norm": 6.8352293159956306, "learning_rate": 3.0427686929882505e-06, "loss": 0.8478, "step": 12386 }, { "epoch": 0.8949012949952138, "grad_norm": 6.749078475778512, "learning_rate": 3.0424831637384015e-06, "loss": 0.8365, "step": 12387 }, { "epoch": 0.8949735401954233, "grad_norm": 6.807239426225364, "learning_rate": 3.042197627062379e-06, "loss": 0.8563, "step": 12388 }, { "epoch": 0.8950457853956327, "grad_norm": 5.716114541723386, "learning_rate": 3.041912082964092e-06, "loss": 0.7682, "step": 12389 }, { "epoch": 0.8951180305958423, "grad_norm": 6.1594815365477205, "learning_rate": 3.04162653144745e-06, "loss": 0.8225, "step": 12390 }, { "epoch": 0.8951902757960518, "grad_norm": 6.125633985444511, "learning_rate": 3.0413409725163606e-06, "loss": 0.8141, "step": 12391 }, { "epoch": 0.8952625209962614, "grad_norm": 8.061026031842047, "learning_rate": 3.041055406174734e-06, "loss": 0.764, "step": 12392 }, { "epoch": 0.8953347661964708, "grad_norm": 6.252758790540043, "learning_rate": 3.0407698324264777e-06, "loss": 0.7617, "step": 12393 }, { "epoch": 0.8954070113966803, "grad_norm": 7.159850168631733, "learning_rate": 3.040484251275504e-06, "loss": 0.7401, "step": 12394 }, { "epoch": 0.8954792565968899, "grad_norm": 7.172080575161669, "learning_rate": 3.0401986627257198e-06, "loss": 0.8304, "step": 12395 }, { "epoch": 0.8955515017970993, "grad_norm": 7.151867719941758, "learning_rate": 3.0399130667810356e-06, "loss": 0.7574, "step": 12396 }, { "epoch": 0.8956237469973088, "grad_norm": 7.377593796643421, "learning_rate": 3.0396274634453605e-06, "loss": 0.8624, "step": 12397 }, { "epoch": 0.8956959921975184, "grad_norm": 6.20266652755103, "learning_rate": 3.0393418527226043e-06, "loss": 0.7989, "step": 12398 }, { "epoch": 0.8957682373977279, "grad_norm": 6.387177449124266, "learning_rate": 3.0390562346166774e-06, "loss": 0.848, "step": 12399 }, { "epoch": 0.8958404825979374, "grad_norm": 5.396318431046572, "learning_rate": 3.0387706091314888e-06, "loss": 0.8074, "step": 12400 }, { "epoch": 0.8959127277981469, "grad_norm": 7.298880389329869, "learning_rate": 3.0384849762709497e-06, "loss": 0.7726, "step": 12401 }, { "epoch": 0.8959849729983564, "grad_norm": 5.216760130381171, "learning_rate": 3.0381993360389683e-06, "loss": 0.8195, "step": 12402 }, { "epoch": 0.896057218198566, "grad_norm": 7.541244017277587, "learning_rate": 3.0379136884394554e-06, "loss": 0.8667, "step": 12403 }, { "epoch": 0.8961294633987754, "grad_norm": 5.581779244716076, "learning_rate": 3.037628033476323e-06, "loss": 0.7717, "step": 12404 }, { "epoch": 0.896201708598985, "grad_norm": 6.168633259110189, "learning_rate": 3.03734237115348e-06, "loss": 0.8147, "step": 12405 }, { "epoch": 0.8962739537991945, "grad_norm": 6.917482657347055, "learning_rate": 3.0370567014748374e-06, "loss": 0.8138, "step": 12406 }, { "epoch": 0.8963461989994039, "grad_norm": 6.111413327845657, "learning_rate": 3.0367710244443046e-06, "loss": 0.8366, "step": 12407 }, { "epoch": 0.8964184441996135, "grad_norm": 8.958334800069526, "learning_rate": 3.036485340065794e-06, "loss": 0.7896, "step": 12408 }, { "epoch": 0.896490689399823, "grad_norm": 6.916713055681066, "learning_rate": 3.0361996483432155e-06, "loss": 0.8319, "step": 12409 }, { "epoch": 0.8965629346000326, "grad_norm": 6.378702621807288, "learning_rate": 3.0359139492804803e-06, "loss": 0.8442, "step": 12410 }, { "epoch": 0.896635179800242, "grad_norm": 7.422362237296198, "learning_rate": 3.0356282428814987e-06, "loss": 0.7937, "step": 12411 }, { "epoch": 0.8967074250004515, "grad_norm": 8.40729411281159, "learning_rate": 3.0353425291501827e-06, "loss": 0.8268, "step": 12412 }, { "epoch": 0.8967796702006611, "grad_norm": 8.451092657287031, "learning_rate": 3.035056808090443e-06, "loss": 0.8795, "step": 12413 }, { "epoch": 0.8968519154008705, "grad_norm": 6.548529295299555, "learning_rate": 3.034771079706192e-06, "loss": 0.8218, "step": 12414 }, { "epoch": 0.89692416060108, "grad_norm": 7.445455703879229, "learning_rate": 3.0344853440013393e-06, "loss": 0.8926, "step": 12415 }, { "epoch": 0.8969964058012896, "grad_norm": 6.318151380367075, "learning_rate": 3.034199600979797e-06, "loss": 0.8194, "step": 12416 }, { "epoch": 0.8970686510014991, "grad_norm": 6.385321844080393, "learning_rate": 3.0339138506454776e-06, "loss": 0.8162, "step": 12417 }, { "epoch": 0.8971408962017086, "grad_norm": 7.160302759514444, "learning_rate": 3.033628093002291e-06, "loss": 0.7718, "step": 12418 }, { "epoch": 0.8972131414019181, "grad_norm": 9.59526760759833, "learning_rate": 3.0333423280541513e-06, "loss": 0.8739, "step": 12419 }, { "epoch": 0.8972853866021276, "grad_norm": 6.371883752656905, "learning_rate": 3.033056555804969e-06, "loss": 0.8845, "step": 12420 }, { "epoch": 0.8973576318023372, "grad_norm": 5.312968334423141, "learning_rate": 3.0327707762586558e-06, "loss": 0.7405, "step": 12421 }, { "epoch": 0.8974298770025466, "grad_norm": 5.4927764620198, "learning_rate": 3.0324849894191244e-06, "loss": 0.7683, "step": 12422 }, { "epoch": 0.8975021222027562, "grad_norm": 6.05356308863757, "learning_rate": 3.032199195290288e-06, "loss": 0.8312, "step": 12423 }, { "epoch": 0.8975743674029657, "grad_norm": 6.610869398782861, "learning_rate": 3.0319133938760577e-06, "loss": 0.8961, "step": 12424 }, { "epoch": 0.8976466126031751, "grad_norm": 6.983753970397509, "learning_rate": 3.031627585180345e-06, "loss": 0.8124, "step": 12425 }, { "epoch": 0.8977188578033847, "grad_norm": 5.735082099413375, "learning_rate": 3.0313417692070634e-06, "loss": 0.8343, "step": 12426 }, { "epoch": 0.8977911030035942, "grad_norm": 6.957942228309332, "learning_rate": 3.0310559459601265e-06, "loss": 0.8243, "step": 12427 }, { "epoch": 0.8978633482038038, "grad_norm": 6.2577265472795585, "learning_rate": 3.0307701154434454e-06, "loss": 0.7563, "step": 12428 }, { "epoch": 0.8979355934040132, "grad_norm": 7.63431392962232, "learning_rate": 3.0304842776609335e-06, "loss": 0.7546, "step": 12429 }, { "epoch": 0.8980078386042227, "grad_norm": 6.942562421298984, "learning_rate": 3.0301984326165045e-06, "loss": 0.8913, "step": 12430 }, { "epoch": 0.8980800838044323, "grad_norm": 7.520941679817807, "learning_rate": 3.02991258031407e-06, "loss": 0.7637, "step": 12431 }, { "epoch": 0.8981523290046417, "grad_norm": 6.629167217811675, "learning_rate": 3.0296267207575436e-06, "loss": 0.7686, "step": 12432 }, { "epoch": 0.8982245742048512, "grad_norm": 5.865211936458078, "learning_rate": 3.029340853950839e-06, "loss": 0.8087, "step": 12433 }, { "epoch": 0.8982968194050608, "grad_norm": 5.770190408563154, "learning_rate": 3.0290549798978687e-06, "loss": 0.8496, "step": 12434 }, { "epoch": 0.8983690646052703, "grad_norm": 4.470377378982949, "learning_rate": 3.0287690986025462e-06, "loss": 0.7326, "step": 12435 }, { "epoch": 0.8984413098054798, "grad_norm": 7.639952709990336, "learning_rate": 3.028483210068786e-06, "loss": 0.8741, "step": 12436 }, { "epoch": 0.8985135550056893, "grad_norm": 5.483750701758169, "learning_rate": 3.0281973143005017e-06, "loss": 0.7917, "step": 12437 }, { "epoch": 0.8985858002058988, "grad_norm": 5.341231438595377, "learning_rate": 3.027911411301605e-06, "loss": 0.8106, "step": 12438 }, { "epoch": 0.8986580454061084, "grad_norm": 6.523245693287966, "learning_rate": 3.027625501076012e-06, "loss": 0.7643, "step": 12439 }, { "epoch": 0.8987302906063178, "grad_norm": 5.892368048645903, "learning_rate": 3.0273395836276344e-06, "loss": 0.7834, "step": 12440 }, { "epoch": 0.8988025358065274, "grad_norm": 7.138034075047763, "learning_rate": 3.0270536589603876e-06, "loss": 0.7364, "step": 12441 }, { "epoch": 0.8988747810067369, "grad_norm": 6.26435667980491, "learning_rate": 3.0267677270781865e-06, "loss": 0.7761, "step": 12442 }, { "epoch": 0.8989470262069463, "grad_norm": 6.612676134269566, "learning_rate": 3.0264817879849433e-06, "loss": 0.8514, "step": 12443 }, { "epoch": 0.8990192714071559, "grad_norm": 7.493800080221916, "learning_rate": 3.0261958416845737e-06, "loss": 0.8682, "step": 12444 }, { "epoch": 0.8990915166073654, "grad_norm": 8.368503613082089, "learning_rate": 3.0259098881809906e-06, "loss": 0.8762, "step": 12445 }, { "epoch": 0.899163761807575, "grad_norm": 5.642002772241646, "learning_rate": 3.0256239274781107e-06, "loss": 0.8136, "step": 12446 }, { "epoch": 0.8992360070077844, "grad_norm": 6.032688423941794, "learning_rate": 3.0253379595798472e-06, "loss": 0.8305, "step": 12447 }, { "epoch": 0.8993082522079939, "grad_norm": 6.854832424616964, "learning_rate": 3.025051984490114e-06, "loss": 0.8575, "step": 12448 }, { "epoch": 0.8993804974082035, "grad_norm": 6.544713503378784, "learning_rate": 3.024766002212827e-06, "loss": 0.7601, "step": 12449 }, { "epoch": 0.8994527426084129, "grad_norm": 6.656400150521841, "learning_rate": 3.0244800127519013e-06, "loss": 0.8136, "step": 12450 }, { "epoch": 0.8995249878086224, "grad_norm": 5.311245938553383, "learning_rate": 3.024194016111252e-06, "loss": 0.747, "step": 12451 }, { "epoch": 0.899597233008832, "grad_norm": 6.684938029171057, "learning_rate": 3.0239080122947927e-06, "loss": 0.8441, "step": 12452 }, { "epoch": 0.8996694782090415, "grad_norm": 6.3218103621417505, "learning_rate": 3.02362200130644e-06, "loss": 0.812, "step": 12453 }, { "epoch": 0.899741723409251, "grad_norm": 5.785706158452322, "learning_rate": 3.0233359831501087e-06, "loss": 0.7764, "step": 12454 }, { "epoch": 0.8998139686094605, "grad_norm": 6.117391414824457, "learning_rate": 3.0230499578297136e-06, "loss": 0.7579, "step": 12455 }, { "epoch": 0.89988621380967, "grad_norm": 5.847147723901539, "learning_rate": 3.0227639253491715e-06, "loss": 0.7326, "step": 12456 }, { "epoch": 0.8999584590098796, "grad_norm": 4.908921213139058, "learning_rate": 3.0224778857123966e-06, "loss": 0.7748, "step": 12457 }, { "epoch": 0.900030704210089, "grad_norm": 6.299136680911495, "learning_rate": 3.022191838923305e-06, "loss": 0.8085, "step": 12458 }, { "epoch": 0.9001029494102986, "grad_norm": 4.918837418318638, "learning_rate": 3.0219057849858126e-06, "loss": 0.7898, "step": 12459 }, { "epoch": 0.9001751946105081, "grad_norm": 5.398261546291359, "learning_rate": 3.021619723903836e-06, "loss": 0.7898, "step": 12460 }, { "epoch": 0.9002474398107175, "grad_norm": 5.437357758163315, "learning_rate": 3.0213336556812893e-06, "loss": 0.847, "step": 12461 }, { "epoch": 0.9003196850109271, "grad_norm": 5.169511791029476, "learning_rate": 3.021047580322091e-06, "loss": 0.754, "step": 12462 }, { "epoch": 0.9003919302111366, "grad_norm": 5.375970974537287, "learning_rate": 3.020761497830155e-06, "loss": 0.7292, "step": 12463 }, { "epoch": 0.9004641754113462, "grad_norm": 6.49534234651926, "learning_rate": 3.020475408209398e-06, "loss": 0.8386, "step": 12464 }, { "epoch": 0.9005364206115556, "grad_norm": 6.377881165721301, "learning_rate": 3.0201893114637376e-06, "loss": 0.8903, "step": 12465 }, { "epoch": 0.9006086658117651, "grad_norm": 6.216696548730998, "learning_rate": 3.0199032075970887e-06, "loss": 0.6787, "step": 12466 }, { "epoch": 0.9006809110119747, "grad_norm": 5.68142644190655, "learning_rate": 3.019617096613369e-06, "loss": 0.7551, "step": 12467 }, { "epoch": 0.9007531562121841, "grad_norm": 6.609901641714101, "learning_rate": 3.019330978516495e-06, "loss": 0.7773, "step": 12468 }, { "epoch": 0.9008254014123936, "grad_norm": 6.220520994194054, "learning_rate": 3.0190448533103827e-06, "loss": 0.8138, "step": 12469 }, { "epoch": 0.9008976466126032, "grad_norm": 8.396658605208495, "learning_rate": 3.0187587209989496e-06, "loss": 0.7507, "step": 12470 }, { "epoch": 0.9009698918128127, "grad_norm": 5.434695869085328, "learning_rate": 3.018472581586112e-06, "loss": 0.8066, "step": 12471 }, { "epoch": 0.9010421370130222, "grad_norm": 7.044487322420432, "learning_rate": 3.0181864350757885e-06, "loss": 0.8508, "step": 12472 }, { "epoch": 0.9011143822132317, "grad_norm": 8.365986998815828, "learning_rate": 3.0179002814718935e-06, "loss": 0.8399, "step": 12473 }, { "epoch": 0.9011866274134412, "grad_norm": 6.838620009388773, "learning_rate": 3.0176141207783467e-06, "loss": 0.8381, "step": 12474 }, { "epoch": 0.9012588726136508, "grad_norm": 7.970426973897963, "learning_rate": 3.017327952999064e-06, "loss": 0.8446, "step": 12475 }, { "epoch": 0.9013311178138602, "grad_norm": 5.815406318828992, "learning_rate": 3.0170417781379637e-06, "loss": 0.7614, "step": 12476 }, { "epoch": 0.9014033630140698, "grad_norm": 5.601244120680948, "learning_rate": 3.0167555961989625e-06, "loss": 0.7948, "step": 12477 }, { "epoch": 0.9014756082142793, "grad_norm": 5.820864002289892, "learning_rate": 3.0164694071859783e-06, "loss": 0.8149, "step": 12478 }, { "epoch": 0.9015478534144887, "grad_norm": 8.680857362374896, "learning_rate": 3.0161832111029295e-06, "loss": 0.8591, "step": 12479 }, { "epoch": 0.9016200986146983, "grad_norm": 8.911942653697507, "learning_rate": 3.0158970079537337e-06, "loss": 0.8812, "step": 12480 }, { "epoch": 0.9016923438149078, "grad_norm": 10.036658424450916, "learning_rate": 3.0156107977423076e-06, "loss": 0.8819, "step": 12481 }, { "epoch": 0.9017645890151174, "grad_norm": 5.189747518818428, "learning_rate": 3.0153245804725707e-06, "loss": 0.8642, "step": 12482 }, { "epoch": 0.9018368342153268, "grad_norm": 7.355678788324822, "learning_rate": 3.0150383561484397e-06, "loss": 0.876, "step": 12483 }, { "epoch": 0.9019090794155363, "grad_norm": 6.029710958711493, "learning_rate": 3.014752124773833e-06, "loss": 0.7998, "step": 12484 }, { "epoch": 0.9019813246157459, "grad_norm": 6.497243736999446, "learning_rate": 3.014465886352671e-06, "loss": 0.8473, "step": 12485 }, { "epoch": 0.9020535698159553, "grad_norm": 7.058250836907752, "learning_rate": 3.0141796408888697e-06, "loss": 0.7495, "step": 12486 }, { "epoch": 0.9021258150161648, "grad_norm": 8.598636306164112, "learning_rate": 3.0138933883863485e-06, "loss": 0.8095, "step": 12487 }, { "epoch": 0.9021980602163744, "grad_norm": 6.38595955445635, "learning_rate": 3.013607128849025e-06, "loss": 0.8614, "step": 12488 }, { "epoch": 0.9022703054165839, "grad_norm": 6.645138918513133, "learning_rate": 3.0133208622808195e-06, "loss": 0.7728, "step": 12489 }, { "epoch": 0.9023425506167934, "grad_norm": 5.555532313404209, "learning_rate": 3.0130345886856506e-06, "loss": 0.7845, "step": 12490 }, { "epoch": 0.9024147958170029, "grad_norm": 5.616992591585445, "learning_rate": 3.012748308067435e-06, "loss": 0.8654, "step": 12491 }, { "epoch": 0.9024870410172124, "grad_norm": 5.407130528004202, "learning_rate": 3.0124620204300944e-06, "loss": 0.7953, "step": 12492 }, { "epoch": 0.902559286217422, "grad_norm": 6.278929931930112, "learning_rate": 3.012175725777546e-06, "loss": 0.8247, "step": 12493 }, { "epoch": 0.9026315314176314, "grad_norm": 5.920944422575221, "learning_rate": 3.0118894241137096e-06, "loss": 0.8173, "step": 12494 }, { "epoch": 0.902703776617841, "grad_norm": 5.5884979995296655, "learning_rate": 3.0116031154425045e-06, "loss": 0.8046, "step": 12495 }, { "epoch": 0.9027760218180505, "grad_norm": 6.14838600379483, "learning_rate": 3.0113167997678505e-06, "loss": 0.7925, "step": 12496 }, { "epoch": 0.9028482670182599, "grad_norm": 7.012267126884789, "learning_rate": 3.011030477093666e-06, "loss": 0.7872, "step": 12497 }, { "epoch": 0.9029205122184695, "grad_norm": 5.345441227143162, "learning_rate": 3.010744147423871e-06, "loss": 0.7421, "step": 12498 }, { "epoch": 0.902992757418679, "grad_norm": 5.42770081544366, "learning_rate": 3.0104578107623854e-06, "loss": 0.819, "step": 12499 }, { "epoch": 0.9030650026188886, "grad_norm": 5.825434250259938, "learning_rate": 3.010171467113129e-06, "loss": 0.8146, "step": 12500 }, { "epoch": 0.903137247819098, "grad_norm": 5.561998323255805, "learning_rate": 3.0098851164800208e-06, "loss": 0.8074, "step": 12501 }, { "epoch": 0.9032094930193075, "grad_norm": 5.561330951042339, "learning_rate": 3.0095987588669813e-06, "loss": 0.8335, "step": 12502 }, { "epoch": 0.9032817382195171, "grad_norm": 6.876873801374378, "learning_rate": 3.0093123942779306e-06, "loss": 0.879, "step": 12503 }, { "epoch": 0.9033539834197265, "grad_norm": 9.311086252928423, "learning_rate": 3.009026022716788e-06, "loss": 0.8622, "step": 12504 }, { "epoch": 0.903426228619936, "grad_norm": 6.828930839980467, "learning_rate": 3.0087396441874763e-06, "loss": 0.9034, "step": 12505 }, { "epoch": 0.9034984738201456, "grad_norm": 6.89643846451725, "learning_rate": 3.008453258693912e-06, "loss": 0.9339, "step": 12506 }, { "epoch": 0.9035707190203551, "grad_norm": 5.782643742999154, "learning_rate": 3.0081668662400184e-06, "loss": 0.7321, "step": 12507 }, { "epoch": 0.9036429642205646, "grad_norm": 6.141319177425455, "learning_rate": 3.0078804668297146e-06, "loss": 0.711, "step": 12508 }, { "epoch": 0.9037152094207741, "grad_norm": 5.582439730669523, "learning_rate": 3.0075940604669213e-06, "loss": 0.7957, "step": 12509 }, { "epoch": 0.9037874546209836, "grad_norm": 6.257196784238105, "learning_rate": 3.00730764715556e-06, "loss": 0.8626, "step": 12510 }, { "epoch": 0.9038596998211931, "grad_norm": 6.124911249257355, "learning_rate": 3.007021226899551e-06, "loss": 0.7918, "step": 12511 }, { "epoch": 0.9039319450214026, "grad_norm": 6.78049897284859, "learning_rate": 3.006734799702814e-06, "loss": 0.8645, "step": 12512 }, { "epoch": 0.9040041902216122, "grad_norm": 5.694018977121204, "learning_rate": 3.0064483655692723e-06, "loss": 0.7723, "step": 12513 }, { "epoch": 0.9040764354218217, "grad_norm": 7.583959476765078, "learning_rate": 3.006161924502845e-06, "loss": 0.8152, "step": 12514 }, { "epoch": 0.9041486806220311, "grad_norm": 6.574818992298886, "learning_rate": 3.0058754765074543e-06, "loss": 0.8481, "step": 12515 }, { "epoch": 0.9042209258222407, "grad_norm": 7.316318395344789, "learning_rate": 3.0055890215870205e-06, "loss": 0.7803, "step": 12516 }, { "epoch": 0.9042931710224502, "grad_norm": 6.641239631482472, "learning_rate": 3.005302559745466e-06, "loss": 0.7834, "step": 12517 }, { "epoch": 0.9043654162226598, "grad_norm": 6.202526303857875, "learning_rate": 3.0050160909867114e-06, "loss": 0.9191, "step": 12518 }, { "epoch": 0.9044376614228692, "grad_norm": 6.693946939486607, "learning_rate": 3.0047296153146797e-06, "loss": 0.7401, "step": 12519 }, { "epoch": 0.9045099066230787, "grad_norm": 6.675209516458869, "learning_rate": 3.0044431327332906e-06, "loss": 0.868, "step": 12520 }, { "epoch": 0.9045821518232883, "grad_norm": 6.686517946383936, "learning_rate": 3.004156643246466e-06, "loss": 0.8033, "step": 12521 }, { "epoch": 0.9046543970234977, "grad_norm": 6.055027397213481, "learning_rate": 3.00387014685813e-06, "loss": 0.8383, "step": 12522 }, { "epoch": 0.9047266422237072, "grad_norm": 7.182515414493961, "learning_rate": 3.003583643572202e-06, "loss": 0.7498, "step": 12523 }, { "epoch": 0.9047988874239168, "grad_norm": 7.9850500609011625, "learning_rate": 3.003297133392606e-06, "loss": 0.8047, "step": 12524 }, { "epoch": 0.9048711326241263, "grad_norm": 6.970851974629591, "learning_rate": 3.003010616323262e-06, "loss": 0.8371, "step": 12525 }, { "epoch": 0.9049433778243358, "grad_norm": 5.161209957589243, "learning_rate": 3.0027240923680928e-06, "loss": 0.7604, "step": 12526 }, { "epoch": 0.9050156230245453, "grad_norm": 6.8143677557170514, "learning_rate": 3.0024375615310215e-06, "loss": 0.8193, "step": 12527 }, { "epoch": 0.9050878682247548, "grad_norm": 6.018733977457905, "learning_rate": 3.0021510238159707e-06, "loss": 0.7889, "step": 12528 }, { "epoch": 0.9051601134249643, "grad_norm": 6.061645132926317, "learning_rate": 3.0018644792268613e-06, "loss": 0.7846, "step": 12529 }, { "epoch": 0.9052323586251738, "grad_norm": 6.906135385520238, "learning_rate": 3.0015779277676175e-06, "loss": 0.8441, "step": 12530 }, { "epoch": 0.9053046038253834, "grad_norm": 6.490846791562192, "learning_rate": 3.001291369442162e-06, "loss": 0.8196, "step": 12531 }, { "epoch": 0.9053768490255929, "grad_norm": 6.657403263960667, "learning_rate": 3.0010048042544155e-06, "loss": 0.8874, "step": 12532 }, { "epoch": 0.9054490942258023, "grad_norm": 6.42778808422942, "learning_rate": 3.000718232208303e-06, "loss": 0.8466, "step": 12533 }, { "epoch": 0.9055213394260119, "grad_norm": 5.818226936013675, "learning_rate": 3.0004316533077466e-06, "loss": 0.7966, "step": 12534 }, { "epoch": 0.9055935846262214, "grad_norm": 6.89332607207417, "learning_rate": 3.000145067556669e-06, "loss": 0.8117, "step": 12535 }, { "epoch": 0.905665829826431, "grad_norm": 7.324001298855344, "learning_rate": 2.9998584749589944e-06, "loss": 0.8193, "step": 12536 }, { "epoch": 0.9057380750266404, "grad_norm": 6.3899035804411, "learning_rate": 2.9995718755186452e-06, "loss": 0.782, "step": 12537 }, { "epoch": 0.9058103202268499, "grad_norm": 6.41239106381336, "learning_rate": 2.999285269239545e-06, "loss": 0.6683, "step": 12538 }, { "epoch": 0.9058825654270595, "grad_norm": 5.530922228051035, "learning_rate": 2.998998656125617e-06, "loss": 0.7883, "step": 12539 }, { "epoch": 0.9059548106272689, "grad_norm": 6.651515369279671, "learning_rate": 2.9987120361807846e-06, "loss": 0.8248, "step": 12540 }, { "epoch": 0.9060270558274784, "grad_norm": 6.141166061365033, "learning_rate": 2.9984254094089713e-06, "loss": 0.7796, "step": 12541 }, { "epoch": 0.906099301027688, "grad_norm": 5.948715379024662, "learning_rate": 2.9981387758141023e-06, "loss": 0.8034, "step": 12542 }, { "epoch": 0.9061715462278975, "grad_norm": 5.733990149615595, "learning_rate": 2.9978521354001e-06, "loss": 0.8564, "step": 12543 }, { "epoch": 0.906243791428107, "grad_norm": 6.0152340452302, "learning_rate": 2.9975654881708877e-06, "loss": 0.7318, "step": 12544 }, { "epoch": 0.9063160366283165, "grad_norm": 5.31340252558808, "learning_rate": 2.9972788341303903e-06, "loss": 0.8217, "step": 12545 }, { "epoch": 0.906388281828526, "grad_norm": 5.241511612596633, "learning_rate": 2.9969921732825326e-06, "loss": 0.8327, "step": 12546 }, { "epoch": 0.9064605270287355, "grad_norm": 6.0564437976926175, "learning_rate": 2.9967055056312377e-06, "loss": 0.8262, "step": 12547 }, { "epoch": 0.906532772228945, "grad_norm": 6.164330460945315, "learning_rate": 2.99641883118043e-06, "loss": 0.7676, "step": 12548 }, { "epoch": 0.9066050174291546, "grad_norm": 9.214893840606758, "learning_rate": 2.9961321499340334e-06, "loss": 0.8007, "step": 12549 }, { "epoch": 0.9066772626293641, "grad_norm": 6.761603378113798, "learning_rate": 2.995845461895973e-06, "loss": 0.779, "step": 12550 }, { "epoch": 0.9067495078295735, "grad_norm": 6.130777553511833, "learning_rate": 2.995558767070174e-06, "loss": 0.8747, "step": 12551 }, { "epoch": 0.9068217530297831, "grad_norm": 6.676236658873992, "learning_rate": 2.99527206546056e-06, "loss": 0.7583, "step": 12552 }, { "epoch": 0.9068939982299926, "grad_norm": 7.270569396670844, "learning_rate": 2.9949853570710557e-06, "loss": 0.7565, "step": 12553 }, { "epoch": 0.9069662434302022, "grad_norm": 6.55487460391105, "learning_rate": 2.9946986419055857e-06, "loss": 0.8264, "step": 12554 }, { "epoch": 0.9070384886304116, "grad_norm": 8.130101876271581, "learning_rate": 2.9944119199680767e-06, "loss": 0.7701, "step": 12555 }, { "epoch": 0.9071107338306211, "grad_norm": 7.1488177088949305, "learning_rate": 2.9941251912624517e-06, "loss": 0.8296, "step": 12556 }, { "epoch": 0.9071829790308307, "grad_norm": 6.067673510886754, "learning_rate": 2.9938384557926364e-06, "loss": 0.851, "step": 12557 }, { "epoch": 0.9072552242310401, "grad_norm": 7.273893512071178, "learning_rate": 2.9935517135625557e-06, "loss": 0.7674, "step": 12558 }, { "epoch": 0.9073274694312496, "grad_norm": 8.52200162630529, "learning_rate": 2.993264964576136e-06, "loss": 0.7661, "step": 12559 }, { "epoch": 0.9073997146314592, "grad_norm": 8.224413874650459, "learning_rate": 2.9929782088373015e-06, "loss": 0.7799, "step": 12560 }, { "epoch": 0.9074719598316687, "grad_norm": 7.151233764053901, "learning_rate": 2.9926914463499786e-06, "loss": 0.8287, "step": 12561 }, { "epoch": 0.9075442050318782, "grad_norm": 7.5164606656804045, "learning_rate": 2.992404677118092e-06, "loss": 0.866, "step": 12562 }, { "epoch": 0.9076164502320877, "grad_norm": 5.561678707912641, "learning_rate": 2.9921179011455674e-06, "loss": 0.8108, "step": 12563 }, { "epoch": 0.9076886954322972, "grad_norm": 5.862113943178902, "learning_rate": 2.9918311184363307e-06, "loss": 0.7906, "step": 12564 }, { "epoch": 0.9077609406325067, "grad_norm": 6.6691134095495475, "learning_rate": 2.991544328994309e-06, "loss": 0.8674, "step": 12565 }, { "epoch": 0.9078331858327162, "grad_norm": 5.767554645122373, "learning_rate": 2.9912575328234263e-06, "loss": 0.8066, "step": 12566 }, { "epoch": 0.9079054310329258, "grad_norm": 6.657247692431078, "learning_rate": 2.9909707299276098e-06, "loss": 0.8372, "step": 12567 }, { "epoch": 0.9079776762331353, "grad_norm": 8.91944395171105, "learning_rate": 2.9906839203107846e-06, "loss": 0.8351, "step": 12568 }, { "epoch": 0.9080499214333447, "grad_norm": 8.427106318935612, "learning_rate": 2.9903971039768776e-06, "loss": 0.7826, "step": 12569 }, { "epoch": 0.9081221666335543, "grad_norm": 5.807884106260046, "learning_rate": 2.9901102809298147e-06, "loss": 0.7836, "step": 12570 }, { "epoch": 0.9081944118337638, "grad_norm": 5.891794751333901, "learning_rate": 2.9898234511735242e-06, "loss": 0.8569, "step": 12571 }, { "epoch": 0.9082666570339734, "grad_norm": 6.839014932294023, "learning_rate": 2.98953661471193e-06, "loss": 0.8637, "step": 12572 }, { "epoch": 0.9083389022341828, "grad_norm": 4.845492338787569, "learning_rate": 2.9892497715489586e-06, "loss": 0.7208, "step": 12573 }, { "epoch": 0.9084111474343923, "grad_norm": 5.8791657957657195, "learning_rate": 2.9889629216885397e-06, "loss": 0.8728, "step": 12574 }, { "epoch": 0.9084833926346019, "grad_norm": 5.824164714600947, "learning_rate": 2.9886760651345963e-06, "loss": 0.7077, "step": 12575 }, { "epoch": 0.9085556378348113, "grad_norm": 5.718994135413888, "learning_rate": 2.988389201891058e-06, "loss": 0.8083, "step": 12576 }, { "epoch": 0.9086278830350208, "grad_norm": 7.722290462616235, "learning_rate": 2.9881023319618507e-06, "loss": 0.88, "step": 12577 }, { "epoch": 0.9087001282352304, "grad_norm": 7.013237833791898, "learning_rate": 2.987815455350901e-06, "loss": 0.8169, "step": 12578 }, { "epoch": 0.9087723734354399, "grad_norm": 6.154447494983271, "learning_rate": 2.987528572062137e-06, "loss": 0.8517, "step": 12579 }, { "epoch": 0.9088446186356494, "grad_norm": 6.93727950012056, "learning_rate": 2.987241682099485e-06, "loss": 0.7929, "step": 12580 }, { "epoch": 0.9089168638358589, "grad_norm": 6.487780896759343, "learning_rate": 2.986954785466873e-06, "loss": 0.8532, "step": 12581 }, { "epoch": 0.9089891090360684, "grad_norm": 5.0558865523829155, "learning_rate": 2.986667882168228e-06, "loss": 0.7937, "step": 12582 }, { "epoch": 0.9090613542362779, "grad_norm": 6.2203486702391295, "learning_rate": 2.9863809722074772e-06, "loss": 0.7854, "step": 12583 }, { "epoch": 0.9091335994364874, "grad_norm": 7.371210450106268, "learning_rate": 2.986094055588549e-06, "loss": 0.8597, "step": 12584 }, { "epoch": 0.909205844636697, "grad_norm": 6.999681465530877, "learning_rate": 2.98580713231537e-06, "loss": 0.7874, "step": 12585 }, { "epoch": 0.9092780898369065, "grad_norm": 5.855044298012565, "learning_rate": 2.985520202391869e-06, "loss": 0.7759, "step": 12586 }, { "epoch": 0.9093503350371159, "grad_norm": 6.041393545707902, "learning_rate": 2.9852332658219727e-06, "loss": 0.7054, "step": 12587 }, { "epoch": 0.9094225802373255, "grad_norm": 4.987310328498132, "learning_rate": 2.984946322609611e-06, "loss": 0.7023, "step": 12588 }, { "epoch": 0.909494825437535, "grad_norm": 6.051141224564909, "learning_rate": 2.9846593727587103e-06, "loss": 0.8253, "step": 12589 }, { "epoch": 0.9095670706377446, "grad_norm": 7.031172417106697, "learning_rate": 2.984372416273199e-06, "loss": 0.8472, "step": 12590 }, { "epoch": 0.909639315837954, "grad_norm": 6.372564411518732, "learning_rate": 2.984085453157005e-06, "loss": 0.8005, "step": 12591 }, { "epoch": 0.9097115610381635, "grad_norm": 7.3093160887028485, "learning_rate": 2.9837984834140575e-06, "loss": 0.8091, "step": 12592 }, { "epoch": 0.9097838062383731, "grad_norm": 7.019501088989878, "learning_rate": 2.983511507048284e-06, "loss": 0.8357, "step": 12593 }, { "epoch": 0.9098560514385825, "grad_norm": 7.2257688014306485, "learning_rate": 2.9832245240636136e-06, "loss": 0.7594, "step": 12594 }, { "epoch": 0.909928296638792, "grad_norm": 6.0478054385503, "learning_rate": 2.982937534463975e-06, "loss": 0.7958, "step": 12595 }, { "epoch": 0.9100005418390016, "grad_norm": 6.74903551911178, "learning_rate": 2.9826505382532965e-06, "loss": 0.7902, "step": 12596 }, { "epoch": 0.9100727870392111, "grad_norm": 5.314702553522809, "learning_rate": 2.9823635354355062e-06, "loss": 0.8194, "step": 12597 }, { "epoch": 0.9101450322394206, "grad_norm": 7.795294775438984, "learning_rate": 2.9820765260145342e-06, "loss": 0.8315, "step": 12598 }, { "epoch": 0.9102172774396301, "grad_norm": 6.981973597935302, "learning_rate": 2.981789509994309e-06, "loss": 0.7937, "step": 12599 }, { "epoch": 0.9102895226398396, "grad_norm": 5.829773861407189, "learning_rate": 2.9815024873787592e-06, "loss": 0.8415, "step": 12600 }, { "epoch": 0.9103617678400491, "grad_norm": 5.974889663252126, "learning_rate": 2.9812154581718133e-06, "loss": 0.8251, "step": 12601 }, { "epoch": 0.9104340130402586, "grad_norm": 7.943283737268971, "learning_rate": 2.980928422377403e-06, "loss": 0.8594, "step": 12602 }, { "epoch": 0.9105062582404682, "grad_norm": 5.779779360562746, "learning_rate": 2.980641379999455e-06, "loss": 0.8492, "step": 12603 }, { "epoch": 0.9105785034406777, "grad_norm": 7.105802137072947, "learning_rate": 2.9803543310419005e-06, "loss": 0.8703, "step": 12604 }, { "epoch": 0.9106507486408871, "grad_norm": 7.89790983918409, "learning_rate": 2.9800672755086675e-06, "loss": 0.8115, "step": 12605 }, { "epoch": 0.9107229938410967, "grad_norm": 5.78648082088996, "learning_rate": 2.979780213403686e-06, "loss": 0.767, "step": 12606 }, { "epoch": 0.9107952390413062, "grad_norm": 5.2270151806135265, "learning_rate": 2.9794931447308867e-06, "loss": 0.8361, "step": 12607 }, { "epoch": 0.9108674842415158, "grad_norm": 9.084875646652337, "learning_rate": 2.979206069494198e-06, "loss": 0.8597, "step": 12608 }, { "epoch": 0.9109397294417252, "grad_norm": 9.039824220416852, "learning_rate": 2.978918987697551e-06, "loss": 0.9859, "step": 12609 }, { "epoch": 0.9110119746419347, "grad_norm": 6.804564727831892, "learning_rate": 2.978631899344875e-06, "loss": 0.8263, "step": 12610 }, { "epoch": 0.9110842198421443, "grad_norm": 6.311759074323471, "learning_rate": 2.9783448044400982e-06, "loss": 0.7841, "step": 12611 }, { "epoch": 0.9111564650423537, "grad_norm": 7.286295309326736, "learning_rate": 2.978057702987154e-06, "loss": 0.8115, "step": 12612 }, { "epoch": 0.9112287102425632, "grad_norm": 5.7472627178125935, "learning_rate": 2.977770594989971e-06, "loss": 0.8057, "step": 12613 }, { "epoch": 0.9113009554427728, "grad_norm": 7.456332430259992, "learning_rate": 2.9774834804524787e-06, "loss": 0.7904, "step": 12614 }, { "epoch": 0.9113732006429823, "grad_norm": 5.3839154490708845, "learning_rate": 2.9771963593786086e-06, "loss": 0.8646, "step": 12615 }, { "epoch": 0.9114454458431918, "grad_norm": 6.582513428184296, "learning_rate": 2.9769092317722907e-06, "loss": 0.8077, "step": 12616 }, { "epoch": 0.9115176910434013, "grad_norm": 5.9004568925234375, "learning_rate": 2.9766220976374567e-06, "loss": 0.8172, "step": 12617 }, { "epoch": 0.9115899362436108, "grad_norm": 7.210872959827641, "learning_rate": 2.9763349569780354e-06, "loss": 0.7779, "step": 12618 }, { "epoch": 0.9116621814438203, "grad_norm": 6.289589882293692, "learning_rate": 2.976047809797959e-06, "loss": 0.7772, "step": 12619 }, { "epoch": 0.9117344266440298, "grad_norm": 7.287675502751096, "learning_rate": 2.975760656101157e-06, "loss": 0.7784, "step": 12620 }, { "epoch": 0.9118066718442394, "grad_norm": 5.868301793737505, "learning_rate": 2.975473495891561e-06, "loss": 0.8477, "step": 12621 }, { "epoch": 0.9118789170444489, "grad_norm": 7.062391432206177, "learning_rate": 2.9751863291731024e-06, "loss": 0.7931, "step": 12622 }, { "epoch": 0.9119511622446583, "grad_norm": 5.509674753050257, "learning_rate": 2.974899155949712e-06, "loss": 0.7789, "step": 12623 }, { "epoch": 0.9120234074448679, "grad_norm": 5.854230488964363, "learning_rate": 2.974611976225321e-06, "loss": 0.7747, "step": 12624 }, { "epoch": 0.9120956526450774, "grad_norm": 8.150532205441163, "learning_rate": 2.9743247900038595e-06, "loss": 0.8934, "step": 12625 }, { "epoch": 0.912167897845287, "grad_norm": 6.6391407463506535, "learning_rate": 2.974037597289261e-06, "loss": 0.775, "step": 12626 }, { "epoch": 0.9122401430454964, "grad_norm": 5.477207641042178, "learning_rate": 2.9737503980854555e-06, "loss": 0.7984, "step": 12627 }, { "epoch": 0.9123123882457059, "grad_norm": 7.204485121469716, "learning_rate": 2.9734631923963753e-06, "loss": 0.8135, "step": 12628 }, { "epoch": 0.9123846334459155, "grad_norm": 5.6341468193578415, "learning_rate": 2.9731759802259506e-06, "loss": 0.7977, "step": 12629 }, { "epoch": 0.9124568786461249, "grad_norm": 6.005889227568084, "learning_rate": 2.972888761578115e-06, "loss": 0.7874, "step": 12630 }, { "epoch": 0.9125291238463344, "grad_norm": 5.4621727328174785, "learning_rate": 2.9726015364567996e-06, "loss": 0.8033, "step": 12631 }, { "epoch": 0.912601369046544, "grad_norm": 6.52751556663454, "learning_rate": 2.9723143048659355e-06, "loss": 0.8613, "step": 12632 }, { "epoch": 0.9126736142467535, "grad_norm": 5.859846986198543, "learning_rate": 2.972027066809456e-06, "loss": 0.767, "step": 12633 }, { "epoch": 0.912745859446963, "grad_norm": 6.775760520675514, "learning_rate": 2.9717398222912918e-06, "loss": 0.8129, "step": 12634 }, { "epoch": 0.9128181046471725, "grad_norm": 5.648042066141877, "learning_rate": 2.9714525713153756e-06, "loss": 0.8403, "step": 12635 }, { "epoch": 0.912890349847382, "grad_norm": 5.650391300120941, "learning_rate": 2.97116531388564e-06, "loss": 0.7624, "step": 12636 }, { "epoch": 0.9129625950475915, "grad_norm": 6.814234801425255, "learning_rate": 2.9708780500060173e-06, "loss": 0.7627, "step": 12637 }, { "epoch": 0.913034840247801, "grad_norm": 6.765100049098787, "learning_rate": 2.97059077968044e-06, "loss": 0.7584, "step": 12638 }, { "epoch": 0.9131070854480106, "grad_norm": 6.680828438743932, "learning_rate": 2.970303502912839e-06, "loss": 0.7704, "step": 12639 }, { "epoch": 0.9131793306482201, "grad_norm": 7.764690626948801, "learning_rate": 2.9700162197071497e-06, "loss": 0.8567, "step": 12640 }, { "epoch": 0.9132515758484295, "grad_norm": 6.548816183723829, "learning_rate": 2.9697289300673025e-06, "loss": 0.8683, "step": 12641 }, { "epoch": 0.9133238210486391, "grad_norm": 6.554574303905626, "learning_rate": 2.9694416339972314e-06, "loss": 0.7438, "step": 12642 }, { "epoch": 0.9133960662488486, "grad_norm": 6.596552104266792, "learning_rate": 2.9691543315008687e-06, "loss": 0.8977, "step": 12643 }, { "epoch": 0.9134683114490582, "grad_norm": 8.91352798504255, "learning_rate": 2.968867022582147e-06, "loss": 0.8299, "step": 12644 }, { "epoch": 0.9135405566492676, "grad_norm": 6.977121294460717, "learning_rate": 2.9685797072450005e-06, "loss": 0.7945, "step": 12645 }, { "epoch": 0.9136128018494771, "grad_norm": 6.780210635083804, "learning_rate": 2.968292385493361e-06, "loss": 0.8369, "step": 12646 }, { "epoch": 0.9136850470496867, "grad_norm": 6.236412843438814, "learning_rate": 2.968005057331163e-06, "loss": 0.8559, "step": 12647 }, { "epoch": 0.9137572922498961, "grad_norm": 5.690016975578636, "learning_rate": 2.9677177227623386e-06, "loss": 0.792, "step": 12648 }, { "epoch": 0.9138295374501056, "grad_norm": 7.162145859516426, "learning_rate": 2.967430381790822e-06, "loss": 0.8276, "step": 12649 }, { "epoch": 0.9139017826503152, "grad_norm": 5.428074702862737, "learning_rate": 2.967143034420546e-06, "loss": 0.6904, "step": 12650 }, { "epoch": 0.9139740278505247, "grad_norm": 5.872794528686652, "learning_rate": 2.9668556806554456e-06, "loss": 0.8545, "step": 12651 }, { "epoch": 0.9140462730507342, "grad_norm": 5.772948867706208, "learning_rate": 2.966568320499452e-06, "loss": 0.7346, "step": 12652 }, { "epoch": 0.9141185182509437, "grad_norm": 5.333693829909608, "learning_rate": 2.966280953956501e-06, "loss": 0.7516, "step": 12653 }, { "epoch": 0.9141907634511532, "grad_norm": 6.100491125628899, "learning_rate": 2.965993581030526e-06, "loss": 0.8162, "step": 12654 }, { "epoch": 0.9142630086513627, "grad_norm": 7.156131926649433, "learning_rate": 2.9657062017254606e-06, "loss": 0.8945, "step": 12655 }, { "epoch": 0.9143352538515722, "grad_norm": 6.001802809397408, "learning_rate": 2.9654188160452397e-06, "loss": 0.7904, "step": 12656 }, { "epoch": 0.9144074990517818, "grad_norm": 6.606518861032764, "learning_rate": 2.9651314239937958e-06, "loss": 0.7414, "step": 12657 }, { "epoch": 0.9144797442519913, "grad_norm": 6.454653505795862, "learning_rate": 2.9648440255750633e-06, "loss": 0.7465, "step": 12658 }, { "epoch": 0.9145519894522007, "grad_norm": 5.297272703831179, "learning_rate": 2.964556620792977e-06, "loss": 0.7441, "step": 12659 }, { "epoch": 0.9146242346524103, "grad_norm": 4.854746880734461, "learning_rate": 2.9642692096514727e-06, "loss": 0.8405, "step": 12660 }, { "epoch": 0.9146964798526198, "grad_norm": 8.701513171968056, "learning_rate": 2.9639817921544822e-06, "loss": 0.8311, "step": 12661 }, { "epoch": 0.9147687250528294, "grad_norm": 6.4253989247962995, "learning_rate": 2.9636943683059415e-06, "loss": 0.7824, "step": 12662 }, { "epoch": 0.9148409702530388, "grad_norm": 7.07387362838435, "learning_rate": 2.963406938109785e-06, "loss": 0.8026, "step": 12663 }, { "epoch": 0.9149132154532483, "grad_norm": 7.075808788211014, "learning_rate": 2.9631195015699473e-06, "loss": 0.7834, "step": 12664 }, { "epoch": 0.9149854606534579, "grad_norm": 7.557374336391678, "learning_rate": 2.9628320586903635e-06, "loss": 0.8948, "step": 12665 }, { "epoch": 0.9150577058536673, "grad_norm": 7.220658747665904, "learning_rate": 2.9625446094749677e-06, "loss": 0.8591, "step": 12666 }, { "epoch": 0.9151299510538768, "grad_norm": 5.310873163994607, "learning_rate": 2.962257153927695e-06, "loss": 0.7664, "step": 12667 }, { "epoch": 0.9152021962540864, "grad_norm": 6.572157955563081, "learning_rate": 2.9619696920524817e-06, "loss": 0.8589, "step": 12668 }, { "epoch": 0.9152744414542959, "grad_norm": 6.526644456388552, "learning_rate": 2.9616822238532615e-06, "loss": 0.88, "step": 12669 }, { "epoch": 0.9153466866545054, "grad_norm": 5.406252998836325, "learning_rate": 2.9613947493339707e-06, "loss": 0.7808, "step": 12670 }, { "epoch": 0.9154189318547149, "grad_norm": 7.357288882939915, "learning_rate": 2.9611072684985433e-06, "loss": 0.7831, "step": 12671 }, { "epoch": 0.9154911770549244, "grad_norm": 7.7743352477045375, "learning_rate": 2.9608197813509153e-06, "loss": 0.7564, "step": 12672 }, { "epoch": 0.9155634222551339, "grad_norm": 7.323164507459894, "learning_rate": 2.9605322878950226e-06, "loss": 0.8221, "step": 12673 }, { "epoch": 0.9156356674553434, "grad_norm": 6.454139611236953, "learning_rate": 2.9602447881348014e-06, "loss": 0.8495, "step": 12674 }, { "epoch": 0.915707912655553, "grad_norm": 6.343003703600268, "learning_rate": 2.959957282074185e-06, "loss": 0.8639, "step": 12675 }, { "epoch": 0.9157801578557625, "grad_norm": 6.370387783733163, "learning_rate": 2.959669769717112e-06, "loss": 0.826, "step": 12676 }, { "epoch": 0.9158524030559719, "grad_norm": 6.240696505775398, "learning_rate": 2.9593822510675154e-06, "loss": 0.8185, "step": 12677 }, { "epoch": 0.9159246482561815, "grad_norm": 5.747260726587493, "learning_rate": 2.9590947261293325e-06, "loss": 0.8238, "step": 12678 }, { "epoch": 0.915996893456391, "grad_norm": 5.7207134607494545, "learning_rate": 2.9588071949065e-06, "loss": 0.8417, "step": 12679 }, { "epoch": 0.9160691386566006, "grad_norm": 7.181318726638197, "learning_rate": 2.958519657402953e-06, "loss": 0.8024, "step": 12680 }, { "epoch": 0.91614138385681, "grad_norm": 5.089486426504278, "learning_rate": 2.9582321136226283e-06, "loss": 0.7685, "step": 12681 }, { "epoch": 0.9162136290570195, "grad_norm": 6.189723376377511, "learning_rate": 2.9579445635694603e-06, "loss": 0.8191, "step": 12682 }, { "epoch": 0.9162858742572291, "grad_norm": 5.62170347468411, "learning_rate": 2.9576570072473883e-06, "loss": 0.7554, "step": 12683 }, { "epoch": 0.9163581194574385, "grad_norm": 6.22775643842529, "learning_rate": 2.9573694446603464e-06, "loss": 0.82, "step": 12684 }, { "epoch": 0.916430364657648, "grad_norm": 5.387803552842609, "learning_rate": 2.957081875812273e-06, "loss": 0.7622, "step": 12685 }, { "epoch": 0.9165026098578576, "grad_norm": 7.924522544365202, "learning_rate": 2.956794300707102e-06, "loss": 0.7249, "step": 12686 }, { "epoch": 0.9165748550580671, "grad_norm": 5.264916618534625, "learning_rate": 2.956506719348773e-06, "loss": 0.7819, "step": 12687 }, { "epoch": 0.9166471002582766, "grad_norm": 5.830936520768213, "learning_rate": 2.9562191317412214e-06, "loss": 0.8035, "step": 12688 }, { "epoch": 0.9167193454584861, "grad_norm": 6.839130671620908, "learning_rate": 2.9559315378883833e-06, "loss": 0.8258, "step": 12689 }, { "epoch": 0.9167915906586956, "grad_norm": 7.434732900104645, "learning_rate": 2.955643937794197e-06, "loss": 0.7062, "step": 12690 }, { "epoch": 0.9168638358589051, "grad_norm": 7.1724180460596, "learning_rate": 2.9553563314625988e-06, "loss": 0.8361, "step": 12691 }, { "epoch": 0.9169360810591146, "grad_norm": 7.487746337506617, "learning_rate": 2.955068718897526e-06, "loss": 0.9254, "step": 12692 }, { "epoch": 0.9170083262593242, "grad_norm": 6.846996077525131, "learning_rate": 2.954781100102916e-06, "loss": 0.772, "step": 12693 }, { "epoch": 0.9170805714595337, "grad_norm": 5.46595684110685, "learning_rate": 2.954493475082706e-06, "loss": 0.81, "step": 12694 }, { "epoch": 0.9171528166597431, "grad_norm": 5.804205770513727, "learning_rate": 2.9542058438408332e-06, "loss": 0.7956, "step": 12695 }, { "epoch": 0.9172250618599527, "grad_norm": 7.928231181695584, "learning_rate": 2.953918206381234e-06, "loss": 0.7971, "step": 12696 }, { "epoch": 0.9172973070601622, "grad_norm": 6.512408809792224, "learning_rate": 2.953630562707848e-06, "loss": 0.9343, "step": 12697 }, { "epoch": 0.9173695522603718, "grad_norm": 5.827326415629802, "learning_rate": 2.9533429128246115e-06, "loss": 0.8068, "step": 12698 }, { "epoch": 0.9174417974605812, "grad_norm": 5.207699342923808, "learning_rate": 2.953055256735463e-06, "loss": 0.713, "step": 12699 }, { "epoch": 0.9175140426607907, "grad_norm": 7.4423773665058475, "learning_rate": 2.952767594444339e-06, "loss": 0.8187, "step": 12700 }, { "epoch": 0.9175862878610003, "grad_norm": 6.525034598189373, "learning_rate": 2.9524799259551783e-06, "loss": 0.7674, "step": 12701 }, { "epoch": 0.9176585330612097, "grad_norm": 5.232316708477936, "learning_rate": 2.9521922512719194e-06, "loss": 0.7973, "step": 12702 }, { "epoch": 0.9177307782614192, "grad_norm": 5.01111778170491, "learning_rate": 2.9519045703984993e-06, "loss": 0.7752, "step": 12703 }, { "epoch": 0.9178030234616288, "grad_norm": 7.018418195699552, "learning_rate": 2.951616883338857e-06, "loss": 0.7746, "step": 12704 }, { "epoch": 0.9178752686618383, "grad_norm": 6.7220992855604, "learning_rate": 2.95132919009693e-06, "loss": 0.8162, "step": 12705 }, { "epoch": 0.9179475138620478, "grad_norm": 6.3536540173791805, "learning_rate": 2.951041490676656e-06, "loss": 0.7646, "step": 12706 }, { "epoch": 0.9180197590622573, "grad_norm": 5.645877833267269, "learning_rate": 2.9507537850819747e-06, "loss": 0.8087, "step": 12707 }, { "epoch": 0.9180920042624668, "grad_norm": 6.74121709141125, "learning_rate": 2.9504660733168247e-06, "loss": 0.8, "step": 12708 }, { "epoch": 0.9181642494626763, "grad_norm": 7.436800627323327, "learning_rate": 2.950178355385143e-06, "loss": 0.7941, "step": 12709 }, { "epoch": 0.9182364946628858, "grad_norm": 5.815149831243771, "learning_rate": 2.9498906312908693e-06, "loss": 0.7612, "step": 12710 }, { "epoch": 0.9183087398630954, "grad_norm": 7.277096328993132, "learning_rate": 2.9496029010379428e-06, "loss": 0.8501, "step": 12711 }, { "epoch": 0.9183809850633049, "grad_norm": 6.264971691898772, "learning_rate": 2.9493151646303016e-06, "loss": 0.7972, "step": 12712 }, { "epoch": 0.9184532302635143, "grad_norm": 6.765582712230718, "learning_rate": 2.9490274220718845e-06, "loss": 0.8042, "step": 12713 }, { "epoch": 0.9185254754637239, "grad_norm": 5.367876166926453, "learning_rate": 2.9487396733666302e-06, "loss": 0.785, "step": 12714 }, { "epoch": 0.9185977206639334, "grad_norm": 6.2783582113347105, "learning_rate": 2.9484519185184783e-06, "loss": 0.8063, "step": 12715 }, { "epoch": 0.9186699658641428, "grad_norm": 5.909802856771806, "learning_rate": 2.948164157531368e-06, "loss": 0.783, "step": 12716 }, { "epoch": 0.9187422110643524, "grad_norm": 5.115562868294422, "learning_rate": 2.9478763904092388e-06, "loss": 0.7372, "step": 12717 }, { "epoch": 0.9188144562645619, "grad_norm": 6.090039811027704, "learning_rate": 2.9475886171560286e-06, "loss": 0.8503, "step": 12718 }, { "epoch": 0.9188867014647715, "grad_norm": 7.050173645045416, "learning_rate": 2.947300837775679e-06, "loss": 0.8301, "step": 12719 }, { "epoch": 0.9189589466649809, "grad_norm": 5.127635557260389, "learning_rate": 2.9470130522721274e-06, "loss": 0.7378, "step": 12720 }, { "epoch": 0.9190311918651904, "grad_norm": 6.990453477374407, "learning_rate": 2.946725260649314e-06, "loss": 0.7943, "step": 12721 }, { "epoch": 0.9191034370654, "grad_norm": 4.78086622106606, "learning_rate": 2.9464374629111796e-06, "loss": 0.7019, "step": 12722 }, { "epoch": 0.9191756822656095, "grad_norm": 5.2087857266091815, "learning_rate": 2.946149659061662e-06, "loss": 0.774, "step": 12723 }, { "epoch": 0.919247927465819, "grad_norm": 6.238290422465079, "learning_rate": 2.9458618491047026e-06, "loss": 0.8067, "step": 12724 }, { "epoch": 0.9193201726660285, "grad_norm": 5.086302572762884, "learning_rate": 2.94557403304424e-06, "loss": 0.8073, "step": 12725 }, { "epoch": 0.919392417866238, "grad_norm": 7.768193946389386, "learning_rate": 2.9452862108842157e-06, "loss": 0.8601, "step": 12726 }, { "epoch": 0.9194646630664475, "grad_norm": 7.281060343224721, "learning_rate": 2.9449983826285684e-06, "loss": 0.7961, "step": 12727 }, { "epoch": 0.919536908266657, "grad_norm": 5.95607382162548, "learning_rate": 2.9447105482812387e-06, "loss": 0.764, "step": 12728 }, { "epoch": 0.9196091534668666, "grad_norm": 6.448003403712432, "learning_rate": 2.9444227078461673e-06, "loss": 0.7933, "step": 12729 }, { "epoch": 0.9196813986670761, "grad_norm": 5.6659833084652345, "learning_rate": 2.9441348613272936e-06, "loss": 0.74, "step": 12730 }, { "epoch": 0.9197536438672855, "grad_norm": 7.319054950514605, "learning_rate": 2.9438470087285592e-06, "loss": 0.8964, "step": 12731 }, { "epoch": 0.9198258890674951, "grad_norm": 6.018518163349543, "learning_rate": 2.943559150053903e-06, "loss": 0.7838, "step": 12732 }, { "epoch": 0.9198981342677046, "grad_norm": 5.684361524602637, "learning_rate": 2.9432712853072677e-06, "loss": 0.7626, "step": 12733 }, { "epoch": 0.919970379467914, "grad_norm": 5.0585277656169465, "learning_rate": 2.9429834144925905e-06, "loss": 0.7621, "step": 12734 }, { "epoch": 0.9200426246681236, "grad_norm": 5.887224505998833, "learning_rate": 2.942695537613816e-06, "loss": 0.7829, "step": 12735 }, { "epoch": 0.9201148698683331, "grad_norm": 6.989642245806665, "learning_rate": 2.9424076546748837e-06, "loss": 0.8871, "step": 12736 }, { "epoch": 0.9201871150685427, "grad_norm": 6.058885263578362, "learning_rate": 2.9421197656797327e-06, "loss": 0.8074, "step": 12737 }, { "epoch": 0.9202593602687521, "grad_norm": 7.823369391929422, "learning_rate": 2.9418318706323064e-06, "loss": 0.7978, "step": 12738 }, { "epoch": 0.9203316054689616, "grad_norm": 6.137160393978311, "learning_rate": 2.9415439695365444e-06, "loss": 0.818, "step": 12739 }, { "epoch": 0.9204038506691712, "grad_norm": 6.962778312558427, "learning_rate": 2.9412560623963887e-06, "loss": 0.7176, "step": 12740 }, { "epoch": 0.9204760958693807, "grad_norm": 5.875857026917967, "learning_rate": 2.9409681492157794e-06, "loss": 0.7386, "step": 12741 }, { "epoch": 0.9205483410695902, "grad_norm": 6.343539530452542, "learning_rate": 2.9406802299986597e-06, "loss": 0.7538, "step": 12742 }, { "epoch": 0.9206205862697997, "grad_norm": 6.059761293953522, "learning_rate": 2.9403923047489693e-06, "loss": 0.8086, "step": 12743 }, { "epoch": 0.9206928314700092, "grad_norm": 6.051721803915989, "learning_rate": 2.9401043734706493e-06, "loss": 0.8853, "step": 12744 }, { "epoch": 0.9207650766702187, "grad_norm": 9.072298212064725, "learning_rate": 2.9398164361676428e-06, "loss": 0.8462, "step": 12745 }, { "epoch": 0.9208373218704282, "grad_norm": 5.463215496071755, "learning_rate": 2.939528492843891e-06, "loss": 0.8488, "step": 12746 }, { "epoch": 0.9209095670706378, "grad_norm": 6.053727872541444, "learning_rate": 2.9392405435033357e-06, "loss": 0.7921, "step": 12747 }, { "epoch": 0.9209818122708473, "grad_norm": 6.145261921154366, "learning_rate": 2.938952588149917e-06, "loss": 0.7572, "step": 12748 }, { "epoch": 0.9210540574710567, "grad_norm": 6.51821284508627, "learning_rate": 2.9386646267875794e-06, "loss": 0.7906, "step": 12749 }, { "epoch": 0.9211263026712663, "grad_norm": 5.32705525597354, "learning_rate": 2.9383766594202633e-06, "loss": 0.7565, "step": 12750 }, { "epoch": 0.9211985478714758, "grad_norm": 6.826962299998841, "learning_rate": 2.9380886860519114e-06, "loss": 0.7925, "step": 12751 }, { "epoch": 0.9212707930716852, "grad_norm": 8.748734736924552, "learning_rate": 2.9378007066864654e-06, "loss": 0.809, "step": 12752 }, { "epoch": 0.9213430382718948, "grad_norm": 6.876917623630319, "learning_rate": 2.9375127213278675e-06, "loss": 0.8053, "step": 12753 }, { "epoch": 0.9214152834721043, "grad_norm": 5.3765492535278, "learning_rate": 2.9372247299800606e-06, "loss": 0.7433, "step": 12754 }, { "epoch": 0.9214875286723139, "grad_norm": 5.833448499723451, "learning_rate": 2.9369367326469863e-06, "loss": 0.718, "step": 12755 }, { "epoch": 0.9215597738725233, "grad_norm": 5.716568994753526, "learning_rate": 2.9366487293325875e-06, "loss": 0.8311, "step": 12756 }, { "epoch": 0.9216320190727328, "grad_norm": 5.893325469988472, "learning_rate": 2.936360720040806e-06, "loss": 0.782, "step": 12757 }, { "epoch": 0.9217042642729424, "grad_norm": 6.134567573311377, "learning_rate": 2.9360727047755855e-06, "loss": 0.8102, "step": 12758 }, { "epoch": 0.9217765094731519, "grad_norm": 6.377850063739887, "learning_rate": 2.935784683540868e-06, "loss": 0.8794, "step": 12759 }, { "epoch": 0.9218487546733614, "grad_norm": 6.601326733955645, "learning_rate": 2.9354966563405973e-06, "loss": 0.778, "step": 12760 }, { "epoch": 0.9219209998735709, "grad_norm": 6.598981616895667, "learning_rate": 2.9352086231787154e-06, "loss": 0.7163, "step": 12761 }, { "epoch": 0.9219932450737804, "grad_norm": 6.999756945068811, "learning_rate": 2.934920584059164e-06, "loss": 0.7861, "step": 12762 }, { "epoch": 0.9220654902739899, "grad_norm": 5.83472124573067, "learning_rate": 2.934632538985889e-06, "loss": 0.8171, "step": 12763 }, { "epoch": 0.9221377354741994, "grad_norm": 5.9941874005384115, "learning_rate": 2.9343444879628314e-06, "loss": 0.8267, "step": 12764 }, { "epoch": 0.922209980674409, "grad_norm": 8.440433698602686, "learning_rate": 2.9340564309939357e-06, "loss": 0.8065, "step": 12765 }, { "epoch": 0.9222822258746185, "grad_norm": 6.135499013489551, "learning_rate": 2.9337683680831437e-06, "loss": 0.7642, "step": 12766 }, { "epoch": 0.9223544710748279, "grad_norm": 6.546832464621668, "learning_rate": 2.9334802992343996e-06, "loss": 0.8722, "step": 12767 }, { "epoch": 0.9224267162750375, "grad_norm": 5.926918639815151, "learning_rate": 2.933192224451647e-06, "loss": 0.8155, "step": 12768 }, { "epoch": 0.922498961475247, "grad_norm": 6.231050374497967, "learning_rate": 2.9329041437388294e-06, "loss": 0.8027, "step": 12769 }, { "epoch": 0.9225712066754564, "grad_norm": 9.469498104342412, "learning_rate": 2.93261605709989e-06, "loss": 0.7583, "step": 12770 }, { "epoch": 0.922643451875666, "grad_norm": 6.068195302738512, "learning_rate": 2.9323279645387727e-06, "loss": 0.7729, "step": 12771 }, { "epoch": 0.9227156970758755, "grad_norm": 7.6877811039338235, "learning_rate": 2.932039866059421e-06, "loss": 0.8462, "step": 12772 }, { "epoch": 0.9227879422760851, "grad_norm": 5.747474115619556, "learning_rate": 2.931751761665779e-06, "loss": 0.7653, "step": 12773 }, { "epoch": 0.9228601874762945, "grad_norm": 4.901499787289082, "learning_rate": 2.9314636513617913e-06, "loss": 0.837, "step": 12774 }, { "epoch": 0.922932432676504, "grad_norm": 5.667326645658758, "learning_rate": 2.9311755351514002e-06, "loss": 0.8073, "step": 12775 }, { "epoch": 0.9230046778767136, "grad_norm": 5.71216775608287, "learning_rate": 2.9308874130385516e-06, "loss": 0.7042, "step": 12776 }, { "epoch": 0.9230769230769231, "grad_norm": 7.220564444925363, "learning_rate": 2.9305992850271887e-06, "loss": 0.7665, "step": 12777 }, { "epoch": 0.9231491682771326, "grad_norm": 6.038126607724303, "learning_rate": 2.9303111511212558e-06, "loss": 0.7648, "step": 12778 }, { "epoch": 0.9232214134773421, "grad_norm": 7.152022933223405, "learning_rate": 2.9300230113246974e-06, "loss": 0.8526, "step": 12779 }, { "epoch": 0.9232936586775516, "grad_norm": 8.902362175688479, "learning_rate": 2.9297348656414583e-06, "loss": 0.7806, "step": 12780 }, { "epoch": 0.9233659038777611, "grad_norm": 6.654468280005519, "learning_rate": 2.9294467140754813e-06, "loss": 0.7869, "step": 12781 }, { "epoch": 0.9234381490779706, "grad_norm": 6.745774536002235, "learning_rate": 2.929158556630713e-06, "loss": 0.7507, "step": 12782 }, { "epoch": 0.9235103942781802, "grad_norm": 5.8668702061341875, "learning_rate": 2.928870393311098e-06, "loss": 0.802, "step": 12783 }, { "epoch": 0.9235826394783897, "grad_norm": 5.91749849874641, "learning_rate": 2.928582224120579e-06, "loss": 0.7292, "step": 12784 }, { "epoch": 0.9236548846785991, "grad_norm": 4.850444671066775, "learning_rate": 2.9282940490631034e-06, "loss": 0.7547, "step": 12785 }, { "epoch": 0.9237271298788087, "grad_norm": 5.438561872956092, "learning_rate": 2.928005868142614e-06, "loss": 0.7855, "step": 12786 }, { "epoch": 0.9237993750790182, "grad_norm": 7.765163476995374, "learning_rate": 2.9277176813630566e-06, "loss": 0.7886, "step": 12787 }, { "epoch": 0.9238716202792276, "grad_norm": 6.911881442271813, "learning_rate": 2.9274294887283767e-06, "loss": 0.8623, "step": 12788 }, { "epoch": 0.9239438654794372, "grad_norm": 5.463806359273782, "learning_rate": 2.927141290242518e-06, "loss": 0.7879, "step": 12789 }, { "epoch": 0.9240161106796467, "grad_norm": 6.236602155935235, "learning_rate": 2.9268530859094278e-06, "loss": 0.7914, "step": 12790 }, { "epoch": 0.9240883558798563, "grad_norm": 7.595677009629829, "learning_rate": 2.9265648757330497e-06, "loss": 0.8522, "step": 12791 }, { "epoch": 0.9241606010800657, "grad_norm": 8.256226010977167, "learning_rate": 2.9262766597173297e-06, "loss": 0.8845, "step": 12792 }, { "epoch": 0.9242328462802752, "grad_norm": 6.32903458451387, "learning_rate": 2.9259884378662135e-06, "loss": 0.7705, "step": 12793 }, { "epoch": 0.9243050914804848, "grad_norm": 7.935370933165244, "learning_rate": 2.9257002101836464e-06, "loss": 0.7683, "step": 12794 }, { "epoch": 0.9243773366806943, "grad_norm": 5.671033224057953, "learning_rate": 2.9254119766735735e-06, "loss": 0.7574, "step": 12795 }, { "epoch": 0.9244495818809038, "grad_norm": 6.170548463059093, "learning_rate": 2.925123737339941e-06, "loss": 0.8522, "step": 12796 }, { "epoch": 0.9245218270811133, "grad_norm": 6.112856290141768, "learning_rate": 2.924835492186695e-06, "loss": 0.7851, "step": 12797 }, { "epoch": 0.9245940722813228, "grad_norm": 6.5340952539241055, "learning_rate": 2.9245472412177805e-06, "loss": 0.794, "step": 12798 }, { "epoch": 0.9246663174815323, "grad_norm": 4.804863555908071, "learning_rate": 2.9242589844371448e-06, "loss": 0.7619, "step": 12799 }, { "epoch": 0.9247385626817418, "grad_norm": 6.6403891846916565, "learning_rate": 2.9239707218487324e-06, "loss": 0.7885, "step": 12800 }, { "epoch": 0.9248108078819514, "grad_norm": 6.160305178625393, "learning_rate": 2.9236824534564896e-06, "loss": 0.8476, "step": 12801 }, { "epoch": 0.9248830530821609, "grad_norm": 7.6045738912435334, "learning_rate": 2.923394179264364e-06, "loss": 0.8429, "step": 12802 }, { "epoch": 0.9249552982823703, "grad_norm": 6.663997815300078, "learning_rate": 2.9231058992762995e-06, "loss": 0.7724, "step": 12803 }, { "epoch": 0.9250275434825799, "grad_norm": 6.200118943580904, "learning_rate": 2.922817613496245e-06, "loss": 0.7864, "step": 12804 }, { "epoch": 0.9250997886827894, "grad_norm": 5.894374311922576, "learning_rate": 2.9225293219281447e-06, "loss": 0.7858, "step": 12805 }, { "epoch": 0.9251720338829988, "grad_norm": 6.098915141208164, "learning_rate": 2.922241024575947e-06, "loss": 0.8214, "step": 12806 }, { "epoch": 0.9252442790832084, "grad_norm": 6.838670770530984, "learning_rate": 2.9219527214435967e-06, "loss": 0.8645, "step": 12807 }, { "epoch": 0.9253165242834179, "grad_norm": 6.891676219240123, "learning_rate": 2.921664412535042e-06, "loss": 0.7272, "step": 12808 }, { "epoch": 0.9253887694836275, "grad_norm": 6.6778489108889065, "learning_rate": 2.921376097854228e-06, "loss": 0.824, "step": 12809 }, { "epoch": 0.9254610146838369, "grad_norm": 6.414543031634249, "learning_rate": 2.9210877774051027e-06, "loss": 0.7617, "step": 12810 }, { "epoch": 0.9255332598840464, "grad_norm": 6.349773084535986, "learning_rate": 2.920799451191613e-06, "loss": 0.7721, "step": 12811 }, { "epoch": 0.925605505084256, "grad_norm": 6.273196699145649, "learning_rate": 2.9205111192177053e-06, "loss": 0.7969, "step": 12812 }, { "epoch": 0.9256777502844655, "grad_norm": 5.82138367083098, "learning_rate": 2.9202227814873272e-06, "loss": 0.8454, "step": 12813 }, { "epoch": 0.925749995484675, "grad_norm": 6.530078837691383, "learning_rate": 2.9199344380044253e-06, "loss": 0.8098, "step": 12814 }, { "epoch": 0.9258222406848845, "grad_norm": 5.514752283836074, "learning_rate": 2.919646088772947e-06, "loss": 0.778, "step": 12815 }, { "epoch": 0.925894485885094, "grad_norm": 6.713963990779398, "learning_rate": 2.9193577337968393e-06, "loss": 0.7832, "step": 12816 }, { "epoch": 0.9259667310853035, "grad_norm": 5.904323687604518, "learning_rate": 2.919069373080051e-06, "loss": 0.8045, "step": 12817 }, { "epoch": 0.926038976285513, "grad_norm": 5.854307378885619, "learning_rate": 2.9187810066265277e-06, "loss": 0.8048, "step": 12818 }, { "epoch": 0.9261112214857226, "grad_norm": 8.327490118565418, "learning_rate": 2.9184926344402164e-06, "loss": 0.803, "step": 12819 }, { "epoch": 0.9261834666859321, "grad_norm": 6.627715220142356, "learning_rate": 2.9182042565250673e-06, "loss": 0.7559, "step": 12820 }, { "epoch": 0.9262557118861415, "grad_norm": 5.568405274275929, "learning_rate": 2.917915872885026e-06, "loss": 0.8096, "step": 12821 }, { "epoch": 0.9263279570863511, "grad_norm": 5.825486964225508, "learning_rate": 2.9176274835240416e-06, "loss": 0.7297, "step": 12822 }, { "epoch": 0.9264002022865606, "grad_norm": 8.380886002273595, "learning_rate": 2.91733908844606e-06, "loss": 0.8694, "step": 12823 }, { "epoch": 0.92647244748677, "grad_norm": 6.896783338801016, "learning_rate": 2.917050687655031e-06, "loss": 0.8161, "step": 12824 }, { "epoch": 0.9265446926869796, "grad_norm": 5.95526260885937, "learning_rate": 2.9167622811549016e-06, "loss": 0.8086, "step": 12825 }, { "epoch": 0.9266169378871891, "grad_norm": 5.235539457695447, "learning_rate": 2.916473868949621e-06, "loss": 0.881, "step": 12826 }, { "epoch": 0.9266891830873987, "grad_norm": 5.783413538894243, "learning_rate": 2.9161854510431364e-06, "loss": 0.7755, "step": 12827 }, { "epoch": 0.9267614282876081, "grad_norm": 6.556915231998897, "learning_rate": 2.9158970274393956e-06, "loss": 0.79, "step": 12828 }, { "epoch": 0.9268336734878176, "grad_norm": 5.899799356443324, "learning_rate": 2.915608598142347e-06, "loss": 0.744, "step": 12829 }, { "epoch": 0.9269059186880272, "grad_norm": 5.672208918050337, "learning_rate": 2.91532016315594e-06, "loss": 0.7522, "step": 12830 }, { "epoch": 0.9269781638882367, "grad_norm": 5.689251640463412, "learning_rate": 2.9150317224841222e-06, "loss": 0.7986, "step": 12831 }, { "epoch": 0.9270504090884462, "grad_norm": 5.717346686278829, "learning_rate": 2.9147432761308425e-06, "loss": 0.8424, "step": 12832 }, { "epoch": 0.9271226542886557, "grad_norm": 6.005711380290399, "learning_rate": 2.9144548241000492e-06, "loss": 0.7457, "step": 12833 }, { "epoch": 0.9271948994888652, "grad_norm": 5.606472103690777, "learning_rate": 2.9141663663956916e-06, "loss": 0.7727, "step": 12834 }, { "epoch": 0.9272671446890747, "grad_norm": 5.206590203095376, "learning_rate": 2.913877903021718e-06, "loss": 0.7239, "step": 12835 }, { "epoch": 0.9273393898892842, "grad_norm": 5.982341530435017, "learning_rate": 2.9135894339820776e-06, "loss": 0.8151, "step": 12836 }, { "epoch": 0.9274116350894938, "grad_norm": 5.9467139779049205, "learning_rate": 2.913300959280718e-06, "loss": 0.8422, "step": 12837 }, { "epoch": 0.9274838802897033, "grad_norm": 8.057775724962136, "learning_rate": 2.9130124789215897e-06, "loss": 0.9009, "step": 12838 }, { "epoch": 0.9275561254899127, "grad_norm": 6.446564154035066, "learning_rate": 2.9127239929086405e-06, "loss": 0.8287, "step": 12839 }, { "epoch": 0.9276283706901223, "grad_norm": 6.10648052986906, "learning_rate": 2.9124355012458214e-06, "loss": 0.7207, "step": 12840 }, { "epoch": 0.9277006158903318, "grad_norm": 5.166318553559254, "learning_rate": 2.91214700393708e-06, "loss": 0.791, "step": 12841 }, { "epoch": 0.9277728610905412, "grad_norm": 6.046254207474761, "learning_rate": 2.9118585009863663e-06, "loss": 0.7974, "step": 12842 }, { "epoch": 0.9278451062907508, "grad_norm": 6.529684508853108, "learning_rate": 2.9115699923976292e-06, "loss": 0.7568, "step": 12843 }, { "epoch": 0.9279173514909603, "grad_norm": 5.543859559431251, "learning_rate": 2.911281478174819e-06, "loss": 0.8268, "step": 12844 }, { "epoch": 0.9279895966911699, "grad_norm": 6.266302590582287, "learning_rate": 2.910992958321884e-06, "loss": 0.8338, "step": 12845 }, { "epoch": 0.9280618418913793, "grad_norm": 5.971920110271637, "learning_rate": 2.9107044328427753e-06, "loss": 0.8151, "step": 12846 }, { "epoch": 0.9281340870915888, "grad_norm": 5.661452456286719, "learning_rate": 2.9104159017414415e-06, "loss": 0.8413, "step": 12847 }, { "epoch": 0.9282063322917984, "grad_norm": 6.05893437247677, "learning_rate": 2.9101273650218322e-06, "loss": 0.7215, "step": 12848 }, { "epoch": 0.9282785774920079, "grad_norm": 7.397267233953197, "learning_rate": 2.909838822687898e-06, "loss": 0.8247, "step": 12849 }, { "epoch": 0.9283508226922174, "grad_norm": 5.634713496292667, "learning_rate": 2.909550274743589e-06, "loss": 0.8002, "step": 12850 }, { "epoch": 0.9284230678924269, "grad_norm": 6.584843554886781, "learning_rate": 2.9092617211928544e-06, "loss": 0.8127, "step": 12851 }, { "epoch": 0.9284953130926364, "grad_norm": 6.049705926983436, "learning_rate": 2.9089731620396444e-06, "loss": 0.7129, "step": 12852 }, { "epoch": 0.9285675582928459, "grad_norm": 6.862783779815309, "learning_rate": 2.9086845972879096e-06, "loss": 0.7635, "step": 12853 }, { "epoch": 0.9286398034930554, "grad_norm": 6.290448641401297, "learning_rate": 2.9083960269416e-06, "loss": 0.8223, "step": 12854 }, { "epoch": 0.928712048693265, "grad_norm": 6.410483547783759, "learning_rate": 2.9081074510046656e-06, "loss": 0.8208, "step": 12855 }, { "epoch": 0.9287842938934745, "grad_norm": 5.751142637083528, "learning_rate": 2.9078188694810576e-06, "loss": 0.8102, "step": 12856 }, { "epoch": 0.9288565390936839, "grad_norm": 6.03877508311036, "learning_rate": 2.9075302823747253e-06, "loss": 0.7265, "step": 12857 }, { "epoch": 0.9289287842938935, "grad_norm": 5.8564947347891705, "learning_rate": 2.90724168968962e-06, "loss": 0.8189, "step": 12858 }, { "epoch": 0.929001029494103, "grad_norm": 6.123197640687733, "learning_rate": 2.9069530914296923e-06, "loss": 0.8613, "step": 12859 }, { "epoch": 0.9290732746943124, "grad_norm": 5.974600436206128, "learning_rate": 2.906664487598893e-06, "loss": 0.7896, "step": 12860 }, { "epoch": 0.929145519894522, "grad_norm": 6.810705386024608, "learning_rate": 2.9063758782011726e-06, "loss": 0.9054, "step": 12861 }, { "epoch": 0.9292177650947315, "grad_norm": 5.776542767962309, "learning_rate": 2.906087263240481e-06, "loss": 0.789, "step": 12862 }, { "epoch": 0.9292900102949411, "grad_norm": 6.137711705054681, "learning_rate": 2.905798642720772e-06, "loss": 0.7597, "step": 12863 }, { "epoch": 0.9293622554951505, "grad_norm": 6.418462064500927, "learning_rate": 2.9055100166459925e-06, "loss": 0.7337, "step": 12864 }, { "epoch": 0.92943450069536, "grad_norm": 5.43626407301312, "learning_rate": 2.9052213850200972e-06, "loss": 0.7798, "step": 12865 }, { "epoch": 0.9295067458955696, "grad_norm": 6.308720429745906, "learning_rate": 2.9049327478470357e-06, "loss": 0.8246, "step": 12866 }, { "epoch": 0.9295789910957791, "grad_norm": 8.570665016678985, "learning_rate": 2.9046441051307585e-06, "loss": 0.8935, "step": 12867 }, { "epoch": 0.9296512362959886, "grad_norm": 6.382579112834544, "learning_rate": 2.9043554568752187e-06, "loss": 0.7924, "step": 12868 }, { "epoch": 0.9297234814961981, "grad_norm": 6.239420366998306, "learning_rate": 2.9040668030843656e-06, "loss": 0.7498, "step": 12869 }, { "epoch": 0.9297957266964076, "grad_norm": 5.2998871917235295, "learning_rate": 2.9037781437621527e-06, "loss": 0.7605, "step": 12870 }, { "epoch": 0.9298679718966171, "grad_norm": 5.507953614936494, "learning_rate": 2.9034894789125293e-06, "loss": 0.9292, "step": 12871 }, { "epoch": 0.9299402170968266, "grad_norm": 5.318910568090768, "learning_rate": 2.903200808539449e-06, "loss": 0.7705, "step": 12872 }, { "epoch": 0.9300124622970362, "grad_norm": 8.21280121272673, "learning_rate": 2.9029121326468623e-06, "loss": 0.8258, "step": 12873 }, { "epoch": 0.9300847074972457, "grad_norm": 6.496910241175013, "learning_rate": 2.902623451238722e-06, "loss": 0.7616, "step": 12874 }, { "epoch": 0.9301569526974551, "grad_norm": 6.742788913464737, "learning_rate": 2.9023347643189787e-06, "loss": 0.7614, "step": 12875 }, { "epoch": 0.9302291978976647, "grad_norm": 6.661074899987403, "learning_rate": 2.902046071891585e-06, "loss": 0.8119, "step": 12876 }, { "epoch": 0.9303014430978742, "grad_norm": 5.887380986721728, "learning_rate": 2.901757373960493e-06, "loss": 0.8143, "step": 12877 }, { "epoch": 0.9303736882980836, "grad_norm": 6.5085475181409285, "learning_rate": 2.901468670529654e-06, "loss": 0.8334, "step": 12878 }, { "epoch": 0.9304459334982932, "grad_norm": 7.138759511260982, "learning_rate": 2.9011799616030217e-06, "loss": 0.8445, "step": 12879 }, { "epoch": 0.9305181786985027, "grad_norm": 6.416833685480647, "learning_rate": 2.900891247184546e-06, "loss": 0.8232, "step": 12880 }, { "epoch": 0.9305904238987123, "grad_norm": 5.558284748252051, "learning_rate": 2.9006025272781806e-06, "loss": 0.7876, "step": 12881 }, { "epoch": 0.9306626690989217, "grad_norm": 5.72271324363684, "learning_rate": 2.900313801887878e-06, "loss": 0.7956, "step": 12882 }, { "epoch": 0.9307349142991312, "grad_norm": 7.180758557216031, "learning_rate": 2.9000250710175905e-06, "loss": 0.7781, "step": 12883 }, { "epoch": 0.9308071594993408, "grad_norm": 5.73083253627585, "learning_rate": 2.8997363346712705e-06, "loss": 0.8189, "step": 12884 }, { "epoch": 0.9308794046995503, "grad_norm": 5.7481271762144885, "learning_rate": 2.89944759285287e-06, "loss": 0.7804, "step": 12885 }, { "epoch": 0.9309516498997598, "grad_norm": 6.16613317020522, "learning_rate": 2.8991588455663417e-06, "loss": 0.716, "step": 12886 }, { "epoch": 0.9310238950999693, "grad_norm": 6.852238933791036, "learning_rate": 2.8988700928156388e-06, "loss": 0.8556, "step": 12887 }, { "epoch": 0.9310961403001788, "grad_norm": 6.303152585288392, "learning_rate": 2.898581334604715e-06, "loss": 0.8416, "step": 12888 }, { "epoch": 0.9311683855003883, "grad_norm": 5.5275982370191725, "learning_rate": 2.8982925709375217e-06, "loss": 0.8686, "step": 12889 }, { "epoch": 0.9312406307005978, "grad_norm": 6.59390880520036, "learning_rate": 2.8980038018180116e-06, "loss": 0.7938, "step": 12890 }, { "epoch": 0.9313128759008074, "grad_norm": 5.671561576653597, "learning_rate": 2.897715027250139e-06, "loss": 0.8667, "step": 12891 }, { "epoch": 0.9313851211010169, "grad_norm": 6.405642596967, "learning_rate": 2.8974262472378566e-06, "loss": 0.7699, "step": 12892 }, { "epoch": 0.9314573663012263, "grad_norm": 6.9108918069686895, "learning_rate": 2.8971374617851182e-06, "loss": 0.8183, "step": 12893 }, { "epoch": 0.9315296115014359, "grad_norm": 8.565803725818045, "learning_rate": 2.8968486708958755e-06, "loss": 0.8084, "step": 12894 }, { "epoch": 0.9316018567016454, "grad_norm": 7.56129862172655, "learning_rate": 2.8965598745740824e-06, "loss": 0.8195, "step": 12895 }, { "epoch": 0.9316741019018548, "grad_norm": 6.298219750174201, "learning_rate": 2.8962710728236927e-06, "loss": 0.8088, "step": 12896 }, { "epoch": 0.9317463471020644, "grad_norm": 6.152615011381053, "learning_rate": 2.89598226564866e-06, "loss": 0.8217, "step": 12897 }, { "epoch": 0.9318185923022739, "grad_norm": 6.722656817438965, "learning_rate": 2.8956934530529374e-06, "loss": 0.8186, "step": 12898 }, { "epoch": 0.9318908375024835, "grad_norm": 7.257954673072638, "learning_rate": 2.895404635040479e-06, "loss": 0.787, "step": 12899 }, { "epoch": 0.9319630827026929, "grad_norm": 7.00683750272195, "learning_rate": 2.8951158116152368e-06, "loss": 0.8539, "step": 12900 }, { "epoch": 0.9320353279029024, "grad_norm": 5.488757520760423, "learning_rate": 2.8948269827811677e-06, "loss": 0.7864, "step": 12901 }, { "epoch": 0.932107573103112, "grad_norm": 6.083922301047701, "learning_rate": 2.894538148542223e-06, "loss": 0.7629, "step": 12902 }, { "epoch": 0.9321798183033215, "grad_norm": 6.392631234502878, "learning_rate": 2.8942493089023572e-06, "loss": 0.8265, "step": 12903 }, { "epoch": 0.932252063503531, "grad_norm": 7.751002339179294, "learning_rate": 2.8939604638655245e-06, "loss": 0.8079, "step": 12904 }, { "epoch": 0.9323243087037405, "grad_norm": 6.027029191200423, "learning_rate": 2.893671613435679e-06, "loss": 0.7937, "step": 12905 }, { "epoch": 0.93239655390395, "grad_norm": 8.16590654637445, "learning_rate": 2.893382757616775e-06, "loss": 0.7981, "step": 12906 }, { "epoch": 0.9324687991041595, "grad_norm": 8.003749922216636, "learning_rate": 2.8930938964127665e-06, "loss": 0.8135, "step": 12907 }, { "epoch": 0.932541044304369, "grad_norm": 6.516542072844938, "learning_rate": 2.892805029827608e-06, "loss": 0.7944, "step": 12908 }, { "epoch": 0.9326132895045786, "grad_norm": 6.926357910108731, "learning_rate": 2.892516157865253e-06, "loss": 0.8406, "step": 12909 }, { "epoch": 0.9326855347047881, "grad_norm": 6.35966472293812, "learning_rate": 2.8922272805296564e-06, "loss": 0.8919, "step": 12910 }, { "epoch": 0.9327577799049975, "grad_norm": 6.044571312027959, "learning_rate": 2.891938397824774e-06, "loss": 0.7468, "step": 12911 }, { "epoch": 0.9328300251052071, "grad_norm": 5.5094931777508895, "learning_rate": 2.8916495097545584e-06, "loss": 0.7857, "step": 12912 }, { "epoch": 0.9329022703054166, "grad_norm": 5.8237360160022735, "learning_rate": 2.891360616322966e-06, "loss": 0.7533, "step": 12913 }, { "epoch": 0.932974515505626, "grad_norm": 5.005476432021575, "learning_rate": 2.8910717175339488e-06, "loss": 0.6873, "step": 12914 }, { "epoch": 0.9330467607058356, "grad_norm": 5.7911384762475, "learning_rate": 2.8907828133914645e-06, "loss": 0.8396, "step": 12915 }, { "epoch": 0.9331190059060451, "grad_norm": 7.452900561266268, "learning_rate": 2.8904939038994674e-06, "loss": 0.8193, "step": 12916 }, { "epoch": 0.9331912511062547, "grad_norm": 5.915416567556727, "learning_rate": 2.8902049890619114e-06, "loss": 0.739, "step": 12917 }, { "epoch": 0.9332634963064641, "grad_norm": 6.3362770601057115, "learning_rate": 2.8899160688827522e-06, "loss": 0.8778, "step": 12918 }, { "epoch": 0.9333357415066736, "grad_norm": 6.336297529459609, "learning_rate": 2.8896271433659446e-06, "loss": 0.8251, "step": 12919 }, { "epoch": 0.9334079867068832, "grad_norm": 6.055424287327895, "learning_rate": 2.889338212515444e-06, "loss": 0.847, "step": 12920 }, { "epoch": 0.9334802319070927, "grad_norm": 5.304120265201358, "learning_rate": 2.889049276335206e-06, "loss": 0.7744, "step": 12921 }, { "epoch": 0.9335524771073022, "grad_norm": 6.161066175563913, "learning_rate": 2.888760334829185e-06, "loss": 0.7789, "step": 12922 }, { "epoch": 0.9336247223075117, "grad_norm": 5.495275896138666, "learning_rate": 2.888471388001337e-06, "loss": 0.8368, "step": 12923 }, { "epoch": 0.9336969675077212, "grad_norm": 5.62118265109511, "learning_rate": 2.8881824358556173e-06, "loss": 0.8262, "step": 12924 }, { "epoch": 0.9337692127079307, "grad_norm": 6.969353482925852, "learning_rate": 2.8878934783959813e-06, "loss": 0.8231, "step": 12925 }, { "epoch": 0.9338414579081402, "grad_norm": 5.244152991139982, "learning_rate": 2.8876045156263856e-06, "loss": 0.7874, "step": 12926 }, { "epoch": 0.9339137031083498, "grad_norm": 7.045732968106393, "learning_rate": 2.887315547550784e-06, "loss": 0.8521, "step": 12927 }, { "epoch": 0.9339859483085593, "grad_norm": 7.5754543357038315, "learning_rate": 2.8870265741731336e-06, "loss": 0.7505, "step": 12928 }, { "epoch": 0.9340581935087687, "grad_norm": 6.701103563663769, "learning_rate": 2.8867375954973895e-06, "loss": 0.883, "step": 12929 }, { "epoch": 0.9341304387089783, "grad_norm": 7.865852735190825, "learning_rate": 2.8864486115275087e-06, "loss": 0.8281, "step": 12930 }, { "epoch": 0.9342026839091878, "grad_norm": 7.616823909591619, "learning_rate": 2.8861596222674467e-06, "loss": 0.8371, "step": 12931 }, { "epoch": 0.9342749291093972, "grad_norm": 6.116962063314861, "learning_rate": 2.8858706277211584e-06, "loss": 0.8505, "step": 12932 }, { "epoch": 0.9343471743096068, "grad_norm": 7.582136653617558, "learning_rate": 2.8855816278926015e-06, "loss": 0.7766, "step": 12933 }, { "epoch": 0.9344194195098163, "grad_norm": 6.108975995002725, "learning_rate": 2.8852926227857313e-06, "loss": 0.7637, "step": 12934 }, { "epoch": 0.9344916647100259, "grad_norm": 7.650172970879482, "learning_rate": 2.8850036124045043e-06, "loss": 0.7745, "step": 12935 }, { "epoch": 0.9345639099102353, "grad_norm": 8.025588121581222, "learning_rate": 2.884714596752877e-06, "loss": 0.798, "step": 12936 }, { "epoch": 0.9346361551104448, "grad_norm": 6.460070403102207, "learning_rate": 2.884425575834805e-06, "loss": 0.8786, "step": 12937 }, { "epoch": 0.9347084003106544, "grad_norm": 7.811178599187093, "learning_rate": 2.8841365496542457e-06, "loss": 0.7454, "step": 12938 }, { "epoch": 0.9347806455108638, "grad_norm": 7.137392744070167, "learning_rate": 2.883847518215155e-06, "loss": 0.7611, "step": 12939 }, { "epoch": 0.9348528907110734, "grad_norm": 6.059551347799695, "learning_rate": 2.88355848152149e-06, "loss": 0.8206, "step": 12940 }, { "epoch": 0.9349251359112829, "grad_norm": 7.178672610873236, "learning_rate": 2.883269439577207e-06, "loss": 0.7939, "step": 12941 }, { "epoch": 0.9349973811114924, "grad_norm": 6.5907490486636595, "learning_rate": 2.8829803923862633e-06, "loss": 0.8394, "step": 12942 }, { "epoch": 0.9350696263117019, "grad_norm": 6.327544461218064, "learning_rate": 2.8826913399526156e-06, "loss": 0.7796, "step": 12943 }, { "epoch": 0.9351418715119114, "grad_norm": 5.0274437195649995, "learning_rate": 2.88240228228022e-06, "loss": 0.7681, "step": 12944 }, { "epoch": 0.935214116712121, "grad_norm": 6.391084314082923, "learning_rate": 2.882113219373035e-06, "loss": 0.8211, "step": 12945 }, { "epoch": 0.9352863619123305, "grad_norm": 8.34275371875596, "learning_rate": 2.8818241512350158e-06, "loss": 0.8107, "step": 12946 }, { "epoch": 0.9353586071125399, "grad_norm": 10.389220270412427, "learning_rate": 2.8815350778701205e-06, "loss": 0.8015, "step": 12947 }, { "epoch": 0.9354308523127495, "grad_norm": 6.602888238275596, "learning_rate": 2.881245999282306e-06, "loss": 0.7827, "step": 12948 }, { "epoch": 0.935503097512959, "grad_norm": 6.400036287204867, "learning_rate": 2.880956915475531e-06, "loss": 0.8963, "step": 12949 }, { "epoch": 0.9355753427131684, "grad_norm": 6.504577492107392, "learning_rate": 2.8806678264537515e-06, "loss": 0.7873, "step": 12950 }, { "epoch": 0.935647587913378, "grad_norm": 6.923688181766281, "learning_rate": 2.8803787322209235e-06, "loss": 0.716, "step": 12951 }, { "epoch": 0.9357198331135875, "grad_norm": 7.3122975900138165, "learning_rate": 2.880089632781007e-06, "loss": 0.746, "step": 12952 }, { "epoch": 0.9357920783137971, "grad_norm": 5.95276582690687, "learning_rate": 2.879800528137958e-06, "loss": 0.7818, "step": 12953 }, { "epoch": 0.9358643235140065, "grad_norm": 7.29830755184638, "learning_rate": 2.8795114182957355e-06, "loss": 0.9018, "step": 12954 }, { "epoch": 0.935936568714216, "grad_norm": 6.461174551179572, "learning_rate": 2.879222303258296e-06, "loss": 0.8308, "step": 12955 }, { "epoch": 0.9360088139144256, "grad_norm": 6.187886235676431, "learning_rate": 2.8789331830295973e-06, "loss": 0.8807, "step": 12956 }, { "epoch": 0.936081059114635, "grad_norm": 5.465828076838107, "learning_rate": 2.8786440576135978e-06, "loss": 0.7712, "step": 12957 }, { "epoch": 0.9361533043148446, "grad_norm": 5.143513618683347, "learning_rate": 2.878354927014256e-06, "loss": 0.7513, "step": 12958 }, { "epoch": 0.9362255495150541, "grad_norm": 7.342307870368911, "learning_rate": 2.8780657912355282e-06, "loss": 0.7376, "step": 12959 }, { "epoch": 0.9362977947152636, "grad_norm": 5.822963689387878, "learning_rate": 2.877776650281373e-06, "loss": 0.7864, "step": 12960 }, { "epoch": 0.9363700399154731, "grad_norm": 5.338498038757199, "learning_rate": 2.8774875041557486e-06, "loss": 0.7687, "step": 12961 }, { "epoch": 0.9364422851156826, "grad_norm": 4.917113310790918, "learning_rate": 2.8771983528626142e-06, "loss": 0.7622, "step": 12962 }, { "epoch": 0.9365145303158922, "grad_norm": 7.973401196976684, "learning_rate": 2.876909196405927e-06, "loss": 0.8654, "step": 12963 }, { "epoch": 0.9365867755161017, "grad_norm": 5.1106980037058545, "learning_rate": 2.876620034789645e-06, "loss": 0.7471, "step": 12964 }, { "epoch": 0.9366590207163111, "grad_norm": 6.381975434230003, "learning_rate": 2.876330868017728e-06, "loss": 0.8797, "step": 12965 }, { "epoch": 0.9367312659165207, "grad_norm": 6.643209515438725, "learning_rate": 2.876041696094133e-06, "loss": 0.8048, "step": 12966 }, { "epoch": 0.9368035111167302, "grad_norm": 8.351790879127176, "learning_rate": 2.875752519022819e-06, "loss": 0.7794, "step": 12967 }, { "epoch": 0.9368757563169396, "grad_norm": 7.391202295189072, "learning_rate": 2.8754633368077457e-06, "loss": 0.75, "step": 12968 }, { "epoch": 0.9369480015171492, "grad_norm": 5.972006982675342, "learning_rate": 2.8751741494528697e-06, "loss": 0.7677, "step": 12969 }, { "epoch": 0.9370202467173587, "grad_norm": 6.846839799533688, "learning_rate": 2.8748849569621518e-06, "loss": 0.7873, "step": 12970 }, { "epoch": 0.9370924919175683, "grad_norm": 7.047620374416781, "learning_rate": 2.874595759339549e-06, "loss": 0.7004, "step": 12971 }, { "epoch": 0.9371647371177777, "grad_norm": 8.275536981314358, "learning_rate": 2.8743065565890223e-06, "loss": 0.8062, "step": 12972 }, { "epoch": 0.9372369823179872, "grad_norm": 7.514665633610117, "learning_rate": 2.8740173487145284e-06, "loss": 0.8384, "step": 12973 }, { "epoch": 0.9373092275181968, "grad_norm": 6.080302105977233, "learning_rate": 2.873728135720028e-06, "loss": 0.8316, "step": 12974 }, { "epoch": 0.9373814727184062, "grad_norm": 5.18656526899554, "learning_rate": 2.8734389176094795e-06, "loss": 0.7944, "step": 12975 }, { "epoch": 0.9374537179186158, "grad_norm": 6.446551431587855, "learning_rate": 2.873149694386842e-06, "loss": 0.7712, "step": 12976 }, { "epoch": 0.9375259631188253, "grad_norm": 5.91778858275666, "learning_rate": 2.872860466056075e-06, "loss": 0.8133, "step": 12977 }, { "epoch": 0.9375982083190348, "grad_norm": 6.492410777598574, "learning_rate": 2.8725712326211374e-06, "loss": 0.773, "step": 12978 }, { "epoch": 0.9376704535192443, "grad_norm": 8.441826509378947, "learning_rate": 2.8722819940859896e-06, "loss": 0.8764, "step": 12979 }, { "epoch": 0.9377426987194538, "grad_norm": 6.437488444790145, "learning_rate": 2.8719927504545902e-06, "loss": 0.7832, "step": 12980 }, { "epoch": 0.9378149439196634, "grad_norm": 6.1565452543609345, "learning_rate": 2.8717035017308987e-06, "loss": 0.8055, "step": 12981 }, { "epoch": 0.9378871891198729, "grad_norm": 6.843421126872623, "learning_rate": 2.8714142479188743e-06, "loss": 0.841, "step": 12982 }, { "epoch": 0.9379594343200823, "grad_norm": 5.353605406717055, "learning_rate": 2.8711249890224784e-06, "loss": 0.7505, "step": 12983 }, { "epoch": 0.9380316795202919, "grad_norm": 5.93948459585546, "learning_rate": 2.870835725045669e-06, "loss": 0.8551, "step": 12984 }, { "epoch": 0.9381039247205014, "grad_norm": 6.385458352314314, "learning_rate": 2.8705464559924058e-06, "loss": 0.9152, "step": 12985 }, { "epoch": 0.9381761699207108, "grad_norm": 6.995548467722728, "learning_rate": 2.87025718186665e-06, "loss": 0.8142, "step": 12986 }, { "epoch": 0.9382484151209204, "grad_norm": 5.553798770763378, "learning_rate": 2.869967902672361e-06, "loss": 0.8431, "step": 12987 }, { "epoch": 0.9383206603211299, "grad_norm": 6.710017148946801, "learning_rate": 2.869678618413499e-06, "loss": 0.8692, "step": 12988 }, { "epoch": 0.9383929055213395, "grad_norm": 7.056624137210879, "learning_rate": 2.869389329094023e-06, "loss": 0.8331, "step": 12989 }, { "epoch": 0.9384651507215489, "grad_norm": 6.2951618448287645, "learning_rate": 2.869100034717894e-06, "loss": 0.8231, "step": 12990 }, { "epoch": 0.9385373959217584, "grad_norm": 8.162485362765638, "learning_rate": 2.868810735289072e-06, "loss": 0.8058, "step": 12991 }, { "epoch": 0.938609641121968, "grad_norm": 5.442027169515521, "learning_rate": 2.868521430811518e-06, "loss": 0.8497, "step": 12992 }, { "epoch": 0.9386818863221774, "grad_norm": 6.555828663512496, "learning_rate": 2.8682321212891917e-06, "loss": 0.7501, "step": 12993 }, { "epoch": 0.938754131522387, "grad_norm": 7.975368249696168, "learning_rate": 2.8679428067260527e-06, "loss": 0.8482, "step": 12994 }, { "epoch": 0.9388263767225965, "grad_norm": 7.829835746669544, "learning_rate": 2.867653487126063e-06, "loss": 0.8604, "step": 12995 }, { "epoch": 0.938898621922806, "grad_norm": 8.913448810766146, "learning_rate": 2.8673641624931825e-06, "loss": 0.7822, "step": 12996 }, { "epoch": 0.9389708671230155, "grad_norm": 8.552167686823891, "learning_rate": 2.867074832831372e-06, "loss": 0.7987, "step": 12997 }, { "epoch": 0.939043112323225, "grad_norm": 5.61227843393133, "learning_rate": 2.866785498144592e-06, "loss": 0.7882, "step": 12998 }, { "epoch": 0.9391153575234346, "grad_norm": 7.169357090514365, "learning_rate": 2.866496158436803e-06, "loss": 0.8271, "step": 12999 }, { "epoch": 0.9391876027236441, "grad_norm": 6.943648903949447, "learning_rate": 2.8662068137119666e-06, "loss": 0.8356, "step": 13000 }, { "epoch": 0.9392598479238535, "grad_norm": 6.317632420506496, "learning_rate": 2.865917463974043e-06, "loss": 0.8414, "step": 13001 }, { "epoch": 0.9393320931240631, "grad_norm": 12.100723345865076, "learning_rate": 2.8656281092269933e-06, "loss": 0.8078, "step": 13002 }, { "epoch": 0.9394043383242726, "grad_norm": 9.240083895215724, "learning_rate": 2.8653387494747787e-06, "loss": 0.8306, "step": 13003 }, { "epoch": 0.939476583524482, "grad_norm": 6.477513076727803, "learning_rate": 2.86504938472136e-06, "loss": 0.8243, "step": 13004 }, { "epoch": 0.9395488287246916, "grad_norm": 6.607405709594473, "learning_rate": 2.8647600149706994e-06, "loss": 0.7617, "step": 13005 }, { "epoch": 0.9396210739249011, "grad_norm": 5.633772557909392, "learning_rate": 2.8644706402267576e-06, "loss": 0.8693, "step": 13006 }, { "epoch": 0.9396933191251107, "grad_norm": 5.764949438530107, "learning_rate": 2.8641812604934948e-06, "loss": 0.7904, "step": 13007 }, { "epoch": 0.9397655643253201, "grad_norm": 6.334224972785493, "learning_rate": 2.863891875774874e-06, "loss": 0.7848, "step": 13008 }, { "epoch": 0.9398378095255296, "grad_norm": 9.553619849129422, "learning_rate": 2.8636024860748556e-06, "loss": 0.8392, "step": 13009 }, { "epoch": 0.9399100547257392, "grad_norm": 6.576947684505828, "learning_rate": 2.863313091397401e-06, "loss": 0.784, "step": 13010 }, { "epoch": 0.9399822999259486, "grad_norm": 7.384797393560998, "learning_rate": 2.8630236917464736e-06, "loss": 0.8439, "step": 13011 }, { "epoch": 0.9400545451261582, "grad_norm": 5.6649645698698645, "learning_rate": 2.8627342871260327e-06, "loss": 0.8228, "step": 13012 }, { "epoch": 0.9401267903263677, "grad_norm": 5.96925104245185, "learning_rate": 2.862444877540041e-06, "loss": 0.8209, "step": 13013 }, { "epoch": 0.9401990355265772, "grad_norm": 6.899582349880549, "learning_rate": 2.8621554629924608e-06, "loss": 0.8583, "step": 13014 }, { "epoch": 0.9402712807267867, "grad_norm": 6.105193519368206, "learning_rate": 2.8618660434872534e-06, "loss": 0.7455, "step": 13015 }, { "epoch": 0.9403435259269962, "grad_norm": 5.986753145597232, "learning_rate": 2.861576619028381e-06, "loss": 0.7351, "step": 13016 }, { "epoch": 0.9404157711272058, "grad_norm": 6.738060175268332, "learning_rate": 2.8612871896198054e-06, "loss": 0.8136, "step": 13017 }, { "epoch": 0.9404880163274153, "grad_norm": 5.564806856297925, "learning_rate": 2.8609977552654883e-06, "loss": 0.7072, "step": 13018 }, { "epoch": 0.9405602615276247, "grad_norm": 7.32740321397065, "learning_rate": 2.8607083159693925e-06, "loss": 0.864, "step": 13019 }, { "epoch": 0.9406325067278343, "grad_norm": 7.528340771006869, "learning_rate": 2.86041887173548e-06, "loss": 0.773, "step": 13020 }, { "epoch": 0.9407047519280438, "grad_norm": 7.806263624168706, "learning_rate": 2.860129422567713e-06, "loss": 0.8584, "step": 13021 }, { "epoch": 0.9407769971282532, "grad_norm": 7.484527395206049, "learning_rate": 2.859839968470054e-06, "loss": 0.7732, "step": 13022 }, { "epoch": 0.9408492423284628, "grad_norm": 6.428590405148547, "learning_rate": 2.8595505094464643e-06, "loss": 0.7402, "step": 13023 }, { "epoch": 0.9409214875286723, "grad_norm": 6.226921808269221, "learning_rate": 2.8592610455009085e-06, "loss": 0.8031, "step": 13024 }, { "epoch": 0.9409937327288819, "grad_norm": 5.686950321673439, "learning_rate": 2.8589715766373473e-06, "loss": 0.8091, "step": 13025 }, { "epoch": 0.9410659779290913, "grad_norm": 7.948922656494135, "learning_rate": 2.858682102859744e-06, "loss": 0.9392, "step": 13026 }, { "epoch": 0.9411382231293008, "grad_norm": 5.486716267920466, "learning_rate": 2.858392624172061e-06, "loss": 0.8047, "step": 13027 }, { "epoch": 0.9412104683295104, "grad_norm": 6.655569982044263, "learning_rate": 2.858103140578261e-06, "loss": 0.7916, "step": 13028 }, { "epoch": 0.9412827135297198, "grad_norm": 7.121812341606084, "learning_rate": 2.857813652082308e-06, "loss": 0.8135, "step": 13029 }, { "epoch": 0.9413549587299294, "grad_norm": 7.407555831199248, "learning_rate": 2.8575241586881634e-06, "loss": 0.7019, "step": 13030 }, { "epoch": 0.9414272039301389, "grad_norm": 5.262563658848008, "learning_rate": 2.8572346603997914e-06, "loss": 0.8039, "step": 13031 }, { "epoch": 0.9414994491303484, "grad_norm": 5.9572912340808255, "learning_rate": 2.856945157221153e-06, "loss": 0.7979, "step": 13032 }, { "epoch": 0.9415716943305579, "grad_norm": 8.093403576834627, "learning_rate": 2.8566556491562133e-06, "loss": 0.8269, "step": 13033 }, { "epoch": 0.9416439395307674, "grad_norm": 5.48744189811656, "learning_rate": 2.8563661362089345e-06, "loss": 0.7581, "step": 13034 }, { "epoch": 0.941716184730977, "grad_norm": 6.031664582071456, "learning_rate": 2.85607661838328e-06, "loss": 0.7998, "step": 13035 }, { "epoch": 0.9417884299311865, "grad_norm": 7.124691337458257, "learning_rate": 2.8557870956832135e-06, "loss": 0.8242, "step": 13036 }, { "epoch": 0.9418606751313959, "grad_norm": 6.43236625926722, "learning_rate": 2.8554975681126966e-06, "loss": 0.8676, "step": 13037 }, { "epoch": 0.9419329203316055, "grad_norm": 8.766032761934031, "learning_rate": 2.8552080356756958e-06, "loss": 0.8231, "step": 13038 }, { "epoch": 0.942005165531815, "grad_norm": 5.149161350148515, "learning_rate": 2.854918498376171e-06, "loss": 0.7074, "step": 13039 }, { "epoch": 0.9420774107320244, "grad_norm": 4.942595930466131, "learning_rate": 2.8546289562180896e-06, "loss": 0.7888, "step": 13040 }, { "epoch": 0.942149655932234, "grad_norm": 6.025629300378191, "learning_rate": 2.8543394092054115e-06, "loss": 0.7701, "step": 13041 }, { "epoch": 0.9422219011324435, "grad_norm": 7.672122120033481, "learning_rate": 2.8540498573421022e-06, "loss": 0.7849, "step": 13042 }, { "epoch": 0.9422941463326531, "grad_norm": 6.689737391314108, "learning_rate": 2.8537603006321264e-06, "loss": 0.7776, "step": 13043 }, { "epoch": 0.9423663915328625, "grad_norm": 6.383511116334277, "learning_rate": 2.8534707390794455e-06, "loss": 0.844, "step": 13044 }, { "epoch": 0.942438636733072, "grad_norm": 6.4317785225689255, "learning_rate": 2.8531811726880253e-06, "loss": 0.7622, "step": 13045 }, { "epoch": 0.9425108819332816, "grad_norm": 6.1319686795086135, "learning_rate": 2.8528916014618284e-06, "loss": 0.7697, "step": 13046 }, { "epoch": 0.942583127133491, "grad_norm": 5.760767267835089, "learning_rate": 2.8526020254048204e-06, "loss": 0.8318, "step": 13047 }, { "epoch": 0.9426553723337006, "grad_norm": 6.763806951167091, "learning_rate": 2.8523124445209634e-06, "loss": 0.8234, "step": 13048 }, { "epoch": 0.9427276175339101, "grad_norm": 5.187103624022529, "learning_rate": 2.8520228588142236e-06, "loss": 0.7558, "step": 13049 }, { "epoch": 0.9427998627341196, "grad_norm": 5.94626556657474, "learning_rate": 2.851733268288564e-06, "loss": 0.7779, "step": 13050 }, { "epoch": 0.9428721079343291, "grad_norm": 7.300786736304119, "learning_rate": 2.8514436729479484e-06, "loss": 0.7884, "step": 13051 }, { "epoch": 0.9429443531345386, "grad_norm": 6.6512352047221395, "learning_rate": 2.851154072796342e-06, "loss": 0.8347, "step": 13052 }, { "epoch": 0.9430165983347482, "grad_norm": 4.8318473022282875, "learning_rate": 2.8508644678377097e-06, "loss": 0.7437, "step": 13053 }, { "epoch": 0.9430888435349577, "grad_norm": 6.165704428716554, "learning_rate": 2.850574858076015e-06, "loss": 0.7292, "step": 13054 }, { "epoch": 0.9431610887351671, "grad_norm": 4.748764479721258, "learning_rate": 2.850285243515222e-06, "loss": 0.7021, "step": 13055 }, { "epoch": 0.9432333339353767, "grad_norm": 7.018714139715966, "learning_rate": 2.8499956241592964e-06, "loss": 0.8089, "step": 13056 }, { "epoch": 0.9433055791355862, "grad_norm": 8.050651893520458, "learning_rate": 2.849706000012203e-06, "loss": 0.8292, "step": 13057 }, { "epoch": 0.9433778243357956, "grad_norm": 6.341469340652646, "learning_rate": 2.849416371077905e-06, "loss": 0.7755, "step": 13058 }, { "epoch": 0.9434500695360052, "grad_norm": 5.561582682547699, "learning_rate": 2.849126737360369e-06, "loss": 0.8708, "step": 13059 }, { "epoch": 0.9435223147362147, "grad_norm": 6.517683184368267, "learning_rate": 2.848837098863559e-06, "loss": 0.7816, "step": 13060 }, { "epoch": 0.9435945599364243, "grad_norm": 6.3163506877580975, "learning_rate": 2.8485474555914393e-06, "loss": 0.7894, "step": 13061 }, { "epoch": 0.9436668051366337, "grad_norm": 6.12926381455777, "learning_rate": 2.8482578075479755e-06, "loss": 0.8349, "step": 13062 }, { "epoch": 0.9437390503368432, "grad_norm": 6.881923172095926, "learning_rate": 2.8479681547371342e-06, "loss": 0.7443, "step": 13063 }, { "epoch": 0.9438112955370528, "grad_norm": 5.507884356523734, "learning_rate": 2.8476784971628773e-06, "loss": 0.8928, "step": 13064 }, { "epoch": 0.9438835407372622, "grad_norm": 7.008404182623297, "learning_rate": 2.847388834829173e-06, "loss": 0.7889, "step": 13065 }, { "epoch": 0.9439557859374718, "grad_norm": 7.6476828462927555, "learning_rate": 2.8470991677399844e-06, "loss": 0.8028, "step": 13066 }, { "epoch": 0.9440280311376813, "grad_norm": 6.35866113353533, "learning_rate": 2.846809495899278e-06, "loss": 0.8187, "step": 13067 }, { "epoch": 0.9441002763378908, "grad_norm": 5.967469082941224, "learning_rate": 2.8465198193110193e-06, "loss": 0.814, "step": 13068 }, { "epoch": 0.9441725215381003, "grad_norm": 5.143828440331148, "learning_rate": 2.846230137979173e-06, "loss": 0.8277, "step": 13069 }, { "epoch": 0.9442447667383098, "grad_norm": 6.140276794655941, "learning_rate": 2.8459404519077042e-06, "loss": 0.8139, "step": 13070 }, { "epoch": 0.9443170119385194, "grad_norm": 6.214684465765997, "learning_rate": 2.8456507611005802e-06, "loss": 0.8123, "step": 13071 }, { "epoch": 0.9443892571387289, "grad_norm": 10.118787578586026, "learning_rate": 2.8453610655617654e-06, "loss": 0.8508, "step": 13072 }, { "epoch": 0.9444615023389383, "grad_norm": 5.527190534537246, "learning_rate": 2.8450713652952256e-06, "loss": 0.7434, "step": 13073 }, { "epoch": 0.9445337475391479, "grad_norm": 5.37622996607021, "learning_rate": 2.844781660304927e-06, "loss": 0.8412, "step": 13074 }, { "epoch": 0.9446059927393574, "grad_norm": 7.239119158530701, "learning_rate": 2.8444919505948355e-06, "loss": 0.7552, "step": 13075 }, { "epoch": 0.9446782379395668, "grad_norm": 7.945603446922215, "learning_rate": 2.844202236168916e-06, "loss": 0.8528, "step": 13076 }, { "epoch": 0.9447504831397764, "grad_norm": 5.321924623536902, "learning_rate": 2.8439125170311356e-06, "loss": 0.7721, "step": 13077 }, { "epoch": 0.9448227283399859, "grad_norm": 7.275174598302849, "learning_rate": 2.8436227931854593e-06, "loss": 0.7616, "step": 13078 }, { "epoch": 0.9448949735401955, "grad_norm": 6.322478914478358, "learning_rate": 2.843333064635855e-06, "loss": 0.8587, "step": 13079 }, { "epoch": 0.9449672187404049, "grad_norm": 6.842517480517781, "learning_rate": 2.8430433313862854e-06, "loss": 0.8254, "step": 13080 }, { "epoch": 0.9450394639406144, "grad_norm": 5.891867266328494, "learning_rate": 2.842753593440721e-06, "loss": 0.7817, "step": 13081 }, { "epoch": 0.945111709140824, "grad_norm": 7.426299565189489, "learning_rate": 2.8424638508031256e-06, "loss": 0.7752, "step": 13082 }, { "epoch": 0.9451839543410334, "grad_norm": 7.1850916269044, "learning_rate": 2.842174103477465e-06, "loss": 0.8686, "step": 13083 }, { "epoch": 0.945256199541243, "grad_norm": 6.9457104185960805, "learning_rate": 2.8418843514677074e-06, "loss": 0.8105, "step": 13084 }, { "epoch": 0.9453284447414525, "grad_norm": 5.9505356491427515, "learning_rate": 2.8415945947778186e-06, "loss": 0.6346, "step": 13085 }, { "epoch": 0.945400689941662, "grad_norm": 6.740011949387097, "learning_rate": 2.841304833411765e-06, "loss": 0.8332, "step": 13086 }, { "epoch": 0.9454729351418715, "grad_norm": 6.011656881687008, "learning_rate": 2.8410150673735124e-06, "loss": 0.7682, "step": 13087 }, { "epoch": 0.945545180342081, "grad_norm": 7.233242003864837, "learning_rate": 2.8407252966670296e-06, "loss": 0.846, "step": 13088 }, { "epoch": 0.9456174255422906, "grad_norm": 7.527356166067212, "learning_rate": 2.8404355212962815e-06, "loss": 0.7525, "step": 13089 }, { "epoch": 0.9456896707425001, "grad_norm": 5.4295863299927145, "learning_rate": 2.840145741265235e-06, "loss": 0.7682, "step": 13090 }, { "epoch": 0.9457619159427095, "grad_norm": 6.0794506833736435, "learning_rate": 2.839855956577858e-06, "loss": 0.8446, "step": 13091 }, { "epoch": 0.9458341611429191, "grad_norm": 7.623775446383817, "learning_rate": 2.8395661672381163e-06, "loss": 0.8754, "step": 13092 }, { "epoch": 0.9459064063431286, "grad_norm": 5.762355301171853, "learning_rate": 2.8392763732499783e-06, "loss": 0.7966, "step": 13093 }, { "epoch": 0.945978651543338, "grad_norm": 5.770093225423496, "learning_rate": 2.8389865746174087e-06, "loss": 0.7967, "step": 13094 }, { "epoch": 0.9460508967435476, "grad_norm": 7.022943770013921, "learning_rate": 2.8386967713443776e-06, "loss": 0.7386, "step": 13095 }, { "epoch": 0.9461231419437571, "grad_norm": 7.627815102139135, "learning_rate": 2.83840696343485e-06, "loss": 0.7485, "step": 13096 }, { "epoch": 0.9461953871439667, "grad_norm": 6.2348329763059125, "learning_rate": 2.838117150892794e-06, "loss": 0.7619, "step": 13097 }, { "epoch": 0.9462676323441761, "grad_norm": 5.495201792183469, "learning_rate": 2.8378273337221766e-06, "loss": 0.7366, "step": 13098 }, { "epoch": 0.9463398775443856, "grad_norm": 7.211875383380536, "learning_rate": 2.8375375119269655e-06, "loss": 0.785, "step": 13099 }, { "epoch": 0.9464121227445952, "grad_norm": 5.242111455647212, "learning_rate": 2.8372476855111283e-06, "loss": 0.762, "step": 13100 }, { "epoch": 0.9464843679448046, "grad_norm": 7.79492407722031, "learning_rate": 2.8369578544786315e-06, "loss": 0.8852, "step": 13101 }, { "epoch": 0.9465566131450142, "grad_norm": 5.337840063621221, "learning_rate": 2.8366680188334433e-06, "loss": 0.7808, "step": 13102 }, { "epoch": 0.9466288583452237, "grad_norm": 6.060352062913907, "learning_rate": 2.8363781785795314e-06, "loss": 0.8157, "step": 13103 }, { "epoch": 0.9467011035454332, "grad_norm": 6.476495942623929, "learning_rate": 2.8360883337208633e-06, "loss": 0.805, "step": 13104 }, { "epoch": 0.9467733487456427, "grad_norm": 5.5839268193409275, "learning_rate": 2.835798484261407e-06, "loss": 0.6774, "step": 13105 }, { "epoch": 0.9468455939458522, "grad_norm": 6.30631519658212, "learning_rate": 2.8355086302051306e-06, "loss": 0.8341, "step": 13106 }, { "epoch": 0.9469178391460618, "grad_norm": 6.670046839513842, "learning_rate": 2.8352187715560013e-06, "loss": 0.7484, "step": 13107 }, { "epoch": 0.9469900843462713, "grad_norm": 6.357528080910107, "learning_rate": 2.8349289083179863e-06, "loss": 0.89, "step": 13108 }, { "epoch": 0.9470623295464807, "grad_norm": 5.467771431700606, "learning_rate": 2.8346390404950565e-06, "loss": 0.9268, "step": 13109 }, { "epoch": 0.9471345747466903, "grad_norm": 6.315926707600497, "learning_rate": 2.8343491680911765e-06, "loss": 0.7485, "step": 13110 }, { "epoch": 0.9472068199468998, "grad_norm": 6.33981637249557, "learning_rate": 2.834059291110317e-06, "loss": 0.752, "step": 13111 }, { "epoch": 0.9472790651471092, "grad_norm": 6.443435747122409, "learning_rate": 2.833769409556445e-06, "loss": 0.8099, "step": 13112 }, { "epoch": 0.9473513103473188, "grad_norm": 8.20912773303029, "learning_rate": 2.8334795234335284e-06, "loss": 0.7528, "step": 13113 }, { "epoch": 0.9474235555475283, "grad_norm": 5.84740085082296, "learning_rate": 2.833189632745536e-06, "loss": 0.7577, "step": 13114 }, { "epoch": 0.9474958007477379, "grad_norm": 6.472193589190378, "learning_rate": 2.832899737496437e-06, "loss": 0.8507, "step": 13115 }, { "epoch": 0.9475680459479473, "grad_norm": 6.60023418357835, "learning_rate": 2.832609837690199e-06, "loss": 0.7546, "step": 13116 }, { "epoch": 0.9476402911481568, "grad_norm": 6.320475761275962, "learning_rate": 2.83231993333079e-06, "loss": 0.7365, "step": 13117 }, { "epoch": 0.9477125363483664, "grad_norm": 6.685645015530038, "learning_rate": 2.8320300244221792e-06, "loss": 0.7843, "step": 13118 }, { "epoch": 0.9477847815485758, "grad_norm": 9.007963260338563, "learning_rate": 2.831740110968335e-06, "loss": 0.7544, "step": 13119 }, { "epoch": 0.9478570267487854, "grad_norm": 6.246608271588802, "learning_rate": 2.831450192973227e-06, "loss": 0.7867, "step": 13120 }, { "epoch": 0.9479292719489949, "grad_norm": 6.297638271823365, "learning_rate": 2.8311602704408224e-06, "loss": 0.7696, "step": 13121 }, { "epoch": 0.9480015171492044, "grad_norm": 5.305900513695151, "learning_rate": 2.830870343375091e-06, "loss": 0.715, "step": 13122 }, { "epoch": 0.9480737623494139, "grad_norm": 5.380563030632699, "learning_rate": 2.8305804117800024e-06, "loss": 0.7999, "step": 13123 }, { "epoch": 0.9481460075496234, "grad_norm": 6.402570756538382, "learning_rate": 2.830290475659524e-06, "loss": 0.8131, "step": 13124 }, { "epoch": 0.948218252749833, "grad_norm": 6.805886794698826, "learning_rate": 2.830000535017626e-06, "loss": 0.7824, "step": 13125 }, { "epoch": 0.9482904979500425, "grad_norm": 5.686276146976836, "learning_rate": 2.8297105898582756e-06, "loss": 0.7144, "step": 13126 }, { "epoch": 0.9483627431502519, "grad_norm": 6.98862404832791, "learning_rate": 2.829420640185444e-06, "loss": 0.7512, "step": 13127 }, { "epoch": 0.9484349883504615, "grad_norm": 6.102834962046296, "learning_rate": 2.829130686003099e-06, "loss": 0.7706, "step": 13128 }, { "epoch": 0.948507233550671, "grad_norm": 5.106221477003989, "learning_rate": 2.828840727315212e-06, "loss": 0.8326, "step": 13129 }, { "epoch": 0.9485794787508804, "grad_norm": 7.338796170335863, "learning_rate": 2.8285507641257493e-06, "loss": 0.8174, "step": 13130 }, { "epoch": 0.94865172395109, "grad_norm": 8.641112825995389, "learning_rate": 2.8282607964386828e-06, "loss": 0.7724, "step": 13131 }, { "epoch": 0.9487239691512995, "grad_norm": 7.301240256521303, "learning_rate": 2.82797082425798e-06, "loss": 0.8508, "step": 13132 }, { "epoch": 0.9487962143515091, "grad_norm": 8.056767754678797, "learning_rate": 2.8276808475876115e-06, "loss": 0.7158, "step": 13133 }, { "epoch": 0.9488684595517185, "grad_norm": 5.602558382641648, "learning_rate": 2.827390866431547e-06, "loss": 0.8275, "step": 13134 }, { "epoch": 0.948940704751928, "grad_norm": 5.812698853075306, "learning_rate": 2.827100880793755e-06, "loss": 0.7722, "step": 13135 }, { "epoch": 0.9490129499521376, "grad_norm": 6.858604161912589, "learning_rate": 2.8268108906782067e-06, "loss": 0.8133, "step": 13136 }, { "epoch": 0.949085195152347, "grad_norm": 7.026660014420219, "learning_rate": 2.8265208960888702e-06, "loss": 0.8719, "step": 13137 }, { "epoch": 0.9491574403525566, "grad_norm": 6.6548901812107015, "learning_rate": 2.8262308970297168e-06, "loss": 0.8171, "step": 13138 }, { "epoch": 0.9492296855527661, "grad_norm": 6.0429678031067455, "learning_rate": 2.8259408935047155e-06, "loss": 0.8011, "step": 13139 }, { "epoch": 0.9493019307529756, "grad_norm": 9.058405510253094, "learning_rate": 2.8256508855178373e-06, "loss": 0.768, "step": 13140 }, { "epoch": 0.9493741759531851, "grad_norm": 7.4071580032464945, "learning_rate": 2.82536087307305e-06, "loss": 0.8271, "step": 13141 }, { "epoch": 0.9494464211533946, "grad_norm": 7.63072115028701, "learning_rate": 2.8250708561743257e-06, "loss": 0.8518, "step": 13142 }, { "epoch": 0.9495186663536042, "grad_norm": 6.556128615136595, "learning_rate": 2.824780834825634e-06, "loss": 0.7915, "step": 13143 }, { "epoch": 0.9495909115538136, "grad_norm": 5.930166433742376, "learning_rate": 2.824490809030944e-06, "loss": 0.7709, "step": 13144 }, { "epoch": 0.9496631567540231, "grad_norm": 6.137886659830547, "learning_rate": 2.8242007787942277e-06, "loss": 0.8405, "step": 13145 }, { "epoch": 0.9497354019542327, "grad_norm": 6.248090223356972, "learning_rate": 2.8239107441194543e-06, "loss": 0.6966, "step": 13146 }, { "epoch": 0.9498076471544422, "grad_norm": 5.8391787351288755, "learning_rate": 2.8236207050105934e-06, "loss": 0.7839, "step": 13147 }, { "epoch": 0.9498798923546516, "grad_norm": 6.182846322978152, "learning_rate": 2.8233306614716177e-06, "loss": 0.8597, "step": 13148 }, { "epoch": 0.9499521375548612, "grad_norm": 5.47721739157889, "learning_rate": 2.8230406135064954e-06, "loss": 0.7181, "step": 13149 }, { "epoch": 0.9500243827550707, "grad_norm": 7.0522408028793855, "learning_rate": 2.822750561119198e-06, "loss": 0.8089, "step": 13150 }, { "epoch": 0.9500966279552803, "grad_norm": 8.03051849477491, "learning_rate": 2.8224605043136956e-06, "loss": 0.8621, "step": 13151 }, { "epoch": 0.9501688731554897, "grad_norm": 7.416140105453827, "learning_rate": 2.82217044309396e-06, "loss": 0.8068, "step": 13152 }, { "epoch": 0.9502411183556992, "grad_norm": 6.16559244370602, "learning_rate": 2.821880377463961e-06, "loss": 0.7043, "step": 13153 }, { "epoch": 0.9503133635559088, "grad_norm": 6.052063443473484, "learning_rate": 2.82159030742767e-06, "loss": 0.8991, "step": 13154 }, { "epoch": 0.9503856087561182, "grad_norm": 5.912188738317666, "learning_rate": 2.8213002329890566e-06, "loss": 0.7246, "step": 13155 }, { "epoch": 0.9504578539563278, "grad_norm": 6.614628861238084, "learning_rate": 2.8210101541520928e-06, "loss": 0.8151, "step": 13156 }, { "epoch": 0.9505300991565373, "grad_norm": 6.026540548192131, "learning_rate": 2.8207200709207495e-06, "loss": 0.7241, "step": 13157 }, { "epoch": 0.9506023443567468, "grad_norm": 5.870656132330718, "learning_rate": 2.820429983298997e-06, "loss": 0.7578, "step": 13158 }, { "epoch": 0.9506745895569563, "grad_norm": 5.393127744989849, "learning_rate": 2.8201398912908075e-06, "loss": 0.7156, "step": 13159 }, { "epoch": 0.9507468347571658, "grad_norm": 6.546750306464674, "learning_rate": 2.819849794900151e-06, "loss": 0.809, "step": 13160 }, { "epoch": 0.9508190799573754, "grad_norm": 7.0563051855127235, "learning_rate": 2.819559694130998e-06, "loss": 0.8104, "step": 13161 }, { "epoch": 0.9508913251575848, "grad_norm": 6.648912280620106, "learning_rate": 2.8192695889873224e-06, "loss": 0.7676, "step": 13162 }, { "epoch": 0.9509635703577943, "grad_norm": 10.400047859668648, "learning_rate": 2.818979479473094e-06, "loss": 0.8878, "step": 13163 }, { "epoch": 0.9510358155580039, "grad_norm": 4.795144828383278, "learning_rate": 2.8186893655922836e-06, "loss": 0.7755, "step": 13164 }, { "epoch": 0.9511080607582134, "grad_norm": 6.660678017501824, "learning_rate": 2.818399247348863e-06, "loss": 0.8107, "step": 13165 }, { "epoch": 0.9511803059584228, "grad_norm": 6.806750189666964, "learning_rate": 2.818109124746805e-06, "loss": 0.7661, "step": 13166 }, { "epoch": 0.9512525511586324, "grad_norm": 6.3881780514587, "learning_rate": 2.8178189977900794e-06, "loss": 0.8776, "step": 13167 }, { "epoch": 0.9513247963588419, "grad_norm": 7.0568127984635245, "learning_rate": 2.817528866482659e-06, "loss": 0.8104, "step": 13168 }, { "epoch": 0.9513970415590515, "grad_norm": 6.097640609965528, "learning_rate": 2.8172387308285143e-06, "loss": 0.7895, "step": 13169 }, { "epoch": 0.9514692867592609, "grad_norm": 7.00645476151779, "learning_rate": 2.816948590831618e-06, "loss": 0.7774, "step": 13170 }, { "epoch": 0.9515415319594704, "grad_norm": 6.722155466440078, "learning_rate": 2.816658446495941e-06, "loss": 0.7613, "step": 13171 }, { "epoch": 0.95161377715968, "grad_norm": 7.255060205821031, "learning_rate": 2.816368297825457e-06, "loss": 0.7601, "step": 13172 }, { "epoch": 0.9516860223598894, "grad_norm": 7.5814415664559425, "learning_rate": 2.816078144824136e-06, "loss": 0.7553, "step": 13173 }, { "epoch": 0.951758267560099, "grad_norm": 7.2981300989975875, "learning_rate": 2.8157879874959504e-06, "loss": 0.8868, "step": 13174 }, { "epoch": 0.9518305127603085, "grad_norm": 6.345290424502968, "learning_rate": 2.815497825844872e-06, "loss": 0.8529, "step": 13175 }, { "epoch": 0.951902757960518, "grad_norm": 6.203275049350192, "learning_rate": 2.815207659874874e-06, "loss": 0.7118, "step": 13176 }, { "epoch": 0.9519750031607275, "grad_norm": 7.034606132624776, "learning_rate": 2.8149174895899285e-06, "loss": 0.7268, "step": 13177 }, { "epoch": 0.952047248360937, "grad_norm": 5.703828951096723, "learning_rate": 2.8146273149940058e-06, "loss": 0.7021, "step": 13178 }, { "epoch": 0.9521194935611466, "grad_norm": 5.90114312170297, "learning_rate": 2.8143371360910797e-06, "loss": 0.7332, "step": 13179 }, { "epoch": 0.952191738761356, "grad_norm": 6.553775473833825, "learning_rate": 2.814046952885123e-06, "loss": 0.7977, "step": 13180 }, { "epoch": 0.9522639839615655, "grad_norm": 8.138543681120826, "learning_rate": 2.8137567653801067e-06, "loss": 0.732, "step": 13181 }, { "epoch": 0.9523362291617751, "grad_norm": 6.415219758953239, "learning_rate": 2.813466573580005e-06, "loss": 0.7419, "step": 13182 }, { "epoch": 0.9524084743619846, "grad_norm": 7.29731935377482, "learning_rate": 2.8131763774887878e-06, "loss": 0.8813, "step": 13183 }, { "epoch": 0.952480719562194, "grad_norm": 5.659263187611984, "learning_rate": 2.8128861771104297e-06, "loss": 0.6738, "step": 13184 }, { "epoch": 0.9525529647624036, "grad_norm": 7.466567738195542, "learning_rate": 2.8125959724489027e-06, "loss": 0.8873, "step": 13185 }, { "epoch": 0.9526252099626131, "grad_norm": 7.179378531929928, "learning_rate": 2.8123057635081804e-06, "loss": 0.7885, "step": 13186 }, { "epoch": 0.9526974551628227, "grad_norm": 7.436423343979208, "learning_rate": 2.8120155502922338e-06, "loss": 0.8655, "step": 13187 }, { "epoch": 0.9527697003630321, "grad_norm": 6.132316734539634, "learning_rate": 2.811725332805037e-06, "loss": 0.8082, "step": 13188 }, { "epoch": 0.9528419455632416, "grad_norm": 6.6964775505091225, "learning_rate": 2.8114351110505622e-06, "loss": 0.8824, "step": 13189 }, { "epoch": 0.9529141907634512, "grad_norm": 6.734038519363654, "learning_rate": 2.811144885032782e-06, "loss": 0.7509, "step": 13190 }, { "epoch": 0.9529864359636606, "grad_norm": 6.028415152061566, "learning_rate": 2.810854654755671e-06, "loss": 0.6967, "step": 13191 }, { "epoch": 0.9530586811638702, "grad_norm": 5.654868278554571, "learning_rate": 2.8105644202232003e-06, "loss": 0.8312, "step": 13192 }, { "epoch": 0.9531309263640797, "grad_norm": 6.8533066168436285, "learning_rate": 2.810274181439344e-06, "loss": 0.7322, "step": 13193 }, { "epoch": 0.9532031715642892, "grad_norm": 6.107410319884336, "learning_rate": 2.8099839384080752e-06, "loss": 0.8294, "step": 13194 }, { "epoch": 0.9532754167644987, "grad_norm": 5.474262010140185, "learning_rate": 2.809693691133367e-06, "loss": 0.7682, "step": 13195 }, { "epoch": 0.9533476619647082, "grad_norm": 5.8402487317770495, "learning_rate": 2.8094034396191926e-06, "loss": 0.7949, "step": 13196 }, { "epoch": 0.9534199071649178, "grad_norm": 7.282395170474156, "learning_rate": 2.809113183869526e-06, "loss": 0.8328, "step": 13197 }, { "epoch": 0.9534921523651272, "grad_norm": 5.94463489896825, "learning_rate": 2.8088229238883393e-06, "loss": 0.7906, "step": 13198 }, { "epoch": 0.9535643975653367, "grad_norm": 5.467702187566004, "learning_rate": 2.8085326596796057e-06, "loss": 0.8073, "step": 13199 }, { "epoch": 0.9536366427655463, "grad_norm": 6.069506813890828, "learning_rate": 2.8082423912473012e-06, "loss": 0.8158, "step": 13200 }, { "epoch": 0.9537088879657558, "grad_norm": 5.930602554663139, "learning_rate": 2.8079521185953966e-06, "loss": 0.7408, "step": 13201 }, { "epoch": 0.9537811331659652, "grad_norm": 6.492184268294172, "learning_rate": 2.8076618417278675e-06, "loss": 0.7065, "step": 13202 }, { "epoch": 0.9538533783661748, "grad_norm": 6.316815402545335, "learning_rate": 2.807371560648685e-06, "loss": 0.815, "step": 13203 }, { "epoch": 0.9539256235663843, "grad_norm": 6.701675080385249, "learning_rate": 2.807081275361826e-06, "loss": 0.8522, "step": 13204 }, { "epoch": 0.9539978687665939, "grad_norm": 6.650562417476435, "learning_rate": 2.806790985871262e-06, "loss": 0.7508, "step": 13205 }, { "epoch": 0.9540701139668033, "grad_norm": 7.5144280572988595, "learning_rate": 2.8065006921809683e-06, "loss": 0.8043, "step": 13206 }, { "epoch": 0.9541423591670128, "grad_norm": 5.573971866521611, "learning_rate": 2.8062103942949175e-06, "loss": 0.7893, "step": 13207 }, { "epoch": 0.9542146043672224, "grad_norm": 6.987479593045866, "learning_rate": 2.805920092217084e-06, "loss": 0.8465, "step": 13208 }, { "epoch": 0.9542868495674318, "grad_norm": 5.340176642421636, "learning_rate": 2.805629785951443e-06, "loss": 0.7367, "step": 13209 }, { "epoch": 0.9543590947676414, "grad_norm": 5.503259040027601, "learning_rate": 2.8053394755019668e-06, "loss": 0.7808, "step": 13210 }, { "epoch": 0.9544313399678509, "grad_norm": 5.399612674244362, "learning_rate": 2.805049160872631e-06, "loss": 0.7964, "step": 13211 }, { "epoch": 0.9545035851680604, "grad_norm": 7.340203390603675, "learning_rate": 2.8047588420674084e-06, "loss": 0.7893, "step": 13212 }, { "epoch": 0.9545758303682699, "grad_norm": 6.190172947453469, "learning_rate": 2.8044685190902736e-06, "loss": 0.7806, "step": 13213 }, { "epoch": 0.9546480755684794, "grad_norm": 6.829350062621774, "learning_rate": 2.8041781919452015e-06, "loss": 0.7326, "step": 13214 }, { "epoch": 0.954720320768689, "grad_norm": 6.53004407929152, "learning_rate": 2.803887860636166e-06, "loss": 0.805, "step": 13215 }, { "epoch": 0.9547925659688984, "grad_norm": 7.138871994031548, "learning_rate": 2.803597525167142e-06, "loss": 0.7302, "step": 13216 }, { "epoch": 0.9548648111691079, "grad_norm": 5.141861508890659, "learning_rate": 2.803307185542103e-06, "loss": 0.7951, "step": 13217 }, { "epoch": 0.9549370563693175, "grad_norm": 6.313341141359898, "learning_rate": 2.803016841765025e-06, "loss": 0.8347, "step": 13218 }, { "epoch": 0.955009301569527, "grad_norm": 7.954887749379284, "learning_rate": 2.8027264938398807e-06, "loss": 0.8378, "step": 13219 }, { "epoch": 0.9550815467697364, "grad_norm": 6.038263699899292, "learning_rate": 2.802436141770647e-06, "loss": 0.785, "step": 13220 }, { "epoch": 0.955153791969946, "grad_norm": 5.234670511131281, "learning_rate": 2.802145785561296e-06, "loss": 0.7168, "step": 13221 }, { "epoch": 0.9552260371701555, "grad_norm": 7.235711849601183, "learning_rate": 2.8018554252158043e-06, "loss": 0.8132, "step": 13222 }, { "epoch": 0.9552982823703651, "grad_norm": 6.501560904079352, "learning_rate": 2.8015650607381467e-06, "loss": 0.7859, "step": 13223 }, { "epoch": 0.9553705275705745, "grad_norm": 7.855707909477872, "learning_rate": 2.801274692132297e-06, "loss": 0.7903, "step": 13224 }, { "epoch": 0.955442772770784, "grad_norm": 6.703441692420516, "learning_rate": 2.8009843194022313e-06, "loss": 0.8027, "step": 13225 }, { "epoch": 0.9555150179709936, "grad_norm": 6.730574453700564, "learning_rate": 2.8006939425519233e-06, "loss": 0.7839, "step": 13226 }, { "epoch": 0.955587263171203, "grad_norm": 6.358105581995071, "learning_rate": 2.8004035615853485e-06, "loss": 0.8064, "step": 13227 }, { "epoch": 0.9556595083714126, "grad_norm": 6.080309634606246, "learning_rate": 2.8001131765064825e-06, "loss": 0.8816, "step": 13228 }, { "epoch": 0.9557317535716221, "grad_norm": 5.372214726772994, "learning_rate": 2.7998227873193005e-06, "loss": 0.7823, "step": 13229 }, { "epoch": 0.9558039987718316, "grad_norm": 7.646367635603119, "learning_rate": 2.7995323940277776e-06, "loss": 0.8352, "step": 13230 }, { "epoch": 0.9558762439720411, "grad_norm": 6.258177938280258, "learning_rate": 2.799241996635888e-06, "loss": 0.7525, "step": 13231 }, { "epoch": 0.9559484891722506, "grad_norm": 5.6121912610068865, "learning_rate": 2.7989515951476083e-06, "loss": 0.8401, "step": 13232 }, { "epoch": 0.9560207343724602, "grad_norm": 6.242459745999162, "learning_rate": 2.798661189566913e-06, "loss": 0.8227, "step": 13233 }, { "epoch": 0.9560929795726696, "grad_norm": 5.465660225093305, "learning_rate": 2.7983707798977784e-06, "loss": 0.7548, "step": 13234 }, { "epoch": 0.9561652247728791, "grad_norm": 7.630925861944804, "learning_rate": 2.798080366144179e-06, "loss": 0.7763, "step": 13235 }, { "epoch": 0.9562374699730887, "grad_norm": 7.6857109158954735, "learning_rate": 2.797789948310091e-06, "loss": 0.8011, "step": 13236 }, { "epoch": 0.9563097151732982, "grad_norm": 6.50328391823634, "learning_rate": 2.7974995263994896e-06, "loss": 0.8059, "step": 13237 }, { "epoch": 0.9563819603735076, "grad_norm": 6.65113799031129, "learning_rate": 2.7972091004163515e-06, "loss": 0.7637, "step": 13238 }, { "epoch": 0.9564542055737172, "grad_norm": 6.95655556390666, "learning_rate": 2.796918670364651e-06, "loss": 0.8758, "step": 13239 }, { "epoch": 0.9565264507739267, "grad_norm": 7.567630005177557, "learning_rate": 2.7966282362483644e-06, "loss": 0.8196, "step": 13240 }, { "epoch": 0.9565986959741363, "grad_norm": 6.156330553486311, "learning_rate": 2.7963377980714675e-06, "loss": 0.8717, "step": 13241 }, { "epoch": 0.9566709411743457, "grad_norm": 5.905436464825263, "learning_rate": 2.7960473558379366e-06, "loss": 0.736, "step": 13242 }, { "epoch": 0.9567431863745552, "grad_norm": 6.3257328739698035, "learning_rate": 2.7957569095517465e-06, "loss": 0.7803, "step": 13243 }, { "epoch": 0.9568154315747648, "grad_norm": 4.912920068539027, "learning_rate": 2.7954664592168745e-06, "loss": 0.8124, "step": 13244 }, { "epoch": 0.9568876767749742, "grad_norm": 6.119528370056293, "learning_rate": 2.795176004837296e-06, "loss": 0.7546, "step": 13245 }, { "epoch": 0.9569599219751838, "grad_norm": 5.749323597804823, "learning_rate": 2.794885546416987e-06, "loss": 0.757, "step": 13246 }, { "epoch": 0.9570321671753933, "grad_norm": 5.860039675841914, "learning_rate": 2.7945950839599246e-06, "loss": 0.7191, "step": 13247 }, { "epoch": 0.9571044123756028, "grad_norm": 7.8927598539149955, "learning_rate": 2.794304617470084e-06, "loss": 0.8169, "step": 13248 }, { "epoch": 0.9571766575758123, "grad_norm": 5.802773818655912, "learning_rate": 2.7940141469514413e-06, "loss": 0.7726, "step": 13249 }, { "epoch": 0.9572489027760218, "grad_norm": 6.225585248161726, "learning_rate": 2.793723672407973e-06, "loss": 0.8912, "step": 13250 }, { "epoch": 0.9573211479762314, "grad_norm": 6.279381924968344, "learning_rate": 2.793433193843656e-06, "loss": 0.8217, "step": 13251 }, { "epoch": 0.9573933931764408, "grad_norm": 6.392802793084543, "learning_rate": 2.793142711262467e-06, "loss": 0.7656, "step": 13252 }, { "epoch": 0.9574656383766503, "grad_norm": 6.2893369626640006, "learning_rate": 2.792852224668381e-06, "loss": 0.8165, "step": 13253 }, { "epoch": 0.9575378835768599, "grad_norm": 6.504725865680005, "learning_rate": 2.792561734065376e-06, "loss": 0.8168, "step": 13254 }, { "epoch": 0.9576101287770694, "grad_norm": 6.230013514422948, "learning_rate": 2.7922712394574273e-06, "loss": 0.7993, "step": 13255 }, { "epoch": 0.9576823739772788, "grad_norm": 7.793484671205333, "learning_rate": 2.791980740848513e-06, "loss": 0.822, "step": 13256 }, { "epoch": 0.9577546191774884, "grad_norm": 5.949821777239063, "learning_rate": 2.791690238242609e-06, "loss": 0.7396, "step": 13257 }, { "epoch": 0.9578268643776979, "grad_norm": 8.089588170455782, "learning_rate": 2.7913997316436917e-06, "loss": 0.8474, "step": 13258 }, { "epoch": 0.9578991095779075, "grad_norm": 6.712139053159232, "learning_rate": 2.7911092210557387e-06, "loss": 0.872, "step": 13259 }, { "epoch": 0.9579713547781169, "grad_norm": 5.439012514414543, "learning_rate": 2.7908187064827257e-06, "loss": 0.7815, "step": 13260 }, { "epoch": 0.9580435999783264, "grad_norm": 6.982759497956294, "learning_rate": 2.7905281879286315e-06, "loss": 0.8903, "step": 13261 }, { "epoch": 0.958115845178536, "grad_norm": 5.523353068827377, "learning_rate": 2.790237665397432e-06, "loss": 0.7885, "step": 13262 }, { "epoch": 0.9581880903787454, "grad_norm": 5.945779909866515, "learning_rate": 2.7899471388931038e-06, "loss": 0.8589, "step": 13263 }, { "epoch": 0.958260335578955, "grad_norm": 7.9073808788549735, "learning_rate": 2.7896566084196242e-06, "loss": 0.798, "step": 13264 }, { "epoch": 0.9583325807791645, "grad_norm": 8.596265168159286, "learning_rate": 2.7893660739809704e-06, "loss": 0.8045, "step": 13265 }, { "epoch": 0.958404825979374, "grad_norm": 6.045060075156893, "learning_rate": 2.7890755355811205e-06, "loss": 0.8549, "step": 13266 }, { "epoch": 0.9584770711795835, "grad_norm": 7.131292408876419, "learning_rate": 2.7887849932240503e-06, "loss": 0.7951, "step": 13267 }, { "epoch": 0.958549316379793, "grad_norm": 5.812190878246841, "learning_rate": 2.788494446913738e-06, "loss": 0.72, "step": 13268 }, { "epoch": 0.9586215615800026, "grad_norm": 8.079143998532484, "learning_rate": 2.788203896654161e-06, "loss": 0.861, "step": 13269 }, { "epoch": 0.958693806780212, "grad_norm": 8.083763766544688, "learning_rate": 2.7879133424492954e-06, "loss": 0.9154, "step": 13270 }, { "epoch": 0.9587660519804215, "grad_norm": 9.174296538698783, "learning_rate": 2.7876227843031208e-06, "loss": 0.8401, "step": 13271 }, { "epoch": 0.9588382971806311, "grad_norm": 6.997488933852953, "learning_rate": 2.787332222219613e-06, "loss": 0.8061, "step": 13272 }, { "epoch": 0.9589105423808406, "grad_norm": 6.112134539516485, "learning_rate": 2.7870416562027514e-06, "loss": 0.7419, "step": 13273 }, { "epoch": 0.95898278758105, "grad_norm": 6.558214804884173, "learning_rate": 2.7867510862565108e-06, "loss": 0.8237, "step": 13274 }, { "epoch": 0.9590550327812596, "grad_norm": 7.11346921300604, "learning_rate": 2.786460512384871e-06, "loss": 0.743, "step": 13275 }, { "epoch": 0.9591272779814691, "grad_norm": 6.928046581631496, "learning_rate": 2.7861699345918093e-06, "loss": 0.7771, "step": 13276 }, { "epoch": 0.9591995231816787, "grad_norm": 5.890263958701158, "learning_rate": 2.7858793528813034e-06, "loss": 0.8166, "step": 13277 }, { "epoch": 0.9592717683818881, "grad_norm": 8.255719138787072, "learning_rate": 2.7855887672573312e-06, "loss": 0.8115, "step": 13278 }, { "epoch": 0.9593440135820976, "grad_norm": 6.282585647308876, "learning_rate": 2.7852981777238704e-06, "loss": 0.7487, "step": 13279 }, { "epoch": 0.9594162587823072, "grad_norm": 5.78357414748299, "learning_rate": 2.785007584284899e-06, "loss": 0.7512, "step": 13280 }, { "epoch": 0.9594885039825166, "grad_norm": 7.293251722717239, "learning_rate": 2.7847169869443954e-06, "loss": 0.8847, "step": 13281 }, { "epoch": 0.9595607491827262, "grad_norm": 6.2232324748898336, "learning_rate": 2.784426385706337e-06, "loss": 0.8926, "step": 13282 }, { "epoch": 0.9596329943829357, "grad_norm": 6.483662540867442, "learning_rate": 2.784135780574702e-06, "loss": 0.8044, "step": 13283 }, { "epoch": 0.9597052395831452, "grad_norm": 7.037144068225611, "learning_rate": 2.783845171553469e-06, "loss": 0.7872, "step": 13284 }, { "epoch": 0.9597774847833547, "grad_norm": 5.665990377722964, "learning_rate": 2.783554558646616e-06, "loss": 0.7509, "step": 13285 }, { "epoch": 0.9598497299835642, "grad_norm": 5.800193967535495, "learning_rate": 2.783263941858122e-06, "loss": 0.7257, "step": 13286 }, { "epoch": 0.9599219751837738, "grad_norm": 7.732109832441434, "learning_rate": 2.7829733211919635e-06, "loss": 0.8612, "step": 13287 }, { "epoch": 0.9599942203839832, "grad_norm": 8.294206035526585, "learning_rate": 2.7826826966521205e-06, "loss": 0.7651, "step": 13288 }, { "epoch": 0.9600664655841927, "grad_norm": 7.064741740976752, "learning_rate": 2.782392068242571e-06, "loss": 0.7422, "step": 13289 }, { "epoch": 0.9601387107844023, "grad_norm": 6.463369297021309, "learning_rate": 2.782101435967293e-06, "loss": 0.7906, "step": 13290 }, { "epoch": 0.9602109559846118, "grad_norm": 5.769548440648053, "learning_rate": 2.7818107998302656e-06, "loss": 0.8109, "step": 13291 }, { "epoch": 0.9602832011848212, "grad_norm": 6.579689111756957, "learning_rate": 2.781520159835467e-06, "loss": 0.7774, "step": 13292 }, { "epoch": 0.9603554463850308, "grad_norm": 9.394149956529422, "learning_rate": 2.7812295159868757e-06, "loss": 0.8098, "step": 13293 }, { "epoch": 0.9604276915852403, "grad_norm": 5.8523923237121185, "learning_rate": 2.780938868288471e-06, "loss": 0.679, "step": 13294 }, { "epoch": 0.9604999367854499, "grad_norm": 6.983491231488261, "learning_rate": 2.780648216744231e-06, "loss": 0.7427, "step": 13295 }, { "epoch": 0.9605721819856593, "grad_norm": 6.922542359018278, "learning_rate": 2.780357561358135e-06, "loss": 0.7494, "step": 13296 }, { "epoch": 0.9606444271858688, "grad_norm": 5.907618449892623, "learning_rate": 2.7800669021341627e-06, "loss": 0.7979, "step": 13297 }, { "epoch": 0.9607166723860784, "grad_norm": 6.371234754245143, "learning_rate": 2.7797762390762905e-06, "loss": 0.7844, "step": 13298 }, { "epoch": 0.9607889175862878, "grad_norm": 6.886979087889787, "learning_rate": 2.7794855721884996e-06, "loss": 0.8873, "step": 13299 }, { "epoch": 0.9608611627864974, "grad_norm": 5.437925826312866, "learning_rate": 2.7791949014747684e-06, "loss": 0.8919, "step": 13300 }, { "epoch": 0.9609334079867069, "grad_norm": 6.548263074899763, "learning_rate": 2.7789042269390754e-06, "loss": 0.7616, "step": 13301 }, { "epoch": 0.9610056531869164, "grad_norm": 5.7899434478580645, "learning_rate": 2.7786135485853993e-06, "loss": 0.8549, "step": 13302 }, { "epoch": 0.9610778983871259, "grad_norm": 5.592501708820408, "learning_rate": 2.778322866417721e-06, "loss": 0.809, "step": 13303 }, { "epoch": 0.9611501435873354, "grad_norm": 5.8702305040478855, "learning_rate": 2.778032180440019e-06, "loss": 0.793, "step": 13304 }, { "epoch": 0.961222388787545, "grad_norm": 7.595964273763471, "learning_rate": 2.7777414906562723e-06, "loss": 0.788, "step": 13305 }, { "epoch": 0.9612946339877544, "grad_norm": 6.760986711619633, "learning_rate": 2.7774507970704594e-06, "loss": 0.8015, "step": 13306 }, { "epoch": 0.9613668791879639, "grad_norm": 6.717755195247982, "learning_rate": 2.777160099686561e-06, "loss": 0.7877, "step": 13307 }, { "epoch": 0.9614391243881735, "grad_norm": 5.762536356184459, "learning_rate": 2.776869398508556e-06, "loss": 0.7639, "step": 13308 }, { "epoch": 0.961511369588383, "grad_norm": 6.1812182048600866, "learning_rate": 2.7765786935404243e-06, "loss": 0.8387, "step": 13309 }, { "epoch": 0.9615836147885924, "grad_norm": 6.520423786923974, "learning_rate": 2.7762879847861444e-06, "loss": 0.7908, "step": 13310 }, { "epoch": 0.961655859988802, "grad_norm": 9.291641121274676, "learning_rate": 2.775997272249697e-06, "loss": 0.8055, "step": 13311 }, { "epoch": 0.9617281051890115, "grad_norm": 4.82501950432981, "learning_rate": 2.7757065559350605e-06, "loss": 0.6834, "step": 13312 }, { "epoch": 0.9618003503892211, "grad_norm": 6.851740105131084, "learning_rate": 2.7754158358462165e-06, "loss": 0.8155, "step": 13313 }, { "epoch": 0.9618725955894305, "grad_norm": 6.071317572340372, "learning_rate": 2.7751251119871433e-06, "loss": 0.8239, "step": 13314 }, { "epoch": 0.96194484078964, "grad_norm": 6.783059718106888, "learning_rate": 2.77483438436182e-06, "loss": 0.8092, "step": 13315 }, { "epoch": 0.9620170859898496, "grad_norm": 6.482108214014431, "learning_rate": 2.7745436529742275e-06, "loss": 0.7991, "step": 13316 }, { "epoch": 0.962089331190059, "grad_norm": 5.985885867635744, "learning_rate": 2.774252917828346e-06, "loss": 0.7647, "step": 13317 }, { "epoch": 0.9621615763902686, "grad_norm": 5.362644455037781, "learning_rate": 2.7739621789281557e-06, "loss": 0.7789, "step": 13318 }, { "epoch": 0.9622338215904781, "grad_norm": 7.853543554492118, "learning_rate": 2.7736714362776345e-06, "loss": 0.808, "step": 13319 }, { "epoch": 0.9623060667906876, "grad_norm": 6.968823898676284, "learning_rate": 2.7733806898807653e-06, "loss": 0.7064, "step": 13320 }, { "epoch": 0.9623783119908971, "grad_norm": 5.803554090536859, "learning_rate": 2.7730899397415255e-06, "loss": 0.8376, "step": 13321 }, { "epoch": 0.9624505571911066, "grad_norm": 6.554731730333486, "learning_rate": 2.772799185863897e-06, "loss": 0.7503, "step": 13322 }, { "epoch": 0.9625228023913162, "grad_norm": 6.599022370960824, "learning_rate": 2.7725084282518595e-06, "loss": 0.7478, "step": 13323 }, { "epoch": 0.9625950475915256, "grad_norm": 6.7114561722040635, "learning_rate": 2.7722176669093926e-06, "loss": 0.8939, "step": 13324 }, { "epoch": 0.9626672927917351, "grad_norm": 6.613646080918042, "learning_rate": 2.7719269018404783e-06, "loss": 0.8027, "step": 13325 }, { "epoch": 0.9627395379919447, "grad_norm": 5.500108371013749, "learning_rate": 2.7716361330490944e-06, "loss": 0.7293, "step": 13326 }, { "epoch": 0.9628117831921542, "grad_norm": 7.029229310334499, "learning_rate": 2.7713453605392236e-06, "loss": 0.8108, "step": 13327 }, { "epoch": 0.9628840283923636, "grad_norm": 5.7384055626000245, "learning_rate": 2.7710545843148456e-06, "loss": 0.7888, "step": 13328 }, { "epoch": 0.9629562735925732, "grad_norm": 6.493154295398918, "learning_rate": 2.770763804379941e-06, "loss": 0.7663, "step": 13329 }, { "epoch": 0.9630285187927827, "grad_norm": 5.427981936206151, "learning_rate": 2.7704730207384894e-06, "loss": 0.7683, "step": 13330 }, { "epoch": 0.9631007639929923, "grad_norm": 5.9915645429726805, "learning_rate": 2.770182233394473e-06, "loss": 0.7735, "step": 13331 }, { "epoch": 0.9631730091932017, "grad_norm": 8.518408196267785, "learning_rate": 2.7698914423518715e-06, "loss": 0.8597, "step": 13332 }, { "epoch": 0.9632452543934112, "grad_norm": 9.639958239579663, "learning_rate": 2.7696006476146647e-06, "loss": 0.838, "step": 13333 }, { "epoch": 0.9633174995936208, "grad_norm": 8.102215084915146, "learning_rate": 2.7693098491868355e-06, "loss": 0.8002, "step": 13334 }, { "epoch": 0.9633897447938302, "grad_norm": 5.994561273526241, "learning_rate": 2.769019047072363e-06, "loss": 0.7696, "step": 13335 }, { "epoch": 0.9634619899940398, "grad_norm": 5.908843586154127, "learning_rate": 2.7687282412752287e-06, "loss": 0.8168, "step": 13336 }, { "epoch": 0.9635342351942493, "grad_norm": 5.953922728908766, "learning_rate": 2.7684374317994136e-06, "loss": 0.8587, "step": 13337 }, { "epoch": 0.9636064803944588, "grad_norm": 6.063915598661917, "learning_rate": 2.768146618648898e-06, "loss": 0.7174, "step": 13338 }, { "epoch": 0.9636787255946683, "grad_norm": 6.429286717175583, "learning_rate": 2.7678558018276646e-06, "loss": 0.878, "step": 13339 }, { "epoch": 0.9637509707948778, "grad_norm": 5.848388462946471, "learning_rate": 2.7675649813396923e-06, "loss": 0.8713, "step": 13340 }, { "epoch": 0.9638232159950874, "grad_norm": 6.329005352054767, "learning_rate": 2.767274157188963e-06, "loss": 0.7295, "step": 13341 }, { "epoch": 0.9638954611952968, "grad_norm": 6.61560082807346, "learning_rate": 2.766983329379458e-06, "loss": 0.7994, "step": 13342 }, { "epoch": 0.9639677063955063, "grad_norm": 6.523261190076255, "learning_rate": 2.766692497915159e-06, "loss": 0.7376, "step": 13343 }, { "epoch": 0.9640399515957159, "grad_norm": 8.06540314207779, "learning_rate": 2.766401662800046e-06, "loss": 0.7172, "step": 13344 }, { "epoch": 0.9641121967959254, "grad_norm": 7.068276262809242, "learning_rate": 2.7661108240381016e-06, "loss": 0.7964, "step": 13345 }, { "epoch": 0.9641844419961348, "grad_norm": 5.869477943717577, "learning_rate": 2.7658199816333077e-06, "loss": 0.7786, "step": 13346 }, { "epoch": 0.9642566871963444, "grad_norm": 6.894481730080661, "learning_rate": 2.765529135589643e-06, "loss": 0.7619, "step": 13347 }, { "epoch": 0.9643289323965539, "grad_norm": 7.938179527691925, "learning_rate": 2.7652382859110916e-06, "loss": 0.7764, "step": 13348 }, { "epoch": 0.9644011775967635, "grad_norm": 5.720866827740547, "learning_rate": 2.7649474326016336e-06, "loss": 0.8587, "step": 13349 }, { "epoch": 0.9644734227969729, "grad_norm": 8.911192262216078, "learning_rate": 2.764656575665251e-06, "loss": 0.7988, "step": 13350 }, { "epoch": 0.9645456679971824, "grad_norm": 5.75435506672005, "learning_rate": 2.764365715105925e-06, "loss": 0.7623, "step": 13351 }, { "epoch": 0.964617913197392, "grad_norm": 6.008276317535781, "learning_rate": 2.7640748509276383e-06, "loss": 0.7739, "step": 13352 }, { "epoch": 0.9646901583976014, "grad_norm": 7.496858829100812, "learning_rate": 2.7637839831343718e-06, "loss": 0.8689, "step": 13353 }, { "epoch": 0.964762403597811, "grad_norm": 6.996280772032615, "learning_rate": 2.7634931117301072e-06, "loss": 0.7963, "step": 13354 }, { "epoch": 0.9648346487980205, "grad_norm": 6.207440237371368, "learning_rate": 2.7632022367188266e-06, "loss": 0.8179, "step": 13355 }, { "epoch": 0.96490689399823, "grad_norm": 5.897411373389381, "learning_rate": 2.7629113581045113e-06, "loss": 0.8234, "step": 13356 }, { "epoch": 0.9649791391984395, "grad_norm": 7.513783060653825, "learning_rate": 2.7626204758911446e-06, "loss": 0.7954, "step": 13357 }, { "epoch": 0.965051384398649, "grad_norm": 6.4049991037694225, "learning_rate": 2.762329590082707e-06, "loss": 0.7353, "step": 13358 }, { "epoch": 0.9651236295988586, "grad_norm": 7.28950759166592, "learning_rate": 2.7620387006831805e-06, "loss": 0.8024, "step": 13359 }, { "epoch": 0.965195874799068, "grad_norm": 6.142515711626477, "learning_rate": 2.7617478076965474e-06, "loss": 0.8376, "step": 13360 }, { "epoch": 0.9652681199992775, "grad_norm": 6.375219303921686, "learning_rate": 2.7614569111267913e-06, "loss": 0.8013, "step": 13361 }, { "epoch": 0.9653403651994871, "grad_norm": 7.010186005000654, "learning_rate": 2.7611660109778924e-06, "loss": 0.8312, "step": 13362 }, { "epoch": 0.9654126103996966, "grad_norm": 6.358050084169102, "learning_rate": 2.7608751072538343e-06, "loss": 0.8133, "step": 13363 }, { "epoch": 0.965484855599906, "grad_norm": 6.781080393274277, "learning_rate": 2.7605841999585976e-06, "loss": 0.852, "step": 13364 }, { "epoch": 0.9655571008001156, "grad_norm": 6.480321881341313, "learning_rate": 2.7602932890961655e-06, "loss": 0.8489, "step": 13365 }, { "epoch": 0.9656293460003251, "grad_norm": 6.0803902530909, "learning_rate": 2.760002374670521e-06, "loss": 0.7653, "step": 13366 }, { "epoch": 0.9657015912005346, "grad_norm": 6.67695856399328, "learning_rate": 2.7597114566856457e-06, "loss": 0.7633, "step": 13367 }, { "epoch": 0.9657738364007441, "grad_norm": 7.297141615511921, "learning_rate": 2.7594205351455222e-06, "loss": 0.7527, "step": 13368 }, { "epoch": 0.9658460816009536, "grad_norm": 6.245025180738397, "learning_rate": 2.7591296100541326e-06, "loss": 0.6989, "step": 13369 }, { "epoch": 0.9659183268011632, "grad_norm": 6.530650718244771, "learning_rate": 2.7588386814154605e-06, "loss": 0.8557, "step": 13370 }, { "epoch": 0.9659905720013726, "grad_norm": 6.1855663013785565, "learning_rate": 2.758547749233488e-06, "loss": 0.8486, "step": 13371 }, { "epoch": 0.9660628172015822, "grad_norm": 7.403536400046037, "learning_rate": 2.758256813512197e-06, "loss": 0.8046, "step": 13372 }, { "epoch": 0.9661350624017917, "grad_norm": 7.0062339816561146, "learning_rate": 2.7579658742555704e-06, "loss": 0.9052, "step": 13373 }, { "epoch": 0.9662073076020012, "grad_norm": 6.66525561976483, "learning_rate": 2.757674931467591e-06, "loss": 0.7931, "step": 13374 }, { "epoch": 0.9662795528022107, "grad_norm": 6.0284306553008875, "learning_rate": 2.7573839851522435e-06, "loss": 0.8067, "step": 13375 }, { "epoch": 0.9663517980024202, "grad_norm": 6.8804496666981585, "learning_rate": 2.7570930353135077e-06, "loss": 0.7662, "step": 13376 }, { "epoch": 0.9664240432026298, "grad_norm": 8.140490042067492, "learning_rate": 2.7568020819553687e-06, "loss": 0.7919, "step": 13377 }, { "epoch": 0.9664962884028392, "grad_norm": 7.166736365873901, "learning_rate": 2.756511125081808e-06, "loss": 0.76, "step": 13378 }, { "epoch": 0.9665685336030487, "grad_norm": 6.457814579300416, "learning_rate": 2.756220164696809e-06, "loss": 0.795, "step": 13379 }, { "epoch": 0.9666407788032583, "grad_norm": 7.617784682119518, "learning_rate": 2.755929200804356e-06, "loss": 0.8446, "step": 13380 }, { "epoch": 0.9667130240034678, "grad_norm": 5.758924110385523, "learning_rate": 2.7556382334084296e-06, "loss": 0.7612, "step": 13381 }, { "epoch": 0.9667852692036772, "grad_norm": 7.063439230791884, "learning_rate": 2.755347262513015e-06, "loss": 0.8122, "step": 13382 }, { "epoch": 0.9668575144038868, "grad_norm": 5.8781236501526895, "learning_rate": 2.755056288122094e-06, "loss": 0.8205, "step": 13383 }, { "epoch": 0.9669297596040963, "grad_norm": 8.167213525433233, "learning_rate": 2.7547653102396515e-06, "loss": 0.8413, "step": 13384 }, { "epoch": 0.9670020048043058, "grad_norm": 5.232717860935959, "learning_rate": 2.754474328869669e-06, "loss": 0.7452, "step": 13385 }, { "epoch": 0.9670742500045153, "grad_norm": 6.2581645280783444, "learning_rate": 2.754183344016131e-06, "loss": 0.8443, "step": 13386 }, { "epoch": 0.9671464952047248, "grad_norm": 5.4273483397152935, "learning_rate": 2.7538923556830198e-06, "loss": 0.7989, "step": 13387 }, { "epoch": 0.9672187404049344, "grad_norm": 6.851014623191772, "learning_rate": 2.75360136387432e-06, "loss": 0.7449, "step": 13388 }, { "epoch": 0.9672909856051438, "grad_norm": 6.165575738585361, "learning_rate": 2.753310368594014e-06, "loss": 0.7812, "step": 13389 }, { "epoch": 0.9673632308053534, "grad_norm": 7.023035566224418, "learning_rate": 2.7530193698460855e-06, "loss": 0.7593, "step": 13390 }, { "epoch": 0.9674354760055629, "grad_norm": 6.2196722065698715, "learning_rate": 2.752728367634519e-06, "loss": 0.8579, "step": 13391 }, { "epoch": 0.9675077212057724, "grad_norm": 6.2171144107760865, "learning_rate": 2.752437361963297e-06, "loss": 0.8214, "step": 13392 }, { "epoch": 0.9675799664059819, "grad_norm": 6.947308728693244, "learning_rate": 2.7521463528364034e-06, "loss": 0.8299, "step": 13393 }, { "epoch": 0.9676522116061914, "grad_norm": 5.35756908719317, "learning_rate": 2.751855340257822e-06, "loss": 0.7788, "step": 13394 }, { "epoch": 0.967724456806401, "grad_norm": 6.246384757624462, "learning_rate": 2.751564324231537e-06, "loss": 0.829, "step": 13395 }, { "epoch": 0.9677967020066104, "grad_norm": 6.130612351665736, "learning_rate": 2.7512733047615313e-06, "loss": 0.7899, "step": 13396 }, { "epoch": 0.9678689472068199, "grad_norm": 7.24898627198559, "learning_rate": 2.750982281851789e-06, "loss": 0.7844, "step": 13397 }, { "epoch": 0.9679411924070295, "grad_norm": 5.334779225494306, "learning_rate": 2.750691255506295e-06, "loss": 0.7318, "step": 13398 }, { "epoch": 0.968013437607239, "grad_norm": 6.8199274013380435, "learning_rate": 2.7504002257290313e-06, "loss": 0.8309, "step": 13399 }, { "epoch": 0.9680856828074484, "grad_norm": 6.998151807666402, "learning_rate": 2.7501091925239835e-06, "loss": 0.7615, "step": 13400 }, { "epoch": 0.968157928007658, "grad_norm": 6.276160156482183, "learning_rate": 2.749818155895135e-06, "loss": 0.6709, "step": 13401 }, { "epoch": 0.9682301732078675, "grad_norm": 7.231418343235497, "learning_rate": 2.74952711584647e-06, "loss": 0.777, "step": 13402 }, { "epoch": 0.968302418408077, "grad_norm": 5.709353873177763, "learning_rate": 2.7492360723819715e-06, "loss": 0.8381, "step": 13403 }, { "epoch": 0.9683746636082865, "grad_norm": 6.978693005634689, "learning_rate": 2.748945025505626e-06, "loss": 0.7795, "step": 13404 }, { "epoch": 0.968446908808496, "grad_norm": 6.785504250395022, "learning_rate": 2.7486539752214166e-06, "loss": 0.7503, "step": 13405 }, { "epoch": 0.9685191540087056, "grad_norm": 5.135615264374538, "learning_rate": 2.748362921533327e-06, "loss": 0.8172, "step": 13406 }, { "epoch": 0.968591399208915, "grad_norm": 5.984372450227917, "learning_rate": 2.7480718644453407e-06, "loss": 0.7827, "step": 13407 }, { "epoch": 0.9686636444091246, "grad_norm": 6.152789232383535, "learning_rate": 2.747780803961444e-06, "loss": 0.774, "step": 13408 }, { "epoch": 0.9687358896093341, "grad_norm": 6.346656471746468, "learning_rate": 2.747489740085621e-06, "loss": 0.8385, "step": 13409 }, { "epoch": 0.9688081348095436, "grad_norm": 5.7755818391790354, "learning_rate": 2.7471986728218553e-06, "loss": 0.7914, "step": 13410 }, { "epoch": 0.9688803800097531, "grad_norm": 5.714270332860681, "learning_rate": 2.746907602174131e-06, "loss": 0.8257, "step": 13411 }, { "epoch": 0.9689526252099626, "grad_norm": 5.740410146913298, "learning_rate": 2.7466165281464345e-06, "loss": 0.7466, "step": 13412 }, { "epoch": 0.9690248704101722, "grad_norm": 6.570212927028631, "learning_rate": 2.7463254507427484e-06, "loss": 0.7412, "step": 13413 }, { "epoch": 0.9690971156103816, "grad_norm": 6.120395720410083, "learning_rate": 2.746034369967059e-06, "loss": 0.8272, "step": 13414 }, { "epoch": 0.9691693608105911, "grad_norm": 6.743431568847605, "learning_rate": 2.745743285823349e-06, "loss": 0.7606, "step": 13415 }, { "epoch": 0.9692416060108007, "grad_norm": 6.299703185340455, "learning_rate": 2.7454521983156047e-06, "loss": 0.8146, "step": 13416 }, { "epoch": 0.9693138512110102, "grad_norm": 5.8759329846366875, "learning_rate": 2.7451611074478103e-06, "loss": 0.7557, "step": 13417 }, { "epoch": 0.9693860964112196, "grad_norm": 7.414472048093049, "learning_rate": 2.744870013223951e-06, "loss": 0.8045, "step": 13418 }, { "epoch": 0.9694583416114292, "grad_norm": 6.480038436022137, "learning_rate": 2.744578915648011e-06, "loss": 0.7877, "step": 13419 }, { "epoch": 0.9695305868116387, "grad_norm": 7.223887807386749, "learning_rate": 2.744287814723976e-06, "loss": 0.8416, "step": 13420 }, { "epoch": 0.9696028320118482, "grad_norm": 6.419229004100109, "learning_rate": 2.7439967104558295e-06, "loss": 0.8463, "step": 13421 }, { "epoch": 0.9696750772120577, "grad_norm": 6.372834080783165, "learning_rate": 2.7437056028475584e-06, "loss": 0.778, "step": 13422 }, { "epoch": 0.9697473224122672, "grad_norm": 5.666979313621643, "learning_rate": 2.7434144919031468e-06, "loss": 0.7995, "step": 13423 }, { "epoch": 0.9698195676124768, "grad_norm": 6.39901339555743, "learning_rate": 2.743123377626579e-06, "loss": 0.8123, "step": 13424 }, { "epoch": 0.9698918128126862, "grad_norm": 7.714513936779278, "learning_rate": 2.7428322600218415e-06, "loss": 0.7817, "step": 13425 }, { "epoch": 0.9699640580128958, "grad_norm": 5.77367271594601, "learning_rate": 2.742541139092919e-06, "loss": 0.775, "step": 13426 }, { "epoch": 0.9700363032131053, "grad_norm": 5.937510279596364, "learning_rate": 2.742250014843797e-06, "loss": 0.724, "step": 13427 }, { "epoch": 0.9701085484133148, "grad_norm": 9.459235015327366, "learning_rate": 2.74195888727846e-06, "loss": 0.8878, "step": 13428 }, { "epoch": 0.9701807936135243, "grad_norm": 7.043867801740634, "learning_rate": 2.7416677564008938e-06, "loss": 0.8259, "step": 13429 }, { "epoch": 0.9702530388137338, "grad_norm": 5.691931035989195, "learning_rate": 2.7413766222150827e-06, "loss": 0.7478, "step": 13430 }, { "epoch": 0.9703252840139434, "grad_norm": 5.3855492620420655, "learning_rate": 2.7410854847250142e-06, "loss": 0.7667, "step": 13431 }, { "epoch": 0.9703975292141528, "grad_norm": 6.922005335698004, "learning_rate": 2.7407943439346725e-06, "loss": 0.7681, "step": 13432 }, { "epoch": 0.9704697744143623, "grad_norm": 9.639405011061895, "learning_rate": 2.740503199848043e-06, "loss": 0.7923, "step": 13433 }, { "epoch": 0.9705420196145719, "grad_norm": 7.1447779225459485, "learning_rate": 2.7402120524691115e-06, "loss": 0.7366, "step": 13434 }, { "epoch": 0.9706142648147814, "grad_norm": 7.996216117544994, "learning_rate": 2.7399209018018634e-06, "loss": 0.8315, "step": 13435 }, { "epoch": 0.9706865100149908, "grad_norm": 7.183076243509348, "learning_rate": 2.7396297478502843e-06, "loss": 0.8158, "step": 13436 }, { "epoch": 0.9707587552152004, "grad_norm": 5.983841755268902, "learning_rate": 2.73933859061836e-06, "loss": 0.7812, "step": 13437 }, { "epoch": 0.9708310004154099, "grad_norm": 5.302903607163731, "learning_rate": 2.739047430110077e-06, "loss": 0.8077, "step": 13438 }, { "epoch": 0.9709032456156194, "grad_norm": 6.429235987146371, "learning_rate": 2.738756266329419e-06, "loss": 0.7704, "step": 13439 }, { "epoch": 0.9709754908158289, "grad_norm": 6.390737220133283, "learning_rate": 2.7384650992803737e-06, "loss": 0.7908, "step": 13440 }, { "epoch": 0.9710477360160384, "grad_norm": 6.146068227988194, "learning_rate": 2.738173928966927e-06, "loss": 0.8595, "step": 13441 }, { "epoch": 0.971119981216248, "grad_norm": 7.196812443330896, "learning_rate": 2.737882755393063e-06, "loss": 0.7831, "step": 13442 }, { "epoch": 0.9711922264164574, "grad_norm": 8.096747001834672, "learning_rate": 2.73759157856277e-06, "loss": 0.8513, "step": 13443 }, { "epoch": 0.971264471616667, "grad_norm": 7.2503777767281505, "learning_rate": 2.7373003984800318e-06, "loss": 0.8131, "step": 13444 }, { "epoch": 0.9713367168168765, "grad_norm": 6.305943171406309, "learning_rate": 2.7370092151488354e-06, "loss": 0.788, "step": 13445 }, { "epoch": 0.971408962017086, "grad_norm": 6.816794905198558, "learning_rate": 2.7367180285731675e-06, "loss": 0.795, "step": 13446 }, { "epoch": 0.9714812072172955, "grad_norm": 5.8022554418855625, "learning_rate": 2.736426838757013e-06, "loss": 0.7591, "step": 13447 }, { "epoch": 0.971553452417505, "grad_norm": 6.30088175098087, "learning_rate": 2.73613564570436e-06, "loss": 0.7805, "step": 13448 }, { "epoch": 0.9716256976177146, "grad_norm": 6.4779491527666435, "learning_rate": 2.7358444494191914e-06, "loss": 0.7962, "step": 13449 }, { "epoch": 0.971697942817924, "grad_norm": 6.953576234041513, "learning_rate": 2.735553249905496e-06, "loss": 0.8839, "step": 13450 }, { "epoch": 0.9717701880181335, "grad_norm": 5.362744931760693, "learning_rate": 2.7352620471672597e-06, "loss": 0.7648, "step": 13451 }, { "epoch": 0.9718424332183431, "grad_norm": 5.854309333700962, "learning_rate": 2.7349708412084693e-06, "loss": 0.8536, "step": 13452 }, { "epoch": 0.9719146784185526, "grad_norm": 6.887824841158525, "learning_rate": 2.7346796320331106e-06, "loss": 0.8237, "step": 13453 }, { "epoch": 0.971986923618762, "grad_norm": 6.9289013611297445, "learning_rate": 2.734388419645168e-06, "loss": 0.7507, "step": 13454 }, { "epoch": 0.9720591688189716, "grad_norm": 7.486278952763766, "learning_rate": 2.7340972040486318e-06, "loss": 0.815, "step": 13455 }, { "epoch": 0.9721314140191811, "grad_norm": 6.999417144487734, "learning_rate": 2.7338059852474857e-06, "loss": 0.8305, "step": 13456 }, { "epoch": 0.9722036592193906, "grad_norm": 6.31449329895587, "learning_rate": 2.7335147632457184e-06, "loss": 0.7868, "step": 13457 }, { "epoch": 0.9722759044196001, "grad_norm": 7.126018785955659, "learning_rate": 2.7332235380473144e-06, "loss": 0.8315, "step": 13458 }, { "epoch": 0.9723481496198096, "grad_norm": 6.12378945357599, "learning_rate": 2.732932309656261e-06, "loss": 0.7459, "step": 13459 }, { "epoch": 0.9724203948200192, "grad_norm": 5.365622568546515, "learning_rate": 2.732641078076545e-06, "loss": 0.766, "step": 13460 }, { "epoch": 0.9724926400202286, "grad_norm": 6.347636117756536, "learning_rate": 2.7323498433121542e-06, "loss": 0.818, "step": 13461 }, { "epoch": 0.9725648852204382, "grad_norm": 6.586901538743654, "learning_rate": 2.7320586053670746e-06, "loss": 0.8401, "step": 13462 }, { "epoch": 0.9726371304206477, "grad_norm": 5.333348731177673, "learning_rate": 2.731767364245292e-06, "loss": 0.7844, "step": 13463 }, { "epoch": 0.9727093756208572, "grad_norm": 5.475312224939441, "learning_rate": 2.7314761199507944e-06, "loss": 0.7625, "step": 13464 }, { "epoch": 0.9727816208210667, "grad_norm": 5.649368230556311, "learning_rate": 2.731184872487568e-06, "loss": 0.7527, "step": 13465 }, { "epoch": 0.9728538660212762, "grad_norm": 6.05229507925038, "learning_rate": 2.7308936218596012e-06, "loss": 0.827, "step": 13466 }, { "epoch": 0.9729261112214858, "grad_norm": 6.242857246610922, "learning_rate": 2.730602368070879e-06, "loss": 0.7948, "step": 13467 }, { "epoch": 0.9729983564216952, "grad_norm": 6.496937543853294, "learning_rate": 2.7303111111253898e-06, "loss": 0.7377, "step": 13468 }, { "epoch": 0.9730706016219047, "grad_norm": 6.392030594008925, "learning_rate": 2.73001985102712e-06, "loss": 0.771, "step": 13469 }, { "epoch": 0.9731428468221143, "grad_norm": 7.726909078109253, "learning_rate": 2.7297285877800584e-06, "loss": 0.8523, "step": 13470 }, { "epoch": 0.9732150920223238, "grad_norm": 8.026662265780606, "learning_rate": 2.7294373213881895e-06, "loss": 0.7516, "step": 13471 }, { "epoch": 0.9732873372225332, "grad_norm": 7.033348211239571, "learning_rate": 2.7291460518555026e-06, "loss": 0.8001, "step": 13472 }, { "epoch": 0.9733595824227428, "grad_norm": 6.432683531733361, "learning_rate": 2.7288547791859832e-06, "loss": 0.7329, "step": 13473 }, { "epoch": 0.9734318276229523, "grad_norm": 5.2794592716305395, "learning_rate": 2.72856350338362e-06, "loss": 0.8125, "step": 13474 }, { "epoch": 0.9735040728231618, "grad_norm": 7.221294004343541, "learning_rate": 2.7282722244524005e-06, "loss": 0.8327, "step": 13475 }, { "epoch": 0.9735763180233713, "grad_norm": 5.286870910881058, "learning_rate": 2.727980942396311e-06, "loss": 0.6935, "step": 13476 }, { "epoch": 0.9736485632235808, "grad_norm": 5.886043800992078, "learning_rate": 2.7276896572193394e-06, "loss": 0.7721, "step": 13477 }, { "epoch": 0.9737208084237904, "grad_norm": 6.557892553437548, "learning_rate": 2.727398368925473e-06, "loss": 0.8441, "step": 13478 }, { "epoch": 0.9737930536239998, "grad_norm": 5.56246245296536, "learning_rate": 2.7271070775187e-06, "loss": 0.7868, "step": 13479 }, { "epoch": 0.9738652988242094, "grad_norm": 7.291580664717897, "learning_rate": 2.7268157830030075e-06, "loss": 0.8189, "step": 13480 }, { "epoch": 0.9739375440244189, "grad_norm": 5.73181827103221, "learning_rate": 2.7265244853823823e-06, "loss": 0.8097, "step": 13481 }, { "epoch": 0.9740097892246284, "grad_norm": 7.587792088324163, "learning_rate": 2.7262331846608127e-06, "loss": 0.8012, "step": 13482 }, { "epoch": 0.9740820344248379, "grad_norm": 7.295501775812865, "learning_rate": 2.7259418808422865e-06, "loss": 0.8067, "step": 13483 }, { "epoch": 0.9741542796250474, "grad_norm": 7.092315125393867, "learning_rate": 2.7256505739307924e-06, "loss": 0.8525, "step": 13484 }, { "epoch": 0.974226524825257, "grad_norm": 5.725895321064052, "learning_rate": 2.7253592639303165e-06, "loss": 0.8248, "step": 13485 }, { "epoch": 0.9742987700254664, "grad_norm": 7.024214144698746, "learning_rate": 2.7250679508448476e-06, "loss": 0.7918, "step": 13486 }, { "epoch": 0.9743710152256759, "grad_norm": 5.96358758056379, "learning_rate": 2.724776634678373e-06, "loss": 0.7329, "step": 13487 }, { "epoch": 0.9744432604258855, "grad_norm": 6.641416830088767, "learning_rate": 2.7244853154348804e-06, "loss": 0.8327, "step": 13488 }, { "epoch": 0.974515505626095, "grad_norm": 6.600898808631072, "learning_rate": 2.7241939931183587e-06, "loss": 0.752, "step": 13489 }, { "epoch": 0.9745877508263044, "grad_norm": 5.81469389827317, "learning_rate": 2.7239026677327952e-06, "loss": 0.8366, "step": 13490 }, { "epoch": 0.974659996026514, "grad_norm": 5.84724069060336, "learning_rate": 2.723611339282178e-06, "loss": 0.8035, "step": 13491 }, { "epoch": 0.9747322412267235, "grad_norm": 6.984914327872935, "learning_rate": 2.7233200077704943e-06, "loss": 0.7286, "step": 13492 }, { "epoch": 0.974804486426933, "grad_norm": 6.187045302204696, "learning_rate": 2.723028673201734e-06, "loss": 0.888, "step": 13493 }, { "epoch": 0.9748767316271425, "grad_norm": 5.440083208008733, "learning_rate": 2.722737335579885e-06, "loss": 0.7603, "step": 13494 }, { "epoch": 0.974948976827352, "grad_norm": 5.685333845523814, "learning_rate": 2.722445994908934e-06, "loss": 0.796, "step": 13495 }, { "epoch": 0.9750212220275616, "grad_norm": 9.704558679686597, "learning_rate": 2.7221546511928697e-06, "loss": 0.9522, "step": 13496 }, { "epoch": 0.975093467227771, "grad_norm": 6.735687623580177, "learning_rate": 2.7218633044356807e-06, "loss": 0.8148, "step": 13497 }, { "epoch": 0.9751657124279806, "grad_norm": 6.9215652706521835, "learning_rate": 2.721571954641356e-06, "loss": 0.8162, "step": 13498 }, { "epoch": 0.9752379576281901, "grad_norm": 5.710694690140083, "learning_rate": 2.7212806018138825e-06, "loss": 0.7545, "step": 13499 }, { "epoch": 0.9753102028283996, "grad_norm": 5.5143607530493055, "learning_rate": 2.7209892459572507e-06, "loss": 0.736, "step": 13500 }, { "epoch": 0.9753824480286091, "grad_norm": 5.86568833054267, "learning_rate": 2.720697887075447e-06, "loss": 0.8289, "step": 13501 }, { "epoch": 0.9754546932288186, "grad_norm": 6.039611080860807, "learning_rate": 2.72040652517246e-06, "loss": 0.8454, "step": 13502 }, { "epoch": 0.9755269384290282, "grad_norm": 8.230847408381386, "learning_rate": 2.72011516025228e-06, "loss": 0.732, "step": 13503 }, { "epoch": 0.9755991836292376, "grad_norm": 5.504264738646784, "learning_rate": 2.719823792318893e-06, "loss": 0.7268, "step": 13504 }, { "epoch": 0.9756714288294471, "grad_norm": 6.2740230453306305, "learning_rate": 2.71953242137629e-06, "loss": 0.8824, "step": 13505 }, { "epoch": 0.9757436740296567, "grad_norm": 7.035467756238548, "learning_rate": 2.7192410474284577e-06, "loss": 0.7756, "step": 13506 }, { "epoch": 0.9758159192298662, "grad_norm": 6.317434667167187, "learning_rate": 2.7189496704793862e-06, "loss": 0.7432, "step": 13507 }, { "epoch": 0.9758881644300756, "grad_norm": 5.030626909992824, "learning_rate": 2.7186582905330637e-06, "loss": 0.7589, "step": 13508 }, { "epoch": 0.9759604096302852, "grad_norm": 5.371840857130444, "learning_rate": 2.718366907593479e-06, "loss": 0.769, "step": 13509 }, { "epoch": 0.9760326548304947, "grad_norm": 7.240071041959944, "learning_rate": 2.718075521664621e-06, "loss": 0.8794, "step": 13510 }, { "epoch": 0.9761049000307042, "grad_norm": 6.149401578243839, "learning_rate": 2.7177841327504785e-06, "loss": 0.8366, "step": 13511 }, { "epoch": 0.9761771452309137, "grad_norm": 6.400981541706368, "learning_rate": 2.71749274085504e-06, "loss": 0.8387, "step": 13512 }, { "epoch": 0.9762493904311232, "grad_norm": 5.054429202430075, "learning_rate": 2.717201345982295e-06, "loss": 0.7438, "step": 13513 }, { "epoch": 0.9763216356313328, "grad_norm": 5.572967453009196, "learning_rate": 2.716909948136232e-06, "loss": 0.7045, "step": 13514 }, { "epoch": 0.9763938808315422, "grad_norm": 6.4414796842387805, "learning_rate": 2.7166185473208404e-06, "loss": 0.7685, "step": 13515 }, { "epoch": 0.9764661260317518, "grad_norm": 6.243496983072605, "learning_rate": 2.716327143540109e-06, "loss": 0.7764, "step": 13516 }, { "epoch": 0.9765383712319613, "grad_norm": 6.647026441218201, "learning_rate": 2.7160357367980265e-06, "loss": 0.7961, "step": 13517 }, { "epoch": 0.9766106164321708, "grad_norm": 5.705355273459026, "learning_rate": 2.7157443270985833e-06, "loss": 0.7273, "step": 13518 }, { "epoch": 0.9766828616323803, "grad_norm": 6.754931591034845, "learning_rate": 2.715452914445768e-06, "loss": 0.8892, "step": 13519 }, { "epoch": 0.9767551068325898, "grad_norm": 6.908701741971605, "learning_rate": 2.7151614988435683e-06, "loss": 0.843, "step": 13520 }, { "epoch": 0.9768273520327994, "grad_norm": 8.304507895076897, "learning_rate": 2.714870080295976e-06, "loss": 0.8833, "step": 13521 }, { "epoch": 0.9768995972330088, "grad_norm": 6.346130826325892, "learning_rate": 2.7145786588069786e-06, "loss": 0.8356, "step": 13522 }, { "epoch": 0.9769718424332183, "grad_norm": 6.553535369263039, "learning_rate": 2.714287234380566e-06, "loss": 0.8028, "step": 13523 }, { "epoch": 0.9770440876334279, "grad_norm": 6.059255459311103, "learning_rate": 2.7139958070207272e-06, "loss": 0.7238, "step": 13524 }, { "epoch": 0.9771163328336374, "grad_norm": 5.242318300983186, "learning_rate": 2.7137043767314525e-06, "loss": 0.7633, "step": 13525 }, { "epoch": 0.9771885780338468, "grad_norm": 5.840486808762612, "learning_rate": 2.7134129435167305e-06, "loss": 0.7698, "step": 13526 }, { "epoch": 0.9772608232340564, "grad_norm": 5.959658104223621, "learning_rate": 2.7131215073805513e-06, "loss": 0.7678, "step": 13527 }, { "epoch": 0.9773330684342659, "grad_norm": 5.963239592740651, "learning_rate": 2.7128300683269042e-06, "loss": 0.8431, "step": 13528 }, { "epoch": 0.9774053136344754, "grad_norm": 7.076213115305577, "learning_rate": 2.7125386263597785e-06, "loss": 0.8343, "step": 13529 }, { "epoch": 0.9774775588346849, "grad_norm": 6.318971847729846, "learning_rate": 2.7122471814831637e-06, "loss": 0.7884, "step": 13530 }, { "epoch": 0.9775498040348944, "grad_norm": 6.053825858548854, "learning_rate": 2.71195573370105e-06, "loss": 0.8187, "step": 13531 }, { "epoch": 0.977622049235104, "grad_norm": 8.1014786865843, "learning_rate": 2.7116642830174272e-06, "loss": 0.766, "step": 13532 }, { "epoch": 0.9776942944353134, "grad_norm": 5.10017242888133, "learning_rate": 2.7113728294362847e-06, "loss": 0.7437, "step": 13533 }, { "epoch": 0.977766539635523, "grad_norm": 7.146015961948166, "learning_rate": 2.7110813729616115e-06, "loss": 0.794, "step": 13534 }, { "epoch": 0.9778387848357325, "grad_norm": 6.143434148917838, "learning_rate": 2.7107899135973985e-06, "loss": 0.7607, "step": 13535 }, { "epoch": 0.977911030035942, "grad_norm": 5.064424878801871, "learning_rate": 2.7104984513476364e-06, "loss": 0.8198, "step": 13536 }, { "epoch": 0.9779832752361515, "grad_norm": 5.665740928579014, "learning_rate": 2.710206986216313e-06, "loss": 0.6682, "step": 13537 }, { "epoch": 0.978055520436361, "grad_norm": 6.106115071456576, "learning_rate": 2.7099155182074196e-06, "loss": 0.8096, "step": 13538 }, { "epoch": 0.9781277656365706, "grad_norm": 7.140548605478926, "learning_rate": 2.7096240473249447e-06, "loss": 0.7774, "step": 13539 }, { "epoch": 0.97820001083678, "grad_norm": 5.8167079337477, "learning_rate": 2.7093325735728797e-06, "loss": 0.8187, "step": 13540 }, { "epoch": 0.9782722560369895, "grad_norm": 6.449626278954742, "learning_rate": 2.7090410969552154e-06, "loss": 0.843, "step": 13541 }, { "epoch": 0.9783445012371991, "grad_norm": 4.941951628129861, "learning_rate": 2.7087496174759397e-06, "loss": 0.7328, "step": 13542 }, { "epoch": 0.9784167464374086, "grad_norm": 7.060015486141003, "learning_rate": 2.708458135139045e-06, "loss": 0.8508, "step": 13543 }, { "epoch": 0.978488991637618, "grad_norm": 5.527909299136738, "learning_rate": 2.7081666499485187e-06, "loss": 0.7958, "step": 13544 }, { "epoch": 0.9785612368378276, "grad_norm": 6.584939720439829, "learning_rate": 2.707875161908353e-06, "loss": 0.8067, "step": 13545 }, { "epoch": 0.9786334820380371, "grad_norm": 5.740648377380782, "learning_rate": 2.707583671022539e-06, "loss": 0.8593, "step": 13546 }, { "epoch": 0.9787057272382466, "grad_norm": 5.228411839124484, "learning_rate": 2.7072921772950646e-06, "loss": 0.7332, "step": 13547 }, { "epoch": 0.9787779724384561, "grad_norm": 6.594584769248826, "learning_rate": 2.7070006807299213e-06, "loss": 0.7226, "step": 13548 }, { "epoch": 0.9788502176386656, "grad_norm": 6.116222398026346, "learning_rate": 2.7067091813310993e-06, "loss": 0.7849, "step": 13549 }, { "epoch": 0.9789224628388752, "grad_norm": 6.4512239108179905, "learning_rate": 2.70641767910259e-06, "loss": 0.7848, "step": 13550 }, { "epoch": 0.9789947080390846, "grad_norm": 6.6653008333438795, "learning_rate": 2.7061261740483818e-06, "loss": 0.7627, "step": 13551 }, { "epoch": 0.9790669532392942, "grad_norm": 7.524586115509397, "learning_rate": 2.705834666172467e-06, "loss": 0.7716, "step": 13552 }, { "epoch": 0.9791391984395037, "grad_norm": 7.780172372982511, "learning_rate": 2.7055431554788355e-06, "loss": 0.8266, "step": 13553 }, { "epoch": 0.9792114436397132, "grad_norm": 6.659155842662434, "learning_rate": 2.705251641971477e-06, "loss": 0.8202, "step": 13554 }, { "epoch": 0.9792836888399227, "grad_norm": 5.905110890173371, "learning_rate": 2.704960125654384e-06, "loss": 0.8039, "step": 13555 }, { "epoch": 0.9793559340401322, "grad_norm": 6.0317341506413955, "learning_rate": 2.7046686065315453e-06, "loss": 0.7588, "step": 13556 }, { "epoch": 0.9794281792403418, "grad_norm": 8.153392305740512, "learning_rate": 2.7043770846069527e-06, "loss": 0.9228, "step": 13557 }, { "epoch": 0.9795004244405512, "grad_norm": 6.32056991373593, "learning_rate": 2.704085559884596e-06, "loss": 0.8493, "step": 13558 }, { "epoch": 0.9795726696407607, "grad_norm": 6.696599456059944, "learning_rate": 2.7037940323684663e-06, "loss": 0.8136, "step": 13559 }, { "epoch": 0.9796449148409703, "grad_norm": 6.386291974942031, "learning_rate": 2.7035025020625555e-06, "loss": 0.8203, "step": 13560 }, { "epoch": 0.9797171600411798, "grad_norm": 6.016578024143414, "learning_rate": 2.703210968970853e-06, "loss": 0.7819, "step": 13561 }, { "epoch": 0.9797894052413892, "grad_norm": 8.33417318562485, "learning_rate": 2.70291943309735e-06, "loss": 0.7877, "step": 13562 }, { "epoch": 0.9798616504415988, "grad_norm": 8.504184366145788, "learning_rate": 2.7026278944460373e-06, "loss": 0.8104, "step": 13563 }, { "epoch": 0.9799338956418083, "grad_norm": 5.683529316645077, "learning_rate": 2.702336353020906e-06, "loss": 0.8405, "step": 13564 }, { "epoch": 0.9800061408420178, "grad_norm": 5.354209078411738, "learning_rate": 2.702044808825948e-06, "loss": 0.7392, "step": 13565 }, { "epoch": 0.9800783860422273, "grad_norm": 5.954153697927601, "learning_rate": 2.7017532618651526e-06, "loss": 0.817, "step": 13566 }, { "epoch": 0.9801506312424368, "grad_norm": 6.188040353319186, "learning_rate": 2.701461712142512e-06, "loss": 0.7218, "step": 13567 }, { "epoch": 0.9802228764426464, "grad_norm": 6.603517358029941, "learning_rate": 2.7011701596620167e-06, "loss": 0.7675, "step": 13568 }, { "epoch": 0.9802951216428558, "grad_norm": 6.558435252979883, "learning_rate": 2.7008786044276586e-06, "loss": 0.8234, "step": 13569 }, { "epoch": 0.9803673668430654, "grad_norm": 6.013110142955024, "learning_rate": 2.700587046443428e-06, "loss": 0.7991, "step": 13570 }, { "epoch": 0.9804396120432749, "grad_norm": 6.145551806537468, "learning_rate": 2.700295485713317e-06, "loss": 0.8178, "step": 13571 }, { "epoch": 0.9805118572434844, "grad_norm": 5.534041912409561, "learning_rate": 2.700003922241316e-06, "loss": 0.7848, "step": 13572 }, { "epoch": 0.9805841024436939, "grad_norm": 7.204543629791843, "learning_rate": 2.6997123560314166e-06, "loss": 0.8665, "step": 13573 }, { "epoch": 0.9806563476439034, "grad_norm": 6.623410304095352, "learning_rate": 2.6994207870876095e-06, "loss": 0.8698, "step": 13574 }, { "epoch": 0.980728592844113, "grad_norm": 6.6541893863310015, "learning_rate": 2.6991292154138877e-06, "loss": 0.7589, "step": 13575 }, { "epoch": 0.9808008380443224, "grad_norm": 7.019875782794121, "learning_rate": 2.6988376410142407e-06, "loss": 0.8077, "step": 13576 }, { "epoch": 0.9808730832445319, "grad_norm": 5.965497953441903, "learning_rate": 2.698546063892661e-06, "loss": 0.8608, "step": 13577 }, { "epoch": 0.9809453284447415, "grad_norm": 5.915611961096467, "learning_rate": 2.6982544840531404e-06, "loss": 0.7681, "step": 13578 }, { "epoch": 0.981017573644951, "grad_norm": 6.129247943988457, "learning_rate": 2.6979629014996696e-06, "loss": 0.802, "step": 13579 }, { "epoch": 0.9810898188451604, "grad_norm": 7.200426767205039, "learning_rate": 2.69767131623624e-06, "loss": 0.8176, "step": 13580 }, { "epoch": 0.98116206404537, "grad_norm": 8.966163607906672, "learning_rate": 2.697379728266844e-06, "loss": 0.8198, "step": 13581 }, { "epoch": 0.9812343092455795, "grad_norm": 6.50839249131803, "learning_rate": 2.6970881375954722e-06, "loss": 0.8213, "step": 13582 }, { "epoch": 0.981306554445789, "grad_norm": 5.211821382472996, "learning_rate": 2.6967965442261166e-06, "loss": 0.7405, "step": 13583 }, { "epoch": 0.9813787996459985, "grad_norm": 6.012373563265826, "learning_rate": 2.69650494816277e-06, "loss": 0.8446, "step": 13584 }, { "epoch": 0.981451044846208, "grad_norm": 6.334880472574572, "learning_rate": 2.696213349409424e-06, "loss": 0.8096, "step": 13585 }, { "epoch": 0.9815232900464176, "grad_norm": 6.7339742235914555, "learning_rate": 2.6959217479700684e-06, "loss": 0.868, "step": 13586 }, { "epoch": 0.981595535246627, "grad_norm": 6.156817879602083, "learning_rate": 2.695630143848696e-06, "loss": 0.7861, "step": 13587 }, { "epoch": 0.9816677804468366, "grad_norm": 5.557542284600018, "learning_rate": 2.695338537049299e-06, "loss": 0.7801, "step": 13588 }, { "epoch": 0.9817400256470461, "grad_norm": 5.714867446443524, "learning_rate": 2.6950469275758694e-06, "loss": 0.6739, "step": 13589 }, { "epoch": 0.9818122708472555, "grad_norm": 5.685674688854478, "learning_rate": 2.6947553154323987e-06, "loss": 0.6638, "step": 13590 }, { "epoch": 0.9818845160474651, "grad_norm": 6.242911629814712, "learning_rate": 2.6944637006228786e-06, "loss": 0.7486, "step": 13591 }, { "epoch": 0.9819567612476746, "grad_norm": 5.762578722898079, "learning_rate": 2.6941720831513017e-06, "loss": 0.7469, "step": 13592 }, { "epoch": 0.9820290064478842, "grad_norm": 6.710822107746294, "learning_rate": 2.6938804630216604e-06, "loss": 0.8617, "step": 13593 }, { "epoch": 0.9821012516480936, "grad_norm": 6.149839520105128, "learning_rate": 2.6935888402379456e-06, "loss": 0.8134, "step": 13594 }, { "epoch": 0.9821734968483031, "grad_norm": 5.525438607183851, "learning_rate": 2.6932972148041497e-06, "loss": 0.8196, "step": 13595 }, { "epoch": 0.9822457420485127, "grad_norm": 7.191138366649663, "learning_rate": 2.693005586724265e-06, "loss": 0.8284, "step": 13596 }, { "epoch": 0.9823179872487222, "grad_norm": 6.245311962487468, "learning_rate": 2.6927139560022835e-06, "loss": 0.7782, "step": 13597 }, { "epoch": 0.9823902324489316, "grad_norm": 6.291112643811041, "learning_rate": 2.692422322642198e-06, "loss": 0.8144, "step": 13598 }, { "epoch": 0.9824624776491412, "grad_norm": 7.552274237664352, "learning_rate": 2.6921306866480003e-06, "loss": 0.7705, "step": 13599 }, { "epoch": 0.9825347228493507, "grad_norm": 5.758905232024186, "learning_rate": 2.6918390480236826e-06, "loss": 0.8846, "step": 13600 }, { "epoch": 0.9826069680495602, "grad_norm": 8.108461890879173, "learning_rate": 2.6915474067732367e-06, "loss": 0.723, "step": 13601 }, { "epoch": 0.9826792132497697, "grad_norm": 5.927156775298325, "learning_rate": 2.6912557629006563e-06, "loss": 0.8301, "step": 13602 }, { "epoch": 0.9827514584499792, "grad_norm": 4.8126831019854945, "learning_rate": 2.690964116409933e-06, "loss": 0.6634, "step": 13603 }, { "epoch": 0.9828237036501888, "grad_norm": 6.122650007074621, "learning_rate": 2.690672467305059e-06, "loss": 0.8086, "step": 13604 }, { "epoch": 0.9828959488503982, "grad_norm": 7.190499650044158, "learning_rate": 2.6903808155900267e-06, "loss": 0.7681, "step": 13605 }, { "epoch": 0.9829681940506078, "grad_norm": 6.377845278806207, "learning_rate": 2.690089161268829e-06, "loss": 0.8277, "step": 13606 }, { "epoch": 0.9830404392508173, "grad_norm": 6.33311977361036, "learning_rate": 2.689797504345459e-06, "loss": 0.7936, "step": 13607 }, { "epoch": 0.9831126844510267, "grad_norm": 6.148782761609235, "learning_rate": 2.6895058448239075e-06, "loss": 0.8, "step": 13608 }, { "epoch": 0.9831849296512363, "grad_norm": 6.751123405502829, "learning_rate": 2.6892141827081687e-06, "loss": 0.8454, "step": 13609 }, { "epoch": 0.9832571748514458, "grad_norm": 5.676533281002766, "learning_rate": 2.6889225180022344e-06, "loss": 0.7609, "step": 13610 }, { "epoch": 0.9833294200516554, "grad_norm": 6.413620593219522, "learning_rate": 2.6886308507100972e-06, "loss": 0.8345, "step": 13611 }, { "epoch": 0.9834016652518648, "grad_norm": 6.281652333293921, "learning_rate": 2.6883391808357513e-06, "loss": 0.7837, "step": 13612 }, { "epoch": 0.9834739104520743, "grad_norm": 6.811886261058221, "learning_rate": 2.688047508383187e-06, "loss": 0.7956, "step": 13613 }, { "epoch": 0.9835461556522839, "grad_norm": 6.632499049388302, "learning_rate": 2.687755833356399e-06, "loss": 0.7429, "step": 13614 }, { "epoch": 0.9836184008524934, "grad_norm": 6.403352705638096, "learning_rate": 2.687464155759379e-06, "loss": 0.8397, "step": 13615 }, { "epoch": 0.9836906460527028, "grad_norm": 6.104839543913795, "learning_rate": 2.6871724755961203e-06, "loss": 0.8528, "step": 13616 }, { "epoch": 0.9837628912529124, "grad_norm": 6.777356132913759, "learning_rate": 2.686880792870616e-06, "loss": 0.845, "step": 13617 }, { "epoch": 0.9838351364531219, "grad_norm": 6.142284062416681, "learning_rate": 2.686589107586859e-06, "loss": 0.8678, "step": 13618 }, { "epoch": 0.9839073816533314, "grad_norm": 5.634689462765173, "learning_rate": 2.6862974197488416e-06, "loss": 0.758, "step": 13619 }, { "epoch": 0.9839796268535409, "grad_norm": 7.840333232415093, "learning_rate": 2.6860057293605566e-06, "loss": 0.9605, "step": 13620 }, { "epoch": 0.9840518720537504, "grad_norm": 5.284976344348871, "learning_rate": 2.685714036425999e-06, "loss": 0.7768, "step": 13621 }, { "epoch": 0.98412411725396, "grad_norm": 6.458386656838418, "learning_rate": 2.6854223409491596e-06, "loss": 0.8562, "step": 13622 }, { "epoch": 0.9841963624541694, "grad_norm": 7.388730731823469, "learning_rate": 2.685130642934033e-06, "loss": 0.7517, "step": 13623 }, { "epoch": 0.984268607654379, "grad_norm": 5.784585516348364, "learning_rate": 2.6848389423846107e-06, "loss": 0.7474, "step": 13624 }, { "epoch": 0.9843408528545885, "grad_norm": 5.103461433528779, "learning_rate": 2.6845472393048868e-06, "loss": 0.7448, "step": 13625 }, { "epoch": 0.9844130980547979, "grad_norm": 5.549727788899541, "learning_rate": 2.6842555336988554e-06, "loss": 0.7609, "step": 13626 }, { "epoch": 0.9844853432550075, "grad_norm": 5.46601843053151, "learning_rate": 2.683963825570508e-06, "loss": 0.7439, "step": 13627 }, { "epoch": 0.984557588455217, "grad_norm": 5.029943450549187, "learning_rate": 2.6836721149238394e-06, "loss": 0.7239, "step": 13628 }, { "epoch": 0.9846298336554266, "grad_norm": 6.951039342214351, "learning_rate": 2.6833804017628412e-06, "loss": 0.7724, "step": 13629 }, { "epoch": 0.984702078855636, "grad_norm": 6.071113995007906, "learning_rate": 2.683088686091508e-06, "loss": 0.8186, "step": 13630 }, { "epoch": 0.9847743240558455, "grad_norm": 6.046356415505152, "learning_rate": 2.6827969679138326e-06, "loss": 0.7276, "step": 13631 }, { "epoch": 0.9848465692560551, "grad_norm": 7.354537507848179, "learning_rate": 2.6825052472338097e-06, "loss": 0.8152, "step": 13632 }, { "epoch": 0.9849188144562646, "grad_norm": 5.987791356143518, "learning_rate": 2.682213524055431e-06, "loss": 0.7121, "step": 13633 }, { "epoch": 0.984991059656474, "grad_norm": 7.882413492983303, "learning_rate": 2.68192179838269e-06, "loss": 0.8537, "step": 13634 }, { "epoch": 0.9850633048566836, "grad_norm": 7.373901285345203, "learning_rate": 2.6816300702195818e-06, "loss": 0.8019, "step": 13635 }, { "epoch": 0.9851355500568931, "grad_norm": 7.023023344904684, "learning_rate": 2.681338339570099e-06, "loss": 0.8087, "step": 13636 }, { "epoch": 0.9852077952571026, "grad_norm": 5.118707841599417, "learning_rate": 2.6810466064382346e-06, "loss": 0.7688, "step": 13637 }, { "epoch": 0.9852800404573121, "grad_norm": 5.978100866656887, "learning_rate": 2.680754870827983e-06, "loss": 0.764, "step": 13638 }, { "epoch": 0.9853522856575216, "grad_norm": 5.834429646968067, "learning_rate": 2.680463132743337e-06, "loss": 0.7967, "step": 13639 }, { "epoch": 0.9854245308577312, "grad_norm": 6.729833642680501, "learning_rate": 2.680171392188291e-06, "loss": 0.8032, "step": 13640 }, { "epoch": 0.9854967760579406, "grad_norm": 6.810294818070155, "learning_rate": 2.6798796491668394e-06, "loss": 0.7652, "step": 13641 }, { "epoch": 0.9855690212581502, "grad_norm": 7.016933395619803, "learning_rate": 2.679587903682974e-06, "loss": 0.8299, "step": 13642 }, { "epoch": 0.9856412664583597, "grad_norm": 6.378613214717502, "learning_rate": 2.679296155740691e-06, "loss": 0.7418, "step": 13643 }, { "epoch": 0.9857135116585691, "grad_norm": 5.984714428710269, "learning_rate": 2.6790044053439817e-06, "loss": 0.9055, "step": 13644 }, { "epoch": 0.9857857568587787, "grad_norm": 6.313327848330689, "learning_rate": 2.678712652496841e-06, "loss": 0.7805, "step": 13645 }, { "epoch": 0.9858580020589882, "grad_norm": 5.9505814853195425, "learning_rate": 2.6784208972032637e-06, "loss": 0.8306, "step": 13646 }, { "epoch": 0.9859302472591978, "grad_norm": 6.813480971516597, "learning_rate": 2.678129139467242e-06, "loss": 0.8184, "step": 13647 }, { "epoch": 0.9860024924594072, "grad_norm": 6.229473433920079, "learning_rate": 2.6778373792927708e-06, "loss": 0.8634, "step": 13648 }, { "epoch": 0.9860747376596167, "grad_norm": 5.610384114488898, "learning_rate": 2.677545616683844e-06, "loss": 0.8127, "step": 13649 }, { "epoch": 0.9861469828598263, "grad_norm": 6.534983466783433, "learning_rate": 2.6772538516444563e-06, "loss": 0.8249, "step": 13650 }, { "epoch": 0.9862192280600358, "grad_norm": 5.739207546811742, "learning_rate": 2.6769620841786008e-06, "loss": 0.7399, "step": 13651 }, { "epoch": 0.9862914732602452, "grad_norm": 5.428293787517724, "learning_rate": 2.6766703142902717e-06, "loss": 0.773, "step": 13652 }, { "epoch": 0.9863637184604548, "grad_norm": 7.780899959182401, "learning_rate": 2.6763785419834627e-06, "loss": 0.8066, "step": 13653 }, { "epoch": 0.9864359636606643, "grad_norm": 7.447341690455362, "learning_rate": 2.676086767262168e-06, "loss": 0.7551, "step": 13654 }, { "epoch": 0.9865082088608738, "grad_norm": 6.015227703499774, "learning_rate": 2.675794990130383e-06, "loss": 0.8737, "step": 13655 }, { "epoch": 0.9865804540610833, "grad_norm": 7.156824613598176, "learning_rate": 2.6755032105921006e-06, "loss": 0.7935, "step": 13656 }, { "epoch": 0.9866526992612928, "grad_norm": 7.956069970622519, "learning_rate": 2.6752114286513164e-06, "loss": 0.786, "step": 13657 }, { "epoch": 0.9867249444615024, "grad_norm": 5.562128976170778, "learning_rate": 2.674919644312023e-06, "loss": 0.7364, "step": 13658 }, { "epoch": 0.9867971896617118, "grad_norm": 8.853571422240346, "learning_rate": 2.674627857578216e-06, "loss": 0.8545, "step": 13659 }, { "epoch": 0.9868694348619214, "grad_norm": 5.822312143390146, "learning_rate": 2.674336068453889e-06, "loss": 0.7902, "step": 13660 }, { "epoch": 0.9869416800621309, "grad_norm": 6.738430987919597, "learning_rate": 2.6740442769430363e-06, "loss": 0.7651, "step": 13661 }, { "epoch": 0.9870139252623403, "grad_norm": 6.628558085104004, "learning_rate": 2.6737524830496525e-06, "loss": 0.767, "step": 13662 }, { "epoch": 0.9870861704625499, "grad_norm": 5.907050830297531, "learning_rate": 2.6734606867777323e-06, "loss": 0.7851, "step": 13663 }, { "epoch": 0.9871584156627594, "grad_norm": 5.910843289234843, "learning_rate": 2.6731688881312705e-06, "loss": 0.8023, "step": 13664 }, { "epoch": 0.987230660862969, "grad_norm": 5.8858931176983456, "learning_rate": 2.6728770871142608e-06, "loss": 0.8404, "step": 13665 }, { "epoch": 0.9873029060631784, "grad_norm": 6.306853233196261, "learning_rate": 2.6725852837306978e-06, "loss": 0.787, "step": 13666 }, { "epoch": 0.9873751512633879, "grad_norm": 8.20775864294985, "learning_rate": 2.672293477984576e-06, "loss": 0.8007, "step": 13667 }, { "epoch": 0.9874473964635975, "grad_norm": 7.334691731034552, "learning_rate": 2.6720016698798906e-06, "loss": 0.8277, "step": 13668 }, { "epoch": 0.987519641663807, "grad_norm": 7.456947609945419, "learning_rate": 2.6717098594206358e-06, "loss": 0.7821, "step": 13669 }, { "epoch": 0.9875918868640164, "grad_norm": 6.755196725506443, "learning_rate": 2.6714180466108065e-06, "loss": 0.831, "step": 13670 }, { "epoch": 0.987664132064226, "grad_norm": 7.041695528892943, "learning_rate": 2.6711262314543977e-06, "loss": 0.8604, "step": 13671 }, { "epoch": 0.9877363772644355, "grad_norm": 6.566401602467124, "learning_rate": 2.670834413955402e-06, "loss": 0.7813, "step": 13672 }, { "epoch": 0.987808622464645, "grad_norm": 7.603845737600198, "learning_rate": 2.6705425941178174e-06, "loss": 0.7674, "step": 13673 }, { "epoch": 0.9878808676648545, "grad_norm": 7.626338153424033, "learning_rate": 2.670250771945637e-06, "loss": 0.8047, "step": 13674 }, { "epoch": 0.987953112865064, "grad_norm": 6.912734081864589, "learning_rate": 2.6699589474428556e-06, "loss": 0.7654, "step": 13675 }, { "epoch": 0.9880253580652736, "grad_norm": 5.549937088068527, "learning_rate": 2.6696671206134676e-06, "loss": 0.7593, "step": 13676 }, { "epoch": 0.988097603265483, "grad_norm": 6.116700446566543, "learning_rate": 2.6693752914614683e-06, "loss": 0.8228, "step": 13677 }, { "epoch": 0.9881698484656926, "grad_norm": 5.522518874723715, "learning_rate": 2.669083459990854e-06, "loss": 0.7731, "step": 13678 }, { "epoch": 0.9882420936659021, "grad_norm": 7.080763706030906, "learning_rate": 2.6687916262056173e-06, "loss": 0.7575, "step": 13679 }, { "epoch": 0.9883143388661115, "grad_norm": 7.690554624481183, "learning_rate": 2.6684997901097554e-06, "loss": 0.7866, "step": 13680 }, { "epoch": 0.9883865840663211, "grad_norm": 5.983627551539042, "learning_rate": 2.6682079517072614e-06, "loss": 0.7855, "step": 13681 }, { "epoch": 0.9884588292665306, "grad_norm": 6.9270570529831215, "learning_rate": 2.6679161110021307e-06, "loss": 0.7899, "step": 13682 }, { "epoch": 0.9885310744667402, "grad_norm": 6.089709071835029, "learning_rate": 2.6676242679983593e-06, "loss": 0.7943, "step": 13683 }, { "epoch": 0.9886033196669496, "grad_norm": 5.969849490212548, "learning_rate": 2.667332422699942e-06, "loss": 0.8016, "step": 13684 }, { "epoch": 0.9886755648671591, "grad_norm": 6.137143922254141, "learning_rate": 2.667040575110874e-06, "loss": 0.726, "step": 13685 }, { "epoch": 0.9887478100673687, "grad_norm": 7.062296906648449, "learning_rate": 2.6667487252351493e-06, "loss": 0.7821, "step": 13686 }, { "epoch": 0.9888200552675782, "grad_norm": 5.028145157872602, "learning_rate": 2.666456873076765e-06, "loss": 0.7751, "step": 13687 }, { "epoch": 0.9888923004677876, "grad_norm": 6.230882015095067, "learning_rate": 2.666165018639715e-06, "loss": 0.7507, "step": 13688 }, { "epoch": 0.9889645456679972, "grad_norm": 8.193898736344307, "learning_rate": 2.665873161927995e-06, "loss": 0.8292, "step": 13689 }, { "epoch": 0.9890367908682067, "grad_norm": 7.661818367291108, "learning_rate": 2.6655813029455997e-06, "loss": 0.7081, "step": 13690 }, { "epoch": 0.9891090360684162, "grad_norm": 6.575168262257777, "learning_rate": 2.665289441696525e-06, "loss": 0.8098, "step": 13691 }, { "epoch": 0.9891812812686257, "grad_norm": 4.872861735586248, "learning_rate": 2.664997578184767e-06, "loss": 0.7558, "step": 13692 }, { "epoch": 0.9892535264688352, "grad_norm": 5.7545201323068635, "learning_rate": 2.6647057124143194e-06, "loss": 0.8289, "step": 13693 }, { "epoch": 0.9893257716690448, "grad_norm": 6.623017788291334, "learning_rate": 2.664413844389179e-06, "loss": 0.8087, "step": 13694 }, { "epoch": 0.9893980168692542, "grad_norm": 6.237770322396513, "learning_rate": 2.6641219741133405e-06, "loss": 0.8594, "step": 13695 }, { "epoch": 0.9894702620694638, "grad_norm": 5.7137168873637165, "learning_rate": 2.6638301015907996e-06, "loss": 0.7317, "step": 13696 }, { "epoch": 0.9895425072696733, "grad_norm": 6.085750395771969, "learning_rate": 2.6635382268255517e-06, "loss": 0.8161, "step": 13697 }, { "epoch": 0.9896147524698827, "grad_norm": 5.821395466047565, "learning_rate": 2.6632463498215932e-06, "loss": 0.7185, "step": 13698 }, { "epoch": 0.9896869976700923, "grad_norm": 6.013640475214387, "learning_rate": 2.662954470582918e-06, "loss": 0.8165, "step": 13699 }, { "epoch": 0.9897592428703018, "grad_norm": 4.456998726327435, "learning_rate": 2.662662589113523e-06, "loss": 0.74, "step": 13700 }, { "epoch": 0.9898314880705114, "grad_norm": 5.693925853572082, "learning_rate": 2.6623707054174035e-06, "loss": 0.763, "step": 13701 }, { "epoch": 0.9899037332707208, "grad_norm": 5.488554576434581, "learning_rate": 2.662078819498555e-06, "loss": 0.7888, "step": 13702 }, { "epoch": 0.9899759784709303, "grad_norm": 8.066172628275483, "learning_rate": 2.6617869313609735e-06, "loss": 0.7666, "step": 13703 }, { "epoch": 0.9900482236711399, "grad_norm": 6.485361488402471, "learning_rate": 2.661495041008654e-06, "loss": 0.8049, "step": 13704 }, { "epoch": 0.9901204688713494, "grad_norm": 6.447881235283138, "learning_rate": 2.661203148445593e-06, "loss": 0.7897, "step": 13705 }, { "epoch": 0.9901927140715588, "grad_norm": 6.762918895785074, "learning_rate": 2.6609112536757865e-06, "loss": 0.7619, "step": 13706 }, { "epoch": 0.9902649592717684, "grad_norm": 6.62930906259928, "learning_rate": 2.6606193567032295e-06, "loss": 0.8383, "step": 13707 }, { "epoch": 0.9903372044719779, "grad_norm": 6.897962473912417, "learning_rate": 2.660327457531918e-06, "loss": 0.8101, "step": 13708 }, { "epoch": 0.9904094496721874, "grad_norm": 5.696125574249324, "learning_rate": 2.6600355561658492e-06, "loss": 0.7814, "step": 13709 }, { "epoch": 0.9904816948723969, "grad_norm": 6.024945853215586, "learning_rate": 2.659743652609017e-06, "loss": 0.7515, "step": 13710 }, { "epoch": 0.9905539400726064, "grad_norm": 6.215218773058005, "learning_rate": 2.659451746865418e-06, "loss": 0.7815, "step": 13711 }, { "epoch": 0.990626185272816, "grad_norm": 6.354759547188854, "learning_rate": 2.659159838939049e-06, "loss": 0.8228, "step": 13712 }, { "epoch": 0.9906984304730254, "grad_norm": 6.1593697480634635, "learning_rate": 2.6588679288339054e-06, "loss": 0.7878, "step": 13713 }, { "epoch": 0.990770675673235, "grad_norm": 6.24796719871439, "learning_rate": 2.6585760165539825e-06, "loss": 0.8003, "step": 13714 }, { "epoch": 0.9908429208734445, "grad_norm": 5.237993225326322, "learning_rate": 2.658284102103277e-06, "loss": 0.8329, "step": 13715 }, { "epoch": 0.9909151660736539, "grad_norm": 5.8170250127345415, "learning_rate": 2.657992185485786e-06, "loss": 0.7569, "step": 13716 }, { "epoch": 0.9909874112738635, "grad_norm": 6.361836324975773, "learning_rate": 2.6577002667055046e-06, "loss": 0.7883, "step": 13717 }, { "epoch": 0.991059656474073, "grad_norm": 4.950992161124536, "learning_rate": 2.6574083457664283e-06, "loss": 0.7089, "step": 13718 }, { "epoch": 0.9911319016742826, "grad_norm": 5.737828184228639, "learning_rate": 2.6571164226725543e-06, "loss": 0.738, "step": 13719 }, { "epoch": 0.991204146874492, "grad_norm": 6.2548159545851085, "learning_rate": 2.656824497427878e-06, "loss": 0.8226, "step": 13720 }, { "epoch": 0.9912763920747015, "grad_norm": 7.064768199101175, "learning_rate": 2.656532570036397e-06, "loss": 0.8277, "step": 13721 }, { "epoch": 0.9913486372749111, "grad_norm": 6.027594689060944, "learning_rate": 2.6562406405021056e-06, "loss": 0.8083, "step": 13722 }, { "epoch": 0.9914208824751206, "grad_norm": 7.257658760733785, "learning_rate": 2.6559487088290025e-06, "loss": 0.6747, "step": 13723 }, { "epoch": 0.99149312767533, "grad_norm": 5.987917177961545, "learning_rate": 2.6556567750210816e-06, "loss": 0.7573, "step": 13724 }, { "epoch": 0.9915653728755396, "grad_norm": 5.51425162445306, "learning_rate": 2.6553648390823406e-06, "loss": 0.7651, "step": 13725 }, { "epoch": 0.9916376180757491, "grad_norm": 5.490899793753656, "learning_rate": 2.655072901016776e-06, "loss": 0.7674, "step": 13726 }, { "epoch": 0.9917098632759586, "grad_norm": 6.306522069379625, "learning_rate": 2.6547809608283825e-06, "loss": 0.8074, "step": 13727 }, { "epoch": 0.9917821084761681, "grad_norm": 6.220382706131688, "learning_rate": 2.6544890185211585e-06, "loss": 0.757, "step": 13728 }, { "epoch": 0.9918543536763776, "grad_norm": 5.071928874615186, "learning_rate": 2.6541970740991e-06, "loss": 0.7851, "step": 13729 }, { "epoch": 0.9919265988765872, "grad_norm": 5.630550443528514, "learning_rate": 2.6539051275662036e-06, "loss": 0.7702, "step": 13730 }, { "epoch": 0.9919988440767966, "grad_norm": 8.129957828576579, "learning_rate": 2.653613178926465e-06, "loss": 0.8696, "step": 13731 }, { "epoch": 0.9920710892770062, "grad_norm": 5.067383282996461, "learning_rate": 2.6533212281838815e-06, "loss": 0.7505, "step": 13732 }, { "epoch": 0.9921433344772157, "grad_norm": 6.1703249759504475, "learning_rate": 2.6530292753424498e-06, "loss": 0.7974, "step": 13733 }, { "epoch": 0.9922155796774251, "grad_norm": 5.642518462766527, "learning_rate": 2.6527373204061653e-06, "loss": 0.7809, "step": 13734 }, { "epoch": 0.9922878248776347, "grad_norm": 6.5460836801748545, "learning_rate": 2.6524453633790258e-06, "loss": 0.6873, "step": 13735 }, { "epoch": 0.9923600700778442, "grad_norm": 5.952187772250053, "learning_rate": 2.6521534042650275e-06, "loss": 0.791, "step": 13736 }, { "epoch": 0.9924323152780538, "grad_norm": 6.394284867542668, "learning_rate": 2.6518614430681683e-06, "loss": 0.8012, "step": 13737 }, { "epoch": 0.9925045604782632, "grad_norm": 6.169546262605199, "learning_rate": 2.651569479792442e-06, "loss": 0.7903, "step": 13738 }, { "epoch": 0.9925768056784727, "grad_norm": 9.606599732624003, "learning_rate": 2.6512775144418483e-06, "loss": 0.8549, "step": 13739 }, { "epoch": 0.9926490508786823, "grad_norm": 6.563100151594025, "learning_rate": 2.650985547020383e-06, "loss": 0.8458, "step": 13740 }, { "epoch": 0.9927212960788918, "grad_norm": 6.4761448853221, "learning_rate": 2.650693577532043e-06, "loss": 0.7832, "step": 13741 }, { "epoch": 0.9927935412791012, "grad_norm": 8.061393247832003, "learning_rate": 2.6504016059808246e-06, "loss": 0.8191, "step": 13742 }, { "epoch": 0.9928657864793108, "grad_norm": 6.40053336782007, "learning_rate": 2.6501096323707243e-06, "loss": 0.8322, "step": 13743 }, { "epoch": 0.9929380316795203, "grad_norm": 6.509279523086097, "learning_rate": 2.6498176567057403e-06, "loss": 0.8281, "step": 13744 }, { "epoch": 0.9930102768797298, "grad_norm": 5.6792089373479415, "learning_rate": 2.649525678989869e-06, "loss": 0.7378, "step": 13745 }, { "epoch": 0.9930825220799393, "grad_norm": 5.551914730219734, "learning_rate": 2.6492336992271073e-06, "loss": 0.8139, "step": 13746 }, { "epoch": 0.9931547672801488, "grad_norm": 5.7054632540753, "learning_rate": 2.648941717421452e-06, "loss": 0.7903, "step": 13747 }, { "epoch": 0.9932270124803584, "grad_norm": 7.125284557348805, "learning_rate": 2.6486497335769e-06, "loss": 0.7724, "step": 13748 }, { "epoch": 0.9932992576805678, "grad_norm": 6.215023898925006, "learning_rate": 2.6483577476974487e-06, "loss": 0.8468, "step": 13749 }, { "epoch": 0.9933715028807774, "grad_norm": 6.356298803931678, "learning_rate": 2.6480657597870953e-06, "loss": 0.8055, "step": 13750 }, { "epoch": 0.9934437480809869, "grad_norm": 7.4831302380206965, "learning_rate": 2.647773769849837e-06, "loss": 0.7949, "step": 13751 }, { "epoch": 0.9935159932811963, "grad_norm": 5.81045627555418, "learning_rate": 2.6474817778896695e-06, "loss": 0.6953, "step": 13752 }, { "epoch": 0.9935882384814059, "grad_norm": 8.21601619144616, "learning_rate": 2.6471897839105915e-06, "loss": 0.8398, "step": 13753 }, { "epoch": 0.9936604836816154, "grad_norm": 5.771938103463588, "learning_rate": 2.646897787916599e-06, "loss": 0.756, "step": 13754 }, { "epoch": 0.993732728881825, "grad_norm": 6.491400972869891, "learning_rate": 2.646605789911691e-06, "loss": 0.8195, "step": 13755 }, { "epoch": 0.9938049740820344, "grad_norm": 6.843091959213749, "learning_rate": 2.6463137898998627e-06, "loss": 0.7507, "step": 13756 }, { "epoch": 0.9938772192822439, "grad_norm": 7.291763246895726, "learning_rate": 2.6460217878851123e-06, "loss": 0.7439, "step": 13757 }, { "epoch": 0.9939494644824535, "grad_norm": 6.2180098663477565, "learning_rate": 2.6457297838714373e-06, "loss": 0.8604, "step": 13758 }, { "epoch": 0.994021709682663, "grad_norm": 6.006678361334444, "learning_rate": 2.645437777862835e-06, "loss": 0.7097, "step": 13759 }, { "epoch": 0.9940939548828724, "grad_norm": 7.07731007312056, "learning_rate": 2.645145769863302e-06, "loss": 0.8738, "step": 13760 }, { "epoch": 0.994166200083082, "grad_norm": 5.541618672022058, "learning_rate": 2.6448537598768357e-06, "loss": 0.7878, "step": 13761 }, { "epoch": 0.9942384452832915, "grad_norm": 7.969384979679781, "learning_rate": 2.6445617479074348e-06, "loss": 0.8028, "step": 13762 }, { "epoch": 0.994310690483501, "grad_norm": 5.8405589810758505, "learning_rate": 2.644269733959095e-06, "loss": 0.8018, "step": 13763 }, { "epoch": 0.9943829356837105, "grad_norm": 7.334000759385826, "learning_rate": 2.643977718035815e-06, "loss": 0.8106, "step": 13764 }, { "epoch": 0.99445518088392, "grad_norm": 6.689356465998745, "learning_rate": 2.6436857001415917e-06, "loss": 0.8075, "step": 13765 }, { "epoch": 0.9945274260841296, "grad_norm": 6.352840731331615, "learning_rate": 2.6433936802804233e-06, "loss": 0.787, "step": 13766 }, { "epoch": 0.994599671284339, "grad_norm": 5.437830772424008, "learning_rate": 2.6431016584563064e-06, "loss": 0.7653, "step": 13767 }, { "epoch": 0.9946719164845486, "grad_norm": 5.4313405546270905, "learning_rate": 2.642809634673238e-06, "loss": 0.7768, "step": 13768 }, { "epoch": 0.9947441616847581, "grad_norm": 6.99780402479475, "learning_rate": 2.642517608935218e-06, "loss": 0.8486, "step": 13769 }, { "epoch": 0.9948164068849675, "grad_norm": 6.1297819193992975, "learning_rate": 2.6422255812462416e-06, "loss": 0.8228, "step": 13770 }, { "epoch": 0.9948886520851771, "grad_norm": 5.315118839804995, "learning_rate": 2.641933551610308e-06, "loss": 0.7139, "step": 13771 }, { "epoch": 0.9949608972853866, "grad_norm": 5.824368409063935, "learning_rate": 2.641641520031413e-06, "loss": 0.7538, "step": 13772 }, { "epoch": 0.9950331424855962, "grad_norm": 6.820336830121981, "learning_rate": 2.6413494865135575e-06, "loss": 0.7745, "step": 13773 }, { "epoch": 0.9951053876858056, "grad_norm": 7.535174221968588, "learning_rate": 2.641057451060736e-06, "loss": 0.7436, "step": 13774 }, { "epoch": 0.9951776328860151, "grad_norm": 5.955843247317805, "learning_rate": 2.6407654136769483e-06, "loss": 0.7937, "step": 13775 }, { "epoch": 0.9952498780862247, "grad_norm": 6.92754329980364, "learning_rate": 2.6404733743661907e-06, "loss": 0.7633, "step": 13776 }, { "epoch": 0.9953221232864342, "grad_norm": 7.860716688178416, "learning_rate": 2.640181333132462e-06, "loss": 0.834, "step": 13777 }, { "epoch": 0.9953943684866436, "grad_norm": 6.30694486723561, "learning_rate": 2.63988928997976e-06, "loss": 0.7234, "step": 13778 }, { "epoch": 0.9954666136868532, "grad_norm": 5.908219912166234, "learning_rate": 2.6395972449120815e-06, "loss": 0.7951, "step": 13779 }, { "epoch": 0.9955388588870627, "grad_norm": 6.568878271300517, "learning_rate": 2.639305197933426e-06, "loss": 0.7632, "step": 13780 }, { "epoch": 0.9956111040872722, "grad_norm": 5.970523271232113, "learning_rate": 2.6390131490477894e-06, "loss": 0.7796, "step": 13781 }, { "epoch": 0.9956833492874817, "grad_norm": 7.007571893514432, "learning_rate": 2.6387210982591717e-06, "loss": 0.824, "step": 13782 }, { "epoch": 0.9957555944876912, "grad_norm": 5.9338107242052605, "learning_rate": 2.63842904557157e-06, "loss": 0.8238, "step": 13783 }, { "epoch": 0.9958278396879008, "grad_norm": 5.41619170625237, "learning_rate": 2.6381369909889816e-06, "loss": 0.8342, "step": 13784 }, { "epoch": 0.9959000848881102, "grad_norm": 6.6809020962609695, "learning_rate": 2.6378449345154044e-06, "loss": 0.8234, "step": 13785 }, { "epoch": 0.9959723300883198, "grad_norm": 7.401490561401641, "learning_rate": 2.637552876154838e-06, "loss": 0.8604, "step": 13786 }, { "epoch": 0.9960445752885293, "grad_norm": 7.664740638400955, "learning_rate": 2.6372608159112795e-06, "loss": 0.7951, "step": 13787 }, { "epoch": 0.9961168204887387, "grad_norm": 6.519596490367851, "learning_rate": 2.6369687537887265e-06, "loss": 0.7986, "step": 13788 }, { "epoch": 0.9961890656889483, "grad_norm": 10.549275985389611, "learning_rate": 2.6366766897911785e-06, "loss": 0.7861, "step": 13789 }, { "epoch": 0.9962613108891578, "grad_norm": 5.837263617947211, "learning_rate": 2.636384623922632e-06, "loss": 0.8416, "step": 13790 }, { "epoch": 0.9963335560893674, "grad_norm": 6.97774674124541, "learning_rate": 2.636092556187086e-06, "loss": 0.7504, "step": 13791 }, { "epoch": 0.9964058012895768, "grad_norm": 6.17930754254247, "learning_rate": 2.635800486588539e-06, "loss": 0.7776, "step": 13792 }, { "epoch": 0.9964780464897863, "grad_norm": 6.951845201303095, "learning_rate": 2.635508415130988e-06, "loss": 0.8004, "step": 13793 }, { "epoch": 0.9965502916899959, "grad_norm": 6.039694137407746, "learning_rate": 2.635216341818433e-06, "loss": 0.7851, "step": 13794 }, { "epoch": 0.9966225368902053, "grad_norm": 8.357108063677787, "learning_rate": 2.6349242666548697e-06, "loss": 0.7712, "step": 13795 }, { "epoch": 0.9966947820904148, "grad_norm": 7.151757575140129, "learning_rate": 2.6346321896442993e-06, "loss": 0.758, "step": 13796 }, { "epoch": 0.9967670272906244, "grad_norm": 7.31097239652924, "learning_rate": 2.6343401107907183e-06, "loss": 0.817, "step": 13797 }, { "epoch": 0.9968392724908339, "grad_norm": 6.0862474472783274, "learning_rate": 2.6340480300981263e-06, "loss": 0.8363, "step": 13798 }, { "epoch": 0.9969115176910434, "grad_norm": 7.92791457628099, "learning_rate": 2.63375594757052e-06, "loss": 0.816, "step": 13799 }, { "epoch": 0.9969837628912529, "grad_norm": 7.517325924582257, "learning_rate": 2.6334638632118986e-06, "loss": 0.745, "step": 13800 }, { "epoch": 0.9970560080914624, "grad_norm": 5.513107979046215, "learning_rate": 2.633171777026261e-06, "loss": 0.7691, "step": 13801 }, { "epoch": 0.997128253291672, "grad_norm": 6.514087669450205, "learning_rate": 2.6328796890176045e-06, "loss": 0.7126, "step": 13802 }, { "epoch": 0.9972004984918814, "grad_norm": 7.255102795346522, "learning_rate": 2.632587599189929e-06, "loss": 0.8054, "step": 13803 }, { "epoch": 0.997272743692091, "grad_norm": 6.016443925082499, "learning_rate": 2.6322955075472317e-06, "loss": 0.8155, "step": 13804 }, { "epoch": 0.9973449888923005, "grad_norm": 5.735016581636917, "learning_rate": 2.6320034140935114e-06, "loss": 0.8293, "step": 13805 }, { "epoch": 0.9974172340925099, "grad_norm": 6.740489617740334, "learning_rate": 2.631711318832767e-06, "loss": 0.7515, "step": 13806 }, { "epoch": 0.9974894792927195, "grad_norm": 6.085365199944801, "learning_rate": 2.6314192217689976e-06, "loss": 0.776, "step": 13807 }, { "epoch": 0.997561724492929, "grad_norm": 7.186300691806365, "learning_rate": 2.631127122906201e-06, "loss": 0.7624, "step": 13808 }, { "epoch": 0.9976339696931386, "grad_norm": 6.853974808285147, "learning_rate": 2.6308350222483747e-06, "loss": 0.8056, "step": 13809 }, { "epoch": 0.997706214893348, "grad_norm": 6.771744918101969, "learning_rate": 2.63054291979952e-06, "loss": 0.8473, "step": 13810 }, { "epoch": 0.9977784600935575, "grad_norm": 5.614684076295952, "learning_rate": 2.630250815563633e-06, "loss": 0.7899, "step": 13811 }, { "epoch": 0.9978507052937671, "grad_norm": 5.920536906665103, "learning_rate": 2.6299587095447144e-06, "loss": 0.9055, "step": 13812 }, { "epoch": 0.9979229504939765, "grad_norm": 6.488970862847722, "learning_rate": 2.6296666017467615e-06, "loss": 0.8036, "step": 13813 }, { "epoch": 0.997995195694186, "grad_norm": 5.804803161700985, "learning_rate": 2.6293744921737734e-06, "loss": 0.8632, "step": 13814 }, { "epoch": 0.9980674408943956, "grad_norm": 6.621488684294509, "learning_rate": 2.629082380829749e-06, "loss": 0.8108, "step": 13815 }, { "epoch": 0.9981396860946051, "grad_norm": 8.39176606388062, "learning_rate": 2.6287902677186872e-06, "loss": 0.7507, "step": 13816 }, { "epoch": 0.9982119312948146, "grad_norm": 5.462827778285261, "learning_rate": 2.6284981528445867e-06, "loss": 0.7019, "step": 13817 }, { "epoch": 0.9982841764950241, "grad_norm": 6.28137844580657, "learning_rate": 2.628206036211446e-06, "loss": 0.7715, "step": 13818 }, { "epoch": 0.9983564216952336, "grad_norm": 5.482418048070449, "learning_rate": 2.627913917823264e-06, "loss": 0.8435, "step": 13819 }, { "epoch": 0.9984286668954432, "grad_norm": 5.782041093467165, "learning_rate": 2.6276217976840403e-06, "loss": 0.7382, "step": 13820 }, { "epoch": 0.9985009120956526, "grad_norm": 6.06584625705342, "learning_rate": 2.627329675797773e-06, "loss": 0.7585, "step": 13821 }, { "epoch": 0.9985731572958622, "grad_norm": 7.1606354581695415, "learning_rate": 2.627037552168461e-06, "loss": 0.7948, "step": 13822 }, { "epoch": 0.9986454024960717, "grad_norm": 6.759062723044, "learning_rate": 2.626745426800104e-06, "loss": 0.8038, "step": 13823 }, { "epoch": 0.9987176476962811, "grad_norm": 7.193098193483372, "learning_rate": 2.6264532996967006e-06, "loss": 0.8901, "step": 13824 }, { "epoch": 0.9987898928964907, "grad_norm": 5.827551928615728, "learning_rate": 2.62616117086225e-06, "loss": 0.7969, "step": 13825 }, { "epoch": 0.9988621380967002, "grad_norm": 6.636959293849322, "learning_rate": 2.62586904030075e-06, "loss": 0.8271, "step": 13826 }, { "epoch": 0.9989343832969098, "grad_norm": 7.853910757359693, "learning_rate": 2.625576908016201e-06, "loss": 0.7918, "step": 13827 }, { "epoch": 0.9990066284971192, "grad_norm": 6.483951416887892, "learning_rate": 2.625284774012602e-06, "loss": 0.7528, "step": 13828 }, { "epoch": 0.9990788736973287, "grad_norm": 6.49008831510717, "learning_rate": 2.624992638293951e-06, "loss": 0.7866, "step": 13829 }, { "epoch": 0.9991511188975383, "grad_norm": 7.134526359165611, "learning_rate": 2.6247005008642486e-06, "loss": 0.7511, "step": 13830 }, { "epoch": 0.9992233640977477, "grad_norm": 5.780413262928039, "learning_rate": 2.6244083617274924e-06, "loss": 0.7768, "step": 13831 }, { "epoch": 0.9992956092979572, "grad_norm": 6.135569580797739, "learning_rate": 2.624116220887683e-06, "loss": 0.7964, "step": 13832 }, { "epoch": 0.9993678544981668, "grad_norm": 5.287968983153823, "learning_rate": 2.6238240783488184e-06, "loss": 0.7712, "step": 13833 }, { "epoch": 0.9994400996983763, "grad_norm": 5.165107655912698, "learning_rate": 2.6235319341148983e-06, "loss": 0.8034, "step": 13834 }, { "epoch": 0.9995123448985858, "grad_norm": 6.439620178110668, "learning_rate": 2.623239788189923e-06, "loss": 0.7864, "step": 13835 }, { "epoch": 0.9995845900987953, "grad_norm": 5.068275453905416, "learning_rate": 2.622947640577889e-06, "loss": 0.7623, "step": 13836 }, { "epoch": 0.9996568352990048, "grad_norm": 6.601893887300061, "learning_rate": 2.622655491282799e-06, "loss": 0.7453, "step": 13837 }, { "epoch": 0.9997290804992144, "grad_norm": 6.435535612660859, "learning_rate": 2.622363340308649e-06, "loss": 0.7949, "step": 13838 }, { "epoch": 0.9998013256994238, "grad_norm": 6.020475101923507, "learning_rate": 2.6220711876594406e-06, "loss": 0.8014, "step": 13839 }, { "epoch": 0.9998735708996334, "grad_norm": 6.252861283521091, "learning_rate": 2.6217790333391724e-06, "loss": 0.6852, "step": 13840 }, { "epoch": 0.9999458160998429, "grad_norm": 6.146381970071619, "learning_rate": 2.6214868773518443e-06, "loss": 0.7262, "step": 13841 }, { "epoch": 1.0000180613000524, "grad_norm": 5.167364545060928, "learning_rate": 2.6211947197014542e-06, "loss": 0.7619, "step": 13842 }, { "epoch": 1.000090306500262, "grad_norm": 5.6034443140395345, "learning_rate": 2.6209025603920028e-06, "loss": 0.6801, "step": 13843 }, { "epoch": 1.0001625517004713, "grad_norm": 5.073089826423625, "learning_rate": 2.6206103994274896e-06, "loss": 0.6713, "step": 13844 }, { "epoch": 1.0002347969006808, "grad_norm": 5.897282326866625, "learning_rate": 2.6203182368119135e-06, "loss": 0.6659, "step": 13845 }, { "epoch": 1.0003070421008904, "grad_norm": 5.582303744606334, "learning_rate": 2.6200260725492742e-06, "loss": 0.6558, "step": 13846 }, { "epoch": 1.0003792873011, "grad_norm": 6.710523954534755, "learning_rate": 2.619733906643571e-06, "loss": 0.6382, "step": 13847 }, { "epoch": 1.0004515325013095, "grad_norm": 5.326781520217977, "learning_rate": 2.6194417390988036e-06, "loss": 0.6843, "step": 13848 }, { "epoch": 1.000523777701519, "grad_norm": 6.367263887099307, "learning_rate": 2.6191495699189716e-06, "loss": 0.7318, "step": 13849 }, { "epoch": 1.0005960229017286, "grad_norm": 6.157385087298118, "learning_rate": 2.6188573991080744e-06, "loss": 0.6286, "step": 13850 }, { "epoch": 1.0006682681019379, "grad_norm": 5.884036642223708, "learning_rate": 2.6185652266701124e-06, "loss": 0.6301, "step": 13851 }, { "epoch": 1.0007405133021474, "grad_norm": 5.545975217239029, "learning_rate": 2.6182730526090832e-06, "loss": 0.6632, "step": 13852 }, { "epoch": 1.000812758502357, "grad_norm": 5.635881876003406, "learning_rate": 2.6179808769289887e-06, "loss": 0.6333, "step": 13853 }, { "epoch": 1.0008850037025665, "grad_norm": 5.683978321396508, "learning_rate": 2.617688699633827e-06, "loss": 0.6202, "step": 13854 }, { "epoch": 1.000957248902776, "grad_norm": 8.589659449051725, "learning_rate": 2.6173965207275994e-06, "loss": 0.6568, "step": 13855 }, { "epoch": 1.0010294941029856, "grad_norm": 5.601869284769378, "learning_rate": 2.6171043402143035e-06, "loss": 0.6867, "step": 13856 }, { "epoch": 1.0011017393031951, "grad_norm": 5.885668219757754, "learning_rate": 2.61681215809794e-06, "loss": 0.6729, "step": 13857 }, { "epoch": 1.0011739845034044, "grad_norm": 5.644604914095447, "learning_rate": 2.61651997438251e-06, "loss": 0.7458, "step": 13858 }, { "epoch": 1.001246229703614, "grad_norm": 6.737513543223285, "learning_rate": 2.6162277890720113e-06, "loss": 0.7019, "step": 13859 }, { "epoch": 1.0013184749038235, "grad_norm": 6.766482257052757, "learning_rate": 2.6159356021704446e-06, "loss": 0.6685, "step": 13860 }, { "epoch": 1.001390720104033, "grad_norm": 6.349810932378469, "learning_rate": 2.615643413681809e-06, "loss": 0.7272, "step": 13861 }, { "epoch": 1.0014629653042426, "grad_norm": 6.355667420642494, "learning_rate": 2.615351223610105e-06, "loss": 0.759, "step": 13862 }, { "epoch": 1.0015352105044522, "grad_norm": 5.825325546663412, "learning_rate": 2.615059031959332e-06, "loss": 0.7097, "step": 13863 }, { "epoch": 1.0016074557046617, "grad_norm": 5.8075238328264875, "learning_rate": 2.6147668387334913e-06, "loss": 0.6466, "step": 13864 }, { "epoch": 1.0016797009048712, "grad_norm": 5.344359424484945, "learning_rate": 2.6144746439365814e-06, "loss": 0.7173, "step": 13865 }, { "epoch": 1.0017519461050806, "grad_norm": 6.194544625529449, "learning_rate": 2.6141824475726013e-06, "loss": 0.7129, "step": 13866 }, { "epoch": 1.00182419130529, "grad_norm": 5.850502070865471, "learning_rate": 2.6138902496455536e-06, "loss": 0.6939, "step": 13867 }, { "epoch": 1.0018964365054996, "grad_norm": 5.6900494908223616, "learning_rate": 2.613598050159436e-06, "loss": 0.6434, "step": 13868 }, { "epoch": 1.0019686817057092, "grad_norm": 6.27841684395089, "learning_rate": 2.61330584911825e-06, "loss": 0.6908, "step": 13869 }, { "epoch": 1.0020409269059187, "grad_norm": 6.400901385250453, "learning_rate": 2.6130136465259943e-06, "loss": 0.7206, "step": 13870 }, { "epoch": 1.0021131721061283, "grad_norm": 5.625428755632088, "learning_rate": 2.6127214423866693e-06, "loss": 0.6318, "step": 13871 }, { "epoch": 1.0021854173063378, "grad_norm": 5.7486593922713505, "learning_rate": 2.6124292367042752e-06, "loss": 0.6628, "step": 13872 }, { "epoch": 1.0022576625065471, "grad_norm": 5.934379962519318, "learning_rate": 2.612137029482813e-06, "loss": 0.7028, "step": 13873 }, { "epoch": 1.0023299077067567, "grad_norm": 5.56086861336495, "learning_rate": 2.611844820726282e-06, "loss": 0.6175, "step": 13874 }, { "epoch": 1.0024021529069662, "grad_norm": 6.2363792008752466, "learning_rate": 2.6115526104386817e-06, "loss": 0.6621, "step": 13875 }, { "epoch": 1.0024743981071758, "grad_norm": 7.171222864890596, "learning_rate": 2.6112603986240126e-06, "loss": 0.6592, "step": 13876 }, { "epoch": 1.0025466433073853, "grad_norm": 6.149190660271251, "learning_rate": 2.610968185286275e-06, "loss": 0.6979, "step": 13877 }, { "epoch": 1.0026188885075948, "grad_norm": 6.3193648374059075, "learning_rate": 2.61067597042947e-06, "loss": 0.6503, "step": 13878 }, { "epoch": 1.0026911337078044, "grad_norm": 6.81647424527333, "learning_rate": 2.610383754057596e-06, "loss": 0.6801, "step": 13879 }, { "epoch": 1.0027633789080137, "grad_norm": 6.362761474512696, "learning_rate": 2.6100915361746544e-06, "loss": 0.6224, "step": 13880 }, { "epoch": 1.0028356241082232, "grad_norm": 6.423157352768027, "learning_rate": 2.6097993167846448e-06, "loss": 0.7114, "step": 13881 }, { "epoch": 1.0029078693084328, "grad_norm": 5.657229849736875, "learning_rate": 2.609507095891568e-06, "loss": 0.6721, "step": 13882 }, { "epoch": 1.0029801145086423, "grad_norm": 6.345402244040316, "learning_rate": 2.609214873499425e-06, "loss": 0.7171, "step": 13883 }, { "epoch": 1.0030523597088519, "grad_norm": 6.0410642477028045, "learning_rate": 2.608922649612214e-06, "loss": 0.7278, "step": 13884 }, { "epoch": 1.0031246049090614, "grad_norm": 5.7336441946403, "learning_rate": 2.6086304242339367e-06, "loss": 0.6708, "step": 13885 }, { "epoch": 1.003196850109271, "grad_norm": 5.851350299160556, "learning_rate": 2.608338197368593e-06, "loss": 0.7443, "step": 13886 }, { "epoch": 1.0032690953094803, "grad_norm": 6.086443310776251, "learning_rate": 2.608045969020185e-06, "loss": 0.6344, "step": 13887 }, { "epoch": 1.0033413405096898, "grad_norm": 8.17443597463568, "learning_rate": 2.60775373919271e-06, "loss": 0.5964, "step": 13888 }, { "epoch": 1.0034135857098994, "grad_norm": 5.977562914125955, "learning_rate": 2.607461507890171e-06, "loss": 0.704, "step": 13889 }, { "epoch": 1.003485830910109, "grad_norm": 6.093457337222985, "learning_rate": 2.607169275116567e-06, "loss": 0.6939, "step": 13890 }, { "epoch": 1.0035580761103184, "grad_norm": 6.325125277639332, "learning_rate": 2.6068770408758983e-06, "loss": 0.6955, "step": 13891 }, { "epoch": 1.003630321310528, "grad_norm": 5.388138792044428, "learning_rate": 2.6065848051721666e-06, "loss": 0.6243, "step": 13892 }, { "epoch": 1.0037025665107375, "grad_norm": 7.962835293451983, "learning_rate": 2.606292568009371e-06, "loss": 0.6747, "step": 13893 }, { "epoch": 1.0037748117109468, "grad_norm": 5.576277226456629, "learning_rate": 2.606000329391513e-06, "loss": 0.6532, "step": 13894 }, { "epoch": 1.0038470569111564, "grad_norm": 6.633880421427627, "learning_rate": 2.605708089322593e-06, "loss": 0.7527, "step": 13895 }, { "epoch": 1.003919302111366, "grad_norm": 7.110568506016925, "learning_rate": 2.605415847806611e-06, "loss": 0.6748, "step": 13896 }, { "epoch": 1.0039915473115755, "grad_norm": 5.670694191501195, "learning_rate": 2.605123604847568e-06, "loss": 0.6796, "step": 13897 }, { "epoch": 1.004063792511785, "grad_norm": 7.823440337823407, "learning_rate": 2.604831360449465e-06, "loss": 0.618, "step": 13898 }, { "epoch": 1.0041360377119946, "grad_norm": 7.222348327839938, "learning_rate": 2.6045391146163016e-06, "loss": 0.7067, "step": 13899 }, { "epoch": 1.004208282912204, "grad_norm": 5.9657636431514165, "learning_rate": 2.604246867352079e-06, "loss": 0.6709, "step": 13900 }, { "epoch": 1.0042805281124136, "grad_norm": 5.977834768074834, "learning_rate": 2.603954618660798e-06, "loss": 0.7054, "step": 13901 }, { "epoch": 1.004352773312623, "grad_norm": 6.174891717247077, "learning_rate": 2.6036623685464587e-06, "loss": 0.6557, "step": 13902 }, { "epoch": 1.0044250185128325, "grad_norm": 7.724708635582229, "learning_rate": 2.603370117013062e-06, "loss": 0.6287, "step": 13903 }, { "epoch": 1.004497263713042, "grad_norm": 6.759441977498436, "learning_rate": 2.6030778640646077e-06, "loss": 0.7692, "step": 13904 }, { "epoch": 1.0045695089132516, "grad_norm": 6.442918589519184, "learning_rate": 2.602785609705099e-06, "loss": 0.647, "step": 13905 }, { "epoch": 1.0046417541134611, "grad_norm": 6.051066520571142, "learning_rate": 2.6024933539385345e-06, "loss": 0.6599, "step": 13906 }, { "epoch": 1.0047139993136707, "grad_norm": 7.258258719631954, "learning_rate": 2.6022010967689158e-06, "loss": 0.5928, "step": 13907 }, { "epoch": 1.0047862445138802, "grad_norm": 5.944881949957105, "learning_rate": 2.6019088382002434e-06, "loss": 0.6315, "step": 13908 }, { "epoch": 1.0048584897140895, "grad_norm": 6.148956781001872, "learning_rate": 2.601616578236518e-06, "loss": 0.67, "step": 13909 }, { "epoch": 1.004930734914299, "grad_norm": 5.4813467506560585, "learning_rate": 2.6013243168817402e-06, "loss": 0.6557, "step": 13910 }, { "epoch": 1.0050029801145086, "grad_norm": 5.267389652669694, "learning_rate": 2.6010320541399114e-06, "loss": 0.6514, "step": 13911 }, { "epoch": 1.0050752253147182, "grad_norm": 7.2473315885937835, "learning_rate": 2.6007397900150328e-06, "loss": 0.5826, "step": 13912 }, { "epoch": 1.0051474705149277, "grad_norm": 6.857357901652605, "learning_rate": 2.6004475245111036e-06, "loss": 0.58, "step": 13913 }, { "epoch": 1.0052197157151372, "grad_norm": 7.148058338219673, "learning_rate": 2.6001552576321266e-06, "loss": 0.6757, "step": 13914 }, { "epoch": 1.0052919609153468, "grad_norm": 6.268604207649275, "learning_rate": 2.599862989382102e-06, "loss": 0.6047, "step": 13915 }, { "epoch": 1.005364206115556, "grad_norm": 7.324958035605984, "learning_rate": 2.5995707197650307e-06, "loss": 0.6838, "step": 13916 }, { "epoch": 1.0054364513157656, "grad_norm": 7.02122739446287, "learning_rate": 2.5992784487849132e-06, "loss": 0.6435, "step": 13917 }, { "epoch": 1.0055086965159752, "grad_norm": 5.478314390243555, "learning_rate": 2.59898617644575e-06, "loss": 0.6437, "step": 13918 }, { "epoch": 1.0055809417161847, "grad_norm": 6.642508349659258, "learning_rate": 2.598693902751544e-06, "loss": 0.6593, "step": 13919 }, { "epoch": 1.0056531869163943, "grad_norm": 6.361837524219737, "learning_rate": 2.5984016277062947e-06, "loss": 0.6486, "step": 13920 }, { "epoch": 1.0057254321166038, "grad_norm": 5.854373190643236, "learning_rate": 2.598109351314004e-06, "loss": 0.7044, "step": 13921 }, { "epoch": 1.0057976773168134, "grad_norm": 6.763833740483017, "learning_rate": 2.5978170735786717e-06, "loss": 0.6848, "step": 13922 }, { "epoch": 1.0058699225170227, "grad_norm": 7.501790913697995, "learning_rate": 2.5975247945042998e-06, "loss": 0.6726, "step": 13923 }, { "epoch": 1.0059421677172322, "grad_norm": 6.005170501651577, "learning_rate": 2.59723251409489e-06, "loss": 0.6791, "step": 13924 }, { "epoch": 1.0060144129174418, "grad_norm": 7.305006573078141, "learning_rate": 2.5969402323544417e-06, "loss": 0.7246, "step": 13925 }, { "epoch": 1.0060866581176513, "grad_norm": 6.815336704234784, "learning_rate": 2.5966479492869574e-06, "loss": 0.6805, "step": 13926 }, { "epoch": 1.0061589033178608, "grad_norm": 7.378389775632102, "learning_rate": 2.5963556648964373e-06, "loss": 0.6593, "step": 13927 }, { "epoch": 1.0062311485180704, "grad_norm": 5.79500119753176, "learning_rate": 2.596063379186883e-06, "loss": 0.6836, "step": 13928 }, { "epoch": 1.00630339371828, "grad_norm": 5.369226770691689, "learning_rate": 2.5957710921622953e-06, "loss": 0.6012, "step": 13929 }, { "epoch": 1.0063756389184892, "grad_norm": 5.923624308746452, "learning_rate": 2.5954788038266765e-06, "loss": 0.6682, "step": 13930 }, { "epoch": 1.0064478841186988, "grad_norm": 6.425688639150257, "learning_rate": 2.595186514184027e-06, "loss": 0.6912, "step": 13931 }, { "epoch": 1.0065201293189083, "grad_norm": 7.802938484892947, "learning_rate": 2.5948942232383477e-06, "loss": 0.7839, "step": 13932 }, { "epoch": 1.0065923745191179, "grad_norm": 5.672466152690377, "learning_rate": 2.59460193099364e-06, "loss": 0.6844, "step": 13933 }, { "epoch": 1.0066646197193274, "grad_norm": 6.350585562072301, "learning_rate": 2.5943096374539055e-06, "loss": 0.6862, "step": 13934 }, { "epoch": 1.006736864919537, "grad_norm": 6.575309821542068, "learning_rate": 2.5940173426231457e-06, "loss": 0.7065, "step": 13935 }, { "epoch": 1.0068091101197465, "grad_norm": 7.457710311128798, "learning_rate": 2.5937250465053605e-06, "loss": 0.622, "step": 13936 }, { "epoch": 1.006881355319956, "grad_norm": 7.414331075548998, "learning_rate": 2.5934327491045524e-06, "loss": 0.7146, "step": 13937 }, { "epoch": 1.0069536005201654, "grad_norm": 6.9113573892480815, "learning_rate": 2.5931404504247233e-06, "loss": 0.6393, "step": 13938 }, { "epoch": 1.007025845720375, "grad_norm": 6.611765472055909, "learning_rate": 2.5928481504698733e-06, "loss": 0.6422, "step": 13939 }, { "epoch": 1.0070980909205844, "grad_norm": 5.7567251930876875, "learning_rate": 2.5925558492440046e-06, "loss": 0.6081, "step": 13940 }, { "epoch": 1.007170336120794, "grad_norm": 7.949570016808987, "learning_rate": 2.5922635467511177e-06, "loss": 0.6913, "step": 13941 }, { "epoch": 1.0072425813210035, "grad_norm": 6.641204018874326, "learning_rate": 2.591971242995214e-06, "loss": 0.7103, "step": 13942 }, { "epoch": 1.007314826521213, "grad_norm": 6.23852339855642, "learning_rate": 2.591678937980296e-06, "loss": 0.6664, "step": 13943 }, { "epoch": 1.0073870717214226, "grad_norm": 6.770984949223945, "learning_rate": 2.591386631710365e-06, "loss": 0.6882, "step": 13944 }, { "epoch": 1.007459316921632, "grad_norm": 5.8052848416150225, "learning_rate": 2.5910943241894214e-06, "loss": 0.6496, "step": 13945 }, { "epoch": 1.0075315621218415, "grad_norm": 6.0364502936093825, "learning_rate": 2.5908020154214675e-06, "loss": 0.7116, "step": 13946 }, { "epoch": 1.007603807322051, "grad_norm": 5.75369807201475, "learning_rate": 2.590509705410504e-06, "loss": 0.7014, "step": 13947 }, { "epoch": 1.0076760525222606, "grad_norm": 6.319720378436145, "learning_rate": 2.590217394160533e-06, "loss": 0.6425, "step": 13948 }, { "epoch": 1.00774829772247, "grad_norm": 5.522237903735595, "learning_rate": 2.589925081675557e-06, "loss": 0.6496, "step": 13949 }, { "epoch": 1.0078205429226796, "grad_norm": 6.271194532094743, "learning_rate": 2.589632767959575e-06, "loss": 0.6825, "step": 13950 }, { "epoch": 1.0078927881228892, "grad_norm": 5.782765452768968, "learning_rate": 2.5893404530165904e-06, "loss": 0.6372, "step": 13951 }, { "epoch": 1.0079650333230985, "grad_norm": 6.503575148760578, "learning_rate": 2.5890481368506043e-06, "loss": 0.7906, "step": 13952 }, { "epoch": 1.008037278523308, "grad_norm": 6.498628911884029, "learning_rate": 2.588755819465619e-06, "loss": 0.6875, "step": 13953 }, { "epoch": 1.0081095237235176, "grad_norm": 5.907420854720371, "learning_rate": 2.588463500865635e-06, "loss": 0.6442, "step": 13954 }, { "epoch": 1.0081817689237271, "grad_norm": 6.517017682208238, "learning_rate": 2.5881711810546552e-06, "loss": 0.7003, "step": 13955 }, { "epoch": 1.0082540141239367, "grad_norm": 5.483389480597093, "learning_rate": 2.587878860036679e-06, "loss": 0.5841, "step": 13956 }, { "epoch": 1.0083262593241462, "grad_norm": 8.229431869160845, "learning_rate": 2.58758653781571e-06, "loss": 0.7299, "step": 13957 }, { "epoch": 1.0083985045243558, "grad_norm": 6.664978608356782, "learning_rate": 2.5872942143957496e-06, "loss": 0.6554, "step": 13958 }, { "epoch": 1.008470749724565, "grad_norm": 6.6770408339395155, "learning_rate": 2.5870018897807987e-06, "loss": 0.6156, "step": 13959 }, { "epoch": 1.0085429949247746, "grad_norm": 6.898932675726658, "learning_rate": 2.58670956397486e-06, "loss": 0.6776, "step": 13960 }, { "epoch": 1.0086152401249842, "grad_norm": 4.872113277928659, "learning_rate": 2.586417236981934e-06, "loss": 0.6334, "step": 13961 }, { "epoch": 1.0086874853251937, "grad_norm": 6.749957331769801, "learning_rate": 2.5861249088060237e-06, "loss": 0.7138, "step": 13962 }, { "epoch": 1.0087597305254032, "grad_norm": 5.58984562669212, "learning_rate": 2.58583257945113e-06, "loss": 0.6758, "step": 13963 }, { "epoch": 1.0088319757256128, "grad_norm": 5.487435467803304, "learning_rate": 2.5855402489212554e-06, "loss": 0.66, "step": 13964 }, { "epoch": 1.0089042209258223, "grad_norm": 5.4788141560263215, "learning_rate": 2.585247917220401e-06, "loss": 0.6015, "step": 13965 }, { "epoch": 1.0089764661260316, "grad_norm": 7.034885941416551, "learning_rate": 2.584955584352568e-06, "loss": 0.6395, "step": 13966 }, { "epoch": 1.0090487113262412, "grad_norm": 6.358422960255468, "learning_rate": 2.58466325032176e-06, "loss": 0.6941, "step": 13967 }, { "epoch": 1.0091209565264507, "grad_norm": 8.322981342801219, "learning_rate": 2.5843709151319773e-06, "loss": 0.7364, "step": 13968 }, { "epoch": 1.0091932017266603, "grad_norm": 7.738676597359243, "learning_rate": 2.584078578787223e-06, "loss": 0.6818, "step": 13969 }, { "epoch": 1.0092654469268698, "grad_norm": 8.011129267379966, "learning_rate": 2.5837862412914976e-06, "loss": 0.6852, "step": 13970 }, { "epoch": 1.0093376921270794, "grad_norm": 6.849486015694843, "learning_rate": 2.5834939026488035e-06, "loss": 0.6755, "step": 13971 }, { "epoch": 1.009409937327289, "grad_norm": 6.067654650100058, "learning_rate": 2.583201562863143e-06, "loss": 0.6384, "step": 13972 }, { "epoch": 1.0094821825274984, "grad_norm": 6.813810729877787, "learning_rate": 2.5829092219385178e-06, "loss": 0.7113, "step": 13973 }, { "epoch": 1.0095544277277078, "grad_norm": 6.338629699781118, "learning_rate": 2.58261687987893e-06, "loss": 0.653, "step": 13974 }, { "epoch": 1.0096266729279173, "grad_norm": 6.758773470781937, "learning_rate": 2.58232453668838e-06, "loss": 0.7055, "step": 13975 }, { "epoch": 1.0096989181281268, "grad_norm": 7.229247548056712, "learning_rate": 2.5820321923708724e-06, "loss": 0.6839, "step": 13976 }, { "epoch": 1.0097711633283364, "grad_norm": 6.097436973055225, "learning_rate": 2.5817398469304074e-06, "loss": 0.6392, "step": 13977 }, { "epoch": 1.009843408528546, "grad_norm": 5.271802408547349, "learning_rate": 2.581447500370987e-06, "loss": 0.7137, "step": 13978 }, { "epoch": 1.0099156537287555, "grad_norm": 5.671047013643691, "learning_rate": 2.5811551526966138e-06, "loss": 0.636, "step": 13979 }, { "epoch": 1.009987898928965, "grad_norm": 6.961074776371557, "learning_rate": 2.5808628039112893e-06, "loss": 0.6431, "step": 13980 }, { "epoch": 1.0100601441291743, "grad_norm": 6.351244478604327, "learning_rate": 2.5805704540190164e-06, "loss": 0.6933, "step": 13981 }, { "epoch": 1.0101323893293839, "grad_norm": 6.218403408801157, "learning_rate": 2.580278103023796e-06, "loss": 0.7274, "step": 13982 }, { "epoch": 1.0102046345295934, "grad_norm": 6.596346231047421, "learning_rate": 2.579985750929631e-06, "loss": 0.6497, "step": 13983 }, { "epoch": 1.010276879729803, "grad_norm": 5.607335817107828, "learning_rate": 2.579693397740523e-06, "loss": 0.6982, "step": 13984 }, { "epoch": 1.0103491249300125, "grad_norm": 6.846020465629672, "learning_rate": 2.5794010434604745e-06, "loss": 0.6764, "step": 13985 }, { "epoch": 1.010421370130222, "grad_norm": 6.103202491967556, "learning_rate": 2.5791086880934868e-06, "loss": 0.7011, "step": 13986 }, { "epoch": 1.0104936153304316, "grad_norm": 7.369707697916816, "learning_rate": 2.578816331643563e-06, "loss": 0.7009, "step": 13987 }, { "epoch": 1.010565860530641, "grad_norm": 8.074920313769695, "learning_rate": 2.578523974114705e-06, "loss": 0.6933, "step": 13988 }, { "epoch": 1.0106381057308504, "grad_norm": 6.036842401994891, "learning_rate": 2.5782316155109143e-06, "loss": 0.6322, "step": 13989 }, { "epoch": 1.01071035093106, "grad_norm": 6.040929429246846, "learning_rate": 2.5779392558361944e-06, "loss": 0.6891, "step": 13990 }, { "epoch": 1.0107825961312695, "grad_norm": 5.391241154494305, "learning_rate": 2.5776468950945455e-06, "loss": 0.6201, "step": 13991 }, { "epoch": 1.010854841331479, "grad_norm": 6.582631938929003, "learning_rate": 2.577354533289972e-06, "loss": 0.6639, "step": 13992 }, { "epoch": 1.0109270865316886, "grad_norm": 5.969833834842055, "learning_rate": 2.5770621704264735e-06, "loss": 0.6801, "step": 13993 }, { "epoch": 1.0109993317318982, "grad_norm": 5.905771710441372, "learning_rate": 2.576769806508055e-06, "loss": 0.6722, "step": 13994 }, { "epoch": 1.0110715769321075, "grad_norm": 6.322364577731955, "learning_rate": 2.576477441538717e-06, "loss": 0.6357, "step": 13995 }, { "epoch": 1.011143822132317, "grad_norm": 6.083558936268695, "learning_rate": 2.576185075522462e-06, "loss": 0.6332, "step": 13996 }, { "epoch": 1.0112160673325266, "grad_norm": 6.9475085945529225, "learning_rate": 2.5758927084632936e-06, "loss": 0.6844, "step": 13997 }, { "epoch": 1.011288312532736, "grad_norm": 7.818334735237841, "learning_rate": 2.5756003403652117e-06, "loss": 0.6839, "step": 13998 }, { "epoch": 1.0113605577329456, "grad_norm": 5.643245352554756, "learning_rate": 2.57530797123222e-06, "loss": 0.5942, "step": 13999 }, { "epoch": 1.0114328029331552, "grad_norm": 6.182113306375202, "learning_rate": 2.575015601068321e-06, "loss": 0.623, "step": 14000 }, { "epoch": 1.0115050481333647, "grad_norm": 6.661266746678905, "learning_rate": 2.5747232298775164e-06, "loss": 0.6942, "step": 14001 }, { "epoch": 1.011577293333574, "grad_norm": 6.248524300882625, "learning_rate": 2.574430857663809e-06, "loss": 0.6471, "step": 14002 }, { "epoch": 1.0116495385337836, "grad_norm": 6.250494365213868, "learning_rate": 2.5741384844312007e-06, "loss": 0.6528, "step": 14003 }, { "epoch": 1.0117217837339931, "grad_norm": 6.035182481233448, "learning_rate": 2.5738461101836943e-06, "loss": 0.602, "step": 14004 }, { "epoch": 1.0117940289342027, "grad_norm": 5.4462219091868125, "learning_rate": 2.573553734925292e-06, "loss": 0.6744, "step": 14005 }, { "epoch": 1.0118662741344122, "grad_norm": 7.28064276655834, "learning_rate": 2.5732613586599964e-06, "loss": 0.6918, "step": 14006 }, { "epoch": 1.0119385193346218, "grad_norm": 5.39180647576891, "learning_rate": 2.5729689813918097e-06, "loss": 0.656, "step": 14007 }, { "epoch": 1.0120107645348313, "grad_norm": 6.524172804274063, "learning_rate": 2.5726766031247335e-06, "loss": 0.751, "step": 14008 }, { "epoch": 1.0120830097350408, "grad_norm": 6.883098756550834, "learning_rate": 2.572384223862772e-06, "loss": 0.6494, "step": 14009 }, { "epoch": 1.0121552549352502, "grad_norm": 7.173824402735217, "learning_rate": 2.572091843609926e-06, "loss": 0.675, "step": 14010 }, { "epoch": 1.0122275001354597, "grad_norm": 6.2661019997475496, "learning_rate": 2.5717994623701988e-06, "loss": 0.7045, "step": 14011 }, { "epoch": 1.0122997453356692, "grad_norm": 5.953592757623989, "learning_rate": 2.5715070801475935e-06, "loss": 0.6357, "step": 14012 }, { "epoch": 1.0123719905358788, "grad_norm": 7.457346362514471, "learning_rate": 2.5712146969461104e-06, "loss": 0.7232, "step": 14013 }, { "epoch": 1.0124442357360883, "grad_norm": 5.8089276893122195, "learning_rate": 2.570922312769754e-06, "loss": 0.6223, "step": 14014 }, { "epoch": 1.0125164809362979, "grad_norm": 6.424634503013728, "learning_rate": 2.5706299276225267e-06, "loss": 0.6882, "step": 14015 }, { "epoch": 1.0125887261365074, "grad_norm": 6.03533702196903, "learning_rate": 2.5703375415084297e-06, "loss": 0.6341, "step": 14016 }, { "epoch": 1.0126609713367167, "grad_norm": 5.516547712136488, "learning_rate": 2.570045154431467e-06, "loss": 0.6284, "step": 14017 }, { "epoch": 1.0127332165369263, "grad_norm": 7.081414744522139, "learning_rate": 2.56975276639564e-06, "loss": 0.7753, "step": 14018 }, { "epoch": 1.0128054617371358, "grad_norm": 6.5688007443516, "learning_rate": 2.5694603774049528e-06, "loss": 0.6552, "step": 14019 }, { "epoch": 1.0128777069373454, "grad_norm": 7.0042531852514776, "learning_rate": 2.5691679874634057e-06, "loss": 0.7464, "step": 14020 }, { "epoch": 1.012949952137555, "grad_norm": 8.707684019664937, "learning_rate": 2.5688755965750033e-06, "loss": 0.7698, "step": 14021 }, { "epoch": 1.0130221973377644, "grad_norm": 7.106384855817629, "learning_rate": 2.5685832047437474e-06, "loss": 0.6345, "step": 14022 }, { "epoch": 1.013094442537974, "grad_norm": 5.8550742680036, "learning_rate": 2.5682908119736405e-06, "loss": 0.6754, "step": 14023 }, { "epoch": 1.0131666877381833, "grad_norm": 6.6807976051235824, "learning_rate": 2.567998418268686e-06, "loss": 0.6658, "step": 14024 }, { "epoch": 1.0132389329383928, "grad_norm": 5.726621120095602, "learning_rate": 2.5677060236328847e-06, "loss": 0.6949, "step": 14025 }, { "epoch": 1.0133111781386024, "grad_norm": 5.743608986469494, "learning_rate": 2.5674136280702424e-06, "loss": 0.6404, "step": 14026 }, { "epoch": 1.013383423338812, "grad_norm": 5.990926557660936, "learning_rate": 2.567121231584758e-06, "loss": 0.6562, "step": 14027 }, { "epoch": 1.0134556685390215, "grad_norm": 6.351555593188896, "learning_rate": 2.5668288341804366e-06, "loss": 0.6591, "step": 14028 }, { "epoch": 1.013527913739231, "grad_norm": 6.694207366220656, "learning_rate": 2.5665364358612805e-06, "loss": 0.7031, "step": 14029 }, { "epoch": 1.0136001589394406, "grad_norm": 7.62933251537295, "learning_rate": 2.5662440366312926e-06, "loss": 0.6661, "step": 14030 }, { "epoch": 1.0136724041396499, "grad_norm": 5.815810378623723, "learning_rate": 2.5659516364944754e-06, "loss": 0.683, "step": 14031 }, { "epoch": 1.0137446493398594, "grad_norm": 9.013480369376783, "learning_rate": 2.56565923545483e-06, "loss": 0.7155, "step": 14032 }, { "epoch": 1.013816894540069, "grad_norm": 7.341118004078445, "learning_rate": 2.565366833516362e-06, "loss": 0.6627, "step": 14033 }, { "epoch": 1.0138891397402785, "grad_norm": 6.439984777207479, "learning_rate": 2.5650744306830728e-06, "loss": 0.6438, "step": 14034 }, { "epoch": 1.013961384940488, "grad_norm": 6.115388140333661, "learning_rate": 2.564782026958965e-06, "loss": 0.6232, "step": 14035 }, { "epoch": 1.0140336301406976, "grad_norm": 5.74901273793488, "learning_rate": 2.5644896223480416e-06, "loss": 0.6694, "step": 14036 }, { "epoch": 1.0141058753409071, "grad_norm": 6.594051697685426, "learning_rate": 2.5641972168543043e-06, "loss": 0.6655, "step": 14037 }, { "epoch": 1.0141781205411164, "grad_norm": 6.510364778439058, "learning_rate": 2.5639048104817576e-06, "loss": 0.6817, "step": 14038 }, { "epoch": 1.014250365741326, "grad_norm": 7.16351748923694, "learning_rate": 2.5636124032344046e-06, "loss": 0.7354, "step": 14039 }, { "epoch": 1.0143226109415355, "grad_norm": 6.084868703286084, "learning_rate": 2.5633199951162467e-06, "loss": 0.6647, "step": 14040 }, { "epoch": 1.014394856141745, "grad_norm": 6.31247461899291, "learning_rate": 2.563027586131287e-06, "loss": 0.7324, "step": 14041 }, { "epoch": 1.0144671013419546, "grad_norm": 5.899237774250049, "learning_rate": 2.5627351762835284e-06, "loss": 0.6415, "step": 14042 }, { "epoch": 1.0145393465421642, "grad_norm": 5.715044332283933, "learning_rate": 2.562442765576974e-06, "loss": 0.6213, "step": 14043 }, { "epoch": 1.0146115917423737, "grad_norm": 5.5116190857396035, "learning_rate": 2.562150354015627e-06, "loss": 0.6764, "step": 14044 }, { "epoch": 1.014683836942583, "grad_norm": 7.076569982159602, "learning_rate": 2.5618579416034896e-06, "loss": 0.6839, "step": 14045 }, { "epoch": 1.0147560821427926, "grad_norm": 6.5044895219879, "learning_rate": 2.5615655283445646e-06, "loss": 0.6557, "step": 14046 }, { "epoch": 1.014828327343002, "grad_norm": 6.584570981636316, "learning_rate": 2.5612731142428567e-06, "loss": 0.6394, "step": 14047 }, { "epoch": 1.0149005725432116, "grad_norm": 4.840539274201573, "learning_rate": 2.5609806993023663e-06, "loss": 0.6765, "step": 14048 }, { "epoch": 1.0149728177434212, "grad_norm": 8.373459403836344, "learning_rate": 2.560688283527098e-06, "loss": 0.6888, "step": 14049 }, { "epoch": 1.0150450629436307, "grad_norm": 7.8061040713473115, "learning_rate": 2.560395866921053e-06, "loss": 0.7696, "step": 14050 }, { "epoch": 1.0151173081438403, "grad_norm": 8.066080880241753, "learning_rate": 2.5601034494882365e-06, "loss": 0.702, "step": 14051 }, { "epoch": 1.0151895533440498, "grad_norm": 5.6459798570976965, "learning_rate": 2.5598110312326502e-06, "loss": 0.6219, "step": 14052 }, { "epoch": 1.0152617985442591, "grad_norm": 6.883537955702551, "learning_rate": 2.559518612158298e-06, "loss": 0.7319, "step": 14053 }, { "epoch": 1.0153340437444687, "grad_norm": 5.411609440720164, "learning_rate": 2.5592261922691813e-06, "loss": 0.6467, "step": 14054 }, { "epoch": 1.0154062889446782, "grad_norm": 8.306270703246161, "learning_rate": 2.5589337715693052e-06, "loss": 0.7241, "step": 14055 }, { "epoch": 1.0154785341448878, "grad_norm": 5.974850717379532, "learning_rate": 2.5586413500626705e-06, "loss": 0.6388, "step": 14056 }, { "epoch": 1.0155507793450973, "grad_norm": 6.169539461182328, "learning_rate": 2.5583489277532815e-06, "loss": 0.7233, "step": 14057 }, { "epoch": 1.0156230245453068, "grad_norm": 5.730802582194224, "learning_rate": 2.5580565046451413e-06, "loss": 0.5937, "step": 14058 }, { "epoch": 1.0156952697455164, "grad_norm": 5.931907188095882, "learning_rate": 2.5577640807422516e-06, "loss": 0.6492, "step": 14059 }, { "epoch": 1.0157675149457257, "grad_norm": 5.773736473673841, "learning_rate": 2.5574716560486173e-06, "loss": 0.7336, "step": 14060 }, { "epoch": 1.0158397601459352, "grad_norm": 5.613999014924673, "learning_rate": 2.55717923056824e-06, "loss": 0.6852, "step": 14061 }, { "epoch": 1.0159120053461448, "grad_norm": 5.720245332591869, "learning_rate": 2.556886804305124e-06, "loss": 0.6721, "step": 14062 }, { "epoch": 1.0159842505463543, "grad_norm": 7.675257665428106, "learning_rate": 2.5565943772632727e-06, "loss": 0.728, "step": 14063 }, { "epoch": 1.0160564957465639, "grad_norm": 7.181512079859911, "learning_rate": 2.5563019494466872e-06, "loss": 0.6377, "step": 14064 }, { "epoch": 1.0161287409467734, "grad_norm": 6.442316124053238, "learning_rate": 2.5560095208593717e-06, "loss": 0.6847, "step": 14065 }, { "epoch": 1.016200986146983, "grad_norm": 5.502359057731085, "learning_rate": 2.555717091505329e-06, "loss": 0.6491, "step": 14066 }, { "epoch": 1.0162732313471923, "grad_norm": 5.7052920888069805, "learning_rate": 2.555424661388564e-06, "loss": 0.6474, "step": 14067 }, { "epoch": 1.0163454765474018, "grad_norm": 5.1570200951913145, "learning_rate": 2.555132230513077e-06, "loss": 0.6446, "step": 14068 }, { "epoch": 1.0164177217476114, "grad_norm": 8.067485362141005, "learning_rate": 2.5548397988828737e-06, "loss": 0.7901, "step": 14069 }, { "epoch": 1.016489966947821, "grad_norm": 7.711258561736666, "learning_rate": 2.554547366501955e-06, "loss": 0.6804, "step": 14070 }, { "epoch": 1.0165622121480304, "grad_norm": 8.908286413979745, "learning_rate": 2.554254933374326e-06, "loss": 0.6982, "step": 14071 }, { "epoch": 1.01663445734824, "grad_norm": 6.796333379102466, "learning_rate": 2.5539624995039893e-06, "loss": 0.6559, "step": 14072 }, { "epoch": 1.0167067025484495, "grad_norm": 5.957016841683395, "learning_rate": 2.5536700648949475e-06, "loss": 0.6732, "step": 14073 }, { "epoch": 1.0167789477486588, "grad_norm": 8.560684791339982, "learning_rate": 2.5533776295512038e-06, "loss": 0.622, "step": 14074 }, { "epoch": 1.0168511929488684, "grad_norm": 7.206104382303406, "learning_rate": 2.553085193476762e-06, "loss": 0.6627, "step": 14075 }, { "epoch": 1.016923438149078, "grad_norm": 6.0009377064700145, "learning_rate": 2.5527927566756256e-06, "loss": 0.599, "step": 14076 }, { "epoch": 1.0169956833492875, "grad_norm": 8.769698721257253, "learning_rate": 2.5525003191517965e-06, "loss": 0.6317, "step": 14077 }, { "epoch": 1.017067928549497, "grad_norm": 6.914075189783553, "learning_rate": 2.55220788090928e-06, "loss": 0.7144, "step": 14078 }, { "epoch": 1.0171401737497066, "grad_norm": 6.861889910373806, "learning_rate": 2.551915441952077e-06, "loss": 0.6242, "step": 14079 }, { "epoch": 1.017212418949916, "grad_norm": 6.8091234711281565, "learning_rate": 2.5516230022841927e-06, "loss": 0.6008, "step": 14080 }, { "epoch": 1.0172846641501254, "grad_norm": 7.196597503334475, "learning_rate": 2.551330561909629e-06, "loss": 0.6907, "step": 14081 }, { "epoch": 1.017356909350335, "grad_norm": 7.097931154813126, "learning_rate": 2.55103812083239e-06, "loss": 0.6493, "step": 14082 }, { "epoch": 1.0174291545505445, "grad_norm": 6.201334889589644, "learning_rate": 2.5507456790564795e-06, "loss": 0.6713, "step": 14083 }, { "epoch": 1.017501399750754, "grad_norm": 7.003404879137222, "learning_rate": 2.550453236585898e-06, "loss": 0.7026, "step": 14084 }, { "epoch": 1.0175736449509636, "grad_norm": 7.644569673670682, "learning_rate": 2.5501607934246525e-06, "loss": 0.6598, "step": 14085 }, { "epoch": 1.0176458901511731, "grad_norm": 7.7681843705931275, "learning_rate": 2.5498683495767445e-06, "loss": 0.6799, "step": 14086 }, { "epoch": 1.0177181353513827, "grad_norm": 6.2105063234768, "learning_rate": 2.5495759050461775e-06, "loss": 0.6507, "step": 14087 }, { "epoch": 1.0177903805515922, "grad_norm": 5.192569323955341, "learning_rate": 2.5492834598369547e-06, "loss": 0.643, "step": 14088 }, { "epoch": 1.0178626257518015, "grad_norm": 7.085289299872703, "learning_rate": 2.5489910139530793e-06, "loss": 0.7004, "step": 14089 }, { "epoch": 1.017934870952011, "grad_norm": 6.7583507169162065, "learning_rate": 2.548698567398556e-06, "loss": 0.7094, "step": 14090 }, { "epoch": 1.0180071161522206, "grad_norm": 5.984864536113161, "learning_rate": 2.548406120177386e-06, "loss": 0.7383, "step": 14091 }, { "epoch": 1.0180793613524302, "grad_norm": 7.014980091112942, "learning_rate": 2.5481136722935747e-06, "loss": 0.7333, "step": 14092 }, { "epoch": 1.0181516065526397, "grad_norm": 6.514717752427869, "learning_rate": 2.5478212237511242e-06, "loss": 0.6567, "step": 14093 }, { "epoch": 1.0182238517528492, "grad_norm": 7.573280658046228, "learning_rate": 2.5475287745540376e-06, "loss": 0.6535, "step": 14094 }, { "epoch": 1.0182960969530588, "grad_norm": 7.550229545286914, "learning_rate": 2.547236324706319e-06, "loss": 0.7245, "step": 14095 }, { "epoch": 1.018368342153268, "grad_norm": 7.245079475425001, "learning_rate": 2.5469438742119728e-06, "loss": 0.6972, "step": 14096 }, { "epoch": 1.0184405873534776, "grad_norm": 6.489172500497343, "learning_rate": 2.5466514230750016e-06, "loss": 0.5927, "step": 14097 }, { "epoch": 1.0185128325536872, "grad_norm": 6.3587187257345965, "learning_rate": 2.5463589712994073e-06, "loss": 0.6784, "step": 14098 }, { "epoch": 1.0185850777538967, "grad_norm": 7.794205875722959, "learning_rate": 2.546066518889196e-06, "loss": 0.674, "step": 14099 }, { "epoch": 1.0186573229541063, "grad_norm": 6.3594154900238955, "learning_rate": 2.545774065848369e-06, "loss": 0.7155, "step": 14100 }, { "epoch": 1.0187295681543158, "grad_norm": 7.174805152719633, "learning_rate": 2.5454816121809307e-06, "loss": 0.6804, "step": 14101 }, { "epoch": 1.0188018133545254, "grad_norm": 8.295249535484574, "learning_rate": 2.5451891578908844e-06, "loss": 0.7042, "step": 14102 }, { "epoch": 1.0188740585547347, "grad_norm": 6.953927051704647, "learning_rate": 2.5448967029822335e-06, "loss": 0.6696, "step": 14103 }, { "epoch": 1.0189463037549442, "grad_norm": 6.168023793492519, "learning_rate": 2.544604247458982e-06, "loss": 0.704, "step": 14104 }, { "epoch": 1.0190185489551538, "grad_norm": 5.510384552927552, "learning_rate": 2.544311791325133e-06, "loss": 0.622, "step": 14105 }, { "epoch": 1.0190907941553633, "grad_norm": 6.74911097570653, "learning_rate": 2.5440193345846905e-06, "loss": 0.6627, "step": 14106 }, { "epoch": 1.0191630393555728, "grad_norm": 5.6602142099344395, "learning_rate": 2.5437268772416563e-06, "loss": 0.6635, "step": 14107 }, { "epoch": 1.0192352845557824, "grad_norm": 5.5822178120065855, "learning_rate": 2.5434344193000353e-06, "loss": 0.6777, "step": 14108 }, { "epoch": 1.019307529755992, "grad_norm": 6.780915845381687, "learning_rate": 2.5431419607638306e-06, "loss": 0.75, "step": 14109 }, { "epoch": 1.0193797749562012, "grad_norm": 5.936822471109171, "learning_rate": 2.542849501637047e-06, "loss": 0.6417, "step": 14110 }, { "epoch": 1.0194520201564108, "grad_norm": 7.282861621284273, "learning_rate": 2.542557041923687e-06, "loss": 0.6594, "step": 14111 }, { "epoch": 1.0195242653566203, "grad_norm": 6.198003312047545, "learning_rate": 2.542264581627753e-06, "loss": 0.6748, "step": 14112 }, { "epoch": 1.0195965105568299, "grad_norm": 6.946899370089817, "learning_rate": 2.5419721207532504e-06, "loss": 0.7079, "step": 14113 }, { "epoch": 1.0196687557570394, "grad_norm": 6.750152868729767, "learning_rate": 2.541679659304182e-06, "loss": 0.6614, "step": 14114 }, { "epoch": 1.019741000957249, "grad_norm": 6.325540198375909, "learning_rate": 2.541387197284552e-06, "loss": 0.6699, "step": 14115 }, { "epoch": 1.0198132461574585, "grad_norm": 10.24280127832927, "learning_rate": 2.541094734698362e-06, "loss": 0.638, "step": 14116 }, { "epoch": 1.0198854913576678, "grad_norm": 6.584779829951458, "learning_rate": 2.540802271549618e-06, "loss": 0.6992, "step": 14117 }, { "epoch": 1.0199577365578774, "grad_norm": 6.029044108626966, "learning_rate": 2.540509807842322e-06, "loss": 0.649, "step": 14118 }, { "epoch": 1.020029981758087, "grad_norm": 6.277930146265305, "learning_rate": 2.540217343580479e-06, "loss": 0.6894, "step": 14119 }, { "epoch": 1.0201022269582964, "grad_norm": 6.649500617299968, "learning_rate": 2.5399248787680906e-06, "loss": 0.7268, "step": 14120 }, { "epoch": 1.020174472158506, "grad_norm": 6.802961478767302, "learning_rate": 2.539632413409163e-06, "loss": 0.7076, "step": 14121 }, { "epoch": 1.0202467173587155, "grad_norm": 6.17687322335361, "learning_rate": 2.539339947507698e-06, "loss": 0.6243, "step": 14122 }, { "epoch": 1.020318962558925, "grad_norm": 5.355263247675533, "learning_rate": 2.539047481067699e-06, "loss": 0.6788, "step": 14123 }, { "epoch": 1.0203912077591346, "grad_norm": 6.316720590218676, "learning_rate": 2.538755014093171e-06, "loss": 0.7233, "step": 14124 }, { "epoch": 1.020463452959344, "grad_norm": 7.004812493656705, "learning_rate": 2.538462546588117e-06, "loss": 0.7047, "step": 14125 }, { "epoch": 1.0205356981595535, "grad_norm": 7.2491186198158015, "learning_rate": 2.5381700785565407e-06, "loss": 0.7397, "step": 14126 }, { "epoch": 1.020607943359763, "grad_norm": 5.969326770337209, "learning_rate": 2.5378776100024443e-06, "loss": 0.6612, "step": 14127 }, { "epoch": 1.0206801885599726, "grad_norm": 6.031412231160897, "learning_rate": 2.537585140929834e-06, "loss": 0.7638, "step": 14128 }, { "epoch": 1.020752433760182, "grad_norm": 6.660447780459695, "learning_rate": 2.5372926713427125e-06, "loss": 0.6862, "step": 14129 }, { "epoch": 1.0208246789603916, "grad_norm": 6.239199958278687, "learning_rate": 2.537000201245083e-06, "loss": 0.6196, "step": 14130 }, { "epoch": 1.0208969241606012, "grad_norm": 8.455716735310645, "learning_rate": 2.5367077306409486e-06, "loss": 0.6242, "step": 14131 }, { "epoch": 1.0209691693608105, "grad_norm": 6.0467526130217975, "learning_rate": 2.5364152595343143e-06, "loss": 0.677, "step": 14132 }, { "epoch": 1.02104141456102, "grad_norm": 5.711661192497403, "learning_rate": 2.5361227879291845e-06, "loss": 0.7104, "step": 14133 }, { "epoch": 1.0211136597612296, "grad_norm": 7.398283073642434, "learning_rate": 2.5358303158295607e-06, "loss": 0.715, "step": 14134 }, { "epoch": 1.0211859049614391, "grad_norm": 6.150681819182351, "learning_rate": 2.5355378432394484e-06, "loss": 0.6504, "step": 14135 }, { "epoch": 1.0212581501616487, "grad_norm": 5.460938023906896, "learning_rate": 2.53524537016285e-06, "loss": 0.6623, "step": 14136 }, { "epoch": 1.0213303953618582, "grad_norm": 6.147407493934689, "learning_rate": 2.5349528966037694e-06, "loss": 0.6571, "step": 14137 }, { "epoch": 1.0214026405620678, "grad_norm": 6.7422876450050895, "learning_rate": 2.5346604225662117e-06, "loss": 0.6044, "step": 14138 }, { "epoch": 1.021474885762277, "grad_norm": 5.884947127345359, "learning_rate": 2.5343679480541792e-06, "loss": 0.6521, "step": 14139 }, { "epoch": 1.0215471309624866, "grad_norm": 6.990824544187702, "learning_rate": 2.534075473071677e-06, "loss": 0.7435, "step": 14140 }, { "epoch": 1.0216193761626962, "grad_norm": 6.278396489685042, "learning_rate": 2.5337829976227067e-06, "loss": 0.6239, "step": 14141 }, { "epoch": 1.0216916213629057, "grad_norm": 6.441587465234453, "learning_rate": 2.533490521711275e-06, "loss": 0.6292, "step": 14142 }, { "epoch": 1.0217638665631152, "grad_norm": 6.547004352466168, "learning_rate": 2.533198045341383e-06, "loss": 0.6239, "step": 14143 }, { "epoch": 1.0218361117633248, "grad_norm": 6.2040067954010825, "learning_rate": 2.5329055685170363e-06, "loss": 0.6663, "step": 14144 }, { "epoch": 1.0219083569635343, "grad_norm": 8.062235509984625, "learning_rate": 2.532613091242237e-06, "loss": 0.67, "step": 14145 }, { "epoch": 1.0219806021637436, "grad_norm": 6.358189277800244, "learning_rate": 2.53232061352099e-06, "loss": 0.637, "step": 14146 }, { "epoch": 1.0220528473639532, "grad_norm": 6.6047312337755395, "learning_rate": 2.532028135357299e-06, "loss": 0.6146, "step": 14147 }, { "epoch": 1.0221250925641627, "grad_norm": 7.661726009240596, "learning_rate": 2.5317356567551676e-06, "loss": 0.6935, "step": 14148 }, { "epoch": 1.0221973377643723, "grad_norm": 6.934852713607105, "learning_rate": 2.5314431777186006e-06, "loss": 0.6953, "step": 14149 }, { "epoch": 1.0222695829645818, "grad_norm": 6.667652089086796, "learning_rate": 2.5311506982516e-06, "loss": 0.6594, "step": 14150 }, { "epoch": 1.0223418281647914, "grad_norm": 9.4139583597715, "learning_rate": 2.5308582183581705e-06, "loss": 0.7414, "step": 14151 }, { "epoch": 1.022414073365001, "grad_norm": 7.944226876268143, "learning_rate": 2.530565738042316e-06, "loss": 0.6291, "step": 14152 }, { "epoch": 1.0224863185652102, "grad_norm": 5.669556198916788, "learning_rate": 2.530273257308041e-06, "loss": 0.6089, "step": 14153 }, { "epoch": 1.0225585637654198, "grad_norm": 6.288756482752789, "learning_rate": 2.529980776159348e-06, "loss": 0.6523, "step": 14154 }, { "epoch": 1.0226308089656293, "grad_norm": 7.515455723876552, "learning_rate": 2.5296882946002404e-06, "loss": 0.7026, "step": 14155 }, { "epoch": 1.0227030541658388, "grad_norm": 7.4448257399251165, "learning_rate": 2.5293958126347245e-06, "loss": 0.7203, "step": 14156 }, { "epoch": 1.0227752993660484, "grad_norm": 6.846576542445319, "learning_rate": 2.5291033302668027e-06, "loss": 0.6303, "step": 14157 }, { "epoch": 1.022847544566258, "grad_norm": 5.956452328426738, "learning_rate": 2.528810847500479e-06, "loss": 0.6417, "step": 14158 }, { "epoch": 1.0229197897664675, "grad_norm": 7.167595411343535, "learning_rate": 2.5285183643397565e-06, "loss": 0.5875, "step": 14159 }, { "epoch": 1.022992034966677, "grad_norm": 7.326614972604136, "learning_rate": 2.5282258807886403e-06, "loss": 0.6956, "step": 14160 }, { "epoch": 1.0230642801668863, "grad_norm": 6.831287760163394, "learning_rate": 2.5279333968511326e-06, "loss": 0.6876, "step": 14161 }, { "epoch": 1.0231365253670959, "grad_norm": 7.479010138472015, "learning_rate": 2.52764091253124e-06, "loss": 0.7425, "step": 14162 }, { "epoch": 1.0232087705673054, "grad_norm": 5.872925087592439, "learning_rate": 2.527348427832964e-06, "loss": 0.6127, "step": 14163 }, { "epoch": 1.023281015767515, "grad_norm": 7.405286766542304, "learning_rate": 2.527055942760309e-06, "loss": 0.5978, "step": 14164 }, { "epoch": 1.0233532609677245, "grad_norm": 6.286471649467653, "learning_rate": 2.5267634573172795e-06, "loss": 0.6128, "step": 14165 }, { "epoch": 1.023425506167934, "grad_norm": 7.060298610348231, "learning_rate": 2.526470971507879e-06, "loss": 0.6753, "step": 14166 }, { "epoch": 1.0234977513681436, "grad_norm": 7.78539487451027, "learning_rate": 2.5261784853361114e-06, "loss": 0.6765, "step": 14167 }, { "epoch": 1.023569996568353, "grad_norm": 10.745578655071313, "learning_rate": 2.5258859988059807e-06, "loss": 0.7368, "step": 14168 }, { "epoch": 1.0236422417685624, "grad_norm": 7.413110316144376, "learning_rate": 2.525593511921491e-06, "loss": 0.6699, "step": 14169 }, { "epoch": 1.023714486968772, "grad_norm": 6.444203858251166, "learning_rate": 2.5253010246866457e-06, "loss": 0.6698, "step": 14170 }, { "epoch": 1.0237867321689815, "grad_norm": 5.737486449913364, "learning_rate": 2.5250085371054487e-06, "loss": 0.6683, "step": 14171 }, { "epoch": 1.023858977369191, "grad_norm": 7.2117026802341, "learning_rate": 2.5247160491819052e-06, "loss": 0.7352, "step": 14172 }, { "epoch": 1.0239312225694006, "grad_norm": 6.3674238477031535, "learning_rate": 2.5244235609200174e-06, "loss": 0.6787, "step": 14173 }, { "epoch": 1.0240034677696102, "grad_norm": 6.469305484528339, "learning_rate": 2.52413107232379e-06, "loss": 0.6567, "step": 14174 }, { "epoch": 1.0240757129698195, "grad_norm": 5.4218187136847815, "learning_rate": 2.523838583397227e-06, "loss": 0.6328, "step": 14175 }, { "epoch": 1.024147958170029, "grad_norm": 6.326050368835437, "learning_rate": 2.523546094144333e-06, "loss": 0.6634, "step": 14176 }, { "epoch": 1.0242202033702386, "grad_norm": 7.402833303402741, "learning_rate": 2.5232536045691103e-06, "loss": 0.6269, "step": 14177 }, { "epoch": 1.024292448570448, "grad_norm": 5.913027149919371, "learning_rate": 2.5229611146755647e-06, "loss": 0.7269, "step": 14178 }, { "epoch": 1.0243646937706576, "grad_norm": 7.290544725115951, "learning_rate": 2.5226686244676982e-06, "loss": 0.6581, "step": 14179 }, { "epoch": 1.0244369389708672, "grad_norm": 9.627702606058369, "learning_rate": 2.5223761339495166e-06, "loss": 0.7227, "step": 14180 }, { "epoch": 1.0245091841710767, "grad_norm": 7.402647276923504, "learning_rate": 2.5220836431250234e-06, "loss": 0.6041, "step": 14181 }, { "epoch": 1.024581429371286, "grad_norm": 7.0971408083684375, "learning_rate": 2.5217911519982215e-06, "loss": 0.6421, "step": 14182 }, { "epoch": 1.0246536745714956, "grad_norm": 6.573789060062786, "learning_rate": 2.5214986605731158e-06, "loss": 0.6537, "step": 14183 }, { "epoch": 1.0247259197717051, "grad_norm": 6.867484128619151, "learning_rate": 2.5212061688537097e-06, "loss": 0.6819, "step": 14184 }, { "epoch": 1.0247981649719147, "grad_norm": 6.352523674678711, "learning_rate": 2.520913676844009e-06, "loss": 0.752, "step": 14185 }, { "epoch": 1.0248704101721242, "grad_norm": 6.388478709151745, "learning_rate": 2.520621184548015e-06, "loss": 0.5811, "step": 14186 }, { "epoch": 1.0249426553723338, "grad_norm": 8.277868192151928, "learning_rate": 2.520328691969734e-06, "loss": 0.6358, "step": 14187 }, { "epoch": 1.0250149005725433, "grad_norm": 7.601124138262919, "learning_rate": 2.5200361991131684e-06, "loss": 0.6525, "step": 14188 }, { "epoch": 1.0250871457727526, "grad_norm": 6.401071530286147, "learning_rate": 2.5197437059823226e-06, "loss": 0.678, "step": 14189 }, { "epoch": 1.0251593909729622, "grad_norm": 6.166213903903383, "learning_rate": 2.5194512125812016e-06, "loss": 0.6851, "step": 14190 }, { "epoch": 1.0252316361731717, "grad_norm": 6.849511077584525, "learning_rate": 2.519158718913808e-06, "loss": 0.6616, "step": 14191 }, { "epoch": 1.0253038813733812, "grad_norm": 7.846590173824362, "learning_rate": 2.518866224984147e-06, "loss": 0.7538, "step": 14192 }, { "epoch": 1.0253761265735908, "grad_norm": 7.729633519416197, "learning_rate": 2.5185737307962204e-06, "loss": 0.7222, "step": 14193 }, { "epoch": 1.0254483717738003, "grad_norm": 8.387622487840947, "learning_rate": 2.5182812363540354e-06, "loss": 0.6474, "step": 14194 }, { "epoch": 1.0255206169740099, "grad_norm": 6.506664674060287, "learning_rate": 2.5179887416615946e-06, "loss": 0.6549, "step": 14195 }, { "epoch": 1.0255928621742192, "grad_norm": 6.986300550773792, "learning_rate": 2.517696246722901e-06, "loss": 0.7123, "step": 14196 }, { "epoch": 1.0256651073744287, "grad_norm": 7.617595642110391, "learning_rate": 2.5174037515419596e-06, "loss": 0.7117, "step": 14197 }, { "epoch": 1.0257373525746383, "grad_norm": 5.653945537713696, "learning_rate": 2.5171112561227746e-06, "loss": 0.5722, "step": 14198 }, { "epoch": 1.0258095977748478, "grad_norm": 5.792106373879704, "learning_rate": 2.51681876046935e-06, "loss": 0.7216, "step": 14199 }, { "epoch": 1.0258818429750574, "grad_norm": 7.297194937445355, "learning_rate": 2.5165262645856893e-06, "loss": 0.6775, "step": 14200 }, { "epoch": 1.025954088175267, "grad_norm": 6.297847852500264, "learning_rate": 2.516233768475797e-06, "loss": 0.6705, "step": 14201 }, { "epoch": 1.0260263333754764, "grad_norm": 6.330654213410758, "learning_rate": 2.5159412721436772e-06, "loss": 0.6445, "step": 14202 }, { "epoch": 1.026098578575686, "grad_norm": 6.017717270076597, "learning_rate": 2.5156487755933336e-06, "loss": 0.5901, "step": 14203 }, { "epoch": 1.0261708237758953, "grad_norm": 6.007682967556783, "learning_rate": 2.5153562788287706e-06, "loss": 0.7304, "step": 14204 }, { "epoch": 1.0262430689761048, "grad_norm": 7.096049066453503, "learning_rate": 2.5150637818539914e-06, "loss": 0.7405, "step": 14205 }, { "epoch": 1.0263153141763144, "grad_norm": 5.8492471063741, "learning_rate": 2.5147712846730016e-06, "loss": 0.5922, "step": 14206 }, { "epoch": 1.026387559376524, "grad_norm": 6.1829891523759315, "learning_rate": 2.5144787872898035e-06, "loss": 0.6364, "step": 14207 }, { "epoch": 1.0264598045767335, "grad_norm": 6.434517178625097, "learning_rate": 2.5141862897084026e-06, "loss": 0.6256, "step": 14208 }, { "epoch": 1.026532049776943, "grad_norm": 7.557073238133508, "learning_rate": 2.513893791932802e-06, "loss": 0.6481, "step": 14209 }, { "epoch": 1.0266042949771526, "grad_norm": 6.119043373189841, "learning_rate": 2.513601293967007e-06, "loss": 0.6175, "step": 14210 }, { "epoch": 1.0266765401773619, "grad_norm": 6.431950223151272, "learning_rate": 2.5133087958150197e-06, "loss": 0.6728, "step": 14211 }, { "epoch": 1.0267487853775714, "grad_norm": 6.594315491061548, "learning_rate": 2.513016297480846e-06, "loss": 0.6353, "step": 14212 }, { "epoch": 1.026821030577781, "grad_norm": 6.803180724841397, "learning_rate": 2.5127237989684892e-06, "loss": 0.7312, "step": 14213 }, { "epoch": 1.0268932757779905, "grad_norm": 6.088683854121468, "learning_rate": 2.512431300281954e-06, "loss": 0.6633, "step": 14214 }, { "epoch": 1.0269655209782, "grad_norm": 7.565398362305259, "learning_rate": 2.5121388014252437e-06, "loss": 0.665, "step": 14215 }, { "epoch": 1.0270377661784096, "grad_norm": 6.8669233564280185, "learning_rate": 2.511846302402362e-06, "loss": 0.6271, "step": 14216 }, { "epoch": 1.0271100113786191, "grad_norm": 6.850499278340873, "learning_rate": 2.5115538032173135e-06, "loss": 0.6989, "step": 14217 }, { "epoch": 1.0271822565788284, "grad_norm": 6.205709461901766, "learning_rate": 2.5112613038741028e-06, "loss": 0.6792, "step": 14218 }, { "epoch": 1.027254501779038, "grad_norm": 6.880890698108524, "learning_rate": 2.5109688043767345e-06, "loss": 0.6494, "step": 14219 }, { "epoch": 1.0273267469792475, "grad_norm": 6.688882560065153, "learning_rate": 2.5106763047292115e-06, "loss": 0.6906, "step": 14220 }, { "epoch": 1.027398992179457, "grad_norm": 6.185417980974708, "learning_rate": 2.510383804935537e-06, "loss": 0.6752, "step": 14221 }, { "epoch": 1.0274712373796666, "grad_norm": 7.642679698804316, "learning_rate": 2.510091304999717e-06, "loss": 0.6489, "step": 14222 }, { "epoch": 1.0275434825798762, "grad_norm": 6.91066576608697, "learning_rate": 2.509798804925755e-06, "loss": 0.5989, "step": 14223 }, { "epoch": 1.0276157277800857, "grad_norm": 5.9850404533733625, "learning_rate": 2.509506304717655e-06, "loss": 0.6887, "step": 14224 }, { "epoch": 1.027687972980295, "grad_norm": 5.799029091435937, "learning_rate": 2.5092138043794205e-06, "loss": 0.6386, "step": 14225 }, { "epoch": 1.0277602181805046, "grad_norm": 7.169548105978878, "learning_rate": 2.508921303915056e-06, "loss": 0.6693, "step": 14226 }, { "epoch": 1.027832463380714, "grad_norm": 5.650938627882861, "learning_rate": 2.5086288033285666e-06, "loss": 0.668, "step": 14227 }, { "epoch": 1.0279047085809236, "grad_norm": 6.327391631405293, "learning_rate": 2.5083363026239553e-06, "loss": 0.7387, "step": 14228 }, { "epoch": 1.0279769537811332, "grad_norm": 6.294199183325469, "learning_rate": 2.508043801805227e-06, "loss": 0.6333, "step": 14229 }, { "epoch": 1.0280491989813427, "grad_norm": 6.442793364234842, "learning_rate": 2.5077513008763846e-06, "loss": 0.7636, "step": 14230 }, { "epoch": 1.0281214441815523, "grad_norm": 7.7432741082048056, "learning_rate": 2.507458799841433e-06, "loss": 0.6031, "step": 14231 }, { "epoch": 1.0281936893817618, "grad_norm": 7.9575768022968605, "learning_rate": 2.507166298704376e-06, "loss": 0.6843, "step": 14232 }, { "epoch": 1.0282659345819711, "grad_norm": 6.537599826145403, "learning_rate": 2.5068737974692188e-06, "loss": 0.6722, "step": 14233 }, { "epoch": 1.0283381797821807, "grad_norm": 7.4835502791304505, "learning_rate": 2.506581296139964e-06, "loss": 0.6199, "step": 14234 }, { "epoch": 1.0284104249823902, "grad_norm": 6.429612744477331, "learning_rate": 2.5062887947206165e-06, "loss": 0.6807, "step": 14235 }, { "epoch": 1.0284826701825998, "grad_norm": 7.0240867917166545, "learning_rate": 2.50599629321518e-06, "loss": 0.6699, "step": 14236 }, { "epoch": 1.0285549153828093, "grad_norm": 6.795013944655055, "learning_rate": 2.505703791627659e-06, "loss": 0.6104, "step": 14237 }, { "epoch": 1.0286271605830188, "grad_norm": 7.3744429038463535, "learning_rate": 2.5054112899620583e-06, "loss": 0.6607, "step": 14238 }, { "epoch": 1.0286994057832284, "grad_norm": 7.5814317547803975, "learning_rate": 2.5051187882223804e-06, "loss": 0.6553, "step": 14239 }, { "epoch": 1.0287716509834377, "grad_norm": 8.32252070535804, "learning_rate": 2.50482628641263e-06, "loss": 0.7156, "step": 14240 }, { "epoch": 1.0288438961836472, "grad_norm": 6.661698811007894, "learning_rate": 2.504533784536812e-06, "loss": 0.6255, "step": 14241 }, { "epoch": 1.0289161413838568, "grad_norm": 5.959508002026737, "learning_rate": 2.5042412825989304e-06, "loss": 0.6626, "step": 14242 }, { "epoch": 1.0289883865840663, "grad_norm": 7.080008350949292, "learning_rate": 2.503948780602988e-06, "loss": 0.7098, "step": 14243 }, { "epoch": 1.0290606317842759, "grad_norm": 6.6953030990125075, "learning_rate": 2.5036562785529912e-06, "loss": 0.6831, "step": 14244 }, { "epoch": 1.0291328769844854, "grad_norm": 7.02608072785156, "learning_rate": 2.503363776452941e-06, "loss": 0.712, "step": 14245 }, { "epoch": 1.029205122184695, "grad_norm": 10.360626559365834, "learning_rate": 2.503071274306845e-06, "loss": 0.741, "step": 14246 }, { "epoch": 1.0292773673849043, "grad_norm": 6.476795739919596, "learning_rate": 2.5027787721187045e-06, "loss": 0.6408, "step": 14247 }, { "epoch": 1.0293496125851138, "grad_norm": 6.944145525114995, "learning_rate": 2.502486269892525e-06, "loss": 0.6415, "step": 14248 }, { "epoch": 1.0294218577853234, "grad_norm": 6.422611537750337, "learning_rate": 2.502193767632311e-06, "loss": 0.5926, "step": 14249 }, { "epoch": 1.029494102985533, "grad_norm": 6.893916513847723, "learning_rate": 2.501901265342065e-06, "loss": 0.684, "step": 14250 }, { "epoch": 1.0295663481857424, "grad_norm": 7.0284271677345025, "learning_rate": 2.501608763025793e-06, "loss": 0.6774, "step": 14251 }, { "epoch": 1.029638593385952, "grad_norm": 5.7997990145882765, "learning_rate": 2.501316260687498e-06, "loss": 0.6143, "step": 14252 }, { "epoch": 1.0297108385861615, "grad_norm": 7.74093245140953, "learning_rate": 2.5010237583311847e-06, "loss": 0.6902, "step": 14253 }, { "epoch": 1.0297830837863708, "grad_norm": 8.69260700975125, "learning_rate": 2.5007312559608564e-06, "loss": 0.7328, "step": 14254 }, { "epoch": 1.0298553289865804, "grad_norm": 6.022786106456949, "learning_rate": 2.500438753580518e-06, "loss": 0.673, "step": 14255 }, { "epoch": 1.02992757418679, "grad_norm": 6.4543042156157195, "learning_rate": 2.5001462511941736e-06, "loss": 0.6733, "step": 14256 }, { "epoch": 1.0299998193869995, "grad_norm": 8.740008071441764, "learning_rate": 2.4998537488058277e-06, "loss": 0.6849, "step": 14257 }, { "epoch": 1.030072064587209, "grad_norm": 6.210637767924885, "learning_rate": 2.4995612464194825e-06, "loss": 0.6312, "step": 14258 }, { "epoch": 1.0301443097874186, "grad_norm": 7.123325619631661, "learning_rate": 2.499268744039144e-06, "loss": 0.6656, "step": 14259 }, { "epoch": 1.030216554987628, "grad_norm": 7.072934974193522, "learning_rate": 2.4989762416688153e-06, "loss": 0.5755, "step": 14260 }, { "epoch": 1.0302888001878374, "grad_norm": 6.179048565460253, "learning_rate": 2.498683739312503e-06, "loss": 0.6725, "step": 14261 }, { "epoch": 1.030361045388047, "grad_norm": 7.138513966704277, "learning_rate": 2.498391236974208e-06, "loss": 0.6988, "step": 14262 }, { "epoch": 1.0304332905882565, "grad_norm": 5.596904866593085, "learning_rate": 2.498098734657935e-06, "loss": 0.665, "step": 14263 }, { "epoch": 1.030505535788466, "grad_norm": 5.541766325931694, "learning_rate": 2.49780623236769e-06, "loss": 0.6472, "step": 14264 }, { "epoch": 1.0305777809886756, "grad_norm": 6.075112034898802, "learning_rate": 2.4975137301074756e-06, "loss": 0.6203, "step": 14265 }, { "epoch": 1.0306500261888851, "grad_norm": 6.898274645497727, "learning_rate": 2.497221227881296e-06, "loss": 0.7123, "step": 14266 }, { "epoch": 1.0307222713890947, "grad_norm": 5.906578741057839, "learning_rate": 2.4969287256931555e-06, "loss": 0.6295, "step": 14267 }, { "epoch": 1.030794516589304, "grad_norm": 7.450214961490179, "learning_rate": 2.4966362235470594e-06, "loss": 0.655, "step": 14268 }, { "epoch": 1.0308667617895135, "grad_norm": 6.242672401281105, "learning_rate": 2.49634372144701e-06, "loss": 0.7276, "step": 14269 }, { "epoch": 1.030939006989723, "grad_norm": 6.083583077656139, "learning_rate": 2.4960512193970123e-06, "loss": 0.6893, "step": 14270 }, { "epoch": 1.0310112521899326, "grad_norm": 6.191394851625943, "learning_rate": 2.4957587174010713e-06, "loss": 0.709, "step": 14271 }, { "epoch": 1.0310834973901422, "grad_norm": 7.792476047629589, "learning_rate": 2.495466215463189e-06, "loss": 0.7321, "step": 14272 }, { "epoch": 1.0311557425903517, "grad_norm": 5.924436312230538, "learning_rate": 2.495173713587371e-06, "loss": 0.7341, "step": 14273 }, { "epoch": 1.0312279877905612, "grad_norm": 7.472235596497899, "learning_rate": 2.49488121177762e-06, "loss": 0.6798, "step": 14274 }, { "epoch": 1.0313002329907708, "grad_norm": 6.290679989009225, "learning_rate": 2.494588710037943e-06, "loss": 0.6146, "step": 14275 }, { "epoch": 1.03137247819098, "grad_norm": 5.900325326517194, "learning_rate": 2.4942962083723415e-06, "loss": 0.6165, "step": 14276 }, { "epoch": 1.0314447233911896, "grad_norm": 6.803385946222068, "learning_rate": 2.4940037067848204e-06, "loss": 0.6484, "step": 14277 }, { "epoch": 1.0315169685913992, "grad_norm": 5.719998582959834, "learning_rate": 2.4937112052793847e-06, "loss": 0.6306, "step": 14278 }, { "epoch": 1.0315892137916087, "grad_norm": 7.7921482964098026, "learning_rate": 2.493418703860037e-06, "loss": 0.6968, "step": 14279 }, { "epoch": 1.0316614589918183, "grad_norm": 5.909691186572637, "learning_rate": 2.493126202530782e-06, "loss": 0.6322, "step": 14280 }, { "epoch": 1.0317337041920278, "grad_norm": 8.064231982239779, "learning_rate": 2.492833701295624e-06, "loss": 0.7131, "step": 14281 }, { "epoch": 1.0318059493922374, "grad_norm": 6.6945934287869795, "learning_rate": 2.4925412001585678e-06, "loss": 0.7071, "step": 14282 }, { "epoch": 1.0318781945924467, "grad_norm": 6.291332143349249, "learning_rate": 2.492248699123616e-06, "loss": 0.7356, "step": 14283 }, { "epoch": 1.0319504397926562, "grad_norm": 6.390540236678082, "learning_rate": 2.4919561981947737e-06, "loss": 0.6377, "step": 14284 }, { "epoch": 1.0320226849928658, "grad_norm": 6.236431499691293, "learning_rate": 2.4916636973760446e-06, "loss": 0.6873, "step": 14285 }, { "epoch": 1.0320949301930753, "grad_norm": 6.852836255602037, "learning_rate": 2.4913711966714342e-06, "loss": 0.6534, "step": 14286 }, { "epoch": 1.0321671753932848, "grad_norm": 5.820162081214863, "learning_rate": 2.4910786960849447e-06, "loss": 0.6899, "step": 14287 }, { "epoch": 1.0322394205934944, "grad_norm": 7.30469820562384, "learning_rate": 2.49078619562058e-06, "loss": 0.63, "step": 14288 }, { "epoch": 1.032311665793704, "grad_norm": 6.057473527360063, "learning_rate": 2.4904936952823464e-06, "loss": 0.7197, "step": 14289 }, { "epoch": 1.0323839109939132, "grad_norm": 5.854647181147866, "learning_rate": 2.490201195074246e-06, "loss": 0.6189, "step": 14290 }, { "epoch": 1.0324561561941228, "grad_norm": 6.833407735031868, "learning_rate": 2.4899086950002837e-06, "loss": 0.7748, "step": 14291 }, { "epoch": 1.0325284013943323, "grad_norm": 7.234666591889414, "learning_rate": 2.4896161950644633e-06, "loss": 0.6758, "step": 14292 }, { "epoch": 1.0326006465945419, "grad_norm": 8.066490427829542, "learning_rate": 2.4893236952707898e-06, "loss": 0.7406, "step": 14293 }, { "epoch": 1.0326728917947514, "grad_norm": 5.99703079506297, "learning_rate": 2.4890311956232663e-06, "loss": 0.6738, "step": 14294 }, { "epoch": 1.032745136994961, "grad_norm": 5.507124880891603, "learning_rate": 2.488738696125897e-06, "loss": 0.6383, "step": 14295 }, { "epoch": 1.0328173821951705, "grad_norm": 6.85456307452393, "learning_rate": 2.488446196782687e-06, "loss": 0.6384, "step": 14296 }, { "epoch": 1.0328896273953798, "grad_norm": 6.199258132357578, "learning_rate": 2.4881536975976387e-06, "loss": 0.616, "step": 14297 }, { "epoch": 1.0329618725955894, "grad_norm": 6.3485417050694295, "learning_rate": 2.487861198574757e-06, "loss": 0.679, "step": 14298 }, { "epoch": 1.033034117795799, "grad_norm": 8.62673402023947, "learning_rate": 2.4875686997180465e-06, "loss": 0.7361, "step": 14299 }, { "epoch": 1.0331063629960084, "grad_norm": 7.023948302910756, "learning_rate": 2.4872762010315116e-06, "loss": 0.6956, "step": 14300 }, { "epoch": 1.033178608196218, "grad_norm": 5.193857181329108, "learning_rate": 2.4869837025191546e-06, "loss": 0.6553, "step": 14301 }, { "epoch": 1.0332508533964275, "grad_norm": 6.307847225083233, "learning_rate": 2.4866912041849807e-06, "loss": 0.6481, "step": 14302 }, { "epoch": 1.033323098596637, "grad_norm": 6.034361357947584, "learning_rate": 2.4863987060329943e-06, "loss": 0.7163, "step": 14303 }, { "epoch": 1.0333953437968466, "grad_norm": 8.817182014081007, "learning_rate": 2.486106208067199e-06, "loss": 0.7624, "step": 14304 }, { "epoch": 1.033467588997056, "grad_norm": 6.28296087697447, "learning_rate": 2.4858137102915982e-06, "loss": 0.6582, "step": 14305 }, { "epoch": 1.0335398341972655, "grad_norm": 7.53566754842533, "learning_rate": 2.485521212710197e-06, "loss": 0.7724, "step": 14306 }, { "epoch": 1.033612079397475, "grad_norm": 5.110007523980919, "learning_rate": 2.4852287153269996e-06, "loss": 0.6859, "step": 14307 }, { "epoch": 1.0336843245976846, "grad_norm": 5.960234474331834, "learning_rate": 2.4849362181460094e-06, "loss": 0.658, "step": 14308 }, { "epoch": 1.033756569797894, "grad_norm": 7.102095345796285, "learning_rate": 2.4846437211712302e-06, "loss": 0.7514, "step": 14309 }, { "epoch": 1.0338288149981036, "grad_norm": 6.270102254468753, "learning_rate": 2.484351224406667e-06, "loss": 0.6953, "step": 14310 }, { "epoch": 1.0339010601983132, "grad_norm": 6.51872929599945, "learning_rate": 2.4840587278563236e-06, "loss": 0.6995, "step": 14311 }, { "epoch": 1.0339733053985225, "grad_norm": 6.033603031319742, "learning_rate": 2.4837662315242035e-06, "loss": 0.732, "step": 14312 }, { "epoch": 1.034045550598732, "grad_norm": 7.327722078808517, "learning_rate": 2.483473735414311e-06, "loss": 0.7022, "step": 14313 }, { "epoch": 1.0341177957989416, "grad_norm": 7.752270273730957, "learning_rate": 2.4831812395306513e-06, "loss": 0.7257, "step": 14314 }, { "epoch": 1.0341900409991511, "grad_norm": 6.572207872610298, "learning_rate": 2.4828887438772266e-06, "loss": 0.6401, "step": 14315 }, { "epoch": 1.0342622861993607, "grad_norm": 7.3303731954998534, "learning_rate": 2.4825962484580412e-06, "loss": 0.6823, "step": 14316 }, { "epoch": 1.0343345313995702, "grad_norm": 6.019207727252679, "learning_rate": 2.4823037532770995e-06, "loss": 0.6206, "step": 14317 }, { "epoch": 1.0344067765997798, "grad_norm": 5.7476061938191725, "learning_rate": 2.482011258338407e-06, "loss": 0.6299, "step": 14318 }, { "epoch": 1.034479021799989, "grad_norm": 5.349067228769471, "learning_rate": 2.4817187636459654e-06, "loss": 0.6542, "step": 14319 }, { "epoch": 1.0345512670001986, "grad_norm": 7.016337810913573, "learning_rate": 2.4814262692037796e-06, "loss": 0.6973, "step": 14320 }, { "epoch": 1.0346235122004082, "grad_norm": 5.73872763271871, "learning_rate": 2.4811337750158544e-06, "loss": 0.6702, "step": 14321 }, { "epoch": 1.0346957574006177, "grad_norm": 7.164679614062862, "learning_rate": 2.480841281086193e-06, "loss": 0.7077, "step": 14322 }, { "epoch": 1.0347680026008272, "grad_norm": 5.267055419062494, "learning_rate": 2.480548787418799e-06, "loss": 0.6431, "step": 14323 }, { "epoch": 1.0348402478010368, "grad_norm": 5.647213456313387, "learning_rate": 2.4802562940176774e-06, "loss": 0.6321, "step": 14324 }, { "epoch": 1.0349124930012463, "grad_norm": 7.139949173946756, "learning_rate": 2.4799638008868324e-06, "loss": 0.6917, "step": 14325 }, { "epoch": 1.0349847382014556, "grad_norm": 7.274822492498582, "learning_rate": 2.479671308030267e-06, "loss": 0.6953, "step": 14326 }, { "epoch": 1.0350569834016652, "grad_norm": 9.290211881555727, "learning_rate": 2.479378815451985e-06, "loss": 0.6396, "step": 14327 }, { "epoch": 1.0351292286018747, "grad_norm": 6.779231601709676, "learning_rate": 2.4790863231559923e-06, "loss": 0.6796, "step": 14328 }, { "epoch": 1.0352014738020843, "grad_norm": 6.785991364752112, "learning_rate": 2.4787938311462907e-06, "loss": 0.7103, "step": 14329 }, { "epoch": 1.0352737190022938, "grad_norm": 5.838313383822815, "learning_rate": 2.478501339426885e-06, "loss": 0.6346, "step": 14330 }, { "epoch": 1.0353459642025034, "grad_norm": 6.829283871284022, "learning_rate": 2.478208848001779e-06, "loss": 0.6906, "step": 14331 }, { "epoch": 1.035418209402713, "grad_norm": 6.061599192599962, "learning_rate": 2.4779163568749783e-06, "loss": 0.6546, "step": 14332 }, { "epoch": 1.0354904546029222, "grad_norm": 6.775113049229067, "learning_rate": 2.477623866050484e-06, "loss": 0.7143, "step": 14333 }, { "epoch": 1.0355626998031318, "grad_norm": 6.490130340725031, "learning_rate": 2.477331375532302e-06, "loss": 0.716, "step": 14334 }, { "epoch": 1.0356349450033413, "grad_norm": 9.45564960154547, "learning_rate": 2.4770388853244366e-06, "loss": 0.772, "step": 14335 }, { "epoch": 1.0357071902035508, "grad_norm": 7.738217410845327, "learning_rate": 2.4767463954308905e-06, "loss": 0.7569, "step": 14336 }, { "epoch": 1.0357794354037604, "grad_norm": 5.7501698344483705, "learning_rate": 2.476453905855668e-06, "loss": 0.6699, "step": 14337 }, { "epoch": 1.03585168060397, "grad_norm": 6.337582150139407, "learning_rate": 2.476161416602773e-06, "loss": 0.6828, "step": 14338 }, { "epoch": 1.0359239258041795, "grad_norm": 7.480035531270708, "learning_rate": 2.475868927676211e-06, "loss": 0.7399, "step": 14339 }, { "epoch": 1.0359961710043888, "grad_norm": 7.655644856107276, "learning_rate": 2.4755764390799835e-06, "loss": 0.6749, "step": 14340 }, { "epoch": 1.0360684162045983, "grad_norm": 6.088155986055784, "learning_rate": 2.4752839508180956e-06, "loss": 0.6224, "step": 14341 }, { "epoch": 1.0361406614048079, "grad_norm": 6.74858163130771, "learning_rate": 2.4749914628945512e-06, "loss": 0.6552, "step": 14342 }, { "epoch": 1.0362129066050174, "grad_norm": 7.89959991987208, "learning_rate": 2.4746989753133556e-06, "loss": 0.6856, "step": 14343 }, { "epoch": 1.036285151805227, "grad_norm": 6.3999903917240495, "learning_rate": 2.47440648807851e-06, "loss": 0.6421, "step": 14344 }, { "epoch": 1.0363573970054365, "grad_norm": 7.698895451973609, "learning_rate": 2.4741140011940197e-06, "loss": 0.6727, "step": 14345 }, { "epoch": 1.036429642205646, "grad_norm": 6.40358533568244, "learning_rate": 2.47382151466389e-06, "loss": 0.5884, "step": 14346 }, { "epoch": 1.0365018874058554, "grad_norm": 5.594983353849982, "learning_rate": 2.473529028492122e-06, "loss": 0.6845, "step": 14347 }, { "epoch": 1.036574132606065, "grad_norm": 7.326429093558434, "learning_rate": 2.4732365426827214e-06, "loss": 0.7141, "step": 14348 }, { "epoch": 1.0366463778062744, "grad_norm": 5.847311474785168, "learning_rate": 2.4729440572396914e-06, "loss": 0.7189, "step": 14349 }, { "epoch": 1.036718623006484, "grad_norm": 7.036580824238781, "learning_rate": 2.472651572167037e-06, "loss": 0.678, "step": 14350 }, { "epoch": 1.0367908682066935, "grad_norm": 6.5598240800855265, "learning_rate": 2.472359087468761e-06, "loss": 0.6879, "step": 14351 }, { "epoch": 1.036863113406903, "grad_norm": 4.852142741569987, "learning_rate": 2.4720666031488674e-06, "loss": 0.6124, "step": 14352 }, { "epoch": 1.0369353586071126, "grad_norm": 6.5777161532313375, "learning_rate": 2.471774119211361e-06, "loss": 0.6327, "step": 14353 }, { "epoch": 1.0370076038073222, "grad_norm": 6.745758136646671, "learning_rate": 2.4714816356602443e-06, "loss": 0.6781, "step": 14354 }, { "epoch": 1.0370798490075315, "grad_norm": 6.332762274177062, "learning_rate": 2.4711891524995218e-06, "loss": 0.7004, "step": 14355 }, { "epoch": 1.037152094207741, "grad_norm": 5.747374889045814, "learning_rate": 2.4708966697331977e-06, "loss": 0.6465, "step": 14356 }, { "epoch": 1.0372243394079506, "grad_norm": 5.970184633093239, "learning_rate": 2.4706041873652763e-06, "loss": 0.7082, "step": 14357 }, { "epoch": 1.03729658460816, "grad_norm": 7.928243451103826, "learning_rate": 2.47031170539976e-06, "loss": 0.6796, "step": 14358 }, { "epoch": 1.0373688298083696, "grad_norm": 7.212800980606621, "learning_rate": 2.4700192238406527e-06, "loss": 0.7227, "step": 14359 }, { "epoch": 1.0374410750085792, "grad_norm": 6.580123923853675, "learning_rate": 2.4697267426919608e-06, "loss": 0.6681, "step": 14360 }, { "epoch": 1.0375133202087887, "grad_norm": 6.3902545998824145, "learning_rate": 2.4694342619576848e-06, "loss": 0.6457, "step": 14361 }, { "epoch": 1.037585565408998, "grad_norm": 7.314457036464241, "learning_rate": 2.4691417816418304e-06, "loss": 0.7059, "step": 14362 }, { "epoch": 1.0376578106092076, "grad_norm": 7.855510269578863, "learning_rate": 2.468849301748401e-06, "loss": 0.7103, "step": 14363 }, { "epoch": 1.0377300558094171, "grad_norm": 5.53221234069774, "learning_rate": 2.468556822281401e-06, "loss": 0.6298, "step": 14364 }, { "epoch": 1.0378023010096267, "grad_norm": 6.7718784249529875, "learning_rate": 2.468264343244833e-06, "loss": 0.6387, "step": 14365 }, { "epoch": 1.0378745462098362, "grad_norm": 5.795058467066467, "learning_rate": 2.4679718646427014e-06, "loss": 0.6598, "step": 14366 }, { "epoch": 1.0379467914100458, "grad_norm": 6.299338036531007, "learning_rate": 2.4676793864790103e-06, "loss": 0.6623, "step": 14367 }, { "epoch": 1.0380190366102553, "grad_norm": 6.210231755304293, "learning_rate": 2.467386908757764e-06, "loss": 0.7236, "step": 14368 }, { "epoch": 1.0380912818104646, "grad_norm": 7.955247157469451, "learning_rate": 2.467094431482965e-06, "loss": 0.6365, "step": 14369 }, { "epoch": 1.0381635270106742, "grad_norm": 7.5817859733800566, "learning_rate": 2.4668019546586176e-06, "loss": 0.6617, "step": 14370 }, { "epoch": 1.0382357722108837, "grad_norm": 6.694467782823641, "learning_rate": 2.4665094782887263e-06, "loss": 0.6767, "step": 14371 }, { "epoch": 1.0383080174110932, "grad_norm": 5.4207509219536485, "learning_rate": 2.4662170023772937e-06, "loss": 0.6685, "step": 14372 }, { "epoch": 1.0383802626113028, "grad_norm": 6.085374289468144, "learning_rate": 2.4659245269283238e-06, "loss": 0.6923, "step": 14373 }, { "epoch": 1.0384525078115123, "grad_norm": 6.174619272209282, "learning_rate": 2.4656320519458203e-06, "loss": 0.6799, "step": 14374 }, { "epoch": 1.0385247530117219, "grad_norm": 6.754917472826539, "learning_rate": 2.465339577433789e-06, "loss": 0.6813, "step": 14375 }, { "epoch": 1.0385969982119312, "grad_norm": 7.091386443300944, "learning_rate": 2.465047103396231e-06, "loss": 0.6449, "step": 14376 }, { "epoch": 1.0386692434121407, "grad_norm": 5.848094936471734, "learning_rate": 2.4647546298371508e-06, "loss": 0.6622, "step": 14377 }, { "epoch": 1.0387414886123503, "grad_norm": 5.885294883165703, "learning_rate": 2.4644621567605532e-06, "loss": 0.6449, "step": 14378 }, { "epoch": 1.0388137338125598, "grad_norm": 6.96885455271205, "learning_rate": 2.46416968417044e-06, "loss": 0.7141, "step": 14379 }, { "epoch": 1.0388859790127694, "grad_norm": 6.715287988868199, "learning_rate": 2.4638772120708164e-06, "loss": 0.6778, "step": 14380 }, { "epoch": 1.038958224212979, "grad_norm": 6.905647355104835, "learning_rate": 2.4635847404656857e-06, "loss": 0.6162, "step": 14381 }, { "epoch": 1.0390304694131884, "grad_norm": 5.863762354824539, "learning_rate": 2.4632922693590518e-06, "loss": 0.6772, "step": 14382 }, { "epoch": 1.039102714613398, "grad_norm": 9.966927962774523, "learning_rate": 2.462999798754918e-06, "loss": 0.6524, "step": 14383 }, { "epoch": 1.0391749598136073, "grad_norm": 5.554997045205444, "learning_rate": 2.4627073286572883e-06, "loss": 0.721, "step": 14384 }, { "epoch": 1.0392472050138168, "grad_norm": 7.495642349781628, "learning_rate": 2.4624148590701675e-06, "loss": 0.7053, "step": 14385 }, { "epoch": 1.0393194502140264, "grad_norm": 6.017870674587041, "learning_rate": 2.4621223899975565e-06, "loss": 0.6416, "step": 14386 }, { "epoch": 1.039391695414236, "grad_norm": 6.322772741381756, "learning_rate": 2.4618299214434606e-06, "loss": 0.6225, "step": 14387 }, { "epoch": 1.0394639406144455, "grad_norm": 7.082541597286593, "learning_rate": 2.4615374534118836e-06, "loss": 0.7558, "step": 14388 }, { "epoch": 1.039536185814655, "grad_norm": 6.458314005504933, "learning_rate": 2.4612449859068304e-06, "loss": 0.6487, "step": 14389 }, { "epoch": 1.0396084310148646, "grad_norm": 6.232398310144263, "learning_rate": 2.460952518932302e-06, "loss": 0.7139, "step": 14390 }, { "epoch": 1.0396806762150739, "grad_norm": 6.083384927330529, "learning_rate": 2.460660052492303e-06, "loss": 0.6814, "step": 14391 }, { "epoch": 1.0397529214152834, "grad_norm": 5.830953530383031, "learning_rate": 2.4603675865908374e-06, "loss": 0.6235, "step": 14392 }, { "epoch": 1.039825166615493, "grad_norm": 7.833661769177119, "learning_rate": 2.46007512123191e-06, "loss": 0.6269, "step": 14393 }, { "epoch": 1.0398974118157025, "grad_norm": 6.580175229785868, "learning_rate": 2.4597826564195218e-06, "loss": 0.6805, "step": 14394 }, { "epoch": 1.039969657015912, "grad_norm": 6.237543128099584, "learning_rate": 2.459490192157678e-06, "loss": 0.6962, "step": 14395 }, { "epoch": 1.0400419022161216, "grad_norm": 6.45382634893722, "learning_rate": 2.459197728450383e-06, "loss": 0.6785, "step": 14396 }, { "epoch": 1.0401141474163311, "grad_norm": 5.789053604180918, "learning_rate": 2.4589052653016384e-06, "loss": 0.5935, "step": 14397 }, { "epoch": 1.0401863926165404, "grad_norm": 6.254082223964707, "learning_rate": 2.458612802715449e-06, "loss": 0.6302, "step": 14398 }, { "epoch": 1.04025863781675, "grad_norm": 8.399397655870647, "learning_rate": 2.4583203406958184e-06, "loss": 0.6526, "step": 14399 }, { "epoch": 1.0403308830169595, "grad_norm": 6.217015623605924, "learning_rate": 2.458027879246751e-06, "loss": 0.6994, "step": 14400 }, { "epoch": 1.040403128217169, "grad_norm": 6.018996049678461, "learning_rate": 2.457735418372248e-06, "loss": 0.6194, "step": 14401 }, { "epoch": 1.0404753734173786, "grad_norm": 7.297096134731385, "learning_rate": 2.4574429580763136e-06, "loss": 0.6563, "step": 14402 }, { "epoch": 1.0405476186175882, "grad_norm": 7.092255960164733, "learning_rate": 2.457150498362954e-06, "loss": 0.6619, "step": 14403 }, { "epoch": 1.0406198638177977, "grad_norm": 5.905650274921276, "learning_rate": 2.45685803923617e-06, "loss": 0.7221, "step": 14404 }, { "epoch": 1.040692109018007, "grad_norm": 8.037007090403206, "learning_rate": 2.4565655806999656e-06, "loss": 0.6591, "step": 14405 }, { "epoch": 1.0407643542182166, "grad_norm": 6.7484680132696075, "learning_rate": 2.4562731227583446e-06, "loss": 0.6468, "step": 14406 }, { "epoch": 1.040836599418426, "grad_norm": 7.218216517168261, "learning_rate": 2.455980665415311e-06, "loss": 0.7385, "step": 14407 }, { "epoch": 1.0409088446186356, "grad_norm": 8.73155404328131, "learning_rate": 2.455688208674868e-06, "loss": 0.6344, "step": 14408 }, { "epoch": 1.0409810898188452, "grad_norm": 6.665795873355569, "learning_rate": 2.4553957525410187e-06, "loss": 0.6469, "step": 14409 }, { "epoch": 1.0410533350190547, "grad_norm": 6.741561701475756, "learning_rate": 2.455103297017767e-06, "loss": 0.6738, "step": 14410 }, { "epoch": 1.0411255802192643, "grad_norm": 5.519586021545743, "learning_rate": 2.4548108421091164e-06, "loss": 0.5566, "step": 14411 }, { "epoch": 1.0411978254194736, "grad_norm": 5.817771899890059, "learning_rate": 2.4545183878190697e-06, "loss": 0.5712, "step": 14412 }, { "epoch": 1.0412700706196831, "grad_norm": 6.689386404776757, "learning_rate": 2.4542259341516316e-06, "loss": 0.6256, "step": 14413 }, { "epoch": 1.0413423158198927, "grad_norm": 7.0942143359696805, "learning_rate": 2.4539334811108056e-06, "loss": 0.6808, "step": 14414 }, { "epoch": 1.0414145610201022, "grad_norm": 6.481360780510422, "learning_rate": 2.4536410287005935e-06, "loss": 0.6358, "step": 14415 }, { "epoch": 1.0414868062203118, "grad_norm": 6.607087012095451, "learning_rate": 2.4533485769249993e-06, "loss": 0.7345, "step": 14416 }, { "epoch": 1.0415590514205213, "grad_norm": 7.30834347221457, "learning_rate": 2.453056125788027e-06, "loss": 0.6282, "step": 14417 }, { "epoch": 1.0416312966207308, "grad_norm": 7.687399514634804, "learning_rate": 2.4527636752936817e-06, "loss": 0.6926, "step": 14418 }, { "epoch": 1.0417035418209402, "grad_norm": 6.207429790245724, "learning_rate": 2.452471225445963e-06, "loss": 0.6404, "step": 14419 }, { "epoch": 1.0417757870211497, "grad_norm": 5.788184713309573, "learning_rate": 2.452178776248877e-06, "loss": 0.6852, "step": 14420 }, { "epoch": 1.0418480322213592, "grad_norm": 6.281836031625042, "learning_rate": 2.4518863277064266e-06, "loss": 0.6216, "step": 14421 }, { "epoch": 1.0419202774215688, "grad_norm": 6.410868250086778, "learning_rate": 2.4515938798226146e-06, "loss": 0.6009, "step": 14422 }, { "epoch": 1.0419925226217783, "grad_norm": 7.4188964006589915, "learning_rate": 2.451301432601445e-06, "loss": 0.6291, "step": 14423 }, { "epoch": 1.0420647678219879, "grad_norm": 7.574699209555082, "learning_rate": 2.4510089860469207e-06, "loss": 0.6995, "step": 14424 }, { "epoch": 1.0421370130221974, "grad_norm": 7.284782895091197, "learning_rate": 2.450716540163046e-06, "loss": 0.6813, "step": 14425 }, { "epoch": 1.042209258222407, "grad_norm": 5.68418066396175, "learning_rate": 2.450424094953823e-06, "loss": 0.6491, "step": 14426 }, { "epoch": 1.0422815034226163, "grad_norm": 6.413694048206084, "learning_rate": 2.450131650423256e-06, "loss": 0.6926, "step": 14427 }, { "epoch": 1.0423537486228258, "grad_norm": 6.946170372004567, "learning_rate": 2.4498392065753484e-06, "loss": 0.7658, "step": 14428 }, { "epoch": 1.0424259938230354, "grad_norm": 8.484814711499395, "learning_rate": 2.4495467634141025e-06, "loss": 0.7412, "step": 14429 }, { "epoch": 1.042498239023245, "grad_norm": 6.469660450723535, "learning_rate": 2.4492543209435217e-06, "loss": 0.6045, "step": 14430 }, { "epoch": 1.0425704842234544, "grad_norm": 7.7009655631919784, "learning_rate": 2.44896187916761e-06, "loss": 0.7607, "step": 14431 }, { "epoch": 1.042642729423664, "grad_norm": 6.803629568485911, "learning_rate": 2.448669438090372e-06, "loss": 0.6795, "step": 14432 }, { "epoch": 1.0427149746238735, "grad_norm": 7.931146196267742, "learning_rate": 2.4483769977158085e-06, "loss": 0.7606, "step": 14433 }, { "epoch": 1.0427872198240828, "grad_norm": 6.115869372772317, "learning_rate": 2.4480845580479234e-06, "loss": 0.6708, "step": 14434 }, { "epoch": 1.0428594650242924, "grad_norm": 6.460830632240416, "learning_rate": 2.4477921190907215e-06, "loss": 0.622, "step": 14435 }, { "epoch": 1.042931710224502, "grad_norm": 6.8214609549073995, "learning_rate": 2.447499680848204e-06, "loss": 0.5923, "step": 14436 }, { "epoch": 1.0430039554247115, "grad_norm": 6.581436592202913, "learning_rate": 2.4472072433243752e-06, "loss": 0.664, "step": 14437 }, { "epoch": 1.043076200624921, "grad_norm": 6.379186116028414, "learning_rate": 2.446914806523238e-06, "loss": 0.5983, "step": 14438 }, { "epoch": 1.0431484458251306, "grad_norm": 6.7299181003887005, "learning_rate": 2.446622370448797e-06, "loss": 0.6704, "step": 14439 }, { "epoch": 1.04322069102534, "grad_norm": 6.286123027144978, "learning_rate": 2.4463299351050533e-06, "loss": 0.6708, "step": 14440 }, { "epoch": 1.0432929362255494, "grad_norm": 5.892256048021777, "learning_rate": 2.446037500496011e-06, "loss": 0.6165, "step": 14441 }, { "epoch": 1.043365181425759, "grad_norm": 6.955584898870863, "learning_rate": 2.4457450666256752e-06, "loss": 0.785, "step": 14442 }, { "epoch": 1.0434374266259685, "grad_norm": 6.223021300791716, "learning_rate": 2.4454526334980458e-06, "loss": 0.6654, "step": 14443 }, { "epoch": 1.043509671826178, "grad_norm": 6.4978381743424025, "learning_rate": 2.4451602011171267e-06, "loss": 0.6741, "step": 14444 }, { "epoch": 1.0435819170263876, "grad_norm": 8.10084111120471, "learning_rate": 2.444867769486923e-06, "loss": 0.6488, "step": 14445 }, { "epoch": 1.0436541622265971, "grad_norm": 7.961690251304358, "learning_rate": 2.4445753386114375e-06, "loss": 0.7373, "step": 14446 }, { "epoch": 1.0437264074268067, "grad_norm": 6.1159872580296355, "learning_rate": 2.4442829084946714e-06, "loss": 0.6917, "step": 14447 }, { "epoch": 1.043798652627016, "grad_norm": 6.81058804341228, "learning_rate": 2.443990479140629e-06, "loss": 0.7675, "step": 14448 }, { "epoch": 1.0438708978272255, "grad_norm": 6.10757177722178, "learning_rate": 2.4436980505533136e-06, "loss": 0.6635, "step": 14449 }, { "epoch": 1.043943143027435, "grad_norm": 7.1366179928403355, "learning_rate": 2.4434056227367285e-06, "loss": 0.6656, "step": 14450 }, { "epoch": 1.0440153882276446, "grad_norm": 5.970831543025271, "learning_rate": 2.4431131956948763e-06, "loss": 0.6221, "step": 14451 }, { "epoch": 1.0440876334278542, "grad_norm": 6.926539655493289, "learning_rate": 2.4428207694317598e-06, "loss": 0.6939, "step": 14452 }, { "epoch": 1.0441598786280637, "grad_norm": 5.960037983510892, "learning_rate": 2.4425283439513835e-06, "loss": 0.7483, "step": 14453 }, { "epoch": 1.0442321238282732, "grad_norm": 6.6430524630331895, "learning_rate": 2.442235919257749e-06, "loss": 0.6894, "step": 14454 }, { "epoch": 1.0443043690284828, "grad_norm": 5.903439776629305, "learning_rate": 2.44194349535486e-06, "loss": 0.6322, "step": 14455 }, { "epoch": 1.044376614228692, "grad_norm": 6.673718950306909, "learning_rate": 2.441651072246719e-06, "loss": 0.6839, "step": 14456 }, { "epoch": 1.0444488594289016, "grad_norm": 5.494178118206587, "learning_rate": 2.4413586499373308e-06, "loss": 0.682, "step": 14457 }, { "epoch": 1.0445211046291112, "grad_norm": 7.396587007473843, "learning_rate": 2.4410662284306956e-06, "loss": 0.6326, "step": 14458 }, { "epoch": 1.0445933498293207, "grad_norm": 5.797207640283427, "learning_rate": 2.4407738077308187e-06, "loss": 0.693, "step": 14459 }, { "epoch": 1.0446655950295303, "grad_norm": 6.37007878660528, "learning_rate": 2.440481387841703e-06, "loss": 0.6153, "step": 14460 }, { "epoch": 1.0447378402297398, "grad_norm": 5.753041002280948, "learning_rate": 2.44018896876735e-06, "loss": 0.5524, "step": 14461 }, { "epoch": 1.0448100854299494, "grad_norm": 7.6610356535625295, "learning_rate": 2.439896550511764e-06, "loss": 0.6576, "step": 14462 }, { "epoch": 1.0448823306301587, "grad_norm": 10.565483800423515, "learning_rate": 2.4396041330789472e-06, "loss": 0.7272, "step": 14463 }, { "epoch": 1.0449545758303682, "grad_norm": 7.469185660835802, "learning_rate": 2.4393117164729034e-06, "loss": 0.7262, "step": 14464 }, { "epoch": 1.0450268210305778, "grad_norm": 7.313056337761085, "learning_rate": 2.439019300697635e-06, "loss": 0.5905, "step": 14465 }, { "epoch": 1.0450990662307873, "grad_norm": 6.350592770280231, "learning_rate": 2.4387268857571446e-06, "loss": 0.6168, "step": 14466 }, { "epoch": 1.0451713114309968, "grad_norm": 5.9422164655486505, "learning_rate": 2.438434471655436e-06, "loss": 0.6499, "step": 14467 }, { "epoch": 1.0452435566312064, "grad_norm": 7.202028137236814, "learning_rate": 2.4381420583965113e-06, "loss": 0.6858, "step": 14468 }, { "epoch": 1.045315801831416, "grad_norm": 7.357896271538802, "learning_rate": 2.4378496459843736e-06, "loss": 0.6566, "step": 14469 }, { "epoch": 1.0453880470316252, "grad_norm": 6.978279201582176, "learning_rate": 2.4375572344230263e-06, "loss": 0.6166, "step": 14470 }, { "epoch": 1.0454602922318348, "grad_norm": 7.699132043049307, "learning_rate": 2.437264823716473e-06, "loss": 0.6262, "step": 14471 }, { "epoch": 1.0455325374320443, "grad_norm": 6.4816138581982115, "learning_rate": 2.4369724138687135e-06, "loss": 0.7122, "step": 14472 }, { "epoch": 1.0456047826322539, "grad_norm": 7.872236463267205, "learning_rate": 2.4366800048837537e-06, "loss": 0.7971, "step": 14473 }, { "epoch": 1.0456770278324634, "grad_norm": 8.22275552938878, "learning_rate": 2.436387596765596e-06, "loss": 0.6572, "step": 14474 }, { "epoch": 1.045749273032673, "grad_norm": 5.897858324731403, "learning_rate": 2.436095189518243e-06, "loss": 0.6253, "step": 14475 }, { "epoch": 1.0458215182328825, "grad_norm": 6.205968555780156, "learning_rate": 2.435802783145696e-06, "loss": 0.6458, "step": 14476 }, { "epoch": 1.0458937634330918, "grad_norm": 5.432292680993234, "learning_rate": 2.4355103776519596e-06, "loss": 0.6497, "step": 14477 }, { "epoch": 1.0459660086333014, "grad_norm": 6.548952487728357, "learning_rate": 2.4352179730410365e-06, "loss": 0.6543, "step": 14478 }, { "epoch": 1.046038253833511, "grad_norm": 8.456964942725048, "learning_rate": 2.434925569316928e-06, "loss": 0.6843, "step": 14479 }, { "epoch": 1.0461104990337204, "grad_norm": 9.2628472266964, "learning_rate": 2.4346331664836382e-06, "loss": 0.6125, "step": 14480 }, { "epoch": 1.04618274423393, "grad_norm": 6.693773695959896, "learning_rate": 2.43434076454517e-06, "loss": 0.7502, "step": 14481 }, { "epoch": 1.0462549894341395, "grad_norm": 7.401985324848164, "learning_rate": 2.434048363505526e-06, "loss": 0.7079, "step": 14482 }, { "epoch": 1.046327234634349, "grad_norm": 6.329318162495832, "learning_rate": 2.4337559633687083e-06, "loss": 0.6733, "step": 14483 }, { "epoch": 1.0463994798345584, "grad_norm": 6.219043513900124, "learning_rate": 2.43346356413872e-06, "loss": 0.6713, "step": 14484 }, { "epoch": 1.046471725034768, "grad_norm": 5.596358729864017, "learning_rate": 2.4331711658195647e-06, "loss": 0.6883, "step": 14485 }, { "epoch": 1.0465439702349775, "grad_norm": 6.280925989097275, "learning_rate": 2.4328787684152428e-06, "loss": 0.6792, "step": 14486 }, { "epoch": 1.046616215435187, "grad_norm": 6.087569481925083, "learning_rate": 2.432586371929759e-06, "loss": 0.6697, "step": 14487 }, { "epoch": 1.0466884606353966, "grad_norm": 7.462220725707575, "learning_rate": 2.4322939763671153e-06, "loss": 0.7716, "step": 14488 }, { "epoch": 1.046760705835606, "grad_norm": 8.820014076827185, "learning_rate": 2.4320015817313154e-06, "loss": 0.6192, "step": 14489 }, { "epoch": 1.0468329510358156, "grad_norm": 8.508697996130516, "learning_rate": 2.4317091880263603e-06, "loss": 0.6488, "step": 14490 }, { "epoch": 1.046905196236025, "grad_norm": 5.991210539552758, "learning_rate": 2.4314167952562535e-06, "loss": 0.6876, "step": 14491 }, { "epoch": 1.0469774414362345, "grad_norm": 5.4887241605646935, "learning_rate": 2.4311244034249975e-06, "loss": 0.6017, "step": 14492 }, { "epoch": 1.047049686636444, "grad_norm": 6.224850523352825, "learning_rate": 2.430832012536595e-06, "loss": 0.6248, "step": 14493 }, { "epoch": 1.0471219318366536, "grad_norm": 8.350773173644237, "learning_rate": 2.430539622595048e-06, "loss": 0.6374, "step": 14494 }, { "epoch": 1.0471941770368631, "grad_norm": 6.954406208768131, "learning_rate": 2.43024723360436e-06, "loss": 0.6496, "step": 14495 }, { "epoch": 1.0472664222370727, "grad_norm": 5.18735862447367, "learning_rate": 2.4299548455685336e-06, "loss": 0.6813, "step": 14496 }, { "epoch": 1.0473386674372822, "grad_norm": 5.792193967384945, "learning_rate": 2.4296624584915707e-06, "loss": 0.5937, "step": 14497 }, { "epoch": 1.0474109126374918, "grad_norm": 8.083160188793446, "learning_rate": 2.429370072377474e-06, "loss": 0.7188, "step": 14498 }, { "epoch": 1.047483157837701, "grad_norm": 6.469725309551765, "learning_rate": 2.429077687230246e-06, "loss": 0.6639, "step": 14499 }, { "epoch": 1.0475554030379106, "grad_norm": 6.042178358135701, "learning_rate": 2.42878530305389e-06, "loss": 0.7263, "step": 14500 }, { "epoch": 1.0476276482381202, "grad_norm": 6.128153397933469, "learning_rate": 2.4284929198524078e-06, "loss": 0.6124, "step": 14501 }, { "epoch": 1.0476998934383297, "grad_norm": 6.396186777428255, "learning_rate": 2.4282005376298012e-06, "loss": 0.6551, "step": 14502 }, { "epoch": 1.0477721386385392, "grad_norm": 6.56576602136153, "learning_rate": 2.427908156390075e-06, "loss": 0.6761, "step": 14503 }, { "epoch": 1.0478443838387488, "grad_norm": 6.4169371246543765, "learning_rate": 2.4276157761372294e-06, "loss": 0.6606, "step": 14504 }, { "epoch": 1.0479166290389583, "grad_norm": 6.506896834966606, "learning_rate": 2.4273233968752673e-06, "loss": 0.6077, "step": 14505 }, { "epoch": 1.0479888742391676, "grad_norm": 7.347247390687937, "learning_rate": 2.4270310186081916e-06, "loss": 0.7181, "step": 14506 }, { "epoch": 1.0480611194393772, "grad_norm": 6.755039170813057, "learning_rate": 2.4267386413400044e-06, "loss": 0.6254, "step": 14507 }, { "epoch": 1.0481333646395867, "grad_norm": 5.64233947228089, "learning_rate": 2.4264462650747085e-06, "loss": 0.6438, "step": 14508 }, { "epoch": 1.0482056098397963, "grad_norm": 7.064483904066209, "learning_rate": 2.426153889816306e-06, "loss": 0.7013, "step": 14509 }, { "epoch": 1.0482778550400058, "grad_norm": 6.194709661913959, "learning_rate": 2.4258615155688e-06, "loss": 0.6467, "step": 14510 }, { "epoch": 1.0483501002402154, "grad_norm": 6.780387014860513, "learning_rate": 2.425569142336192e-06, "loss": 0.6442, "step": 14511 }, { "epoch": 1.048422345440425, "grad_norm": 6.7107527577525605, "learning_rate": 2.425276770122484e-06, "loss": 0.5727, "step": 14512 }, { "epoch": 1.0484945906406342, "grad_norm": 6.730505874008904, "learning_rate": 2.4249843989316795e-06, "loss": 0.6734, "step": 14513 }, { "epoch": 1.0485668358408438, "grad_norm": 6.353832031777132, "learning_rate": 2.4246920287677813e-06, "loss": 0.6835, "step": 14514 }, { "epoch": 1.0486390810410533, "grad_norm": 6.185583260865511, "learning_rate": 2.424399659634789e-06, "loss": 0.6981, "step": 14515 }, { "epoch": 1.0487113262412628, "grad_norm": 5.502730298766858, "learning_rate": 2.4241072915367073e-06, "loss": 0.6797, "step": 14516 }, { "epoch": 1.0487835714414724, "grad_norm": 7.5006914455845495, "learning_rate": 2.4238149244775393e-06, "loss": 0.7091, "step": 14517 }, { "epoch": 1.048855816641682, "grad_norm": 6.767826699980342, "learning_rate": 2.423522558461284e-06, "loss": 0.7302, "step": 14518 }, { "epoch": 1.0489280618418915, "grad_norm": 6.099897990390009, "learning_rate": 2.423230193491946e-06, "loss": 0.6459, "step": 14519 }, { "epoch": 1.0490003070421008, "grad_norm": 6.918190692462545, "learning_rate": 2.422937829573527e-06, "loss": 0.6578, "step": 14520 }, { "epoch": 1.0490725522423103, "grad_norm": 6.67063216843886, "learning_rate": 2.4226454667100295e-06, "loss": 0.6208, "step": 14521 }, { "epoch": 1.0491447974425199, "grad_norm": 6.280222338974403, "learning_rate": 2.4223531049054554e-06, "loss": 0.6584, "step": 14522 }, { "epoch": 1.0492170426427294, "grad_norm": 6.093425096506749, "learning_rate": 2.422060744163807e-06, "loss": 0.6552, "step": 14523 }, { "epoch": 1.049289287842939, "grad_norm": 7.2582232438099075, "learning_rate": 2.421768384489086e-06, "loss": 0.623, "step": 14524 }, { "epoch": 1.0493615330431485, "grad_norm": 7.681765022114415, "learning_rate": 2.421476025885296e-06, "loss": 0.7174, "step": 14525 }, { "epoch": 1.049433778243358, "grad_norm": 8.43063278484077, "learning_rate": 2.4211836683564372e-06, "loss": 0.6383, "step": 14526 }, { "epoch": 1.0495060234435676, "grad_norm": 6.4662090231339855, "learning_rate": 2.420891311906513e-06, "loss": 0.6592, "step": 14527 }, { "epoch": 1.049578268643777, "grad_norm": 9.013065179676971, "learning_rate": 2.420598956539527e-06, "loss": 0.6365, "step": 14528 }, { "epoch": 1.0496505138439864, "grad_norm": 8.442989117995003, "learning_rate": 2.4203066022594775e-06, "loss": 0.7166, "step": 14529 }, { "epoch": 1.049722759044196, "grad_norm": 6.681305199761741, "learning_rate": 2.4200142490703694e-06, "loss": 0.6413, "step": 14530 }, { "epoch": 1.0497950042444055, "grad_norm": 6.65384483761433, "learning_rate": 2.4197218969762043e-06, "loss": 0.6286, "step": 14531 }, { "epoch": 1.049867249444615, "grad_norm": 7.547161088974429, "learning_rate": 2.419429545980985e-06, "loss": 0.711, "step": 14532 }, { "epoch": 1.0499394946448246, "grad_norm": 7.936828104252058, "learning_rate": 2.4191371960887115e-06, "loss": 0.7339, "step": 14533 }, { "epoch": 1.0500117398450342, "grad_norm": 8.316198917096136, "learning_rate": 2.418844847303387e-06, "loss": 0.6336, "step": 14534 }, { "epoch": 1.0500839850452435, "grad_norm": 6.657533906856747, "learning_rate": 2.418552499629014e-06, "loss": 0.6161, "step": 14535 }, { "epoch": 1.050156230245453, "grad_norm": 7.633450437533173, "learning_rate": 2.418260153069594e-06, "loss": 0.6736, "step": 14536 }, { "epoch": 1.0502284754456626, "grad_norm": 5.855698390781311, "learning_rate": 2.4179678076291284e-06, "loss": 0.6557, "step": 14537 }, { "epoch": 1.050300720645872, "grad_norm": 8.72906474658641, "learning_rate": 2.4176754633116202e-06, "loss": 0.6088, "step": 14538 }, { "epoch": 1.0503729658460816, "grad_norm": 6.254214887371705, "learning_rate": 2.417383120121071e-06, "loss": 0.6534, "step": 14539 }, { "epoch": 1.0504452110462912, "grad_norm": 6.124985052596557, "learning_rate": 2.4170907780614826e-06, "loss": 0.6304, "step": 14540 }, { "epoch": 1.0505174562465007, "grad_norm": 6.941160593746812, "learning_rate": 2.4167984371368573e-06, "loss": 0.7275, "step": 14541 }, { "epoch": 1.05058970144671, "grad_norm": 7.726304037401657, "learning_rate": 2.4165060973511977e-06, "loss": 0.7321, "step": 14542 }, { "epoch": 1.0506619466469196, "grad_norm": 5.409560408298918, "learning_rate": 2.416213758708503e-06, "loss": 0.6234, "step": 14543 }, { "epoch": 1.0507341918471291, "grad_norm": 8.394177698755325, "learning_rate": 2.4159214212127778e-06, "loss": 0.6756, "step": 14544 }, { "epoch": 1.0508064370473387, "grad_norm": 7.039619855587491, "learning_rate": 2.4156290848680227e-06, "loss": 0.7404, "step": 14545 }, { "epoch": 1.0508786822475482, "grad_norm": 5.855070358883034, "learning_rate": 2.415336749678241e-06, "loss": 0.6358, "step": 14546 }, { "epoch": 1.0509509274477578, "grad_norm": 8.062251597281685, "learning_rate": 2.4150444156474326e-06, "loss": 0.6932, "step": 14547 }, { "epoch": 1.0510231726479673, "grad_norm": 7.41531679820717, "learning_rate": 2.4147520827796e-06, "loss": 0.764, "step": 14548 }, { "epoch": 1.0510954178481766, "grad_norm": 6.39051367329301, "learning_rate": 2.414459751078745e-06, "loss": 0.6854, "step": 14549 }, { "epoch": 1.0511676630483862, "grad_norm": 7.496624250460195, "learning_rate": 2.4141674205488707e-06, "loss": 0.6771, "step": 14550 }, { "epoch": 1.0512399082485957, "grad_norm": 6.052469351752741, "learning_rate": 2.4138750911939767e-06, "loss": 0.641, "step": 14551 }, { "epoch": 1.0513121534488052, "grad_norm": 6.59788838910362, "learning_rate": 2.4135827630180663e-06, "loss": 0.6471, "step": 14552 }, { "epoch": 1.0513843986490148, "grad_norm": 6.274464752348745, "learning_rate": 2.413290436025141e-06, "loss": 0.699, "step": 14553 }, { "epoch": 1.0514566438492243, "grad_norm": 6.458746357498338, "learning_rate": 2.4129981102192017e-06, "loss": 0.6723, "step": 14554 }, { "epoch": 1.0515288890494339, "grad_norm": 8.862630471370807, "learning_rate": 2.412705785604251e-06, "loss": 0.7241, "step": 14555 }, { "epoch": 1.0516011342496432, "grad_norm": 6.012907132582258, "learning_rate": 2.41241346218429e-06, "loss": 0.6682, "step": 14556 }, { "epoch": 1.0516733794498527, "grad_norm": 6.101710045431168, "learning_rate": 2.4121211399633215e-06, "loss": 0.7036, "step": 14557 }, { "epoch": 1.0517456246500623, "grad_norm": 7.509608154233279, "learning_rate": 2.411828818945346e-06, "loss": 0.7174, "step": 14558 }, { "epoch": 1.0518178698502718, "grad_norm": 6.443007696336302, "learning_rate": 2.411536499134365e-06, "loss": 0.6894, "step": 14559 }, { "epoch": 1.0518901150504814, "grad_norm": 6.315674238659494, "learning_rate": 2.411244180534382e-06, "loss": 0.6852, "step": 14560 }, { "epoch": 1.051962360250691, "grad_norm": 6.8857448882467045, "learning_rate": 2.410951863149396e-06, "loss": 0.6485, "step": 14561 }, { "epoch": 1.0520346054509004, "grad_norm": 7.236168657571513, "learning_rate": 2.41065954698341e-06, "loss": 0.6546, "step": 14562 }, { "epoch": 1.0521068506511098, "grad_norm": 6.5850402293211046, "learning_rate": 2.410367232040425e-06, "loss": 0.6546, "step": 14563 }, { "epoch": 1.0521790958513193, "grad_norm": 6.934049830950558, "learning_rate": 2.4100749183244444e-06, "loss": 0.6913, "step": 14564 }, { "epoch": 1.0522513410515288, "grad_norm": 6.695917553976336, "learning_rate": 2.4097826058394674e-06, "loss": 0.7087, "step": 14565 }, { "epoch": 1.0523235862517384, "grad_norm": 6.531737678974378, "learning_rate": 2.4094902945894965e-06, "loss": 0.6861, "step": 14566 }, { "epoch": 1.052395831451948, "grad_norm": 6.090035113153565, "learning_rate": 2.4091979845785337e-06, "loss": 0.705, "step": 14567 }, { "epoch": 1.0524680766521575, "grad_norm": 8.562281696293324, "learning_rate": 2.4089056758105795e-06, "loss": 0.6629, "step": 14568 }, { "epoch": 1.052540321852367, "grad_norm": 7.987349282814282, "learning_rate": 2.4086133682896356e-06, "loss": 0.7369, "step": 14569 }, { "epoch": 1.0526125670525763, "grad_norm": 5.873075778840581, "learning_rate": 2.408321062019704e-06, "loss": 0.608, "step": 14570 }, { "epoch": 1.0526848122527859, "grad_norm": 6.436221097554182, "learning_rate": 2.4080287570047863e-06, "loss": 0.6139, "step": 14571 }, { "epoch": 1.0527570574529954, "grad_norm": 5.579663831093911, "learning_rate": 2.407736453248883e-06, "loss": 0.6999, "step": 14572 }, { "epoch": 1.052829302653205, "grad_norm": 7.080829701487998, "learning_rate": 2.4074441507559963e-06, "loss": 0.663, "step": 14573 }, { "epoch": 1.0529015478534145, "grad_norm": 5.324595605243181, "learning_rate": 2.407151849530128e-06, "loss": 0.5862, "step": 14574 }, { "epoch": 1.052973793053624, "grad_norm": 5.660464239593326, "learning_rate": 2.406859549575278e-06, "loss": 0.5747, "step": 14575 }, { "epoch": 1.0530460382538336, "grad_norm": 6.1633260115309545, "learning_rate": 2.406567250895448e-06, "loss": 0.6396, "step": 14576 }, { "epoch": 1.0531182834540431, "grad_norm": 7.580421084179728, "learning_rate": 2.4062749534946395e-06, "loss": 0.7372, "step": 14577 }, { "epoch": 1.0531905286542524, "grad_norm": 8.177590000143638, "learning_rate": 2.405982657376856e-06, "loss": 0.6402, "step": 14578 }, { "epoch": 1.053262773854462, "grad_norm": 6.171909612244718, "learning_rate": 2.4056903625460954e-06, "loss": 0.6663, "step": 14579 }, { "epoch": 1.0533350190546715, "grad_norm": 7.090276329778494, "learning_rate": 2.4053980690063603e-06, "loss": 0.6939, "step": 14580 }, { "epoch": 1.053407264254881, "grad_norm": 7.815068913581213, "learning_rate": 2.4051057767616527e-06, "loss": 0.6527, "step": 14581 }, { "epoch": 1.0534795094550906, "grad_norm": 7.307872644894282, "learning_rate": 2.4048134858159743e-06, "loss": 0.6712, "step": 14582 }, { "epoch": 1.0535517546553002, "grad_norm": 5.926371857412762, "learning_rate": 2.404521196173324e-06, "loss": 0.663, "step": 14583 }, { "epoch": 1.0536239998555097, "grad_norm": 8.192878645387522, "learning_rate": 2.4042289078377047e-06, "loss": 0.6292, "step": 14584 }, { "epoch": 1.053696245055719, "grad_norm": 6.617337095693184, "learning_rate": 2.4039366208131176e-06, "loss": 0.6747, "step": 14585 }, { "epoch": 1.0537684902559286, "grad_norm": 7.395974801729808, "learning_rate": 2.4036443351035635e-06, "loss": 0.7052, "step": 14586 }, { "epoch": 1.053840735456138, "grad_norm": 6.819495852454243, "learning_rate": 2.4033520507130434e-06, "loss": 0.6598, "step": 14587 }, { "epoch": 1.0539129806563476, "grad_norm": 6.711692332403486, "learning_rate": 2.4030597676455587e-06, "loss": 0.6898, "step": 14588 }, { "epoch": 1.0539852258565572, "grad_norm": 7.37942914993141, "learning_rate": 2.4027674859051112e-06, "loss": 0.6709, "step": 14589 }, { "epoch": 1.0540574710567667, "grad_norm": 6.515381440291888, "learning_rate": 2.4024752054957006e-06, "loss": 0.6091, "step": 14590 }, { "epoch": 1.0541297162569763, "grad_norm": 6.389840000877675, "learning_rate": 2.4021829264213288e-06, "loss": 0.691, "step": 14591 }, { "epoch": 1.0542019614571856, "grad_norm": 7.055861602446639, "learning_rate": 2.4018906486859974e-06, "loss": 0.7513, "step": 14592 }, { "epoch": 1.0542742066573951, "grad_norm": 5.673349740482428, "learning_rate": 2.401598372293706e-06, "loss": 0.6891, "step": 14593 }, { "epoch": 1.0543464518576047, "grad_norm": 6.967441589964735, "learning_rate": 2.4013060972484566e-06, "loss": 0.6614, "step": 14594 }, { "epoch": 1.0544186970578142, "grad_norm": 5.911997748368058, "learning_rate": 2.4010138235542503e-06, "loss": 0.6314, "step": 14595 }, { "epoch": 1.0544909422580238, "grad_norm": 7.345257024143704, "learning_rate": 2.400721551215088e-06, "loss": 0.7083, "step": 14596 }, { "epoch": 1.0545631874582333, "grad_norm": 7.296449963363766, "learning_rate": 2.4004292802349706e-06, "loss": 0.7303, "step": 14597 }, { "epoch": 1.0546354326584428, "grad_norm": 6.38856170848533, "learning_rate": 2.4001370106178986e-06, "loss": 0.6706, "step": 14598 }, { "epoch": 1.0547076778586522, "grad_norm": 7.834276778309543, "learning_rate": 2.399844742367874e-06, "loss": 0.6963, "step": 14599 }, { "epoch": 1.0547799230588617, "grad_norm": 5.761922003830202, "learning_rate": 2.3995524754888968e-06, "loss": 0.628, "step": 14600 }, { "epoch": 1.0548521682590712, "grad_norm": 8.126533246258152, "learning_rate": 2.399260209984968e-06, "loss": 0.7221, "step": 14601 }, { "epoch": 1.0549244134592808, "grad_norm": 5.868350222360233, "learning_rate": 2.3989679458600886e-06, "loss": 0.5969, "step": 14602 }, { "epoch": 1.0549966586594903, "grad_norm": 6.971989310715448, "learning_rate": 2.398675683118261e-06, "loss": 0.6468, "step": 14603 }, { "epoch": 1.0550689038596999, "grad_norm": 6.629213828401224, "learning_rate": 2.3983834217634834e-06, "loss": 0.6163, "step": 14604 }, { "epoch": 1.0551411490599094, "grad_norm": 5.815422717906193, "learning_rate": 2.3980911617997575e-06, "loss": 0.6853, "step": 14605 }, { "epoch": 1.055213394260119, "grad_norm": 6.511344985898891, "learning_rate": 2.3977989032310847e-06, "loss": 0.6677, "step": 14606 }, { "epoch": 1.0552856394603283, "grad_norm": 6.386206855441521, "learning_rate": 2.3975066460614663e-06, "loss": 0.6174, "step": 14607 }, { "epoch": 1.0553578846605378, "grad_norm": 5.7017686877154965, "learning_rate": 2.3972143902949017e-06, "loss": 0.6663, "step": 14608 }, { "epoch": 1.0554301298607474, "grad_norm": 5.681210236363011, "learning_rate": 2.3969221359353923e-06, "loss": 0.6383, "step": 14609 }, { "epoch": 1.055502375060957, "grad_norm": 5.867342889711596, "learning_rate": 2.3966298829869393e-06, "loss": 0.6945, "step": 14610 }, { "epoch": 1.0555746202611664, "grad_norm": 5.544724254304664, "learning_rate": 2.3963376314535426e-06, "loss": 0.6129, "step": 14611 }, { "epoch": 1.055646865461376, "grad_norm": 6.262026088546947, "learning_rate": 2.396045381339203e-06, "loss": 0.5894, "step": 14612 }, { "epoch": 1.0557191106615855, "grad_norm": 6.264256506275586, "learning_rate": 2.3957531326479216e-06, "loss": 0.6917, "step": 14613 }, { "epoch": 1.0557913558617948, "grad_norm": 6.828916595454639, "learning_rate": 2.3954608853836992e-06, "loss": 0.6826, "step": 14614 }, { "epoch": 1.0558636010620044, "grad_norm": 6.274529804987911, "learning_rate": 2.3951686395505356e-06, "loss": 0.7871, "step": 14615 }, { "epoch": 1.055935846262214, "grad_norm": 6.187301478428293, "learning_rate": 2.3948763951524322e-06, "loss": 0.6609, "step": 14616 }, { "epoch": 1.0560080914624235, "grad_norm": 5.338374775084437, "learning_rate": 2.39458415219339e-06, "loss": 0.5847, "step": 14617 }, { "epoch": 1.056080336662633, "grad_norm": 6.067541484148686, "learning_rate": 2.394291910677408e-06, "loss": 0.5903, "step": 14618 }, { "epoch": 1.0561525818628426, "grad_norm": 6.448468095888465, "learning_rate": 2.3939996706084874e-06, "loss": 0.685, "step": 14619 }, { "epoch": 1.056224827063052, "grad_norm": 8.335360420365896, "learning_rate": 2.3937074319906288e-06, "loss": 0.6957, "step": 14620 }, { "epoch": 1.0562970722632614, "grad_norm": 6.840969687988148, "learning_rate": 2.3934151948278346e-06, "loss": 0.6341, "step": 14621 }, { "epoch": 1.056369317463471, "grad_norm": 7.203840439490364, "learning_rate": 2.3931229591241026e-06, "loss": 0.6889, "step": 14622 }, { "epoch": 1.0564415626636805, "grad_norm": 6.904008182171585, "learning_rate": 2.392830724883434e-06, "loss": 0.7364, "step": 14623 }, { "epoch": 1.05651380786389, "grad_norm": 5.770750997715697, "learning_rate": 2.3925384921098304e-06, "loss": 0.6664, "step": 14624 }, { "epoch": 1.0565860530640996, "grad_norm": 6.202178806043782, "learning_rate": 2.392246260807291e-06, "loss": 0.7006, "step": 14625 }, { "epoch": 1.0566582982643091, "grad_norm": 5.758078166572878, "learning_rate": 2.391954030979816e-06, "loss": 0.5793, "step": 14626 }, { "epoch": 1.0567305434645187, "grad_norm": 6.190828295185447, "learning_rate": 2.3916618026314068e-06, "loss": 0.6831, "step": 14627 }, { "epoch": 1.056802788664728, "grad_norm": 6.431120442826932, "learning_rate": 2.3913695757660637e-06, "loss": 0.6696, "step": 14628 }, { "epoch": 1.0568750338649375, "grad_norm": 6.177588954085933, "learning_rate": 2.3910773503877866e-06, "loss": 0.6527, "step": 14629 }, { "epoch": 1.056947279065147, "grad_norm": 6.276075062854178, "learning_rate": 2.390785126500576e-06, "loss": 0.6303, "step": 14630 }, { "epoch": 1.0570195242653566, "grad_norm": 6.110454493224668, "learning_rate": 2.390492904108432e-06, "loss": 0.607, "step": 14631 }, { "epoch": 1.0570917694655662, "grad_norm": 6.627436639633269, "learning_rate": 2.390200683215356e-06, "loss": 0.6602, "step": 14632 }, { "epoch": 1.0571640146657757, "grad_norm": 7.752473744013025, "learning_rate": 2.389908463825347e-06, "loss": 0.6347, "step": 14633 }, { "epoch": 1.0572362598659852, "grad_norm": 6.842608909757632, "learning_rate": 2.3896162459424045e-06, "loss": 0.6621, "step": 14634 }, { "epoch": 1.0573085050661946, "grad_norm": 8.77848712721479, "learning_rate": 2.3893240295705313e-06, "loss": 0.6827, "step": 14635 }, { "epoch": 1.057380750266404, "grad_norm": 6.745350967449527, "learning_rate": 2.3890318147137255e-06, "loss": 0.7297, "step": 14636 }, { "epoch": 1.0574529954666136, "grad_norm": 5.90076979448279, "learning_rate": 2.3887396013759883e-06, "loss": 0.6567, "step": 14637 }, { "epoch": 1.0575252406668232, "grad_norm": 5.903584842734229, "learning_rate": 2.388447389561319e-06, "loss": 0.631, "step": 14638 }, { "epoch": 1.0575974858670327, "grad_norm": 6.061492521595277, "learning_rate": 2.388155179273719e-06, "loss": 0.6173, "step": 14639 }, { "epoch": 1.0576697310672423, "grad_norm": 7.619843036707038, "learning_rate": 2.3878629705171875e-06, "loss": 0.6889, "step": 14640 }, { "epoch": 1.0577419762674518, "grad_norm": 8.57962884558124, "learning_rate": 2.3875707632957248e-06, "loss": 0.7337, "step": 14641 }, { "epoch": 1.0578142214676611, "grad_norm": 6.863319600952896, "learning_rate": 2.3872785576133315e-06, "loss": 0.6498, "step": 14642 }, { "epoch": 1.0578864666678707, "grad_norm": 5.5153476548099825, "learning_rate": 2.386986353474007e-06, "loss": 0.6858, "step": 14643 }, { "epoch": 1.0579587118680802, "grad_norm": 6.024135999342496, "learning_rate": 2.386694150881751e-06, "loss": 0.6804, "step": 14644 }, { "epoch": 1.0580309570682898, "grad_norm": 6.171802066420335, "learning_rate": 2.386401949840564e-06, "loss": 0.6717, "step": 14645 }, { "epoch": 1.0581032022684993, "grad_norm": 6.909139865720047, "learning_rate": 2.3861097503544476e-06, "loss": 0.687, "step": 14646 }, { "epoch": 1.0581754474687088, "grad_norm": 7.925431816779567, "learning_rate": 2.3858175524273995e-06, "loss": 0.7599, "step": 14647 }, { "epoch": 1.0582476926689184, "grad_norm": 6.192809014000571, "learning_rate": 2.3855253560634194e-06, "loss": 0.6981, "step": 14648 }, { "epoch": 1.058319937869128, "grad_norm": 6.056697310078175, "learning_rate": 2.38523316126651e-06, "loss": 0.633, "step": 14649 }, { "epoch": 1.0583921830693372, "grad_norm": 6.535411914081526, "learning_rate": 2.3849409680406684e-06, "loss": 0.6709, "step": 14650 }, { "epoch": 1.0584644282695468, "grad_norm": 5.984617542102765, "learning_rate": 2.3846487763898955e-06, "loss": 0.6633, "step": 14651 }, { "epoch": 1.0585366734697563, "grad_norm": 6.32065863303975, "learning_rate": 2.3843565863181916e-06, "loss": 0.6291, "step": 14652 }, { "epoch": 1.0586089186699659, "grad_norm": 6.418562208658261, "learning_rate": 2.384064397829557e-06, "loss": 0.7634, "step": 14653 }, { "epoch": 1.0586811638701754, "grad_norm": 5.799830585504774, "learning_rate": 2.38377221092799e-06, "loss": 0.7291, "step": 14654 }, { "epoch": 1.058753409070385, "grad_norm": 6.222750962967997, "learning_rate": 2.383480025617491e-06, "loss": 0.6536, "step": 14655 }, { "epoch": 1.0588256542705945, "grad_norm": 7.739304576455719, "learning_rate": 2.3831878419020598e-06, "loss": 0.6603, "step": 14656 }, { "epoch": 1.0588978994708038, "grad_norm": 8.389277344636687, "learning_rate": 2.3828956597856973e-06, "loss": 0.7107, "step": 14657 }, { "epoch": 1.0589701446710134, "grad_norm": 5.664717769208379, "learning_rate": 2.3826034792724014e-06, "loss": 0.616, "step": 14658 }, { "epoch": 1.059042389871223, "grad_norm": 6.986096061284173, "learning_rate": 2.382311300366173e-06, "loss": 0.6973, "step": 14659 }, { "epoch": 1.0591146350714324, "grad_norm": 6.468536225992897, "learning_rate": 2.3820191230710125e-06, "loss": 0.6283, "step": 14660 }, { "epoch": 1.059186880271642, "grad_norm": 5.753911553799321, "learning_rate": 2.3817269473909176e-06, "loss": 0.6261, "step": 14661 }, { "epoch": 1.0592591254718515, "grad_norm": 6.943227242501546, "learning_rate": 2.3814347733298884e-06, "loss": 0.6964, "step": 14662 }, { "epoch": 1.059331370672061, "grad_norm": 6.629439683715324, "learning_rate": 2.3811426008919256e-06, "loss": 0.6414, "step": 14663 }, { "epoch": 1.0594036158722704, "grad_norm": 6.633248430536198, "learning_rate": 2.3808504300810296e-06, "loss": 0.6666, "step": 14664 }, { "epoch": 1.05947586107248, "grad_norm": 7.8227669354030205, "learning_rate": 2.3805582609011972e-06, "loss": 0.6557, "step": 14665 }, { "epoch": 1.0595481062726895, "grad_norm": 7.961140907990812, "learning_rate": 2.38026609335643e-06, "loss": 0.6345, "step": 14666 }, { "epoch": 1.059620351472899, "grad_norm": 8.299294444050227, "learning_rate": 2.379973927450727e-06, "loss": 0.7269, "step": 14667 }, { "epoch": 1.0596925966731086, "grad_norm": 6.359138053210133, "learning_rate": 2.3796817631880873e-06, "loss": 0.7041, "step": 14668 }, { "epoch": 1.059764841873318, "grad_norm": 7.351071867345397, "learning_rate": 2.379389600572511e-06, "loss": 0.7346, "step": 14669 }, { "epoch": 1.0598370870735276, "grad_norm": 7.806622789337269, "learning_rate": 2.3790974396079976e-06, "loss": 0.6743, "step": 14670 }, { "epoch": 1.059909332273737, "grad_norm": 7.684370691050795, "learning_rate": 2.3788052802985466e-06, "loss": 0.7778, "step": 14671 }, { "epoch": 1.0599815774739465, "grad_norm": 6.783233211943784, "learning_rate": 2.3785131226481565e-06, "loss": 0.5983, "step": 14672 }, { "epoch": 1.060053822674156, "grad_norm": 7.0624962190601055, "learning_rate": 2.3782209666608276e-06, "loss": 0.6475, "step": 14673 }, { "epoch": 1.0601260678743656, "grad_norm": 8.86705370562607, "learning_rate": 2.3779288123405607e-06, "loss": 0.7736, "step": 14674 }, { "epoch": 1.0601983130745751, "grad_norm": 6.298008667625842, "learning_rate": 2.377636659691352e-06, "loss": 0.6361, "step": 14675 }, { "epoch": 1.0602705582747847, "grad_norm": 6.305834886784205, "learning_rate": 2.3773445087172016e-06, "loss": 0.6784, "step": 14676 }, { "epoch": 1.0603428034749942, "grad_norm": 7.997386266979449, "learning_rate": 2.3770523594221106e-06, "loss": 0.6736, "step": 14677 }, { "epoch": 1.0604150486752038, "grad_norm": 7.462426737005583, "learning_rate": 2.3767602118100786e-06, "loss": 0.6789, "step": 14678 }, { "epoch": 1.060487293875413, "grad_norm": 6.601982870769034, "learning_rate": 2.376468065885102e-06, "loss": 0.6953, "step": 14679 }, { "epoch": 1.0605595390756226, "grad_norm": 6.058145119088477, "learning_rate": 2.376175921651182e-06, "loss": 0.6932, "step": 14680 }, { "epoch": 1.0606317842758322, "grad_norm": 5.442847067298332, "learning_rate": 2.3758837791123175e-06, "loss": 0.677, "step": 14681 }, { "epoch": 1.0607040294760417, "grad_norm": 6.037894744289208, "learning_rate": 2.3755916382725084e-06, "loss": 0.5829, "step": 14682 }, { "epoch": 1.0607762746762512, "grad_norm": 5.7903818951304995, "learning_rate": 2.375299499135752e-06, "loss": 0.6753, "step": 14683 }, { "epoch": 1.0608485198764608, "grad_norm": 6.596154520291835, "learning_rate": 2.3750073617060494e-06, "loss": 0.6738, "step": 14684 }, { "epoch": 1.0609207650766703, "grad_norm": 5.996891488057063, "learning_rate": 2.374715225987399e-06, "loss": 0.7181, "step": 14685 }, { "epoch": 1.0609930102768796, "grad_norm": 7.310443556409903, "learning_rate": 2.3744230919837996e-06, "loss": 0.722, "step": 14686 }, { "epoch": 1.0610652554770892, "grad_norm": 5.8217670031373085, "learning_rate": 2.3741309596992503e-06, "loss": 0.6476, "step": 14687 }, { "epoch": 1.0611375006772987, "grad_norm": 5.999013183823444, "learning_rate": 2.3738388291377506e-06, "loss": 0.6078, "step": 14688 }, { "epoch": 1.0612097458775083, "grad_norm": 6.108682188278629, "learning_rate": 2.3735467003033007e-06, "loss": 0.6382, "step": 14689 }, { "epoch": 1.0612819910777178, "grad_norm": 6.850290178153211, "learning_rate": 2.373254573199897e-06, "loss": 0.6804, "step": 14690 }, { "epoch": 1.0613542362779274, "grad_norm": 5.767938909410823, "learning_rate": 2.372962447831539e-06, "loss": 0.6776, "step": 14691 }, { "epoch": 1.061426481478137, "grad_norm": 5.427554099763482, "learning_rate": 2.372670324202228e-06, "loss": 0.6301, "step": 14692 }, { "epoch": 1.0614987266783462, "grad_norm": 7.123368193499899, "learning_rate": 2.372378202315961e-06, "loss": 0.6171, "step": 14693 }, { "epoch": 1.0615709718785558, "grad_norm": 6.577230143446653, "learning_rate": 2.3720860821767366e-06, "loss": 0.6873, "step": 14694 }, { "epoch": 1.0616432170787653, "grad_norm": 7.2807412686078985, "learning_rate": 2.3717939637885548e-06, "loss": 0.6188, "step": 14695 }, { "epoch": 1.0617154622789748, "grad_norm": 7.439190840660665, "learning_rate": 2.3715018471554146e-06, "loss": 0.6357, "step": 14696 }, { "epoch": 1.0617877074791844, "grad_norm": 7.556062086117846, "learning_rate": 2.3712097322813136e-06, "loss": 0.666, "step": 14697 }, { "epoch": 1.061859952679394, "grad_norm": 6.5818318775442615, "learning_rate": 2.3709176191702516e-06, "loss": 0.648, "step": 14698 }, { "epoch": 1.0619321978796035, "grad_norm": 7.130447446846259, "learning_rate": 2.3706255078262274e-06, "loss": 0.6603, "step": 14699 }, { "epoch": 1.0620044430798128, "grad_norm": 6.284324083544046, "learning_rate": 2.3703333982532394e-06, "loss": 0.7122, "step": 14700 }, { "epoch": 1.0620766882800223, "grad_norm": 5.995664619721298, "learning_rate": 2.370041290455286e-06, "loss": 0.5957, "step": 14701 }, { "epoch": 1.0621489334802319, "grad_norm": 5.779744050057899, "learning_rate": 2.369749184436367e-06, "loss": 0.7597, "step": 14702 }, { "epoch": 1.0622211786804414, "grad_norm": 7.022473635109679, "learning_rate": 2.3694570802004814e-06, "loss": 0.6333, "step": 14703 }, { "epoch": 1.062293423880651, "grad_norm": 6.260480409658415, "learning_rate": 2.3691649777516257e-06, "loss": 0.6042, "step": 14704 }, { "epoch": 1.0623656690808605, "grad_norm": 7.24793924443463, "learning_rate": 2.3688728770937997e-06, "loss": 0.6706, "step": 14705 }, { "epoch": 1.06243791428107, "grad_norm": 6.643595096364871, "learning_rate": 2.3685807782310037e-06, "loss": 0.6784, "step": 14706 }, { "epoch": 1.0625101594812794, "grad_norm": 6.133451586426448, "learning_rate": 2.3682886811672333e-06, "loss": 0.666, "step": 14707 }, { "epoch": 1.062582404681489, "grad_norm": 6.838090341806335, "learning_rate": 2.367996585906489e-06, "loss": 0.7305, "step": 14708 }, { "epoch": 1.0626546498816984, "grad_norm": 5.890578697285694, "learning_rate": 2.3677044924527688e-06, "loss": 0.6611, "step": 14709 }, { "epoch": 1.062726895081908, "grad_norm": 6.9088085837836966, "learning_rate": 2.3674124008100723e-06, "loss": 0.6139, "step": 14710 }, { "epoch": 1.0627991402821175, "grad_norm": 5.406272755834063, "learning_rate": 2.3671203109823964e-06, "loss": 0.6783, "step": 14711 }, { "epoch": 1.062871385482327, "grad_norm": 5.872183266547287, "learning_rate": 2.36682822297374e-06, "loss": 0.6579, "step": 14712 }, { "epoch": 1.0629436306825366, "grad_norm": 7.852869818868122, "learning_rate": 2.366536136788102e-06, "loss": 0.6893, "step": 14713 }, { "epoch": 1.063015875882746, "grad_norm": 7.381602806813612, "learning_rate": 2.366244052429481e-06, "loss": 0.6751, "step": 14714 }, { "epoch": 1.0630881210829555, "grad_norm": 5.667668571909937, "learning_rate": 2.3659519699018745e-06, "loss": 0.6796, "step": 14715 }, { "epoch": 1.063160366283165, "grad_norm": 6.3091358247177025, "learning_rate": 2.3656598892092817e-06, "loss": 0.7114, "step": 14716 }, { "epoch": 1.0632326114833746, "grad_norm": 5.5598981965988346, "learning_rate": 2.365367810355702e-06, "loss": 0.6494, "step": 14717 }, { "epoch": 1.063304856683584, "grad_norm": 6.645706350933062, "learning_rate": 2.3650757333451308e-06, "loss": 0.6442, "step": 14718 }, { "epoch": 1.0633771018837936, "grad_norm": 7.64807439766295, "learning_rate": 2.364783658181568e-06, "loss": 0.6579, "step": 14719 }, { "epoch": 1.0634493470840032, "grad_norm": 6.835261127252885, "learning_rate": 2.364491584869012e-06, "loss": 0.7248, "step": 14720 }, { "epoch": 1.0635215922842125, "grad_norm": 6.302188117887484, "learning_rate": 2.3641995134114623e-06, "loss": 0.6562, "step": 14721 }, { "epoch": 1.063593837484422, "grad_norm": 6.216996295497724, "learning_rate": 2.363907443812915e-06, "loss": 0.5578, "step": 14722 }, { "epoch": 1.0636660826846316, "grad_norm": 5.810612854142208, "learning_rate": 2.363615376077369e-06, "loss": 0.7069, "step": 14723 }, { "epoch": 1.0637383278848411, "grad_norm": 5.708627215243363, "learning_rate": 2.363323310208823e-06, "loss": 0.6186, "step": 14724 }, { "epoch": 1.0638105730850507, "grad_norm": 5.969751723348948, "learning_rate": 2.363031246211274e-06, "loss": 0.6752, "step": 14725 }, { "epoch": 1.0638828182852602, "grad_norm": 6.292648373575851, "learning_rate": 2.3627391840887213e-06, "loss": 0.7358, "step": 14726 }, { "epoch": 1.0639550634854698, "grad_norm": 6.313413346188731, "learning_rate": 2.3624471238451622e-06, "loss": 0.6427, "step": 14727 }, { "epoch": 1.0640273086856793, "grad_norm": 6.636439397814115, "learning_rate": 2.362155065484596e-06, "loss": 0.6179, "step": 14728 }, { "epoch": 1.0640995538858886, "grad_norm": 7.629745757540485, "learning_rate": 2.3618630090110192e-06, "loss": 0.6179, "step": 14729 }, { "epoch": 1.0641717990860982, "grad_norm": 6.346175308013407, "learning_rate": 2.361570954428431e-06, "loss": 0.6897, "step": 14730 }, { "epoch": 1.0642440442863077, "grad_norm": 8.066482861318997, "learning_rate": 2.3612789017408296e-06, "loss": 0.6673, "step": 14731 }, { "epoch": 1.0643162894865172, "grad_norm": 7.367346461533986, "learning_rate": 2.3609868509522114e-06, "loss": 0.6963, "step": 14732 }, { "epoch": 1.0643885346867268, "grad_norm": 5.806499378117264, "learning_rate": 2.3606948020665748e-06, "loss": 0.6675, "step": 14733 }, { "epoch": 1.0644607798869363, "grad_norm": 7.187949257783523, "learning_rate": 2.3604027550879184e-06, "loss": 0.6041, "step": 14734 }, { "epoch": 1.0645330250871459, "grad_norm": 6.830195691277985, "learning_rate": 2.3601107100202413e-06, "loss": 0.6163, "step": 14735 }, { "epoch": 1.0646052702873552, "grad_norm": 7.063090071882518, "learning_rate": 2.3598186668675388e-06, "loss": 0.703, "step": 14736 }, { "epoch": 1.0646775154875647, "grad_norm": 6.372373451242449, "learning_rate": 2.3595266256338097e-06, "loss": 0.6544, "step": 14737 }, { "epoch": 1.0647497606877743, "grad_norm": 6.27175200398285, "learning_rate": 2.359234586323052e-06, "loss": 0.6476, "step": 14738 }, { "epoch": 1.0648220058879838, "grad_norm": 7.333442629375363, "learning_rate": 2.3589425489392644e-06, "loss": 0.6881, "step": 14739 }, { "epoch": 1.0648942510881934, "grad_norm": 7.452755964887618, "learning_rate": 2.3586505134864433e-06, "loss": 0.6126, "step": 14740 }, { "epoch": 1.064966496288403, "grad_norm": 10.132759041156206, "learning_rate": 2.3583584799685864e-06, "loss": 0.5863, "step": 14741 }, { "epoch": 1.0650387414886124, "grad_norm": 5.396669929288208, "learning_rate": 2.358066448389693e-06, "loss": 0.6253, "step": 14742 }, { "epoch": 1.0651109866888218, "grad_norm": 6.047829722656553, "learning_rate": 2.3577744187537592e-06, "loss": 0.6305, "step": 14743 }, { "epoch": 1.0651832318890313, "grad_norm": 6.686865375389576, "learning_rate": 2.357482391064783e-06, "loss": 0.6543, "step": 14744 }, { "epoch": 1.0652554770892408, "grad_norm": 6.75106407538118, "learning_rate": 2.3571903653267618e-06, "loss": 0.6331, "step": 14745 }, { "epoch": 1.0653277222894504, "grad_norm": 7.809352637507373, "learning_rate": 2.3568983415436953e-06, "loss": 0.7548, "step": 14746 }, { "epoch": 1.06539996748966, "grad_norm": 5.597255524735287, "learning_rate": 2.3566063197195775e-06, "loss": 0.6271, "step": 14747 }, { "epoch": 1.0654722126898695, "grad_norm": 6.02419077397132, "learning_rate": 2.3563142998584083e-06, "loss": 0.7333, "step": 14748 }, { "epoch": 1.065544457890079, "grad_norm": 7.498335590055553, "learning_rate": 2.356022281964186e-06, "loss": 0.657, "step": 14749 }, { "epoch": 1.0656167030902886, "grad_norm": 5.507914310894129, "learning_rate": 2.355730266040906e-06, "loss": 0.6718, "step": 14750 }, { "epoch": 1.0656889482904979, "grad_norm": 7.383663972063175, "learning_rate": 2.355438252092566e-06, "loss": 0.6865, "step": 14751 }, { "epoch": 1.0657611934907074, "grad_norm": 5.639411267334443, "learning_rate": 2.3551462401231647e-06, "loss": 0.6648, "step": 14752 }, { "epoch": 1.065833438690917, "grad_norm": 7.0055939257454405, "learning_rate": 2.354854230136699e-06, "loss": 0.7173, "step": 14753 }, { "epoch": 1.0659056838911265, "grad_norm": 7.0399354992425955, "learning_rate": 2.3545622221371662e-06, "loss": 0.6717, "step": 14754 }, { "epoch": 1.065977929091336, "grad_norm": 6.842085962615803, "learning_rate": 2.354270216128563e-06, "loss": 0.6938, "step": 14755 }, { "epoch": 1.0660501742915456, "grad_norm": 7.437019653195861, "learning_rate": 2.3539782121148886e-06, "loss": 0.7046, "step": 14756 }, { "epoch": 1.0661224194917551, "grad_norm": 5.412160652217841, "learning_rate": 2.3536862101001377e-06, "loss": 0.5941, "step": 14757 }, { "epoch": 1.0661946646919644, "grad_norm": 6.043583568028988, "learning_rate": 2.3533942100883097e-06, "loss": 0.6408, "step": 14758 }, { "epoch": 1.066266909892174, "grad_norm": 6.55679305657633, "learning_rate": 2.353102212083401e-06, "loss": 0.7218, "step": 14759 }, { "epoch": 1.0663391550923835, "grad_norm": 6.251616612214258, "learning_rate": 2.35281021608941e-06, "loss": 0.5738, "step": 14760 }, { "epoch": 1.066411400292593, "grad_norm": 6.7225708498897285, "learning_rate": 2.3525182221103313e-06, "loss": 0.7068, "step": 14761 }, { "epoch": 1.0664836454928026, "grad_norm": 7.957034605884172, "learning_rate": 2.352226230150164e-06, "loss": 0.6033, "step": 14762 }, { "epoch": 1.0665558906930122, "grad_norm": 8.053996014814052, "learning_rate": 2.3519342402129047e-06, "loss": 0.7202, "step": 14763 }, { "epoch": 1.0666281358932217, "grad_norm": 7.010757898997752, "learning_rate": 2.351642252302552e-06, "loss": 0.8343, "step": 14764 }, { "epoch": 1.066700381093431, "grad_norm": 6.248862506351937, "learning_rate": 2.351350266423101e-06, "loss": 0.685, "step": 14765 }, { "epoch": 1.0667726262936406, "grad_norm": 7.846032771734194, "learning_rate": 2.351058282578549e-06, "loss": 0.7023, "step": 14766 }, { "epoch": 1.06684487149385, "grad_norm": 7.810199124072703, "learning_rate": 2.3507663007728936e-06, "loss": 0.7347, "step": 14767 }, { "epoch": 1.0669171166940596, "grad_norm": 6.397418824873795, "learning_rate": 2.3504743210101315e-06, "loss": 0.6972, "step": 14768 }, { "epoch": 1.0669893618942692, "grad_norm": 6.539095752043591, "learning_rate": 2.35018234329426e-06, "loss": 0.6786, "step": 14769 }, { "epoch": 1.0670616070944787, "grad_norm": 5.867534683053517, "learning_rate": 2.349890367629276e-06, "loss": 0.7053, "step": 14770 }, { "epoch": 1.0671338522946883, "grad_norm": 6.014678167166539, "learning_rate": 2.3495983940191766e-06, "loss": 0.624, "step": 14771 }, { "epoch": 1.0672060974948976, "grad_norm": 5.856301602815105, "learning_rate": 2.349306422467958e-06, "loss": 0.6798, "step": 14772 }, { "epoch": 1.0672783426951071, "grad_norm": 6.984665286455634, "learning_rate": 2.349014452979617e-06, "loss": 0.7174, "step": 14773 }, { "epoch": 1.0673505878953167, "grad_norm": 6.501298334594436, "learning_rate": 2.348722485558153e-06, "loss": 0.6449, "step": 14774 }, { "epoch": 1.0674228330955262, "grad_norm": 8.914947866992348, "learning_rate": 2.3484305202075582e-06, "loss": 0.6796, "step": 14775 }, { "epoch": 1.0674950782957358, "grad_norm": 7.9986525831387345, "learning_rate": 2.3481385569318325e-06, "loss": 0.6767, "step": 14776 }, { "epoch": 1.0675673234959453, "grad_norm": 6.639918390714721, "learning_rate": 2.3478465957349725e-06, "loss": 0.6643, "step": 14777 }, { "epoch": 1.0676395686961548, "grad_norm": 7.529686223315155, "learning_rate": 2.3475546366209755e-06, "loss": 0.6579, "step": 14778 }, { "epoch": 1.0677118138963642, "grad_norm": 7.853250654181512, "learning_rate": 2.3472626795938355e-06, "loss": 0.6419, "step": 14779 }, { "epoch": 1.0677840590965737, "grad_norm": 6.632673893391279, "learning_rate": 2.346970724657551e-06, "loss": 0.6569, "step": 14780 }, { "epoch": 1.0678563042967832, "grad_norm": 7.206703339847177, "learning_rate": 2.3466787718161193e-06, "loss": 0.7699, "step": 14781 }, { "epoch": 1.0679285494969928, "grad_norm": 7.953143946057369, "learning_rate": 2.3463868210735356e-06, "loss": 0.6464, "step": 14782 }, { "epoch": 1.0680007946972023, "grad_norm": 6.662934721027941, "learning_rate": 2.346094872433797e-06, "loss": 0.6486, "step": 14783 }, { "epoch": 1.0680730398974119, "grad_norm": 8.737180201363238, "learning_rate": 2.3458029259009004e-06, "loss": 0.6249, "step": 14784 }, { "epoch": 1.0681452850976214, "grad_norm": 9.429953279834598, "learning_rate": 2.345510981478842e-06, "loss": 0.6774, "step": 14785 }, { "epoch": 1.0682175302978307, "grad_norm": 6.6939084729723675, "learning_rate": 2.345219039171618e-06, "loss": 0.6381, "step": 14786 }, { "epoch": 1.0682897754980403, "grad_norm": 7.510611084966982, "learning_rate": 2.3449270989832253e-06, "loss": 0.6632, "step": 14787 }, { "epoch": 1.0683620206982498, "grad_norm": 6.844576293671697, "learning_rate": 2.34463516091766e-06, "loss": 0.73, "step": 14788 }, { "epoch": 1.0684342658984594, "grad_norm": 5.873641486049402, "learning_rate": 2.344343224978919e-06, "loss": 0.6935, "step": 14789 }, { "epoch": 1.068506511098669, "grad_norm": 6.027468746172309, "learning_rate": 2.3440512911709983e-06, "loss": 0.7136, "step": 14790 }, { "epoch": 1.0685787562988784, "grad_norm": 6.9136572422722145, "learning_rate": 2.343759359497894e-06, "loss": 0.6903, "step": 14791 }, { "epoch": 1.068651001499088, "grad_norm": 6.322268641874473, "learning_rate": 2.343467429963604e-06, "loss": 0.6106, "step": 14792 }, { "epoch": 1.0687232466992973, "grad_norm": 7.941812860424864, "learning_rate": 2.3431755025721226e-06, "loss": 0.6729, "step": 14793 }, { "epoch": 1.0687954918995068, "grad_norm": 6.9701328701670535, "learning_rate": 2.3428835773274465e-06, "loss": 0.6599, "step": 14794 }, { "epoch": 1.0688677370997164, "grad_norm": 6.798304335163004, "learning_rate": 2.342591654233572e-06, "loss": 0.5982, "step": 14795 }, { "epoch": 1.068939982299926, "grad_norm": 6.183512009389459, "learning_rate": 2.3422997332944966e-06, "loss": 0.6894, "step": 14796 }, { "epoch": 1.0690122275001355, "grad_norm": 5.570407347654104, "learning_rate": 2.3420078145142146e-06, "loss": 0.6876, "step": 14797 }, { "epoch": 1.069084472700345, "grad_norm": 6.343078878447211, "learning_rate": 2.341715897896723e-06, "loss": 0.5852, "step": 14798 }, { "epoch": 1.0691567179005546, "grad_norm": 6.534737126054928, "learning_rate": 2.3414239834460183e-06, "loss": 0.7036, "step": 14799 }, { "epoch": 1.069228963100764, "grad_norm": 6.431674728975024, "learning_rate": 2.341132071166096e-06, "loss": 0.6026, "step": 14800 }, { "epoch": 1.0693012083009734, "grad_norm": 6.384028306119114, "learning_rate": 2.3408401610609516e-06, "loss": 0.6428, "step": 14801 }, { "epoch": 1.069373453501183, "grad_norm": 8.75068269518647, "learning_rate": 2.340548253134582e-06, "loss": 0.6787, "step": 14802 }, { "epoch": 1.0694456987013925, "grad_norm": 6.249096614399057, "learning_rate": 2.3402563473909845e-06, "loss": 0.7053, "step": 14803 }, { "epoch": 1.069517943901602, "grad_norm": 7.289811630431731, "learning_rate": 2.3399644438341516e-06, "loss": 0.6652, "step": 14804 }, { "epoch": 1.0695901891018116, "grad_norm": 6.186655931024992, "learning_rate": 2.3396725424680818e-06, "loss": 0.6363, "step": 14805 }, { "epoch": 1.0696624343020211, "grad_norm": 5.897496756002545, "learning_rate": 2.3393806432967713e-06, "loss": 0.6004, "step": 14806 }, { "epoch": 1.0697346795022307, "grad_norm": 5.662696494762621, "learning_rate": 2.3390887463242144e-06, "loss": 0.6479, "step": 14807 }, { "epoch": 1.06980692470244, "grad_norm": 6.266739363282508, "learning_rate": 2.3387968515544073e-06, "loss": 0.612, "step": 14808 }, { "epoch": 1.0698791699026495, "grad_norm": 6.071048961921397, "learning_rate": 2.3385049589913463e-06, "loss": 0.6972, "step": 14809 }, { "epoch": 1.069951415102859, "grad_norm": 6.45541791843944, "learning_rate": 2.3382130686390274e-06, "loss": 0.7264, "step": 14810 }, { "epoch": 1.0700236603030686, "grad_norm": 6.245588653162464, "learning_rate": 2.3379211805014455e-06, "loss": 0.6196, "step": 14811 }, { "epoch": 1.0700959055032782, "grad_norm": 5.549773670427358, "learning_rate": 2.337629294582597e-06, "loss": 0.6272, "step": 14812 }, { "epoch": 1.0701681507034877, "grad_norm": 6.745523734603679, "learning_rate": 2.337337410886478e-06, "loss": 0.7159, "step": 14813 }, { "epoch": 1.0702403959036972, "grad_norm": 6.835593462324813, "learning_rate": 2.3370455294170825e-06, "loss": 0.6444, "step": 14814 }, { "epoch": 1.0703126411039066, "grad_norm": 8.514291753428466, "learning_rate": 2.3367536501784076e-06, "loss": 0.6526, "step": 14815 }, { "epoch": 1.070384886304116, "grad_norm": 8.482980181445024, "learning_rate": 2.3364617731744487e-06, "loss": 0.7362, "step": 14816 }, { "epoch": 1.0704571315043256, "grad_norm": 8.129268991936588, "learning_rate": 2.3361698984092017e-06, "loss": 0.7038, "step": 14817 }, { "epoch": 1.0705293767045352, "grad_norm": 7.896413360736825, "learning_rate": 2.3358780258866603e-06, "loss": 0.6255, "step": 14818 }, { "epoch": 1.0706016219047447, "grad_norm": 7.056840097166892, "learning_rate": 2.3355861556108213e-06, "loss": 0.6579, "step": 14819 }, { "epoch": 1.0706738671049543, "grad_norm": 7.116058092503104, "learning_rate": 2.3352942875856805e-06, "loss": 0.7286, "step": 14820 }, { "epoch": 1.0707461123051638, "grad_norm": 5.895754900119675, "learning_rate": 2.3350024218152344e-06, "loss": 0.6617, "step": 14821 }, { "epoch": 1.0708183575053734, "grad_norm": 7.155467694429581, "learning_rate": 2.3347105583034756e-06, "loss": 0.6833, "step": 14822 }, { "epoch": 1.0708906027055827, "grad_norm": 6.491351022094015, "learning_rate": 2.3344186970544007e-06, "loss": 0.6999, "step": 14823 }, { "epoch": 1.0709628479057922, "grad_norm": 7.778457569508529, "learning_rate": 2.3341268380720062e-06, "loss": 0.6149, "step": 14824 }, { "epoch": 1.0710350931060018, "grad_norm": 6.3269288993061235, "learning_rate": 2.333834981360286e-06, "loss": 0.7062, "step": 14825 }, { "epoch": 1.0711073383062113, "grad_norm": 6.5993073533474185, "learning_rate": 2.333543126923236e-06, "loss": 0.6648, "step": 14826 }, { "epoch": 1.0711795835064208, "grad_norm": 6.880317989363982, "learning_rate": 2.3332512747648507e-06, "loss": 0.6223, "step": 14827 }, { "epoch": 1.0712518287066304, "grad_norm": 7.087853220054925, "learning_rate": 2.3329594248891272e-06, "loss": 0.6441, "step": 14828 }, { "epoch": 1.07132407390684, "grad_norm": 6.9369523115736405, "learning_rate": 2.3326675773000586e-06, "loss": 0.6846, "step": 14829 }, { "epoch": 1.0713963191070492, "grad_norm": 7.344780253115319, "learning_rate": 2.332375732001641e-06, "loss": 0.6541, "step": 14830 }, { "epoch": 1.0714685643072588, "grad_norm": 6.288419816249579, "learning_rate": 2.3320838889978705e-06, "loss": 0.6371, "step": 14831 }, { "epoch": 1.0715408095074683, "grad_norm": 6.329564663107795, "learning_rate": 2.33179204829274e-06, "loss": 0.7339, "step": 14832 }, { "epoch": 1.0716130547076779, "grad_norm": 5.885765114880888, "learning_rate": 2.3315002098902454e-06, "loss": 0.6377, "step": 14833 }, { "epoch": 1.0716852999078874, "grad_norm": 8.506475338274017, "learning_rate": 2.3312083737943827e-06, "loss": 0.6848, "step": 14834 }, { "epoch": 1.071757545108097, "grad_norm": 6.4248960494007585, "learning_rate": 2.3309165400091474e-06, "loss": 0.6288, "step": 14835 }, { "epoch": 1.0718297903083065, "grad_norm": 6.9962930400670205, "learning_rate": 2.330624708538532e-06, "loss": 0.6104, "step": 14836 }, { "epoch": 1.0719020355085158, "grad_norm": 6.669269848254596, "learning_rate": 2.3303328793865332e-06, "loss": 0.6447, "step": 14837 }, { "epoch": 1.0719742807087254, "grad_norm": 6.036552983601496, "learning_rate": 2.330041052557146e-06, "loss": 0.6909, "step": 14838 }, { "epoch": 1.072046525908935, "grad_norm": 6.321816698032725, "learning_rate": 2.329749228054364e-06, "loss": 0.6139, "step": 14839 }, { "epoch": 1.0721187711091444, "grad_norm": 6.72917324888618, "learning_rate": 2.329457405882183e-06, "loss": 0.6685, "step": 14840 }, { "epoch": 1.072191016309354, "grad_norm": 6.738484202106659, "learning_rate": 2.329165586044598e-06, "loss": 0.6405, "step": 14841 }, { "epoch": 1.0722632615095635, "grad_norm": 6.14936218676011, "learning_rate": 2.3288737685456035e-06, "loss": 0.6389, "step": 14842 }, { "epoch": 1.072335506709773, "grad_norm": 6.487329310999671, "learning_rate": 2.328581953389194e-06, "loss": 0.668, "step": 14843 }, { "epoch": 1.0724077519099824, "grad_norm": 6.407558400590854, "learning_rate": 2.3282901405793646e-06, "loss": 0.6705, "step": 14844 }, { "epoch": 1.072479997110192, "grad_norm": 7.561924179475921, "learning_rate": 2.3279983301201098e-06, "loss": 0.7319, "step": 14845 }, { "epoch": 1.0725522423104015, "grad_norm": 6.247840813556549, "learning_rate": 2.327706522015425e-06, "loss": 0.7048, "step": 14846 }, { "epoch": 1.072624487510611, "grad_norm": 6.2768463336487885, "learning_rate": 2.3274147162693027e-06, "loss": 0.6679, "step": 14847 }, { "epoch": 1.0726967327108206, "grad_norm": 6.167842581040164, "learning_rate": 2.3271229128857396e-06, "loss": 0.6738, "step": 14848 }, { "epoch": 1.07276897791103, "grad_norm": 7.438273926546914, "learning_rate": 2.3268311118687307e-06, "loss": 0.6701, "step": 14849 }, { "epoch": 1.0728412231112396, "grad_norm": 6.163796384068654, "learning_rate": 2.3265393132222685e-06, "loss": 0.5884, "step": 14850 }, { "epoch": 1.072913468311449, "grad_norm": 6.507230698314298, "learning_rate": 2.3262475169503484e-06, "loss": 0.67, "step": 14851 }, { "epoch": 1.0729857135116585, "grad_norm": 6.106600782769182, "learning_rate": 2.3259557230569637e-06, "loss": 0.6315, "step": 14852 }, { "epoch": 1.073057958711868, "grad_norm": 7.429603552118403, "learning_rate": 2.325663931546112e-06, "loss": 0.6687, "step": 14853 }, { "epoch": 1.0731302039120776, "grad_norm": 8.098658660218158, "learning_rate": 2.3253721424217853e-06, "loss": 0.6944, "step": 14854 }, { "epoch": 1.0732024491122871, "grad_norm": 7.259292697973647, "learning_rate": 2.3250803556879775e-06, "loss": 0.6576, "step": 14855 }, { "epoch": 1.0732746943124967, "grad_norm": 5.460818921112892, "learning_rate": 2.3247885713486844e-06, "loss": 0.6605, "step": 14856 }, { "epoch": 1.0733469395127062, "grad_norm": 7.879484080741715, "learning_rate": 2.3244967894078998e-06, "loss": 0.6555, "step": 14857 }, { "epoch": 1.0734191847129155, "grad_norm": 6.506861659604157, "learning_rate": 2.3242050098696174e-06, "loss": 0.6447, "step": 14858 }, { "epoch": 1.073491429913125, "grad_norm": 7.233014170319114, "learning_rate": 2.323913232737832e-06, "loss": 0.7339, "step": 14859 }, { "epoch": 1.0735636751133346, "grad_norm": 6.904787486988902, "learning_rate": 2.3236214580165385e-06, "loss": 0.6341, "step": 14860 }, { "epoch": 1.0736359203135442, "grad_norm": 8.566322976823937, "learning_rate": 2.323329685709729e-06, "loss": 0.7563, "step": 14861 }, { "epoch": 1.0737081655137537, "grad_norm": 6.443088513081467, "learning_rate": 2.3230379158213996e-06, "loss": 0.6162, "step": 14862 }, { "epoch": 1.0737804107139632, "grad_norm": 6.751378801597215, "learning_rate": 2.322746148355545e-06, "loss": 0.7449, "step": 14863 }, { "epoch": 1.0738526559141728, "grad_norm": 6.453474945111935, "learning_rate": 2.3224543833161563e-06, "loss": 0.6594, "step": 14864 }, { "epoch": 1.073924901114382, "grad_norm": 5.4188267826681376, "learning_rate": 2.3221626207072296e-06, "loss": 0.6336, "step": 14865 }, { "epoch": 1.0739971463145916, "grad_norm": 5.178892048649257, "learning_rate": 2.3218708605327582e-06, "loss": 0.6568, "step": 14866 }, { "epoch": 1.0740693915148012, "grad_norm": 6.517974356774976, "learning_rate": 2.3215791027967375e-06, "loss": 0.6918, "step": 14867 }, { "epoch": 1.0741416367150107, "grad_norm": 5.723705708485851, "learning_rate": 2.3212873475031597e-06, "loss": 0.671, "step": 14868 }, { "epoch": 1.0742138819152203, "grad_norm": 6.247209154726644, "learning_rate": 2.320995594656019e-06, "loss": 0.6343, "step": 14869 }, { "epoch": 1.0742861271154298, "grad_norm": 6.3836118087914615, "learning_rate": 2.32070384425931e-06, "loss": 0.6649, "step": 14870 }, { "epoch": 1.0743583723156394, "grad_norm": 7.029896245546198, "learning_rate": 2.3204120963170263e-06, "loss": 0.6625, "step": 14871 }, { "epoch": 1.0744306175158487, "grad_norm": 6.827048908698239, "learning_rate": 2.3201203508331615e-06, "loss": 0.6745, "step": 14872 }, { "epoch": 1.0745028627160582, "grad_norm": 6.291055039072434, "learning_rate": 2.3198286078117086e-06, "loss": 0.5865, "step": 14873 }, { "epoch": 1.0745751079162678, "grad_norm": 5.35730830345807, "learning_rate": 2.3195368672566637e-06, "loss": 0.638, "step": 14874 }, { "epoch": 1.0746473531164773, "grad_norm": 6.799186927206351, "learning_rate": 2.319245129172018e-06, "loss": 0.6278, "step": 14875 }, { "epoch": 1.0747195983166868, "grad_norm": 7.22957944836656, "learning_rate": 2.3189533935617658e-06, "loss": 0.6936, "step": 14876 }, { "epoch": 1.0747918435168964, "grad_norm": 6.782964393023986, "learning_rate": 2.3186616604299016e-06, "loss": 0.6888, "step": 14877 }, { "epoch": 1.074864088717106, "grad_norm": 6.555834482297226, "learning_rate": 2.3183699297804195e-06, "loss": 0.7229, "step": 14878 }, { "epoch": 1.0749363339173155, "grad_norm": 6.375673146331112, "learning_rate": 2.3180782016173107e-06, "loss": 0.5926, "step": 14879 }, { "epoch": 1.0750085791175248, "grad_norm": 6.029396523411231, "learning_rate": 2.31778647594457e-06, "loss": 0.6187, "step": 14880 }, { "epoch": 1.0750808243177343, "grad_norm": 6.46686736644229, "learning_rate": 2.3174947527661916e-06, "loss": 0.7121, "step": 14881 }, { "epoch": 1.0751530695179439, "grad_norm": 6.484027605058008, "learning_rate": 2.317203032086168e-06, "loss": 0.5988, "step": 14882 }, { "epoch": 1.0752253147181534, "grad_norm": 6.296629642978841, "learning_rate": 2.316911313908493e-06, "loss": 0.6934, "step": 14883 }, { "epoch": 1.075297559918363, "grad_norm": 6.597685448518131, "learning_rate": 2.3166195982371596e-06, "loss": 0.6333, "step": 14884 }, { "epoch": 1.0753698051185725, "grad_norm": 7.457867343204617, "learning_rate": 2.3163278850761622e-06, "loss": 0.6827, "step": 14885 }, { "epoch": 1.075442050318782, "grad_norm": 6.880637891606429, "learning_rate": 2.316036174429493e-06, "loss": 0.635, "step": 14886 }, { "epoch": 1.0755142955189914, "grad_norm": 6.917029068732855, "learning_rate": 2.3157444663011455e-06, "loss": 0.7231, "step": 14887 }, { "epoch": 1.075586540719201, "grad_norm": 5.933094518477314, "learning_rate": 2.3154527606951136e-06, "loss": 0.6752, "step": 14888 }, { "epoch": 1.0756587859194104, "grad_norm": 6.5163503558701645, "learning_rate": 2.31516105761539e-06, "loss": 0.6653, "step": 14889 }, { "epoch": 1.07573103111962, "grad_norm": 8.271311197641076, "learning_rate": 2.314869357065968e-06, "loss": 0.6615, "step": 14890 }, { "epoch": 1.0758032763198295, "grad_norm": 10.852378732126326, "learning_rate": 2.314577659050841e-06, "loss": 0.6403, "step": 14891 }, { "epoch": 1.075875521520039, "grad_norm": 7.371679819457415, "learning_rate": 2.314285963574002e-06, "loss": 0.6345, "step": 14892 }, { "epoch": 1.0759477667202486, "grad_norm": 5.110027866474172, "learning_rate": 2.3139942706394438e-06, "loss": 0.731, "step": 14893 }, { "epoch": 1.076020011920458, "grad_norm": 5.395444799547201, "learning_rate": 2.3137025802511593e-06, "loss": 0.5744, "step": 14894 }, { "epoch": 1.0760922571206675, "grad_norm": 7.2020474701609105, "learning_rate": 2.313410892413141e-06, "loss": 0.7, "step": 14895 }, { "epoch": 1.076164502320877, "grad_norm": 6.773153085688229, "learning_rate": 2.3131192071293845e-06, "loss": 0.6986, "step": 14896 }, { "epoch": 1.0762367475210866, "grad_norm": 7.280379738597678, "learning_rate": 2.31282752440388e-06, "loss": 0.6527, "step": 14897 }, { "epoch": 1.076308992721296, "grad_norm": 6.987124727438827, "learning_rate": 2.3125358442406216e-06, "loss": 0.5603, "step": 14898 }, { "epoch": 1.0763812379215056, "grad_norm": 6.732300051479754, "learning_rate": 2.312244166643602e-06, "loss": 0.6193, "step": 14899 }, { "epoch": 1.0764534831217152, "grad_norm": 6.2013490378236265, "learning_rate": 2.311952491616814e-06, "loss": 0.6514, "step": 14900 }, { "epoch": 1.0765257283219247, "grad_norm": 6.37977989367182, "learning_rate": 2.3116608191642495e-06, "loss": 0.6744, "step": 14901 }, { "epoch": 1.076597973522134, "grad_norm": 6.103664998172173, "learning_rate": 2.3113691492899028e-06, "loss": 0.6677, "step": 14902 }, { "epoch": 1.0766702187223436, "grad_norm": 7.038391826488053, "learning_rate": 2.3110774819977664e-06, "loss": 0.6834, "step": 14903 }, { "epoch": 1.0767424639225531, "grad_norm": 7.084693269517357, "learning_rate": 2.3107858172918317e-06, "loss": 0.6847, "step": 14904 }, { "epoch": 1.0768147091227627, "grad_norm": 6.803498366785684, "learning_rate": 2.310494155176093e-06, "loss": 0.6194, "step": 14905 }, { "epoch": 1.0768869543229722, "grad_norm": 7.783656962144831, "learning_rate": 2.3102024956545428e-06, "loss": 0.6318, "step": 14906 }, { "epoch": 1.0769591995231818, "grad_norm": 5.942057897983516, "learning_rate": 2.309910838731172e-06, "loss": 0.715, "step": 14907 }, { "epoch": 1.0770314447233913, "grad_norm": 7.770890168364823, "learning_rate": 2.309619184409974e-06, "loss": 0.6689, "step": 14908 }, { "epoch": 1.0771036899236006, "grad_norm": 8.365751255409332, "learning_rate": 2.3093275326949414e-06, "loss": 0.6421, "step": 14909 }, { "epoch": 1.0771759351238102, "grad_norm": 6.48572939780355, "learning_rate": 2.3090358835900682e-06, "loss": 0.6625, "step": 14910 }, { "epoch": 1.0772481803240197, "grad_norm": 5.517871085972481, "learning_rate": 2.3087442370993446e-06, "loss": 0.6757, "step": 14911 }, { "epoch": 1.0773204255242292, "grad_norm": 8.083597657381631, "learning_rate": 2.3084525932267637e-06, "loss": 0.7001, "step": 14912 }, { "epoch": 1.0773926707244388, "grad_norm": 7.58718676352567, "learning_rate": 2.3081609519763187e-06, "loss": 0.707, "step": 14913 }, { "epoch": 1.0774649159246483, "grad_norm": 6.211960656684434, "learning_rate": 2.3078693133520006e-06, "loss": 0.6015, "step": 14914 }, { "epoch": 1.0775371611248579, "grad_norm": 6.754140643525239, "learning_rate": 2.3075776773578028e-06, "loss": 0.6295, "step": 14915 }, { "epoch": 1.0776094063250672, "grad_norm": 6.5144721092958076, "learning_rate": 2.307286043997717e-06, "loss": 0.6309, "step": 14916 }, { "epoch": 1.0776816515252767, "grad_norm": 7.2974317448538555, "learning_rate": 2.306994413275736e-06, "loss": 0.6833, "step": 14917 }, { "epoch": 1.0777538967254863, "grad_norm": 6.136462327054361, "learning_rate": 2.306702785195851e-06, "loss": 0.7139, "step": 14918 }, { "epoch": 1.0778261419256958, "grad_norm": 7.700151407313797, "learning_rate": 2.306411159762055e-06, "loss": 0.7135, "step": 14919 }, { "epoch": 1.0778983871259054, "grad_norm": 6.672135144774626, "learning_rate": 2.30611953697834e-06, "loss": 0.642, "step": 14920 }, { "epoch": 1.077970632326115, "grad_norm": 5.740293520025341, "learning_rate": 2.305827916848699e-06, "loss": 0.6453, "step": 14921 }, { "epoch": 1.0780428775263244, "grad_norm": 6.88054863095165, "learning_rate": 2.3055362993771218e-06, "loss": 0.6287, "step": 14922 }, { "epoch": 1.0781151227265338, "grad_norm": 5.4952538559566335, "learning_rate": 2.3052446845676017e-06, "loss": 0.6875, "step": 14923 }, { "epoch": 1.0781873679267433, "grad_norm": 5.718638267884761, "learning_rate": 2.304953072424132e-06, "loss": 0.5956, "step": 14924 }, { "epoch": 1.0782596131269528, "grad_norm": 6.044396496252231, "learning_rate": 2.3046614629507016e-06, "loss": 0.6678, "step": 14925 }, { "epoch": 1.0783318583271624, "grad_norm": 6.696736739560872, "learning_rate": 2.304369856151305e-06, "loss": 0.6555, "step": 14926 }, { "epoch": 1.078404103527372, "grad_norm": 7.120280794328822, "learning_rate": 2.3040782520299324e-06, "loss": 0.6514, "step": 14927 }, { "epoch": 1.0784763487275815, "grad_norm": 8.185139789075029, "learning_rate": 2.303786650590577e-06, "loss": 0.7035, "step": 14928 }, { "epoch": 1.078548593927791, "grad_norm": 6.340930631668727, "learning_rate": 2.3034950518372302e-06, "loss": 0.7274, "step": 14929 }, { "epoch": 1.0786208391280003, "grad_norm": 5.731983985617629, "learning_rate": 2.303203455773883e-06, "loss": 0.6274, "step": 14930 }, { "epoch": 1.0786930843282099, "grad_norm": 7.634896281898763, "learning_rate": 2.3029118624045286e-06, "loss": 0.6601, "step": 14931 }, { "epoch": 1.0787653295284194, "grad_norm": 7.482349480040015, "learning_rate": 2.302620271733157e-06, "loss": 0.6312, "step": 14932 }, { "epoch": 1.078837574728629, "grad_norm": 6.087441959926888, "learning_rate": 2.3023286837637602e-06, "loss": 0.6073, "step": 14933 }, { "epoch": 1.0789098199288385, "grad_norm": 8.310327231600995, "learning_rate": 2.3020370985003308e-06, "loss": 0.6799, "step": 14934 }, { "epoch": 1.078982065129048, "grad_norm": 7.630888869280088, "learning_rate": 2.301745515946861e-06, "loss": 0.6031, "step": 14935 }, { "epoch": 1.0790543103292576, "grad_norm": 6.575095450865202, "learning_rate": 2.3014539361073395e-06, "loss": 0.6871, "step": 14936 }, { "epoch": 1.079126555529467, "grad_norm": 6.969828569227785, "learning_rate": 2.3011623589857593e-06, "loss": 0.7149, "step": 14937 }, { "epoch": 1.0791988007296764, "grad_norm": 5.965674122068092, "learning_rate": 2.3008707845861135e-06, "loss": 0.6145, "step": 14938 }, { "epoch": 1.079271045929886, "grad_norm": 6.93338165388631, "learning_rate": 2.3005792129123913e-06, "loss": 0.7204, "step": 14939 }, { "epoch": 1.0793432911300955, "grad_norm": 6.984087978669715, "learning_rate": 2.3002876439685847e-06, "loss": 0.7144, "step": 14940 }, { "epoch": 1.079415536330305, "grad_norm": 7.001628141429633, "learning_rate": 2.299996077758685e-06, "loss": 0.6568, "step": 14941 }, { "epoch": 1.0794877815305146, "grad_norm": 7.491484576237601, "learning_rate": 2.299704514286684e-06, "loss": 0.6453, "step": 14942 }, { "epoch": 1.0795600267307242, "grad_norm": 6.077821856394477, "learning_rate": 2.2994129535565725e-06, "loss": 0.6133, "step": 14943 }, { "epoch": 1.0796322719309335, "grad_norm": 6.703082602484653, "learning_rate": 2.299121395572342e-06, "loss": 0.6499, "step": 14944 }, { "epoch": 1.079704517131143, "grad_norm": 6.280564607642492, "learning_rate": 2.2988298403379837e-06, "loss": 0.5589, "step": 14945 }, { "epoch": 1.0797767623313526, "grad_norm": 7.504133229990474, "learning_rate": 2.298538287857489e-06, "loss": 0.6863, "step": 14946 }, { "epoch": 1.079849007531562, "grad_norm": 6.118359761559874, "learning_rate": 2.298246738134848e-06, "loss": 0.7034, "step": 14947 }, { "epoch": 1.0799212527317716, "grad_norm": 6.61411297745326, "learning_rate": 2.2979551911740526e-06, "loss": 0.6567, "step": 14948 }, { "epoch": 1.0799934979319812, "grad_norm": 6.877114542979189, "learning_rate": 2.297663646979095e-06, "loss": 0.773, "step": 14949 }, { "epoch": 1.0800657431321907, "grad_norm": 7.866982147658164, "learning_rate": 2.297372105553964e-06, "loss": 0.6645, "step": 14950 }, { "epoch": 1.0801379883324003, "grad_norm": 7.84980351876422, "learning_rate": 2.2970805669026504e-06, "loss": 0.6987, "step": 14951 }, { "epoch": 1.0802102335326096, "grad_norm": 8.488255184324911, "learning_rate": 2.2967890310291476e-06, "loss": 0.6584, "step": 14952 }, { "epoch": 1.0802824787328191, "grad_norm": 8.015687343659843, "learning_rate": 2.2964974979374458e-06, "loss": 0.6509, "step": 14953 }, { "epoch": 1.0803547239330287, "grad_norm": 6.1721469481225695, "learning_rate": 2.296205967631534e-06, "loss": 0.6106, "step": 14954 }, { "epoch": 1.0804269691332382, "grad_norm": 6.642786585232387, "learning_rate": 2.2959144401154046e-06, "loss": 0.7477, "step": 14955 }, { "epoch": 1.0804992143334478, "grad_norm": 8.579583049326638, "learning_rate": 2.2956229153930486e-06, "loss": 0.6592, "step": 14956 }, { "epoch": 1.0805714595336573, "grad_norm": 7.184322019483722, "learning_rate": 2.2953313934684555e-06, "loss": 0.6534, "step": 14957 }, { "epoch": 1.0806437047338668, "grad_norm": 5.987605007580798, "learning_rate": 2.295039874345617e-06, "loss": 0.6024, "step": 14958 }, { "epoch": 1.0807159499340762, "grad_norm": 6.655316927705481, "learning_rate": 2.294748358028523e-06, "loss": 0.6896, "step": 14959 }, { "epoch": 1.0807881951342857, "grad_norm": 6.188241625180863, "learning_rate": 2.2944568445211658e-06, "loss": 0.573, "step": 14960 }, { "epoch": 1.0808604403344952, "grad_norm": 6.255192850068871, "learning_rate": 2.2941653338275337e-06, "loss": 0.5918, "step": 14961 }, { "epoch": 1.0809326855347048, "grad_norm": 6.203636013861586, "learning_rate": 2.2938738259516186e-06, "loss": 0.5661, "step": 14962 }, { "epoch": 1.0810049307349143, "grad_norm": 6.31444617759825, "learning_rate": 2.2935823208974117e-06, "loss": 0.6837, "step": 14963 }, { "epoch": 1.0810771759351239, "grad_norm": 6.553108689010906, "learning_rate": 2.2932908186689016e-06, "loss": 0.6683, "step": 14964 }, { "epoch": 1.0811494211353334, "grad_norm": 6.427037894790587, "learning_rate": 2.292999319270079e-06, "loss": 0.6452, "step": 14965 }, { "epoch": 1.0812216663355427, "grad_norm": 6.54825317152511, "learning_rate": 2.292707822704936e-06, "loss": 0.6334, "step": 14966 }, { "epoch": 1.0812939115357523, "grad_norm": 8.325692399474296, "learning_rate": 2.2924163289774627e-06, "loss": 0.7474, "step": 14967 }, { "epoch": 1.0813661567359618, "grad_norm": 8.354856036148416, "learning_rate": 2.2921248380916474e-06, "loss": 0.5975, "step": 14968 }, { "epoch": 1.0814384019361714, "grad_norm": 8.175190065373432, "learning_rate": 2.291833350051482e-06, "loss": 0.6606, "step": 14969 }, { "epoch": 1.081510647136381, "grad_norm": 5.808989090026498, "learning_rate": 2.2915418648609567e-06, "loss": 0.6293, "step": 14970 }, { "epoch": 1.0815828923365904, "grad_norm": 6.755265619235544, "learning_rate": 2.2912503825240608e-06, "loss": 0.7196, "step": 14971 }, { "epoch": 1.0816551375368, "grad_norm": 6.72636429232149, "learning_rate": 2.2909589030447855e-06, "loss": 0.6208, "step": 14972 }, { "epoch": 1.0817273827370095, "grad_norm": 6.657750493378381, "learning_rate": 2.29066742642712e-06, "loss": 0.6832, "step": 14973 }, { "epoch": 1.0817996279372188, "grad_norm": 5.880204513003446, "learning_rate": 2.2903759526750558e-06, "loss": 0.6531, "step": 14974 }, { "epoch": 1.0818718731374284, "grad_norm": 6.9286376433094805, "learning_rate": 2.2900844817925817e-06, "loss": 0.7075, "step": 14975 }, { "epoch": 1.081944118337638, "grad_norm": 8.173807356977798, "learning_rate": 2.2897930137836876e-06, "loss": 0.6416, "step": 14976 }, { "epoch": 1.0820163635378475, "grad_norm": 6.845250352805084, "learning_rate": 2.289501548652364e-06, "loss": 0.6602, "step": 14977 }, { "epoch": 1.082088608738057, "grad_norm": 7.009366172618299, "learning_rate": 2.289210086402602e-06, "loss": 0.6678, "step": 14978 }, { "epoch": 1.0821608539382666, "grad_norm": 6.01827223358085, "learning_rate": 2.288918627038389e-06, "loss": 0.6594, "step": 14979 }, { "epoch": 1.082233099138476, "grad_norm": 7.577467553633416, "learning_rate": 2.2886271705637158e-06, "loss": 0.6545, "step": 14980 }, { "epoch": 1.0823053443386854, "grad_norm": 5.535079061741265, "learning_rate": 2.288335716982574e-06, "loss": 0.6193, "step": 14981 }, { "epoch": 1.082377589538895, "grad_norm": 5.6104781147334775, "learning_rate": 2.2880442662989508e-06, "loss": 0.6841, "step": 14982 }, { "epoch": 1.0824498347391045, "grad_norm": 6.28870219270675, "learning_rate": 2.2877528185168367e-06, "loss": 0.6445, "step": 14983 }, { "epoch": 1.082522079939314, "grad_norm": 6.831279104716991, "learning_rate": 2.2874613736402223e-06, "loss": 0.6867, "step": 14984 }, { "epoch": 1.0825943251395236, "grad_norm": 6.3766142353482005, "learning_rate": 2.287169931673097e-06, "loss": 0.7331, "step": 14985 }, { "epoch": 1.0826665703397331, "grad_norm": 6.049153216663624, "learning_rate": 2.286878492619449e-06, "loss": 0.6817, "step": 14986 }, { "epoch": 1.0827388155399427, "grad_norm": 6.051873400970724, "learning_rate": 2.28658705648327e-06, "loss": 0.6035, "step": 14987 }, { "epoch": 1.082811060740152, "grad_norm": 6.626628171833272, "learning_rate": 2.2862956232685483e-06, "loss": 0.651, "step": 14988 }, { "epoch": 1.0828833059403615, "grad_norm": 7.078156528023739, "learning_rate": 2.286004192979273e-06, "loss": 0.6462, "step": 14989 }, { "epoch": 1.082955551140571, "grad_norm": 6.884930461235951, "learning_rate": 2.2857127656194344e-06, "loss": 0.7065, "step": 14990 }, { "epoch": 1.0830277963407806, "grad_norm": 7.608894975314177, "learning_rate": 2.285421341193022e-06, "loss": 0.6936, "step": 14991 }, { "epoch": 1.0831000415409902, "grad_norm": 5.747223556925661, "learning_rate": 2.285129919704025e-06, "loss": 0.6381, "step": 14992 }, { "epoch": 1.0831722867411997, "grad_norm": 6.835191644578463, "learning_rate": 2.284838501156432e-06, "loss": 0.6635, "step": 14993 }, { "epoch": 1.0832445319414092, "grad_norm": 8.021479381618242, "learning_rate": 2.2845470855542326e-06, "loss": 0.7593, "step": 14994 }, { "epoch": 1.0833167771416186, "grad_norm": 6.591623261602591, "learning_rate": 2.284255672901418e-06, "loss": 0.5894, "step": 14995 }, { "epoch": 1.083389022341828, "grad_norm": 6.249288289555585, "learning_rate": 2.283964263201974e-06, "loss": 0.7371, "step": 14996 }, { "epoch": 1.0834612675420376, "grad_norm": 6.382022056988574, "learning_rate": 2.283672856459892e-06, "loss": 0.6282, "step": 14997 }, { "epoch": 1.0835335127422472, "grad_norm": 6.756411298013886, "learning_rate": 2.2833814526791604e-06, "loss": 0.6299, "step": 14998 }, { "epoch": 1.0836057579424567, "grad_norm": 5.56464644605388, "learning_rate": 2.283090051863769e-06, "loss": 0.6557, "step": 14999 }, { "epoch": 1.0836780031426663, "grad_norm": 5.754349431867446, "learning_rate": 2.282798654017706e-06, "loss": 0.5909, "step": 15000 }, { "epoch": 1.0837502483428758, "grad_norm": 6.2145515723274185, "learning_rate": 2.2825072591449607e-06, "loss": 0.654, "step": 15001 }, { "epoch": 1.0838224935430851, "grad_norm": 6.36462515543065, "learning_rate": 2.282215867249522e-06, "loss": 0.572, "step": 15002 }, { "epoch": 1.0838947387432947, "grad_norm": 7.860230416373704, "learning_rate": 2.28192447833538e-06, "loss": 0.6552, "step": 15003 }, { "epoch": 1.0839669839435042, "grad_norm": 7.2860908624688685, "learning_rate": 2.2816330924065213e-06, "loss": 0.6428, "step": 15004 }, { "epoch": 1.0840392291437138, "grad_norm": 8.036688124972027, "learning_rate": 2.2813417094669367e-06, "loss": 0.6714, "step": 15005 }, { "epoch": 1.0841114743439233, "grad_norm": 6.555824008480992, "learning_rate": 2.281050329520615e-06, "loss": 0.6526, "step": 15006 }, { "epoch": 1.0841837195441328, "grad_norm": 6.596910632175944, "learning_rate": 2.280758952571543e-06, "loss": 0.603, "step": 15007 }, { "epoch": 1.0842559647443424, "grad_norm": 7.406721011608229, "learning_rate": 2.2804675786237105e-06, "loss": 0.7142, "step": 15008 }, { "epoch": 1.0843282099445517, "grad_norm": 7.239658741363867, "learning_rate": 2.2801762076811072e-06, "loss": 0.7226, "step": 15009 }, { "epoch": 1.0844004551447612, "grad_norm": 7.328449559293919, "learning_rate": 2.2798848397477214e-06, "loss": 0.6494, "step": 15010 }, { "epoch": 1.0844727003449708, "grad_norm": 7.686373496286403, "learning_rate": 2.279593474827541e-06, "loss": 0.7002, "step": 15011 }, { "epoch": 1.0845449455451803, "grad_norm": 6.634940121803804, "learning_rate": 2.2793021129245542e-06, "loss": 0.6318, "step": 15012 }, { "epoch": 1.0846171907453899, "grad_norm": 7.07409634160385, "learning_rate": 2.2790107540427506e-06, "loss": 0.6944, "step": 15013 }, { "epoch": 1.0846894359455994, "grad_norm": 5.715585635692597, "learning_rate": 2.278719398186118e-06, "loss": 0.6684, "step": 15014 }, { "epoch": 1.084761681145809, "grad_norm": 7.076715526918245, "learning_rate": 2.278428045358645e-06, "loss": 0.6819, "step": 15015 }, { "epoch": 1.0848339263460183, "grad_norm": 8.697833385563749, "learning_rate": 2.2781366955643193e-06, "loss": 0.7073, "step": 15016 }, { "epoch": 1.0849061715462278, "grad_norm": 6.274976643674629, "learning_rate": 2.277845348807131e-06, "loss": 0.6698, "step": 15017 }, { "epoch": 1.0849784167464374, "grad_norm": 6.85618680578856, "learning_rate": 2.277554005091067e-06, "loss": 0.734, "step": 15018 }, { "epoch": 1.085050661946647, "grad_norm": 6.4795062435086805, "learning_rate": 2.277262664420116e-06, "loss": 0.6605, "step": 15019 }, { "epoch": 1.0851229071468564, "grad_norm": 7.862652741663359, "learning_rate": 2.276971326798267e-06, "loss": 0.7412, "step": 15020 }, { "epoch": 1.085195152347066, "grad_norm": 7.22214471172245, "learning_rate": 2.276679992229506e-06, "loss": 0.7746, "step": 15021 }, { "epoch": 1.0852673975472755, "grad_norm": 6.877491170197355, "learning_rate": 2.2763886607178226e-06, "loss": 0.607, "step": 15022 }, { "epoch": 1.085339642747485, "grad_norm": 7.869696875118508, "learning_rate": 2.276097332267205e-06, "loss": 0.6626, "step": 15023 }, { "epoch": 1.0854118879476944, "grad_norm": 7.68571463841677, "learning_rate": 2.2758060068816426e-06, "loss": 0.7086, "step": 15024 }, { "epoch": 1.085484133147904, "grad_norm": 7.280221498075121, "learning_rate": 2.2755146845651204e-06, "loss": 0.6244, "step": 15025 }, { "epoch": 1.0855563783481135, "grad_norm": 6.857308669583236, "learning_rate": 2.275223365321628e-06, "loss": 0.639, "step": 15026 }, { "epoch": 1.085628623548323, "grad_norm": 7.403203538870146, "learning_rate": 2.274932049155153e-06, "loss": 0.662, "step": 15027 }, { "epoch": 1.0857008687485326, "grad_norm": 7.534713771676649, "learning_rate": 2.2746407360696843e-06, "loss": 0.6085, "step": 15028 }, { "epoch": 1.085773113948742, "grad_norm": 7.22749835578744, "learning_rate": 2.274349426069208e-06, "loss": 0.6487, "step": 15029 }, { "epoch": 1.0858453591489516, "grad_norm": 7.36783730481971, "learning_rate": 2.274058119157713e-06, "loss": 0.6228, "step": 15030 }, { "epoch": 1.085917604349161, "grad_norm": 6.224915788073242, "learning_rate": 2.2737668153391877e-06, "loss": 0.6746, "step": 15031 }, { "epoch": 1.0859898495493705, "grad_norm": 6.660510781436279, "learning_rate": 2.2734755146176186e-06, "loss": 0.6337, "step": 15032 }, { "epoch": 1.08606209474958, "grad_norm": 6.760470428801621, "learning_rate": 2.273184216996994e-06, "loss": 0.6779, "step": 15033 }, { "epoch": 1.0861343399497896, "grad_norm": 5.88986338743576, "learning_rate": 2.2728929224813005e-06, "loss": 0.5813, "step": 15034 }, { "epoch": 1.0862065851499991, "grad_norm": 7.984939465660568, "learning_rate": 2.272601631074528e-06, "loss": 0.6379, "step": 15035 }, { "epoch": 1.0862788303502087, "grad_norm": 6.2951903254890444, "learning_rate": 2.272310342780661e-06, "loss": 0.5981, "step": 15036 }, { "epoch": 1.0863510755504182, "grad_norm": 7.4686907522493104, "learning_rate": 2.2720190576036893e-06, "loss": 0.6813, "step": 15037 }, { "epoch": 1.0864233207506275, "grad_norm": 6.5148597469149445, "learning_rate": 2.2717277755476007e-06, "loss": 0.657, "step": 15038 }, { "epoch": 1.086495565950837, "grad_norm": 7.067694450108033, "learning_rate": 2.2714364966163806e-06, "loss": 0.6243, "step": 15039 }, { "epoch": 1.0865678111510466, "grad_norm": 6.710006347289402, "learning_rate": 2.271145220814017e-06, "loss": 0.6772, "step": 15040 }, { "epoch": 1.0866400563512562, "grad_norm": 6.207378476167433, "learning_rate": 2.2708539481444983e-06, "loss": 0.6823, "step": 15041 }, { "epoch": 1.0867123015514657, "grad_norm": 5.392592096086122, "learning_rate": 2.2705626786118113e-06, "loss": 0.6869, "step": 15042 }, { "epoch": 1.0867845467516752, "grad_norm": 7.229271557254933, "learning_rate": 2.2702714122199424e-06, "loss": 0.622, "step": 15043 }, { "epoch": 1.0868567919518848, "grad_norm": 6.0720193591506595, "learning_rate": 2.26998014897288e-06, "loss": 0.6764, "step": 15044 }, { "epoch": 1.0869290371520943, "grad_norm": 18.02812286710206, "learning_rate": 2.2696888888746106e-06, "loss": 0.6876, "step": 15045 }, { "epoch": 1.0870012823523036, "grad_norm": 6.50330298206332, "learning_rate": 2.2693976319291217e-06, "loss": 0.5993, "step": 15046 }, { "epoch": 1.0870735275525132, "grad_norm": 7.442934503093117, "learning_rate": 2.2691063781403996e-06, "loss": 0.6696, "step": 15047 }, { "epoch": 1.0871457727527227, "grad_norm": 6.225467906356611, "learning_rate": 2.268815127512432e-06, "loss": 0.6397, "step": 15048 }, { "epoch": 1.0872180179529323, "grad_norm": 7.672814461238574, "learning_rate": 2.2685238800492073e-06, "loss": 0.6938, "step": 15049 }, { "epoch": 1.0872902631531418, "grad_norm": 6.211116533877956, "learning_rate": 2.268232635754709e-06, "loss": 0.7458, "step": 15050 }, { "epoch": 1.0873625083533514, "grad_norm": 6.987681312460645, "learning_rate": 2.2679413946329263e-06, "loss": 0.643, "step": 15051 }, { "epoch": 1.087434753553561, "grad_norm": 6.851275482603614, "learning_rate": 2.2676501566878457e-06, "loss": 0.6627, "step": 15052 }, { "epoch": 1.0875069987537702, "grad_norm": 6.035162254750249, "learning_rate": 2.2673589219234553e-06, "loss": 0.6966, "step": 15053 }, { "epoch": 1.0875792439539798, "grad_norm": 6.842242906707184, "learning_rate": 2.26706769034374e-06, "loss": 0.6884, "step": 15054 }, { "epoch": 1.0876514891541893, "grad_norm": 6.770041770637156, "learning_rate": 2.2667764619526864e-06, "loss": 0.6631, "step": 15055 }, { "epoch": 1.0877237343543988, "grad_norm": 8.22764657060695, "learning_rate": 2.266485236754283e-06, "loss": 0.6997, "step": 15056 }, { "epoch": 1.0877959795546084, "grad_norm": 6.305886609559134, "learning_rate": 2.2661940147525148e-06, "loss": 0.5712, "step": 15057 }, { "epoch": 1.087868224754818, "grad_norm": 7.331526042297519, "learning_rate": 2.2659027959513686e-06, "loss": 0.6461, "step": 15058 }, { "epoch": 1.0879404699550275, "grad_norm": 5.902712453842228, "learning_rate": 2.265611580354832e-06, "loss": 0.6338, "step": 15059 }, { "epoch": 1.0880127151552368, "grad_norm": 7.402330866578498, "learning_rate": 2.2653203679668907e-06, "loss": 0.7973, "step": 15060 }, { "epoch": 1.0880849603554463, "grad_norm": 6.5528784564077736, "learning_rate": 2.2650291587915316e-06, "loss": 0.6691, "step": 15061 }, { "epoch": 1.0881572055556559, "grad_norm": 6.407707532363856, "learning_rate": 2.2647379528327402e-06, "loss": 0.6693, "step": 15062 }, { "epoch": 1.0882294507558654, "grad_norm": 7.01662894968994, "learning_rate": 2.264446750094505e-06, "loss": 0.6192, "step": 15063 }, { "epoch": 1.088301695956075, "grad_norm": 7.235616161416494, "learning_rate": 2.264155550580809e-06, "loss": 0.7373, "step": 15064 }, { "epoch": 1.0883739411562845, "grad_norm": 6.092144407367951, "learning_rate": 2.2638643542956413e-06, "loss": 0.6618, "step": 15065 }, { "epoch": 1.088446186356494, "grad_norm": 6.996300128254872, "learning_rate": 2.2635731612429872e-06, "loss": 0.6537, "step": 15066 }, { "epoch": 1.0885184315567034, "grad_norm": 6.369604183337657, "learning_rate": 2.263281971426834e-06, "loss": 0.6341, "step": 15067 }, { "epoch": 1.088590676756913, "grad_norm": 6.826640720665819, "learning_rate": 2.2629907848511654e-06, "loss": 0.6447, "step": 15068 }, { "epoch": 1.0886629219571224, "grad_norm": 6.649992531220107, "learning_rate": 2.262699601519969e-06, "loss": 0.6367, "step": 15069 }, { "epoch": 1.088735167157332, "grad_norm": 7.274518351652922, "learning_rate": 2.2624084214372317e-06, "loss": 0.6513, "step": 15070 }, { "epoch": 1.0888074123575415, "grad_norm": 7.039057893571606, "learning_rate": 2.2621172446069374e-06, "loss": 0.7045, "step": 15071 }, { "epoch": 1.088879657557751, "grad_norm": 6.940172385237733, "learning_rate": 2.261826071033074e-06, "loss": 0.7459, "step": 15072 }, { "epoch": 1.0889519027579606, "grad_norm": 5.439572870932834, "learning_rate": 2.2615349007196267e-06, "loss": 0.5842, "step": 15073 }, { "epoch": 1.08902414795817, "grad_norm": 6.334926538681272, "learning_rate": 2.2612437336705818e-06, "loss": 0.6872, "step": 15074 }, { "epoch": 1.0890963931583795, "grad_norm": 8.018669754780431, "learning_rate": 2.2609525698899244e-06, "loss": 0.6409, "step": 15075 }, { "epoch": 1.089168638358589, "grad_norm": 5.622309910404948, "learning_rate": 2.2606614093816403e-06, "loss": 0.6319, "step": 15076 }, { "epoch": 1.0892408835587986, "grad_norm": 7.940408729391386, "learning_rate": 2.2603702521497174e-06, "loss": 0.755, "step": 15077 }, { "epoch": 1.089313128759008, "grad_norm": 7.253178656041438, "learning_rate": 2.2600790981981375e-06, "loss": 0.6358, "step": 15078 }, { "epoch": 1.0893853739592176, "grad_norm": 6.774571940845374, "learning_rate": 2.2597879475308893e-06, "loss": 0.7162, "step": 15079 }, { "epoch": 1.0894576191594272, "grad_norm": 7.851301350212314, "learning_rate": 2.2594968001519572e-06, "loss": 0.753, "step": 15080 }, { "epoch": 1.0895298643596365, "grad_norm": 8.182344866668103, "learning_rate": 2.2592056560653288e-06, "loss": 0.7054, "step": 15081 }, { "epoch": 1.089602109559846, "grad_norm": 5.5973645684647595, "learning_rate": 2.2589145152749866e-06, "loss": 0.6065, "step": 15082 }, { "epoch": 1.0896743547600556, "grad_norm": 6.839314734949779, "learning_rate": 2.2586233777849178e-06, "loss": 0.6272, "step": 15083 }, { "epoch": 1.0897465999602651, "grad_norm": 5.962512846798922, "learning_rate": 2.258332243599107e-06, "loss": 0.6098, "step": 15084 }, { "epoch": 1.0898188451604747, "grad_norm": 6.00582253545612, "learning_rate": 2.258041112721541e-06, "loss": 0.5758, "step": 15085 }, { "epoch": 1.0898910903606842, "grad_norm": 6.916103876343178, "learning_rate": 2.257749985156204e-06, "loss": 0.6637, "step": 15086 }, { "epoch": 1.0899633355608938, "grad_norm": 6.665402419697096, "learning_rate": 2.2574588609070815e-06, "loss": 0.6026, "step": 15087 }, { "epoch": 1.090035580761103, "grad_norm": 5.962383609811337, "learning_rate": 2.2571677399781593e-06, "loss": 0.6517, "step": 15088 }, { "epoch": 1.0901078259613126, "grad_norm": 5.616860498240049, "learning_rate": 2.2568766223734213e-06, "loss": 0.6133, "step": 15089 }, { "epoch": 1.0901800711615222, "grad_norm": 7.345361670723265, "learning_rate": 2.256585508096854e-06, "loss": 0.6671, "step": 15090 }, { "epoch": 1.0902523163617317, "grad_norm": 5.980304180051171, "learning_rate": 2.256294397152442e-06, "loss": 0.627, "step": 15091 }, { "epoch": 1.0903245615619412, "grad_norm": 6.217297561718215, "learning_rate": 2.2560032895441713e-06, "loss": 0.6403, "step": 15092 }, { "epoch": 1.0903968067621508, "grad_norm": 6.662103935335657, "learning_rate": 2.255712185276025e-06, "loss": 0.6171, "step": 15093 }, { "epoch": 1.0904690519623603, "grad_norm": 6.146281115065447, "learning_rate": 2.2554210843519895e-06, "loss": 0.6782, "step": 15094 }, { "epoch": 1.0905412971625696, "grad_norm": 8.291204493947138, "learning_rate": 2.2551299867760503e-06, "loss": 0.6843, "step": 15095 }, { "epoch": 1.0906135423627792, "grad_norm": 6.907965123480054, "learning_rate": 2.254838892552191e-06, "loss": 0.6933, "step": 15096 }, { "epoch": 1.0906857875629887, "grad_norm": 7.653900159564919, "learning_rate": 2.2545478016843965e-06, "loss": 0.6716, "step": 15097 }, { "epoch": 1.0907580327631983, "grad_norm": 7.473663629089249, "learning_rate": 2.2542567141766517e-06, "loss": 0.7301, "step": 15098 }, { "epoch": 1.0908302779634078, "grad_norm": 6.653346614990024, "learning_rate": 2.2539656300329428e-06, "loss": 0.7213, "step": 15099 }, { "epoch": 1.0909025231636174, "grad_norm": 7.003602871810705, "learning_rate": 2.2536745492572524e-06, "loss": 0.6585, "step": 15100 }, { "epoch": 1.090974768363827, "grad_norm": 6.810371836455132, "learning_rate": 2.2533834718535663e-06, "loss": 0.6782, "step": 15101 }, { "epoch": 1.0910470135640364, "grad_norm": 5.952221418876816, "learning_rate": 2.2530923978258693e-06, "loss": 0.5818, "step": 15102 }, { "epoch": 1.0911192587642458, "grad_norm": 7.786045542378254, "learning_rate": 2.2528013271781455e-06, "loss": 0.6538, "step": 15103 }, { "epoch": 1.0911915039644553, "grad_norm": 7.7074903147460585, "learning_rate": 2.2525102599143798e-06, "loss": 0.6817, "step": 15104 }, { "epoch": 1.0912637491646648, "grad_norm": 5.59699636697302, "learning_rate": 2.252219196038556e-06, "loss": 0.6389, "step": 15105 }, { "epoch": 1.0913359943648744, "grad_norm": 5.900531240566263, "learning_rate": 2.25192813555466e-06, "loss": 0.6801, "step": 15106 }, { "epoch": 1.091408239565084, "grad_norm": 6.179335322514564, "learning_rate": 2.2516370784666743e-06, "loss": 0.6154, "step": 15107 }, { "epoch": 1.0914804847652935, "grad_norm": 9.166266785193619, "learning_rate": 2.251346024778584e-06, "loss": 0.7113, "step": 15108 }, { "epoch": 1.091552729965503, "grad_norm": 6.220763527473625, "learning_rate": 2.251054974494374e-06, "loss": 0.6269, "step": 15109 }, { "epoch": 1.0916249751657123, "grad_norm": 7.3949573553273025, "learning_rate": 2.250763927618029e-06, "loss": 0.7471, "step": 15110 }, { "epoch": 1.0916972203659219, "grad_norm": 6.544416234477882, "learning_rate": 2.2504728841535313e-06, "loss": 0.6776, "step": 15111 }, { "epoch": 1.0917694655661314, "grad_norm": 5.812811074085224, "learning_rate": 2.2501818441048655e-06, "loss": 0.6326, "step": 15112 }, { "epoch": 1.091841710766341, "grad_norm": 5.5373668577148605, "learning_rate": 2.2498908074760177e-06, "loss": 0.6729, "step": 15113 }, { "epoch": 1.0919139559665505, "grad_norm": 5.3154400039227925, "learning_rate": 2.2495997742709695e-06, "loss": 0.6348, "step": 15114 }, { "epoch": 1.09198620116676, "grad_norm": 7.126471116630718, "learning_rate": 2.249308744493706e-06, "loss": 0.6996, "step": 15115 }, { "epoch": 1.0920584463669696, "grad_norm": 5.780156542496208, "learning_rate": 2.2490177181482114e-06, "loss": 0.6369, "step": 15116 }, { "epoch": 1.092130691567179, "grad_norm": 7.577590892099371, "learning_rate": 2.2487266952384695e-06, "loss": 0.7381, "step": 15117 }, { "epoch": 1.0922029367673884, "grad_norm": 7.386746378765754, "learning_rate": 2.2484356757684637e-06, "loss": 0.709, "step": 15118 }, { "epoch": 1.092275181967598, "grad_norm": 7.727212444621064, "learning_rate": 2.2481446597421783e-06, "loss": 0.6806, "step": 15119 }, { "epoch": 1.0923474271678075, "grad_norm": 6.6704734741717475, "learning_rate": 2.2478536471635983e-06, "loss": 0.6177, "step": 15120 }, { "epoch": 1.092419672368017, "grad_norm": 6.3446293864970515, "learning_rate": 2.2475626380367037e-06, "loss": 0.6686, "step": 15121 }, { "epoch": 1.0924919175682266, "grad_norm": 5.809963862557883, "learning_rate": 2.2472716323654817e-06, "loss": 0.6261, "step": 15122 }, { "epoch": 1.0925641627684362, "grad_norm": 7.074075850123519, "learning_rate": 2.2469806301539144e-06, "loss": 0.7294, "step": 15123 }, { "epoch": 1.0926364079686457, "grad_norm": 6.5577052447782815, "learning_rate": 2.2466896314059873e-06, "loss": 0.6735, "step": 15124 }, { "epoch": 1.092708653168855, "grad_norm": 6.821639623351479, "learning_rate": 2.2463986361256814e-06, "loss": 0.5988, "step": 15125 }, { "epoch": 1.0927808983690646, "grad_norm": 7.077573371595823, "learning_rate": 2.2461076443169807e-06, "loss": 0.6485, "step": 15126 }, { "epoch": 1.092853143569274, "grad_norm": 6.133060367701997, "learning_rate": 2.2458166559838704e-06, "loss": 0.619, "step": 15127 }, { "epoch": 1.0929253887694836, "grad_norm": 6.525379517724758, "learning_rate": 2.245525671130332e-06, "loss": 0.6483, "step": 15128 }, { "epoch": 1.0929976339696932, "grad_norm": 8.076404498992584, "learning_rate": 2.2452346897603494e-06, "loss": 0.724, "step": 15129 }, { "epoch": 1.0930698791699027, "grad_norm": 6.4232681137017815, "learning_rate": 2.244943711877906e-06, "loss": 0.7432, "step": 15130 }, { "epoch": 1.0931421243701123, "grad_norm": 5.20811557568872, "learning_rate": 2.2446527374869858e-06, "loss": 0.6486, "step": 15131 }, { "epoch": 1.0932143695703216, "grad_norm": 5.969245290927633, "learning_rate": 2.244361766591571e-06, "loss": 0.627, "step": 15132 }, { "epoch": 1.0932866147705311, "grad_norm": 7.894912974751922, "learning_rate": 2.244070799195645e-06, "loss": 0.7182, "step": 15133 }, { "epoch": 1.0933588599707407, "grad_norm": 7.065313268412841, "learning_rate": 2.243779835303191e-06, "loss": 0.6536, "step": 15134 }, { "epoch": 1.0934311051709502, "grad_norm": 6.368523093721469, "learning_rate": 2.243488874918193e-06, "loss": 0.7079, "step": 15135 }, { "epoch": 1.0935033503711598, "grad_norm": 6.216438822452996, "learning_rate": 2.243197918044632e-06, "loss": 0.6642, "step": 15136 }, { "epoch": 1.0935755955713693, "grad_norm": 6.395083637182746, "learning_rate": 2.2429069646864927e-06, "loss": 0.7123, "step": 15137 }, { "epoch": 1.0936478407715788, "grad_norm": 6.4654535564925455, "learning_rate": 2.242616014847758e-06, "loss": 0.6692, "step": 15138 }, { "epoch": 1.0937200859717882, "grad_norm": 7.671990854045043, "learning_rate": 2.2423250685324093e-06, "loss": 0.7381, "step": 15139 }, { "epoch": 1.0937923311719977, "grad_norm": 5.869211795013583, "learning_rate": 2.2420341257444305e-06, "loss": 0.6754, "step": 15140 }, { "epoch": 1.0938645763722072, "grad_norm": 7.287849546133625, "learning_rate": 2.2417431864878035e-06, "loss": 0.6796, "step": 15141 }, { "epoch": 1.0939368215724168, "grad_norm": 6.528895779727155, "learning_rate": 2.2414522507665134e-06, "loss": 0.7311, "step": 15142 }, { "epoch": 1.0940090667726263, "grad_norm": 7.526835180422908, "learning_rate": 2.2411613185845403e-06, "loss": 0.7104, "step": 15143 }, { "epoch": 1.0940813119728359, "grad_norm": 5.4177363977754815, "learning_rate": 2.2408703899458678e-06, "loss": 0.6006, "step": 15144 }, { "epoch": 1.0941535571730454, "grad_norm": 6.967135802661278, "learning_rate": 2.2405794648544786e-06, "loss": 0.6383, "step": 15145 }, { "epoch": 1.0942258023732547, "grad_norm": 6.324592112782168, "learning_rate": 2.240288543314355e-06, "loss": 0.7444, "step": 15146 }, { "epoch": 1.0942980475734643, "grad_norm": 5.554170007649834, "learning_rate": 2.2399976253294796e-06, "loss": 0.5962, "step": 15147 }, { "epoch": 1.0943702927736738, "grad_norm": 5.151277861340459, "learning_rate": 2.2397067109038345e-06, "loss": 0.6306, "step": 15148 }, { "epoch": 1.0944425379738834, "grad_norm": 6.576569506930812, "learning_rate": 2.2394158000414032e-06, "loss": 0.6825, "step": 15149 }, { "epoch": 1.094514783174093, "grad_norm": 7.169837279874143, "learning_rate": 2.2391248927461665e-06, "loss": 0.7059, "step": 15150 }, { "epoch": 1.0945870283743024, "grad_norm": 7.806314445822099, "learning_rate": 2.238833989022108e-06, "loss": 0.6583, "step": 15151 }, { "epoch": 1.094659273574512, "grad_norm": 6.551029025483295, "learning_rate": 2.23854308887321e-06, "loss": 0.6728, "step": 15152 }, { "epoch": 1.0947315187747213, "grad_norm": 5.891803492027876, "learning_rate": 2.238252192303453e-06, "loss": 0.6108, "step": 15153 }, { "epoch": 1.0948037639749308, "grad_norm": 6.785228493876335, "learning_rate": 2.2379612993168203e-06, "loss": 0.6114, "step": 15154 }, { "epoch": 1.0948760091751404, "grad_norm": 7.232685855886554, "learning_rate": 2.2376704099172934e-06, "loss": 0.6876, "step": 15155 }, { "epoch": 1.09494825437535, "grad_norm": 6.005675492611851, "learning_rate": 2.2373795241088567e-06, "loss": 0.6448, "step": 15156 }, { "epoch": 1.0950204995755595, "grad_norm": 7.748962086733349, "learning_rate": 2.2370886418954895e-06, "loss": 0.7687, "step": 15157 }, { "epoch": 1.095092744775769, "grad_norm": 7.34224578386169, "learning_rate": 2.2367977632811742e-06, "loss": 0.7074, "step": 15158 }, { "epoch": 1.0951649899759786, "grad_norm": 5.4687658691176, "learning_rate": 2.236506888269893e-06, "loss": 0.7069, "step": 15159 }, { "epoch": 1.0952372351761879, "grad_norm": 7.773003678221221, "learning_rate": 2.236216016865629e-06, "loss": 0.6495, "step": 15160 }, { "epoch": 1.0953094803763974, "grad_norm": 6.514258371740337, "learning_rate": 2.235925149072362e-06, "loss": 0.679, "step": 15161 }, { "epoch": 1.095381725576607, "grad_norm": 5.8365473294580505, "learning_rate": 2.235634284894075e-06, "loss": 0.6326, "step": 15162 }, { "epoch": 1.0954539707768165, "grad_norm": 6.758877885221332, "learning_rate": 2.23534342433475e-06, "loss": 0.6634, "step": 15163 }, { "epoch": 1.095526215977026, "grad_norm": 6.394893349361428, "learning_rate": 2.235052567398367e-06, "loss": 0.6439, "step": 15164 }, { "epoch": 1.0955984611772356, "grad_norm": 7.486873583947556, "learning_rate": 2.234761714088909e-06, "loss": 0.6739, "step": 15165 }, { "epoch": 1.0956707063774451, "grad_norm": 6.226089517010506, "learning_rate": 2.234470864410357e-06, "loss": 0.6889, "step": 15166 }, { "epoch": 1.0957429515776544, "grad_norm": 6.492482753608838, "learning_rate": 2.234180018366694e-06, "loss": 0.6585, "step": 15167 }, { "epoch": 1.095815196777864, "grad_norm": 6.828177794507712, "learning_rate": 2.233889175961899e-06, "loss": 0.7224, "step": 15168 }, { "epoch": 1.0958874419780735, "grad_norm": 6.2400812407853765, "learning_rate": 2.2335983371999543e-06, "loss": 0.6862, "step": 15169 }, { "epoch": 1.095959687178283, "grad_norm": 5.930776222207376, "learning_rate": 2.233307502084842e-06, "loss": 0.6159, "step": 15170 }, { "epoch": 1.0960319323784926, "grad_norm": 6.129803700625586, "learning_rate": 2.233016670620543e-06, "loss": 0.6949, "step": 15171 }, { "epoch": 1.0961041775787022, "grad_norm": 6.167468697569372, "learning_rate": 2.232725842811038e-06, "loss": 0.701, "step": 15172 }, { "epoch": 1.0961764227789117, "grad_norm": 6.750978187043237, "learning_rate": 2.2324350186603085e-06, "loss": 0.7385, "step": 15173 }, { "epoch": 1.0962486679791212, "grad_norm": 6.16024418334513, "learning_rate": 2.2321441981723367e-06, "loss": 0.7045, "step": 15174 }, { "epoch": 1.0963209131793306, "grad_norm": 5.867202454000584, "learning_rate": 2.2318533813511023e-06, "loss": 0.628, "step": 15175 }, { "epoch": 1.09639315837954, "grad_norm": 6.819323560891139, "learning_rate": 2.231562568200587e-06, "loss": 0.7203, "step": 15176 }, { "epoch": 1.0964654035797496, "grad_norm": 7.609134615443945, "learning_rate": 2.231271758724772e-06, "loss": 0.7049, "step": 15177 }, { "epoch": 1.0965376487799592, "grad_norm": 6.746273849018231, "learning_rate": 2.2309809529276377e-06, "loss": 0.6594, "step": 15178 }, { "epoch": 1.0966098939801687, "grad_norm": 5.57905258886369, "learning_rate": 2.230690150813165e-06, "loss": 0.6566, "step": 15179 }, { "epoch": 1.0966821391803783, "grad_norm": 6.701893086372097, "learning_rate": 2.2303993523853352e-06, "loss": 0.6191, "step": 15180 }, { "epoch": 1.0967543843805878, "grad_norm": 6.322856301569943, "learning_rate": 2.23010855764813e-06, "loss": 0.6575, "step": 15181 }, { "epoch": 1.0968266295807971, "grad_norm": 8.466785386021616, "learning_rate": 2.2298177666055283e-06, "loss": 0.6743, "step": 15182 }, { "epoch": 1.0968988747810067, "grad_norm": 6.009851315606182, "learning_rate": 2.229526979261511e-06, "loss": 0.654, "step": 15183 }, { "epoch": 1.0969711199812162, "grad_norm": 5.8389245984435005, "learning_rate": 2.22923619562006e-06, "loss": 0.6543, "step": 15184 }, { "epoch": 1.0970433651814258, "grad_norm": 5.147855271339058, "learning_rate": 2.228945415685155e-06, "loss": 0.5757, "step": 15185 }, { "epoch": 1.0971156103816353, "grad_norm": 6.272625064142803, "learning_rate": 2.228654639460777e-06, "loss": 0.6558, "step": 15186 }, { "epoch": 1.0971878555818448, "grad_norm": 7.3572100716958255, "learning_rate": 2.228363866950906e-06, "loss": 0.6549, "step": 15187 }, { "epoch": 1.0972601007820544, "grad_norm": 5.65978690996498, "learning_rate": 2.228073098159523e-06, "loss": 0.6688, "step": 15188 }, { "epoch": 1.0973323459822637, "grad_norm": 7.4455138556520755, "learning_rate": 2.227782333090608e-06, "loss": 0.6645, "step": 15189 }, { "epoch": 1.0974045911824732, "grad_norm": 7.226217527661348, "learning_rate": 2.2274915717481413e-06, "loss": 0.679, "step": 15190 }, { "epoch": 1.0974768363826828, "grad_norm": 6.588361083761236, "learning_rate": 2.2272008141361036e-06, "loss": 0.6646, "step": 15191 }, { "epoch": 1.0975490815828923, "grad_norm": 7.174674358355054, "learning_rate": 2.2269100602584754e-06, "loss": 0.6342, "step": 15192 }, { "epoch": 1.0976213267831019, "grad_norm": 6.512618215004185, "learning_rate": 2.2266193101192355e-06, "loss": 0.6799, "step": 15193 }, { "epoch": 1.0976935719833114, "grad_norm": 7.2932629681714065, "learning_rate": 2.2263285637223655e-06, "loss": 0.6159, "step": 15194 }, { "epoch": 1.097765817183521, "grad_norm": 6.403779832933175, "learning_rate": 2.226037821071846e-06, "loss": 0.7156, "step": 15195 }, { "epoch": 1.0978380623837305, "grad_norm": 6.306004572195518, "learning_rate": 2.2257470821716547e-06, "loss": 0.6585, "step": 15196 }, { "epoch": 1.0979103075839398, "grad_norm": 6.439917545620034, "learning_rate": 2.225456347025773e-06, "loss": 0.6352, "step": 15197 }, { "epoch": 1.0979825527841494, "grad_norm": 4.713987056139856, "learning_rate": 2.2251656156381802e-06, "loss": 0.5852, "step": 15198 }, { "epoch": 1.098054797984359, "grad_norm": 8.12872698474848, "learning_rate": 2.224874888012858e-06, "loss": 0.6897, "step": 15199 }, { "epoch": 1.0981270431845684, "grad_norm": 8.55549591285867, "learning_rate": 2.2245841641537848e-06, "loss": 0.6165, "step": 15200 }, { "epoch": 1.098199288384778, "grad_norm": 6.29491975401376, "learning_rate": 2.22429344406494e-06, "loss": 0.6592, "step": 15201 }, { "epoch": 1.0982715335849875, "grad_norm": 6.142821251647121, "learning_rate": 2.224002727750304e-06, "loss": 0.6626, "step": 15202 }, { "epoch": 1.098343778785197, "grad_norm": 7.2056080806049465, "learning_rate": 2.2237120152138564e-06, "loss": 0.7126, "step": 15203 }, { "epoch": 1.0984160239854064, "grad_norm": 5.905601183257582, "learning_rate": 2.2234213064595765e-06, "loss": 0.6356, "step": 15204 }, { "epoch": 1.098488269185616, "grad_norm": 6.772841342619427, "learning_rate": 2.223130601491444e-06, "loss": 0.6728, "step": 15205 }, { "epoch": 1.0985605143858255, "grad_norm": 6.250129088022371, "learning_rate": 2.2228399003134395e-06, "loss": 0.661, "step": 15206 }, { "epoch": 1.098632759586035, "grad_norm": 6.581589029098461, "learning_rate": 2.222549202929541e-06, "loss": 0.685, "step": 15207 }, { "epoch": 1.0987050047862446, "grad_norm": 8.50984339999173, "learning_rate": 2.2222585093437286e-06, "loss": 0.7333, "step": 15208 }, { "epoch": 1.098777249986454, "grad_norm": 5.563466877432548, "learning_rate": 2.2219678195599823e-06, "loss": 0.6662, "step": 15209 }, { "epoch": 1.0988494951866636, "grad_norm": 8.30276538786551, "learning_rate": 2.22167713358228e-06, "loss": 0.6876, "step": 15210 }, { "epoch": 1.098921740386873, "grad_norm": 6.4572519032738676, "learning_rate": 2.221386451414601e-06, "loss": 0.686, "step": 15211 }, { "epoch": 1.0989939855870825, "grad_norm": 7.52598647435068, "learning_rate": 2.221095773060925e-06, "loss": 0.6668, "step": 15212 }, { "epoch": 1.099066230787292, "grad_norm": 6.394590308791108, "learning_rate": 2.220805098525233e-06, "loss": 0.653, "step": 15213 }, { "epoch": 1.0991384759875016, "grad_norm": 7.9061395087079225, "learning_rate": 2.2205144278115013e-06, "loss": 0.7085, "step": 15214 }, { "epoch": 1.0992107211877111, "grad_norm": 6.978876394431969, "learning_rate": 2.22022376092371e-06, "loss": 0.621, "step": 15215 }, { "epoch": 1.0992829663879207, "grad_norm": 7.919711628297499, "learning_rate": 2.219933097865838e-06, "loss": 0.6728, "step": 15216 }, { "epoch": 1.0993552115881302, "grad_norm": 6.529040679332823, "learning_rate": 2.2196424386418654e-06, "loss": 0.698, "step": 15217 }, { "epoch": 1.0994274567883395, "grad_norm": 6.7848006414981015, "learning_rate": 2.219351783255769e-06, "loss": 0.6648, "step": 15218 }, { "epoch": 1.099499701988549, "grad_norm": 5.7287796537134135, "learning_rate": 2.2190611317115295e-06, "loss": 0.5914, "step": 15219 }, { "epoch": 1.0995719471887586, "grad_norm": 6.882742943125407, "learning_rate": 2.218770484013125e-06, "loss": 0.7459, "step": 15220 }, { "epoch": 1.0996441923889682, "grad_norm": 5.722657249893257, "learning_rate": 2.218479840164534e-06, "loss": 0.7022, "step": 15221 }, { "epoch": 1.0997164375891777, "grad_norm": 5.898807418036134, "learning_rate": 2.2181892001697352e-06, "loss": 0.5693, "step": 15222 }, { "epoch": 1.0997886827893872, "grad_norm": 6.445386185658342, "learning_rate": 2.2178985640327076e-06, "loss": 0.6593, "step": 15223 }, { "epoch": 1.0998609279895968, "grad_norm": 6.158243389100848, "learning_rate": 2.2176079317574307e-06, "loss": 0.6718, "step": 15224 }, { "epoch": 1.099933173189806, "grad_norm": 8.753978151896812, "learning_rate": 2.2173173033478808e-06, "loss": 0.6661, "step": 15225 }, { "epoch": 1.1000054183900156, "grad_norm": 7.440636750732269, "learning_rate": 2.2170266788080365e-06, "loss": 0.6378, "step": 15226 }, { "epoch": 1.1000776635902252, "grad_norm": 7.684359025104761, "learning_rate": 2.2167360581418797e-06, "loss": 0.6697, "step": 15227 }, { "epoch": 1.1001499087904347, "grad_norm": 6.910933757576564, "learning_rate": 2.2164454413533843e-06, "loss": 0.6839, "step": 15228 }, { "epoch": 1.1002221539906443, "grad_norm": 6.654568311990291, "learning_rate": 2.2161548284465313e-06, "loss": 0.6736, "step": 15229 }, { "epoch": 1.1002943991908538, "grad_norm": 6.259674214426175, "learning_rate": 2.2158642194252984e-06, "loss": 0.6613, "step": 15230 }, { "epoch": 1.1003666443910634, "grad_norm": 6.960814195561947, "learning_rate": 2.215573614293664e-06, "loss": 0.6687, "step": 15231 }, { "epoch": 1.1004388895912727, "grad_norm": 6.77174012983268, "learning_rate": 2.2152830130556055e-06, "loss": 0.6505, "step": 15232 }, { "epoch": 1.1005111347914822, "grad_norm": 7.3241859374265, "learning_rate": 2.2149924157151013e-06, "loss": 0.6922, "step": 15233 }, { "epoch": 1.1005833799916918, "grad_norm": 6.749837237620993, "learning_rate": 2.214701822276131e-06, "loss": 0.7033, "step": 15234 }, { "epoch": 1.1006556251919013, "grad_norm": 7.659585778811966, "learning_rate": 2.2144112327426696e-06, "loss": 0.6654, "step": 15235 }, { "epoch": 1.1007278703921108, "grad_norm": 6.3064228680662655, "learning_rate": 2.214120647118697e-06, "loss": 0.6455, "step": 15236 }, { "epoch": 1.1008001155923204, "grad_norm": 6.291052613598184, "learning_rate": 2.213830065408191e-06, "loss": 0.6363, "step": 15237 }, { "epoch": 1.10087236079253, "grad_norm": 6.084730153428257, "learning_rate": 2.2135394876151304e-06, "loss": 0.7064, "step": 15238 }, { "epoch": 1.1009446059927392, "grad_norm": 6.563257373520729, "learning_rate": 2.2132489137434905e-06, "loss": 0.6515, "step": 15239 }, { "epoch": 1.1010168511929488, "grad_norm": 6.7989257524949815, "learning_rate": 2.2129583437972494e-06, "loss": 0.6415, "step": 15240 }, { "epoch": 1.1010890963931583, "grad_norm": 7.0207153068907555, "learning_rate": 2.2126677777803866e-06, "loss": 0.6846, "step": 15241 }, { "epoch": 1.1011613415933679, "grad_norm": 5.647889929857879, "learning_rate": 2.21237721569688e-06, "loss": 0.6811, "step": 15242 }, { "epoch": 1.1012335867935774, "grad_norm": 5.956254111705854, "learning_rate": 2.212086657550705e-06, "loss": 0.6222, "step": 15243 }, { "epoch": 1.101305831993787, "grad_norm": 8.857257139110297, "learning_rate": 2.21179610334584e-06, "loss": 0.6604, "step": 15244 }, { "epoch": 1.1013780771939965, "grad_norm": 7.043338674942545, "learning_rate": 2.211505553086263e-06, "loss": 0.7025, "step": 15245 }, { "epoch": 1.101450322394206, "grad_norm": 6.223286723102147, "learning_rate": 2.2112150067759505e-06, "loss": 0.6694, "step": 15246 }, { "epoch": 1.1015225675944154, "grad_norm": 6.124446610344398, "learning_rate": 2.2109244644188803e-06, "loss": 0.6635, "step": 15247 }, { "epoch": 1.101594812794625, "grad_norm": 6.600713298283949, "learning_rate": 2.2106339260190295e-06, "loss": 0.647, "step": 15248 }, { "epoch": 1.1016670579948344, "grad_norm": 6.282011830452045, "learning_rate": 2.2103433915803766e-06, "loss": 0.6661, "step": 15249 }, { "epoch": 1.101739303195044, "grad_norm": 6.527245567035364, "learning_rate": 2.210052861106897e-06, "loss": 0.6569, "step": 15250 }, { "epoch": 1.1018115483952535, "grad_norm": 8.454472404455249, "learning_rate": 2.2097623346025686e-06, "loss": 0.6642, "step": 15251 }, { "epoch": 1.101883793595463, "grad_norm": 6.577276542112684, "learning_rate": 2.2094718120713697e-06, "loss": 0.6758, "step": 15252 }, { "epoch": 1.1019560387956726, "grad_norm": 6.516703344893917, "learning_rate": 2.2091812935172747e-06, "loss": 0.712, "step": 15253 }, { "epoch": 1.102028283995882, "grad_norm": 7.335447382372839, "learning_rate": 2.2088907789442617e-06, "loss": 0.7335, "step": 15254 }, { "epoch": 1.1021005291960915, "grad_norm": 6.913588547491376, "learning_rate": 2.2086002683563087e-06, "loss": 0.7213, "step": 15255 }, { "epoch": 1.102172774396301, "grad_norm": 7.122443292565449, "learning_rate": 2.208309761757392e-06, "loss": 0.7078, "step": 15256 }, { "epoch": 1.1022450195965106, "grad_norm": 7.377741675818382, "learning_rate": 2.208019259151488e-06, "loss": 0.7099, "step": 15257 }, { "epoch": 1.10231726479672, "grad_norm": 6.877049366072519, "learning_rate": 2.207728760542573e-06, "loss": 0.7119, "step": 15258 }, { "epoch": 1.1023895099969296, "grad_norm": 5.709829909268177, "learning_rate": 2.207438265934625e-06, "loss": 0.6475, "step": 15259 }, { "epoch": 1.1024617551971392, "grad_norm": 6.669709687327659, "learning_rate": 2.20714777533162e-06, "loss": 0.7068, "step": 15260 }, { "epoch": 1.1025340003973485, "grad_norm": 6.646609493202301, "learning_rate": 2.206857288737534e-06, "loss": 0.6955, "step": 15261 }, { "epoch": 1.102606245597558, "grad_norm": 8.345035982505792, "learning_rate": 2.2065668061563443e-06, "loss": 0.663, "step": 15262 }, { "epoch": 1.1026784907977676, "grad_norm": 6.521133692184792, "learning_rate": 2.2062763275920275e-06, "loss": 0.6516, "step": 15263 }, { "epoch": 1.1027507359979771, "grad_norm": 5.562764365096889, "learning_rate": 2.2059858530485595e-06, "loss": 0.6796, "step": 15264 }, { "epoch": 1.1028229811981867, "grad_norm": 6.987455571942697, "learning_rate": 2.2056953825299164e-06, "loss": 0.7205, "step": 15265 }, { "epoch": 1.1028952263983962, "grad_norm": 5.983137276641961, "learning_rate": 2.205404916040076e-06, "loss": 0.68, "step": 15266 }, { "epoch": 1.1029674715986058, "grad_norm": 5.710186659773314, "learning_rate": 2.2051144535830137e-06, "loss": 0.6174, "step": 15267 }, { "epoch": 1.1030397167988153, "grad_norm": 6.763022400145018, "learning_rate": 2.2048239951627044e-06, "loss": 0.6296, "step": 15268 }, { "epoch": 1.1031119619990246, "grad_norm": 10.627996044728178, "learning_rate": 2.2045335407831255e-06, "loss": 0.6212, "step": 15269 }, { "epoch": 1.1031842071992342, "grad_norm": 6.719212715716252, "learning_rate": 2.2042430904482543e-06, "loss": 0.6488, "step": 15270 }, { "epoch": 1.1032564523994437, "grad_norm": 7.359150034711783, "learning_rate": 2.2039526441620647e-06, "loss": 0.6536, "step": 15271 }, { "epoch": 1.1033286975996532, "grad_norm": 8.109191539651645, "learning_rate": 2.203662201928533e-06, "loss": 0.7152, "step": 15272 }, { "epoch": 1.1034009427998628, "grad_norm": 7.284012037322789, "learning_rate": 2.2033717637516364e-06, "loss": 0.587, "step": 15273 }, { "epoch": 1.1034731880000723, "grad_norm": 6.002832062229793, "learning_rate": 2.20308132963535e-06, "loss": 0.6019, "step": 15274 }, { "epoch": 1.1035454332002819, "grad_norm": 6.5888234034274085, "learning_rate": 2.2027908995836493e-06, "loss": 0.5993, "step": 15275 }, { "epoch": 1.1036176784004912, "grad_norm": 6.911375327475446, "learning_rate": 2.202500473600511e-06, "loss": 0.6896, "step": 15276 }, { "epoch": 1.1036899236007007, "grad_norm": 6.20245157809501, "learning_rate": 2.2022100516899096e-06, "loss": 0.7027, "step": 15277 }, { "epoch": 1.1037621688009103, "grad_norm": 5.901943029084125, "learning_rate": 2.2019196338558218e-06, "loss": 0.6379, "step": 15278 }, { "epoch": 1.1038344140011198, "grad_norm": 6.809473327795119, "learning_rate": 2.201629220102222e-06, "loss": 0.6674, "step": 15279 }, { "epoch": 1.1039066592013294, "grad_norm": 7.263827062489167, "learning_rate": 2.201338810433087e-06, "loss": 0.66, "step": 15280 }, { "epoch": 1.103978904401539, "grad_norm": 6.743439205669214, "learning_rate": 2.201048404852393e-06, "loss": 0.6556, "step": 15281 }, { "epoch": 1.1040511496017484, "grad_norm": 5.852416766852772, "learning_rate": 2.2007580033641128e-06, "loss": 0.6048, "step": 15282 }, { "epoch": 1.1041233948019578, "grad_norm": 6.367655394043019, "learning_rate": 2.2004676059722228e-06, "loss": 0.7014, "step": 15283 }, { "epoch": 1.1041956400021673, "grad_norm": 5.72422620049928, "learning_rate": 2.2001772126807003e-06, "loss": 0.5501, "step": 15284 }, { "epoch": 1.1042678852023768, "grad_norm": 5.470293797164365, "learning_rate": 2.199886823493518e-06, "loss": 0.6487, "step": 15285 }, { "epoch": 1.1043401304025864, "grad_norm": 7.219196784655214, "learning_rate": 2.199596438414652e-06, "loss": 0.6158, "step": 15286 }, { "epoch": 1.104412375602796, "grad_norm": 10.385399443759749, "learning_rate": 2.199306057448077e-06, "loss": 0.6675, "step": 15287 }, { "epoch": 1.1044846208030055, "grad_norm": 6.202201255605873, "learning_rate": 2.19901568059777e-06, "loss": 0.6627, "step": 15288 }, { "epoch": 1.104556866003215, "grad_norm": 6.01687918135372, "learning_rate": 2.1987253078677037e-06, "loss": 0.6896, "step": 15289 }, { "epoch": 1.1046291112034243, "grad_norm": 5.664496548562956, "learning_rate": 2.198434939261854e-06, "loss": 0.6167, "step": 15290 }, { "epoch": 1.1047013564036339, "grad_norm": 5.999062146958173, "learning_rate": 2.1981445747841957e-06, "loss": 0.6549, "step": 15291 }, { "epoch": 1.1047736016038434, "grad_norm": 6.977796216941709, "learning_rate": 2.1978542144387045e-06, "loss": 0.6265, "step": 15292 }, { "epoch": 1.104845846804053, "grad_norm": 6.658753116951384, "learning_rate": 2.197563858229354e-06, "loss": 0.7126, "step": 15293 }, { "epoch": 1.1049180920042625, "grad_norm": 6.791885684485793, "learning_rate": 2.1972735061601192e-06, "loss": 0.6207, "step": 15294 }, { "epoch": 1.104990337204472, "grad_norm": 7.719570100900996, "learning_rate": 2.1969831582349767e-06, "loss": 0.692, "step": 15295 }, { "epoch": 1.1050625824046816, "grad_norm": 5.403024758798487, "learning_rate": 2.1966928144578976e-06, "loss": 0.6371, "step": 15296 }, { "epoch": 1.105134827604891, "grad_norm": 6.519944037916917, "learning_rate": 2.1964024748328584e-06, "loss": 0.7075, "step": 15297 }, { "epoch": 1.1052070728051004, "grad_norm": 6.042084287043527, "learning_rate": 2.1961121393638342e-06, "loss": 0.6337, "step": 15298 }, { "epoch": 1.10527931800531, "grad_norm": 7.373194586682623, "learning_rate": 2.1958218080547994e-06, "loss": 0.6714, "step": 15299 }, { "epoch": 1.1053515632055195, "grad_norm": 7.838395559442574, "learning_rate": 2.195531480909727e-06, "loss": 0.6609, "step": 15300 }, { "epoch": 1.105423808405729, "grad_norm": 6.1478964580883275, "learning_rate": 2.1952411579325925e-06, "loss": 0.5931, "step": 15301 }, { "epoch": 1.1054960536059386, "grad_norm": 7.581380683546214, "learning_rate": 2.1949508391273703e-06, "loss": 0.6072, "step": 15302 }, { "epoch": 1.1055682988061482, "grad_norm": 6.494952075713901, "learning_rate": 2.1946605244980336e-06, "loss": 0.6155, "step": 15303 }, { "epoch": 1.1056405440063575, "grad_norm": 7.192188889633664, "learning_rate": 2.1943702140485573e-06, "loss": 0.7468, "step": 15304 }, { "epoch": 1.105712789206567, "grad_norm": 8.986593645895274, "learning_rate": 2.1940799077829155e-06, "loss": 0.6433, "step": 15305 }, { "epoch": 1.1057850344067766, "grad_norm": 7.505372983850235, "learning_rate": 2.193789605705083e-06, "loss": 0.6487, "step": 15306 }, { "epoch": 1.105857279606986, "grad_norm": 7.02563089273255, "learning_rate": 2.193499307819032e-06, "loss": 0.6884, "step": 15307 }, { "epoch": 1.1059295248071956, "grad_norm": 6.903623055432843, "learning_rate": 2.193209014128738e-06, "loss": 0.6915, "step": 15308 }, { "epoch": 1.1060017700074052, "grad_norm": 6.721112922517842, "learning_rate": 2.1929187246381753e-06, "loss": 0.6331, "step": 15309 }, { "epoch": 1.1060740152076147, "grad_norm": 5.787649540792121, "learning_rate": 2.1926284393513157e-06, "loss": 0.7298, "step": 15310 }, { "epoch": 1.106146260407824, "grad_norm": 6.494834314454588, "learning_rate": 2.1923381582721334e-06, "loss": 0.6651, "step": 15311 }, { "epoch": 1.1062185056080336, "grad_norm": 7.090308072777552, "learning_rate": 2.192047881404604e-06, "loss": 0.752, "step": 15312 }, { "epoch": 1.1062907508082431, "grad_norm": 7.483178921177645, "learning_rate": 2.1917576087527e-06, "loss": 0.601, "step": 15313 }, { "epoch": 1.1063629960084527, "grad_norm": 7.34271336807538, "learning_rate": 2.1914673403203947e-06, "loss": 0.7017, "step": 15314 }, { "epoch": 1.1064352412086622, "grad_norm": 6.762734727434778, "learning_rate": 2.191177076111662e-06, "loss": 0.6254, "step": 15315 }, { "epoch": 1.1065074864088718, "grad_norm": 6.313286458492314, "learning_rate": 2.1908868161304754e-06, "loss": 0.6282, "step": 15316 }, { "epoch": 1.1065797316090813, "grad_norm": 6.827127134966228, "learning_rate": 2.190596560380808e-06, "loss": 0.7732, "step": 15317 }, { "epoch": 1.1066519768092906, "grad_norm": 8.222481812282732, "learning_rate": 2.1903063088666333e-06, "loss": 0.6092, "step": 15318 }, { "epoch": 1.1067242220095002, "grad_norm": 6.313557149161827, "learning_rate": 2.190016061591925e-06, "loss": 0.6107, "step": 15319 }, { "epoch": 1.1067964672097097, "grad_norm": 5.814979926588815, "learning_rate": 2.1897258185606564e-06, "loss": 0.6456, "step": 15320 }, { "epoch": 1.1068687124099192, "grad_norm": 7.1518775875530585, "learning_rate": 2.1894355797768e-06, "loss": 0.6147, "step": 15321 }, { "epoch": 1.1069409576101288, "grad_norm": 6.4652440987253925, "learning_rate": 2.1891453452443296e-06, "loss": 0.68, "step": 15322 }, { "epoch": 1.1070132028103383, "grad_norm": 5.570499112078379, "learning_rate": 2.188855114967218e-06, "loss": 0.6131, "step": 15323 }, { "epoch": 1.1070854480105479, "grad_norm": 6.092044846067217, "learning_rate": 2.1885648889494394e-06, "loss": 0.5504, "step": 15324 }, { "epoch": 1.1071576932107574, "grad_norm": 7.106885134480313, "learning_rate": 2.1882746671949637e-06, "loss": 0.6591, "step": 15325 }, { "epoch": 1.1072299384109667, "grad_norm": 8.210891970318583, "learning_rate": 2.1879844497077666e-06, "loss": 0.6458, "step": 15326 }, { "epoch": 1.1073021836111763, "grad_norm": 7.979938625778485, "learning_rate": 2.187694236491821e-06, "loss": 0.6899, "step": 15327 }, { "epoch": 1.1073744288113858, "grad_norm": 7.081221082313527, "learning_rate": 2.187404027551098e-06, "loss": 0.6502, "step": 15328 }, { "epoch": 1.1074466740115954, "grad_norm": 6.91418387958428, "learning_rate": 2.187113822889571e-06, "loss": 0.7501, "step": 15329 }, { "epoch": 1.107518919211805, "grad_norm": 6.863683924317862, "learning_rate": 2.1868236225112127e-06, "loss": 0.6736, "step": 15330 }, { "epoch": 1.1075911644120144, "grad_norm": 6.632491859980311, "learning_rate": 2.1865334264199967e-06, "loss": 0.5809, "step": 15331 }, { "epoch": 1.107663409612224, "grad_norm": 8.064771701823869, "learning_rate": 2.1862432346198937e-06, "loss": 0.7099, "step": 15332 }, { "epoch": 1.1077356548124333, "grad_norm": 6.342985962206447, "learning_rate": 2.1859530471148775e-06, "loss": 0.6517, "step": 15333 }, { "epoch": 1.1078079000126428, "grad_norm": 5.8230187185917845, "learning_rate": 2.1856628639089207e-06, "loss": 0.6485, "step": 15334 }, { "epoch": 1.1078801452128524, "grad_norm": 6.284334099331566, "learning_rate": 2.1853726850059947e-06, "loss": 0.6431, "step": 15335 }, { "epoch": 1.107952390413062, "grad_norm": 6.788074346869151, "learning_rate": 2.1850825104100727e-06, "loss": 0.7178, "step": 15336 }, { "epoch": 1.1080246356132715, "grad_norm": 7.044385516819073, "learning_rate": 2.1847923401251262e-06, "loss": 0.6571, "step": 15337 }, { "epoch": 1.108096880813481, "grad_norm": 6.976808823426551, "learning_rate": 2.184502174155129e-06, "loss": 0.6226, "step": 15338 }, { "epoch": 1.1081691260136906, "grad_norm": 5.69882148471921, "learning_rate": 2.1842120125040504e-06, "loss": 0.6239, "step": 15339 }, { "epoch": 1.1082413712138999, "grad_norm": 6.29097014691727, "learning_rate": 2.183921855175865e-06, "loss": 0.7242, "step": 15340 }, { "epoch": 1.1083136164141094, "grad_norm": 7.298415223636658, "learning_rate": 2.1836317021745444e-06, "loss": 0.6905, "step": 15341 }, { "epoch": 1.108385861614319, "grad_norm": 7.941593586390974, "learning_rate": 2.1833415535040598e-06, "loss": 0.6501, "step": 15342 }, { "epoch": 1.1084581068145285, "grad_norm": 7.37949066513446, "learning_rate": 2.183051409168383e-06, "loss": 0.6875, "step": 15343 }, { "epoch": 1.108530352014738, "grad_norm": 6.751155330542998, "learning_rate": 2.1827612691714866e-06, "loss": 0.7027, "step": 15344 }, { "epoch": 1.1086025972149476, "grad_norm": 7.846827416254819, "learning_rate": 2.1824711335173424e-06, "loss": 0.7229, "step": 15345 }, { "epoch": 1.1086748424151571, "grad_norm": 6.930498043510546, "learning_rate": 2.1821810022099214e-06, "loss": 0.6545, "step": 15346 }, { "epoch": 1.1087470876153667, "grad_norm": 6.7631930237654005, "learning_rate": 2.1818908752531958e-06, "loss": 0.5882, "step": 15347 }, { "epoch": 1.108819332815576, "grad_norm": 6.629815420858143, "learning_rate": 2.181600752651137e-06, "loss": 0.6989, "step": 15348 }, { "epoch": 1.1088915780157855, "grad_norm": 6.248617095541177, "learning_rate": 2.181310634407717e-06, "loss": 0.7364, "step": 15349 }, { "epoch": 1.108963823215995, "grad_norm": 6.351012634274388, "learning_rate": 2.181020520526907e-06, "loss": 0.6055, "step": 15350 }, { "epoch": 1.1090360684162046, "grad_norm": 7.166937298063588, "learning_rate": 2.180730411012678e-06, "loss": 0.6644, "step": 15351 }, { "epoch": 1.1091083136164142, "grad_norm": 6.072940920153738, "learning_rate": 2.1804403058690028e-06, "loss": 0.6495, "step": 15352 }, { "epoch": 1.1091805588166237, "grad_norm": 6.443632593574572, "learning_rate": 2.1801502050998504e-06, "loss": 0.5911, "step": 15353 }, { "epoch": 1.1092528040168332, "grad_norm": 5.996183771209421, "learning_rate": 2.179860108709194e-06, "loss": 0.6464, "step": 15354 }, { "epoch": 1.1093250492170426, "grad_norm": 5.56959595425269, "learning_rate": 2.1795700167010035e-06, "loss": 0.7184, "step": 15355 }, { "epoch": 1.109397294417252, "grad_norm": 7.966839969085457, "learning_rate": 2.1792799290792518e-06, "loss": 0.6747, "step": 15356 }, { "epoch": 1.1094695396174616, "grad_norm": 6.258042920617361, "learning_rate": 2.178989845847908e-06, "loss": 0.6477, "step": 15357 }, { "epoch": 1.1095417848176712, "grad_norm": 5.491372016716457, "learning_rate": 2.178699767010944e-06, "loss": 0.6246, "step": 15358 }, { "epoch": 1.1096140300178807, "grad_norm": 5.560564883236118, "learning_rate": 2.1784096925723315e-06, "loss": 0.6507, "step": 15359 }, { "epoch": 1.1096862752180903, "grad_norm": 5.927103356549365, "learning_rate": 2.1781196225360395e-06, "loss": 0.6016, "step": 15360 }, { "epoch": 1.1097585204182998, "grad_norm": 5.808840019787796, "learning_rate": 2.1778295569060404e-06, "loss": 0.6621, "step": 15361 }, { "epoch": 1.1098307656185091, "grad_norm": 5.667282893763926, "learning_rate": 2.1775394956863043e-06, "loss": 0.6505, "step": 15362 }, { "epoch": 1.1099030108187187, "grad_norm": 7.522162688918306, "learning_rate": 2.177249438880803e-06, "loss": 0.7381, "step": 15363 }, { "epoch": 1.1099752560189282, "grad_norm": 7.548166610730688, "learning_rate": 2.1769593864935055e-06, "loss": 0.6128, "step": 15364 }, { "epoch": 1.1100475012191378, "grad_norm": 5.794317539898364, "learning_rate": 2.176669338528383e-06, "loss": 0.6636, "step": 15365 }, { "epoch": 1.1101197464193473, "grad_norm": 5.8583179385044915, "learning_rate": 2.1763792949894074e-06, "loss": 0.663, "step": 15366 }, { "epoch": 1.1101919916195568, "grad_norm": 6.382610789442041, "learning_rate": 2.1760892558805465e-06, "loss": 0.6339, "step": 15367 }, { "epoch": 1.1102642368197664, "grad_norm": 6.96278543486462, "learning_rate": 2.1757992212057727e-06, "loss": 0.6779, "step": 15368 }, { "epoch": 1.1103364820199757, "grad_norm": 5.859486652582042, "learning_rate": 2.175509190969056e-06, "loss": 0.6577, "step": 15369 }, { "epoch": 1.1104087272201852, "grad_norm": 6.1746977326703005, "learning_rate": 2.175219165174367e-06, "loss": 0.6722, "step": 15370 }, { "epoch": 1.1104809724203948, "grad_norm": 6.390495466864187, "learning_rate": 2.174929143825675e-06, "loss": 0.6132, "step": 15371 }, { "epoch": 1.1105532176206043, "grad_norm": 7.031226942236499, "learning_rate": 2.1746391269269502e-06, "loss": 0.6254, "step": 15372 }, { "epoch": 1.1106254628208139, "grad_norm": 7.131127917943982, "learning_rate": 2.1743491144821635e-06, "loss": 0.7094, "step": 15373 }, { "epoch": 1.1106977080210234, "grad_norm": 7.50363172975208, "learning_rate": 2.174059106495285e-06, "loss": 0.6036, "step": 15374 }, { "epoch": 1.110769953221233, "grad_norm": 5.789336616317214, "learning_rate": 2.1737691029702836e-06, "loss": 0.6546, "step": 15375 }, { "epoch": 1.1108421984214423, "grad_norm": 6.346453010959153, "learning_rate": 2.17347910391113e-06, "loss": 0.681, "step": 15376 }, { "epoch": 1.1109144436216518, "grad_norm": 7.051763154447672, "learning_rate": 2.1731891093217946e-06, "loss": 0.6411, "step": 15377 }, { "epoch": 1.1109866888218614, "grad_norm": 6.8767104361998435, "learning_rate": 2.1728991192062456e-06, "loss": 0.6474, "step": 15378 }, { "epoch": 1.111058934022071, "grad_norm": 7.807471281958295, "learning_rate": 2.1726091335684537e-06, "loss": 0.6778, "step": 15379 }, { "epoch": 1.1111311792222804, "grad_norm": 6.9342379762332325, "learning_rate": 2.172319152412389e-06, "loss": 0.6605, "step": 15380 }, { "epoch": 1.11120342442249, "grad_norm": 8.223329841768688, "learning_rate": 2.1720291757420214e-06, "loss": 0.6752, "step": 15381 }, { "epoch": 1.1112756696226995, "grad_norm": 5.009164422381164, "learning_rate": 2.171739203561318e-06, "loss": 0.645, "step": 15382 }, { "epoch": 1.1113479148229088, "grad_norm": 6.848505745403461, "learning_rate": 2.1714492358742507e-06, "loss": 0.5843, "step": 15383 }, { "epoch": 1.1114201600231184, "grad_norm": 7.411049362197352, "learning_rate": 2.17115927268479e-06, "loss": 0.6969, "step": 15384 }, { "epoch": 1.111492405223328, "grad_norm": 7.097799212580571, "learning_rate": 2.1708693139969012e-06, "loss": 0.6801, "step": 15385 }, { "epoch": 1.1115646504235375, "grad_norm": 6.455180951336837, "learning_rate": 2.170579359814557e-06, "loss": 0.6497, "step": 15386 }, { "epoch": 1.111636895623747, "grad_norm": 6.836380286998874, "learning_rate": 2.170289410141725e-06, "loss": 0.7175, "step": 15387 }, { "epoch": 1.1117091408239566, "grad_norm": 7.488391029695435, "learning_rate": 2.169999464982376e-06, "loss": 0.5516, "step": 15388 }, { "epoch": 1.111781386024166, "grad_norm": 6.807980499401023, "learning_rate": 2.169709524340477e-06, "loss": 0.6204, "step": 15389 }, { "epoch": 1.1118536312243754, "grad_norm": 5.9774355979905796, "learning_rate": 2.1694195882199984e-06, "loss": 0.5831, "step": 15390 }, { "epoch": 1.111925876424585, "grad_norm": 7.0045143284729745, "learning_rate": 2.1691296566249093e-06, "loss": 0.6515, "step": 15391 }, { "epoch": 1.1119981216247945, "grad_norm": 5.950245238307081, "learning_rate": 2.168839729559178e-06, "loss": 0.6024, "step": 15392 }, { "epoch": 1.112070366825004, "grad_norm": 8.935023678330362, "learning_rate": 2.1685498070267735e-06, "loss": 0.6251, "step": 15393 }, { "epoch": 1.1121426120252136, "grad_norm": 6.030858160542983, "learning_rate": 2.168259889031665e-06, "loss": 0.682, "step": 15394 }, { "epoch": 1.1122148572254231, "grad_norm": 6.624997984687931, "learning_rate": 2.167969975577822e-06, "loss": 0.6628, "step": 15395 }, { "epoch": 1.1122871024256327, "grad_norm": 6.3663202174861, "learning_rate": 2.1676800666692106e-06, "loss": 0.6521, "step": 15396 }, { "epoch": 1.1123593476258422, "grad_norm": 9.147131848679612, "learning_rate": 2.1673901623098018e-06, "loss": 0.6238, "step": 15397 }, { "epoch": 1.1124315928260515, "grad_norm": 8.212990719143324, "learning_rate": 2.1671002625035635e-06, "loss": 0.7094, "step": 15398 }, { "epoch": 1.112503838026261, "grad_norm": 5.845937717164694, "learning_rate": 2.166810367254465e-06, "loss": 0.6696, "step": 15399 }, { "epoch": 1.1125760832264706, "grad_norm": 6.072841358099562, "learning_rate": 2.166520476566473e-06, "loss": 0.7259, "step": 15400 }, { "epoch": 1.1126483284266802, "grad_norm": 6.5700460010236155, "learning_rate": 2.166230590443556e-06, "loss": 0.6466, "step": 15401 }, { "epoch": 1.1127205736268897, "grad_norm": 6.400999420332856, "learning_rate": 2.165940708889684e-06, "loss": 0.6786, "step": 15402 }, { "epoch": 1.1127928188270992, "grad_norm": 6.467569289512748, "learning_rate": 2.165650831908824e-06, "loss": 0.6094, "step": 15403 }, { "epoch": 1.1128650640273088, "grad_norm": 6.340942062038642, "learning_rate": 2.1653609595049443e-06, "loss": 0.659, "step": 15404 }, { "epoch": 1.112937309227518, "grad_norm": 10.002333178131373, "learning_rate": 2.1650710916820132e-06, "loss": 0.6865, "step": 15405 }, { "epoch": 1.1130095544277276, "grad_norm": 6.492055292539237, "learning_rate": 2.164781228444e-06, "loss": 0.6337, "step": 15406 }, { "epoch": 1.1130817996279372, "grad_norm": 6.29041619747851, "learning_rate": 2.16449136979487e-06, "loss": 0.6966, "step": 15407 }, { "epoch": 1.1131540448281467, "grad_norm": 6.243437106031633, "learning_rate": 2.1642015157385933e-06, "loss": 0.6817, "step": 15408 }, { "epoch": 1.1132262900283563, "grad_norm": 6.0975367591190945, "learning_rate": 2.163911666279138e-06, "loss": 0.7048, "step": 15409 }, { "epoch": 1.1132985352285658, "grad_norm": 7.0503865562138515, "learning_rate": 2.163621821420469e-06, "loss": 0.6583, "step": 15410 }, { "epoch": 1.1133707804287754, "grad_norm": 5.963746854980765, "learning_rate": 2.163331981166557e-06, "loss": 0.6634, "step": 15411 }, { "epoch": 1.1134430256289847, "grad_norm": 6.318223530224366, "learning_rate": 2.163042145521369e-06, "loss": 0.5923, "step": 15412 }, { "epoch": 1.1135152708291942, "grad_norm": 6.613497266942319, "learning_rate": 2.162752314488873e-06, "loss": 0.7693, "step": 15413 }, { "epoch": 1.1135875160294038, "grad_norm": 5.95081354557913, "learning_rate": 2.1624624880730353e-06, "loss": 0.7194, "step": 15414 }, { "epoch": 1.1136597612296133, "grad_norm": 6.57995232144506, "learning_rate": 2.162172666277824e-06, "loss": 0.6236, "step": 15415 }, { "epoch": 1.1137320064298228, "grad_norm": 7.364378700129158, "learning_rate": 2.1618828491072068e-06, "loss": 0.7524, "step": 15416 }, { "epoch": 1.1138042516300324, "grad_norm": 5.933028936971684, "learning_rate": 2.161593036565151e-06, "loss": 0.6252, "step": 15417 }, { "epoch": 1.113876496830242, "grad_norm": 7.1825231155676805, "learning_rate": 2.161303228655623e-06, "loss": 0.6606, "step": 15418 }, { "epoch": 1.1139487420304515, "grad_norm": 5.51471994555307, "learning_rate": 2.1610134253825913e-06, "loss": 0.6336, "step": 15419 }, { "epoch": 1.1140209872306608, "grad_norm": 7.324467964510102, "learning_rate": 2.160723626750023e-06, "loss": 0.7366, "step": 15420 }, { "epoch": 1.1140932324308703, "grad_norm": 8.129365188570423, "learning_rate": 2.160433832761884e-06, "loss": 0.6789, "step": 15421 }, { "epoch": 1.1141654776310799, "grad_norm": 5.655852077249419, "learning_rate": 2.1601440434221427e-06, "loss": 0.6648, "step": 15422 }, { "epoch": 1.1142377228312894, "grad_norm": 6.798614630104568, "learning_rate": 2.1598542587347652e-06, "loss": 0.6756, "step": 15423 }, { "epoch": 1.114309968031499, "grad_norm": 7.351717129417899, "learning_rate": 2.1595644787037194e-06, "loss": 0.6914, "step": 15424 }, { "epoch": 1.1143822132317085, "grad_norm": 7.739029780256519, "learning_rate": 2.159274703332971e-06, "loss": 0.6732, "step": 15425 }, { "epoch": 1.114454458431918, "grad_norm": 6.179110301149311, "learning_rate": 2.158984932626487e-06, "loss": 0.6536, "step": 15426 }, { "epoch": 1.1145267036321274, "grad_norm": 5.759304976386922, "learning_rate": 2.1586951665882362e-06, "loss": 0.6551, "step": 15427 }, { "epoch": 1.114598948832337, "grad_norm": 6.854726689272092, "learning_rate": 2.1584054052221826e-06, "loss": 0.6706, "step": 15428 }, { "epoch": 1.1146711940325464, "grad_norm": 6.342534592428515, "learning_rate": 2.158115648532293e-06, "loss": 0.6473, "step": 15429 }, { "epoch": 1.114743439232756, "grad_norm": 6.983114585394124, "learning_rate": 2.157825896522535e-06, "loss": 0.7092, "step": 15430 }, { "epoch": 1.1148156844329655, "grad_norm": 6.690447291544677, "learning_rate": 2.1575361491968757e-06, "loss": 0.6978, "step": 15431 }, { "epoch": 1.114887929633175, "grad_norm": 6.840839481258833, "learning_rate": 2.15724640655928e-06, "loss": 0.6632, "step": 15432 }, { "epoch": 1.1149601748333846, "grad_norm": 6.533654167446051, "learning_rate": 2.1569566686137145e-06, "loss": 0.6626, "step": 15433 }, { "epoch": 1.115032420033594, "grad_norm": 7.718664500886542, "learning_rate": 2.1566669353641467e-06, "loss": 0.6498, "step": 15434 }, { "epoch": 1.1151046652338035, "grad_norm": 7.25611356595466, "learning_rate": 2.1563772068145415e-06, "loss": 0.6903, "step": 15435 }, { "epoch": 1.115176910434013, "grad_norm": 5.781994580958073, "learning_rate": 2.156087482968865e-06, "loss": 0.5421, "step": 15436 }, { "epoch": 1.1152491556342226, "grad_norm": 6.625520217911872, "learning_rate": 2.1557977638310844e-06, "loss": 0.6832, "step": 15437 }, { "epoch": 1.115321400834432, "grad_norm": 6.237497260144689, "learning_rate": 2.1555080494051657e-06, "loss": 0.625, "step": 15438 }, { "epoch": 1.1153936460346416, "grad_norm": 6.703332146420493, "learning_rate": 2.1552183396950734e-06, "loss": 0.6584, "step": 15439 }, { "epoch": 1.1154658912348512, "grad_norm": 8.17942353472123, "learning_rate": 2.1549286347047744e-06, "loss": 0.6972, "step": 15440 }, { "epoch": 1.1155381364350605, "grad_norm": 6.88015165649757, "learning_rate": 2.154638934438236e-06, "loss": 0.6653, "step": 15441 }, { "epoch": 1.11561038163527, "grad_norm": 8.03997922069085, "learning_rate": 2.1543492388994206e-06, "loss": 0.7356, "step": 15442 }, { "epoch": 1.1156826268354796, "grad_norm": 6.142981157286688, "learning_rate": 2.154059548092296e-06, "loss": 0.6378, "step": 15443 }, { "epoch": 1.1157548720356891, "grad_norm": 6.277853279822532, "learning_rate": 2.1537698620208275e-06, "loss": 0.7737, "step": 15444 }, { "epoch": 1.1158271172358987, "grad_norm": 5.526272705694155, "learning_rate": 2.153480180688982e-06, "loss": 0.6943, "step": 15445 }, { "epoch": 1.1158993624361082, "grad_norm": 5.979358771912101, "learning_rate": 2.1531905041007226e-06, "loss": 0.7305, "step": 15446 }, { "epoch": 1.1159716076363178, "grad_norm": 6.773170545128657, "learning_rate": 2.152900832260016e-06, "loss": 0.6579, "step": 15447 }, { "epoch": 1.116043852836527, "grad_norm": 7.890111321073712, "learning_rate": 2.152611165170828e-06, "loss": 0.7451, "step": 15448 }, { "epoch": 1.1161160980367366, "grad_norm": 6.074079015173276, "learning_rate": 2.1523215028371235e-06, "loss": 0.625, "step": 15449 }, { "epoch": 1.1161883432369462, "grad_norm": 7.433153437104016, "learning_rate": 2.152031845262867e-06, "loss": 0.6272, "step": 15450 }, { "epoch": 1.1162605884371557, "grad_norm": 6.49012299360867, "learning_rate": 2.1517421924520245e-06, "loss": 0.6896, "step": 15451 }, { "epoch": 1.1163328336373652, "grad_norm": 7.210567444205181, "learning_rate": 2.1514525444085616e-06, "loss": 0.6907, "step": 15452 }, { "epoch": 1.1164050788375748, "grad_norm": 7.621751687466628, "learning_rate": 2.151162901136442e-06, "loss": 0.6093, "step": 15453 }, { "epoch": 1.1164773240377843, "grad_norm": 5.3218807200268365, "learning_rate": 2.1508732626396312e-06, "loss": 0.6692, "step": 15454 }, { "epoch": 1.1165495692379936, "grad_norm": 6.074368215604813, "learning_rate": 2.150583628922095e-06, "loss": 0.6901, "step": 15455 }, { "epoch": 1.1166218144382032, "grad_norm": 7.358884369841747, "learning_rate": 2.1502939999877983e-06, "loss": 0.6122, "step": 15456 }, { "epoch": 1.1166940596384127, "grad_norm": 6.6722817933473095, "learning_rate": 2.1500043758407044e-06, "loss": 0.6599, "step": 15457 }, { "epoch": 1.1167663048386223, "grad_norm": 7.467257681798382, "learning_rate": 2.1497147564847787e-06, "loss": 0.7119, "step": 15458 }, { "epoch": 1.1168385500388318, "grad_norm": 6.963364234856967, "learning_rate": 2.1494251419239865e-06, "loss": 0.7366, "step": 15459 }, { "epoch": 1.1169107952390414, "grad_norm": 5.773756955281553, "learning_rate": 2.1491355321622915e-06, "loss": 0.695, "step": 15460 }, { "epoch": 1.116983040439251, "grad_norm": 5.269164211874519, "learning_rate": 2.1488459272036584e-06, "loss": 0.5834, "step": 15461 }, { "epoch": 1.1170552856394602, "grad_norm": 5.889246124052549, "learning_rate": 2.148556327052052e-06, "loss": 0.6796, "step": 15462 }, { "epoch": 1.1171275308396698, "grad_norm": 6.5743339288224485, "learning_rate": 2.1482667317114373e-06, "loss": 0.6464, "step": 15463 }, { "epoch": 1.1171997760398793, "grad_norm": 5.781134526929557, "learning_rate": 2.147977141185777e-06, "loss": 0.6784, "step": 15464 }, { "epoch": 1.1172720212400888, "grad_norm": 6.540036656454873, "learning_rate": 2.1476875554790366e-06, "loss": 0.6401, "step": 15465 }, { "epoch": 1.1173442664402984, "grad_norm": 6.538622915425461, "learning_rate": 2.1473979745951804e-06, "loss": 0.6209, "step": 15466 }, { "epoch": 1.117416511640508, "grad_norm": 6.163436180949765, "learning_rate": 2.147108398538172e-06, "loss": 0.6238, "step": 15467 }, { "epoch": 1.1174887568407175, "grad_norm": 6.861505477354914, "learning_rate": 2.146818827311975e-06, "loss": 0.6739, "step": 15468 }, { "epoch": 1.117561002040927, "grad_norm": 6.84052831338349, "learning_rate": 2.1465292609205545e-06, "loss": 0.5943, "step": 15469 }, { "epoch": 1.1176332472411363, "grad_norm": 6.534554991393589, "learning_rate": 2.1462396993678753e-06, "loss": 0.672, "step": 15470 }, { "epoch": 1.1177054924413459, "grad_norm": 6.22384511506813, "learning_rate": 2.145950142657898e-06, "loss": 0.6896, "step": 15471 }, { "epoch": 1.1177777376415554, "grad_norm": 7.480516175600315, "learning_rate": 2.1456605907945894e-06, "loss": 0.7314, "step": 15472 }, { "epoch": 1.117849982841765, "grad_norm": 6.049536934778078, "learning_rate": 2.145371043781912e-06, "loss": 0.6917, "step": 15473 }, { "epoch": 1.1179222280419745, "grad_norm": 5.945872617545891, "learning_rate": 2.1450815016238293e-06, "loss": 0.6725, "step": 15474 }, { "epoch": 1.117994473242184, "grad_norm": 6.524421590256284, "learning_rate": 2.144791964324305e-06, "loss": 0.7558, "step": 15475 }, { "epoch": 1.1180667184423936, "grad_norm": 6.816829880281539, "learning_rate": 2.1445024318873034e-06, "loss": 0.6151, "step": 15476 }, { "epoch": 1.118138963642603, "grad_norm": 5.656643679918121, "learning_rate": 2.1442129043167877e-06, "loss": 0.677, "step": 15477 }, { "epoch": 1.1182112088428124, "grad_norm": 6.297158511762716, "learning_rate": 2.143923381616721e-06, "loss": 0.6276, "step": 15478 }, { "epoch": 1.118283454043022, "grad_norm": 7.347213123350352, "learning_rate": 2.143633863791066e-06, "loss": 0.6288, "step": 15479 }, { "epoch": 1.1183556992432315, "grad_norm": 6.343362730636005, "learning_rate": 2.143344350843787e-06, "loss": 0.6499, "step": 15480 }, { "epoch": 1.118427944443441, "grad_norm": 6.202311657064438, "learning_rate": 2.1430548427788474e-06, "loss": 0.6484, "step": 15481 }, { "epoch": 1.1185001896436506, "grad_norm": 6.483401011658812, "learning_rate": 2.1427653396002094e-06, "loss": 0.7296, "step": 15482 }, { "epoch": 1.1185724348438602, "grad_norm": 6.309693874181906, "learning_rate": 2.142475841311837e-06, "loss": 0.6827, "step": 15483 }, { "epoch": 1.1186446800440695, "grad_norm": 6.599992590235394, "learning_rate": 2.142186347917693e-06, "loss": 0.7558, "step": 15484 }, { "epoch": 1.118716925244279, "grad_norm": 6.976028268144361, "learning_rate": 2.1418968594217393e-06, "loss": 0.6915, "step": 15485 }, { "epoch": 1.1187891704444886, "grad_norm": 5.873072531225138, "learning_rate": 2.1416073758279397e-06, "loss": 0.6176, "step": 15486 }, { "epoch": 1.118861415644698, "grad_norm": 7.7202744928291835, "learning_rate": 2.141317897140256e-06, "loss": 0.6827, "step": 15487 }, { "epoch": 1.1189336608449076, "grad_norm": 6.949539871901761, "learning_rate": 2.141028423362654e-06, "loss": 0.6789, "step": 15488 }, { "epoch": 1.1190059060451172, "grad_norm": 7.024615196467865, "learning_rate": 2.1407389544990927e-06, "loss": 0.6831, "step": 15489 }, { "epoch": 1.1190781512453267, "grad_norm": 6.861771497292422, "learning_rate": 2.140449490553536e-06, "loss": 0.6849, "step": 15490 }, { "epoch": 1.1191503964455363, "grad_norm": 6.61551058620286, "learning_rate": 2.1401600315299472e-06, "loss": 0.6616, "step": 15491 }, { "epoch": 1.1192226416457456, "grad_norm": 6.713127302981462, "learning_rate": 2.139870577432288e-06, "loss": 0.6326, "step": 15492 }, { "epoch": 1.1192948868459551, "grad_norm": 6.187196049786696, "learning_rate": 2.1395811282645207e-06, "loss": 0.7243, "step": 15493 }, { "epoch": 1.1193671320461647, "grad_norm": 5.844827318079965, "learning_rate": 2.139291684030608e-06, "loss": 0.6149, "step": 15494 }, { "epoch": 1.1194393772463742, "grad_norm": 7.8659114162598645, "learning_rate": 2.1390022447345125e-06, "loss": 0.6668, "step": 15495 }, { "epoch": 1.1195116224465838, "grad_norm": 6.740536873373559, "learning_rate": 2.138712810380195e-06, "loss": 0.6804, "step": 15496 }, { "epoch": 1.1195838676467933, "grad_norm": 5.252472613125721, "learning_rate": 2.1384233809716195e-06, "loss": 0.6077, "step": 15497 }, { "epoch": 1.1196561128470028, "grad_norm": 7.023870639365799, "learning_rate": 2.1381339565127474e-06, "loss": 0.6763, "step": 15498 }, { "epoch": 1.1197283580472122, "grad_norm": 7.974281934022727, "learning_rate": 2.13784453700754e-06, "loss": 0.7059, "step": 15499 }, { "epoch": 1.1198006032474217, "grad_norm": 7.794222516233955, "learning_rate": 2.1375551224599593e-06, "loss": 0.7348, "step": 15500 }, { "epoch": 1.1198728484476312, "grad_norm": 6.788242935913713, "learning_rate": 2.1372657128739673e-06, "loss": 0.6184, "step": 15501 }, { "epoch": 1.1199450936478408, "grad_norm": 6.815895564232869, "learning_rate": 2.1369763082535276e-06, "loss": 0.6806, "step": 15502 }, { "epoch": 1.1200173388480503, "grad_norm": 6.424550782170342, "learning_rate": 2.1366869086025993e-06, "loss": 0.6378, "step": 15503 }, { "epoch": 1.1200895840482599, "grad_norm": 6.325136736571639, "learning_rate": 2.1363975139251452e-06, "loss": 0.6223, "step": 15504 }, { "epoch": 1.1201618292484694, "grad_norm": 5.430875755207981, "learning_rate": 2.1361081242251265e-06, "loss": 0.6621, "step": 15505 }, { "epoch": 1.1202340744486787, "grad_norm": 6.1251195779143774, "learning_rate": 2.135818739506506e-06, "loss": 0.6015, "step": 15506 }, { "epoch": 1.1203063196488883, "grad_norm": 7.0617313473100936, "learning_rate": 2.1355293597732437e-06, "loss": 0.6569, "step": 15507 }, { "epoch": 1.1203785648490978, "grad_norm": 6.402170361631708, "learning_rate": 2.135239985029301e-06, "loss": 0.6413, "step": 15508 }, { "epoch": 1.1204508100493074, "grad_norm": 5.948106467653139, "learning_rate": 2.1349506152786404e-06, "loss": 0.6399, "step": 15509 }, { "epoch": 1.120523055249517, "grad_norm": 6.022429821069485, "learning_rate": 2.1346612505252217e-06, "loss": 0.609, "step": 15510 }, { "epoch": 1.1205953004497264, "grad_norm": 6.525276043778513, "learning_rate": 2.134371890773007e-06, "loss": 0.6484, "step": 15511 }, { "epoch": 1.120667545649936, "grad_norm": 6.452699661092229, "learning_rate": 2.1340825360259575e-06, "loss": 0.6636, "step": 15512 }, { "epoch": 1.1207397908501453, "grad_norm": 6.414504376336723, "learning_rate": 2.1337931862880347e-06, "loss": 0.5878, "step": 15513 }, { "epoch": 1.1208120360503548, "grad_norm": 6.051550031354151, "learning_rate": 2.133503841563198e-06, "loss": 0.6189, "step": 15514 }, { "epoch": 1.1208842812505644, "grad_norm": 6.263807380335798, "learning_rate": 2.1332145018554086e-06, "loss": 0.664, "step": 15515 }, { "epoch": 1.120956526450774, "grad_norm": 7.167257979491186, "learning_rate": 2.132925167168629e-06, "loss": 0.7028, "step": 15516 }, { "epoch": 1.1210287716509835, "grad_norm": 8.21145549849773, "learning_rate": 2.1326358375068184e-06, "loss": 0.6938, "step": 15517 }, { "epoch": 1.121101016851193, "grad_norm": 5.436079694791101, "learning_rate": 2.1323465128739377e-06, "loss": 0.7038, "step": 15518 }, { "epoch": 1.1211732620514026, "grad_norm": 7.453931848769716, "learning_rate": 2.1320571932739477e-06, "loss": 0.7277, "step": 15519 }, { "epoch": 1.1212455072516119, "grad_norm": 5.901610474698272, "learning_rate": 2.1317678787108095e-06, "loss": 0.5775, "step": 15520 }, { "epoch": 1.1213177524518214, "grad_norm": 6.468867056704323, "learning_rate": 2.1314785691884825e-06, "loss": 0.7107, "step": 15521 }, { "epoch": 1.121389997652031, "grad_norm": 5.578067554851265, "learning_rate": 2.1311892647109283e-06, "loss": 0.6558, "step": 15522 }, { "epoch": 1.1214622428522405, "grad_norm": 6.39320765308456, "learning_rate": 2.1308999652821064e-06, "loss": 0.6048, "step": 15523 }, { "epoch": 1.12153448805245, "grad_norm": 6.457863608065061, "learning_rate": 2.1306106709059775e-06, "loss": 0.6322, "step": 15524 }, { "epoch": 1.1216067332526596, "grad_norm": 6.113950143118528, "learning_rate": 2.130321381586502e-06, "loss": 0.6873, "step": 15525 }, { "epoch": 1.1216789784528691, "grad_norm": 5.90830804400643, "learning_rate": 2.130032097327639e-06, "loss": 0.6539, "step": 15526 }, { "epoch": 1.1217512236530784, "grad_norm": 6.344873488587589, "learning_rate": 2.1297428181333507e-06, "loss": 0.6556, "step": 15527 }, { "epoch": 1.121823468853288, "grad_norm": 6.861215540013551, "learning_rate": 2.1294535440075946e-06, "loss": 0.7036, "step": 15528 }, { "epoch": 1.1218957140534975, "grad_norm": 7.37535818490959, "learning_rate": 2.1291642749543314e-06, "loss": 0.6858, "step": 15529 }, { "epoch": 1.121967959253707, "grad_norm": 6.641951843546608, "learning_rate": 2.128875010977522e-06, "loss": 0.6412, "step": 15530 }, { "epoch": 1.1220402044539166, "grad_norm": 5.960399278412938, "learning_rate": 2.128585752081126e-06, "loss": 0.6304, "step": 15531 }, { "epoch": 1.1221124496541262, "grad_norm": 5.670109581805076, "learning_rate": 2.128296498269102e-06, "loss": 0.642, "step": 15532 }, { "epoch": 1.1221846948543357, "grad_norm": 6.17591528653566, "learning_rate": 2.1280072495454106e-06, "loss": 0.6041, "step": 15533 }, { "epoch": 1.122256940054545, "grad_norm": 6.492594094518144, "learning_rate": 2.127718005914011e-06, "loss": 0.6193, "step": 15534 }, { "epoch": 1.1223291852547546, "grad_norm": 7.108058933389418, "learning_rate": 2.127428767378863e-06, "loss": 0.6924, "step": 15535 }, { "epoch": 1.122401430454964, "grad_norm": 7.464685842619494, "learning_rate": 2.1271395339439256e-06, "loss": 0.6107, "step": 15536 }, { "epoch": 1.1224736756551736, "grad_norm": 5.938646065379191, "learning_rate": 2.1268503056131582e-06, "loss": 0.6818, "step": 15537 }, { "epoch": 1.1225459208553832, "grad_norm": 7.17264089047751, "learning_rate": 2.1265610823905213e-06, "loss": 0.657, "step": 15538 }, { "epoch": 1.1226181660555927, "grad_norm": 6.714947711346179, "learning_rate": 2.1262718642799726e-06, "loss": 0.6778, "step": 15539 }, { "epoch": 1.1226904112558023, "grad_norm": 6.465077707719042, "learning_rate": 2.125982651285472e-06, "loss": 0.6505, "step": 15540 }, { "epoch": 1.1227626564560116, "grad_norm": 6.220576185910498, "learning_rate": 2.1256934434109794e-06, "loss": 0.7589, "step": 15541 }, { "epoch": 1.1228349016562211, "grad_norm": 6.080370176992336, "learning_rate": 2.125404240660452e-06, "loss": 0.6898, "step": 15542 }, { "epoch": 1.1229071468564307, "grad_norm": 6.91470192576426, "learning_rate": 2.1251150430378486e-06, "loss": 0.6779, "step": 15543 }, { "epoch": 1.1229793920566402, "grad_norm": 5.947228101790494, "learning_rate": 2.1248258505471303e-06, "loss": 0.624, "step": 15544 }, { "epoch": 1.1230516372568498, "grad_norm": 7.437672845450856, "learning_rate": 2.1245366631922556e-06, "loss": 0.6586, "step": 15545 }, { "epoch": 1.1231238824570593, "grad_norm": 6.0084858332242606, "learning_rate": 2.124247480977182e-06, "loss": 0.5941, "step": 15546 }, { "epoch": 1.1231961276572688, "grad_norm": 6.193298089317559, "learning_rate": 2.123958303905868e-06, "loss": 0.6544, "step": 15547 }, { "epoch": 1.1232683728574784, "grad_norm": 7.113598451449645, "learning_rate": 2.1236691319822732e-06, "loss": 0.7925, "step": 15548 }, { "epoch": 1.1233406180576877, "grad_norm": 5.291821975631582, "learning_rate": 2.1233799652103555e-06, "loss": 0.6512, "step": 15549 }, { "epoch": 1.1234128632578972, "grad_norm": 5.534455658181123, "learning_rate": 2.123090803594074e-06, "loss": 0.5971, "step": 15550 }, { "epoch": 1.1234851084581068, "grad_norm": 6.399357954245293, "learning_rate": 2.122801647137386e-06, "loss": 0.707, "step": 15551 }, { "epoch": 1.1235573536583163, "grad_norm": 5.048324518524531, "learning_rate": 2.122512495844252e-06, "loss": 0.5638, "step": 15552 }, { "epoch": 1.1236295988585259, "grad_norm": 6.4382098649118635, "learning_rate": 2.122223349718628e-06, "loss": 0.6583, "step": 15553 }, { "epoch": 1.1237018440587354, "grad_norm": 7.663061731853736, "learning_rate": 2.1219342087644726e-06, "loss": 0.6262, "step": 15554 }, { "epoch": 1.123774089258945, "grad_norm": 5.773826658273104, "learning_rate": 2.1216450729857458e-06, "loss": 0.6592, "step": 15555 }, { "epoch": 1.1238463344591543, "grad_norm": 5.8744769573728925, "learning_rate": 2.1213559423864026e-06, "loss": 0.6899, "step": 15556 }, { "epoch": 1.1239185796593638, "grad_norm": 6.113657823365744, "learning_rate": 2.121066816970403e-06, "loss": 0.6921, "step": 15557 }, { "epoch": 1.1239908248595734, "grad_norm": 6.205162962710877, "learning_rate": 2.120777696741704e-06, "loss": 0.7072, "step": 15558 }, { "epoch": 1.124063070059783, "grad_norm": 8.008029722663528, "learning_rate": 2.1204885817042654e-06, "loss": 0.6868, "step": 15559 }, { "epoch": 1.1241353152599924, "grad_norm": 8.057344429486125, "learning_rate": 2.1201994718620423e-06, "loss": 0.6752, "step": 15560 }, { "epoch": 1.124207560460202, "grad_norm": 6.454574015868, "learning_rate": 2.1199103672189934e-06, "loss": 0.6568, "step": 15561 }, { "epoch": 1.1242798056604115, "grad_norm": 6.248362517425659, "learning_rate": 2.119621267779077e-06, "loss": 0.6361, "step": 15562 }, { "epoch": 1.1243520508606208, "grad_norm": 8.220305669071482, "learning_rate": 2.11933217354625e-06, "loss": 0.6567, "step": 15563 }, { "epoch": 1.1244242960608304, "grad_norm": 7.859914469168636, "learning_rate": 2.1190430845244696e-06, "loss": 0.6867, "step": 15564 }, { "epoch": 1.12449654126104, "grad_norm": 6.398708463527675, "learning_rate": 2.118754000717694e-06, "loss": 0.7001, "step": 15565 }, { "epoch": 1.1245687864612495, "grad_norm": 6.366799259635881, "learning_rate": 2.11846492212988e-06, "loss": 0.7214, "step": 15566 }, { "epoch": 1.124641031661459, "grad_norm": 6.435944008042504, "learning_rate": 2.1181758487649846e-06, "loss": 0.6668, "step": 15567 }, { "epoch": 1.1247132768616686, "grad_norm": 7.792386706804207, "learning_rate": 2.1178867806269657e-06, "loss": 0.615, "step": 15568 }, { "epoch": 1.124785522061878, "grad_norm": 7.607239096408228, "learning_rate": 2.1175977177197797e-06, "loss": 0.6922, "step": 15569 }, { "epoch": 1.1248577672620876, "grad_norm": 5.752884182922435, "learning_rate": 2.1173086600473857e-06, "loss": 0.6995, "step": 15570 }, { "epoch": 1.124930012462297, "grad_norm": 7.133725094361559, "learning_rate": 2.117019607613737e-06, "loss": 0.6528, "step": 15571 }, { "epoch": 1.1250022576625065, "grad_norm": 6.558090617799477, "learning_rate": 2.116730560422793e-06, "loss": 0.6301, "step": 15572 }, { "epoch": 1.125074502862716, "grad_norm": 6.26319773550408, "learning_rate": 2.1164415184785107e-06, "loss": 0.615, "step": 15573 }, { "epoch": 1.1251467480629256, "grad_norm": 6.559288183530998, "learning_rate": 2.116152481784846e-06, "loss": 0.5809, "step": 15574 }, { "epoch": 1.1252189932631351, "grad_norm": 7.760714599898273, "learning_rate": 2.115863450345755e-06, "loss": 0.7582, "step": 15575 }, { "epoch": 1.1252912384633447, "grad_norm": 5.670567049066521, "learning_rate": 2.1155744241651957e-06, "loss": 0.6588, "step": 15576 }, { "epoch": 1.1253634836635542, "grad_norm": 7.037139189501094, "learning_rate": 2.1152854032471244e-06, "loss": 0.6432, "step": 15577 }, { "epoch": 1.1254357288637635, "grad_norm": 5.826979455532284, "learning_rate": 2.1149963875954966e-06, "loss": 0.6292, "step": 15578 }, { "epoch": 1.125507974063973, "grad_norm": 8.129176078985159, "learning_rate": 2.114707377214269e-06, "loss": 0.6534, "step": 15579 }, { "epoch": 1.1255802192641826, "grad_norm": 7.169951669272065, "learning_rate": 2.1144183721073993e-06, "loss": 0.6301, "step": 15580 }, { "epoch": 1.1256524644643922, "grad_norm": 7.265213361995125, "learning_rate": 2.114129372278842e-06, "loss": 0.6808, "step": 15581 }, { "epoch": 1.1257247096646017, "grad_norm": 7.834107813864819, "learning_rate": 2.1138403777325537e-06, "loss": 0.6675, "step": 15582 }, { "epoch": 1.1257969548648112, "grad_norm": 6.016656326468222, "learning_rate": 2.1135513884724913e-06, "loss": 0.6418, "step": 15583 }, { "epoch": 1.1258692000650208, "grad_norm": 6.702030830624302, "learning_rate": 2.1132624045026113e-06, "loss": 0.6132, "step": 15584 }, { "epoch": 1.12594144526523, "grad_norm": 5.945027288144867, "learning_rate": 2.1129734258268676e-06, "loss": 0.7105, "step": 15585 }, { "epoch": 1.1260136904654396, "grad_norm": 7.131911021295332, "learning_rate": 2.1126844524492163e-06, "loss": 0.7602, "step": 15586 }, { "epoch": 1.1260859356656492, "grad_norm": 5.534376564650311, "learning_rate": 2.112395484373615e-06, "loss": 0.6602, "step": 15587 }, { "epoch": 1.1261581808658587, "grad_norm": 6.512161908004472, "learning_rate": 2.1121065216040195e-06, "loss": 0.6602, "step": 15588 }, { "epoch": 1.1262304260660683, "grad_norm": 6.940480459770143, "learning_rate": 2.1118175641443836e-06, "loss": 0.6345, "step": 15589 }, { "epoch": 1.1263026712662778, "grad_norm": 5.768640902759525, "learning_rate": 2.1115286119986634e-06, "loss": 0.6809, "step": 15590 }, { "epoch": 1.1263749164664874, "grad_norm": 6.279947477959329, "learning_rate": 2.111239665170816e-06, "loss": 0.7372, "step": 15591 }, { "epoch": 1.1264471616666967, "grad_norm": 8.70477903801044, "learning_rate": 2.110950723664795e-06, "loss": 0.7341, "step": 15592 }, { "epoch": 1.1265194068669062, "grad_norm": 7.164402744612541, "learning_rate": 2.1106617874845566e-06, "loss": 0.5243, "step": 15593 }, { "epoch": 1.1265916520671158, "grad_norm": 8.372907234737731, "learning_rate": 2.1103728566340558e-06, "loss": 0.6989, "step": 15594 }, { "epoch": 1.1266638972673253, "grad_norm": 7.252749743732707, "learning_rate": 2.110083931117249e-06, "loss": 0.657, "step": 15595 }, { "epoch": 1.1267361424675348, "grad_norm": 5.9765625, "learning_rate": 2.1097950109380894e-06, "loss": 0.6629, "step": 15596 }, { "epoch": 1.1268083876677444, "grad_norm": 7.781389472661302, "learning_rate": 2.109506096100533e-06, "loss": 0.6421, "step": 15597 }, { "epoch": 1.126880632867954, "grad_norm": 6.277608090848389, "learning_rate": 2.1092171866085364e-06, "loss": 0.5725, "step": 15598 }, { "epoch": 1.1269528780681632, "grad_norm": 6.7456577604119135, "learning_rate": 2.108928282466052e-06, "loss": 0.6255, "step": 15599 }, { "epoch": 1.1270251232683728, "grad_norm": 8.751615974523068, "learning_rate": 2.1086393836770354e-06, "loss": 0.6161, "step": 15600 }, { "epoch": 1.1270973684685823, "grad_norm": 7.497673691139712, "learning_rate": 2.108350490245442e-06, "loss": 0.766, "step": 15601 }, { "epoch": 1.1271696136687919, "grad_norm": 7.047900478409471, "learning_rate": 2.1080616021752274e-06, "loss": 0.6066, "step": 15602 }, { "epoch": 1.1272418588690014, "grad_norm": 7.0527668278469635, "learning_rate": 2.107772719470344e-06, "loss": 0.6548, "step": 15603 }, { "epoch": 1.127314104069211, "grad_norm": 6.137296206320915, "learning_rate": 2.1074838421347475e-06, "loss": 0.6657, "step": 15604 }, { "epoch": 1.1273863492694205, "grad_norm": 7.197264564959216, "learning_rate": 2.1071949701723933e-06, "loss": 0.6499, "step": 15605 }, { "epoch": 1.1274585944696298, "grad_norm": 6.336163574399949, "learning_rate": 2.1069061035872343e-06, "loss": 0.6416, "step": 15606 }, { "epoch": 1.1275308396698394, "grad_norm": 7.78130147526272, "learning_rate": 2.1066172423832256e-06, "loss": 0.6695, "step": 15607 }, { "epoch": 1.127603084870049, "grad_norm": 6.313735085701797, "learning_rate": 2.106328386564321e-06, "loss": 0.6883, "step": 15608 }, { "epoch": 1.1276753300702584, "grad_norm": 6.525226936924811, "learning_rate": 2.1060395361344763e-06, "loss": 0.6036, "step": 15609 }, { "epoch": 1.127747575270468, "grad_norm": 8.145213175888282, "learning_rate": 2.105750691097643e-06, "loss": 0.6809, "step": 15610 }, { "epoch": 1.1278198204706775, "grad_norm": 8.034764571001803, "learning_rate": 2.1054618514577775e-06, "loss": 0.6559, "step": 15611 }, { "epoch": 1.127892065670887, "grad_norm": 6.4792418971298975, "learning_rate": 2.1051730172188327e-06, "loss": 0.634, "step": 15612 }, { "epoch": 1.1279643108710964, "grad_norm": 6.35814458017757, "learning_rate": 2.1048841883847636e-06, "loss": 0.6869, "step": 15613 }, { "epoch": 1.128036556071306, "grad_norm": 6.478247116270179, "learning_rate": 2.1045953649595217e-06, "loss": 0.6907, "step": 15614 }, { "epoch": 1.1281088012715155, "grad_norm": 6.220744517621314, "learning_rate": 2.104306546947063e-06, "loss": 0.7105, "step": 15615 }, { "epoch": 1.128181046471725, "grad_norm": 7.1400757960295405, "learning_rate": 2.104017734351341e-06, "loss": 0.7804, "step": 15616 }, { "epoch": 1.1282532916719346, "grad_norm": 6.537917534822435, "learning_rate": 2.103728927176308e-06, "loss": 0.7033, "step": 15617 }, { "epoch": 1.128325536872144, "grad_norm": 6.758322212533076, "learning_rate": 2.1034401254259185e-06, "loss": 0.6703, "step": 15618 }, { "epoch": 1.1283977820723536, "grad_norm": 6.975802423951514, "learning_rate": 2.1031513291041253e-06, "loss": 0.6889, "step": 15619 }, { "epoch": 1.128470027272563, "grad_norm": 6.0090343487894655, "learning_rate": 2.102862538214883e-06, "loss": 0.6787, "step": 15620 }, { "epoch": 1.1285422724727725, "grad_norm": 6.000204082832894, "learning_rate": 2.102573752762144e-06, "loss": 0.6532, "step": 15621 }, { "epoch": 1.128614517672982, "grad_norm": 6.938442131885896, "learning_rate": 2.102284972749861e-06, "loss": 0.6979, "step": 15622 }, { "epoch": 1.1286867628731916, "grad_norm": 7.121818769232134, "learning_rate": 2.101996198181989e-06, "loss": 0.6219, "step": 15623 }, { "epoch": 1.1287590080734011, "grad_norm": 6.018438617578042, "learning_rate": 2.1017074290624796e-06, "loss": 0.616, "step": 15624 }, { "epoch": 1.1288312532736107, "grad_norm": 6.676927426839478, "learning_rate": 2.101418665395286e-06, "loss": 0.7029, "step": 15625 }, { "epoch": 1.1289034984738202, "grad_norm": 7.421872430098239, "learning_rate": 2.1011299071843612e-06, "loss": 0.7085, "step": 15626 }, { "epoch": 1.1289757436740298, "grad_norm": 6.329575209985832, "learning_rate": 2.1008411544336595e-06, "loss": 0.6594, "step": 15627 }, { "epoch": 1.129047988874239, "grad_norm": 7.890945760420387, "learning_rate": 2.100552407147131e-06, "loss": 0.7025, "step": 15628 }, { "epoch": 1.1291202340744486, "grad_norm": 6.200505316046137, "learning_rate": 2.1002636653287304e-06, "loss": 0.6432, "step": 15629 }, { "epoch": 1.1291924792746582, "grad_norm": 6.19237503510886, "learning_rate": 2.0999749289824107e-06, "loss": 0.6472, "step": 15630 }, { "epoch": 1.1292647244748677, "grad_norm": 6.153524192310579, "learning_rate": 2.0996861981121227e-06, "loss": 0.6473, "step": 15631 }, { "epoch": 1.1293369696750772, "grad_norm": 6.221035178049023, "learning_rate": 2.0993974727218198e-06, "loss": 0.6961, "step": 15632 }, { "epoch": 1.1294092148752868, "grad_norm": 7.485412143529312, "learning_rate": 2.0991087528154546e-06, "loss": 0.659, "step": 15633 }, { "epoch": 1.1294814600754963, "grad_norm": 7.054816193109122, "learning_rate": 2.09882003839698e-06, "loss": 0.6928, "step": 15634 }, { "epoch": 1.1295537052757059, "grad_norm": 6.319680237708611, "learning_rate": 2.0985313294703463e-06, "loss": 0.6579, "step": 15635 }, { "epoch": 1.1296259504759152, "grad_norm": 5.698859974142444, "learning_rate": 2.0982426260395077e-06, "loss": 0.619, "step": 15636 }, { "epoch": 1.1296981956761247, "grad_norm": 6.2565575340208985, "learning_rate": 2.0979539281084154e-06, "loss": 0.6448, "step": 15637 }, { "epoch": 1.1297704408763343, "grad_norm": 6.80814131143406, "learning_rate": 2.0976652356810217e-06, "loss": 0.6906, "step": 15638 }, { "epoch": 1.1298426860765438, "grad_norm": 7.257900274462509, "learning_rate": 2.0973765487612786e-06, "loss": 0.6832, "step": 15639 }, { "epoch": 1.1299149312767534, "grad_norm": 6.32931966925245, "learning_rate": 2.0970878673531377e-06, "loss": 0.7015, "step": 15640 }, { "epoch": 1.129987176476963, "grad_norm": 5.849537966488184, "learning_rate": 2.0967991914605523e-06, "loss": 0.6787, "step": 15641 }, { "epoch": 1.1300594216771724, "grad_norm": 6.646230687745974, "learning_rate": 2.096510521087471e-06, "loss": 0.6307, "step": 15642 }, { "epoch": 1.1301316668773818, "grad_norm": 6.288009726033927, "learning_rate": 2.096221856237848e-06, "loss": 0.6403, "step": 15643 }, { "epoch": 1.1302039120775913, "grad_norm": 6.175448925672833, "learning_rate": 2.095933196915635e-06, "loss": 0.6515, "step": 15644 }, { "epoch": 1.1302761572778008, "grad_norm": 6.375878067880111, "learning_rate": 2.095644543124783e-06, "loss": 0.6597, "step": 15645 }, { "epoch": 1.1303484024780104, "grad_norm": 6.139778214714185, "learning_rate": 2.0953558948692423e-06, "loss": 0.619, "step": 15646 }, { "epoch": 1.13042064767822, "grad_norm": 6.532668256319093, "learning_rate": 2.095067252152965e-06, "loss": 0.6606, "step": 15647 }, { "epoch": 1.1304928928784295, "grad_norm": 7.281370497594679, "learning_rate": 2.0947786149799036e-06, "loss": 0.6717, "step": 15648 }, { "epoch": 1.130565138078639, "grad_norm": 5.851405713332608, "learning_rate": 2.094489983354008e-06, "loss": 0.6381, "step": 15649 }, { "epoch": 1.1306373832788483, "grad_norm": 6.071122163365663, "learning_rate": 2.094201357279229e-06, "loss": 0.6774, "step": 15650 }, { "epoch": 1.1307096284790579, "grad_norm": 5.2496790106601585, "learning_rate": 2.0939127367595188e-06, "loss": 0.6622, "step": 15651 }, { "epoch": 1.1307818736792674, "grad_norm": 6.929340134508266, "learning_rate": 2.0936241217988286e-06, "loss": 0.6892, "step": 15652 }, { "epoch": 1.130854118879477, "grad_norm": 7.36514218236519, "learning_rate": 2.093335512401108e-06, "loss": 0.6356, "step": 15653 }, { "epoch": 1.1309263640796865, "grad_norm": 6.862371880563487, "learning_rate": 2.0930469085703077e-06, "loss": 0.6314, "step": 15654 }, { "epoch": 1.130998609279896, "grad_norm": 7.9932938124204895, "learning_rate": 2.092758310310381e-06, "loss": 0.6863, "step": 15655 }, { "epoch": 1.1310708544801056, "grad_norm": 5.851409950865341, "learning_rate": 2.092469717625275e-06, "loss": 0.6021, "step": 15656 }, { "epoch": 1.131143099680315, "grad_norm": 7.69526763748027, "learning_rate": 2.092181130518943e-06, "loss": 0.6601, "step": 15657 }, { "epoch": 1.1312153448805244, "grad_norm": 7.197000609944466, "learning_rate": 2.0918925489953348e-06, "loss": 0.6274, "step": 15658 }, { "epoch": 1.131287590080734, "grad_norm": 7.677782817911916, "learning_rate": 2.091603973058401e-06, "loss": 0.633, "step": 15659 }, { "epoch": 1.1313598352809435, "grad_norm": 7.376487711192173, "learning_rate": 2.0913154027120912e-06, "loss": 0.7233, "step": 15660 }, { "epoch": 1.131432080481153, "grad_norm": 6.1595103349144535, "learning_rate": 2.0910268379603564e-06, "loss": 0.6039, "step": 15661 }, { "epoch": 1.1315043256813626, "grad_norm": 7.308383924366419, "learning_rate": 2.090738278807146e-06, "loss": 0.6549, "step": 15662 }, { "epoch": 1.1315765708815722, "grad_norm": 6.578483952313014, "learning_rate": 2.090449725256412e-06, "loss": 0.6219, "step": 15663 }, { "epoch": 1.1316488160817815, "grad_norm": 6.933206415237402, "learning_rate": 2.0901611773121024e-06, "loss": 0.6689, "step": 15664 }, { "epoch": 1.131721061281991, "grad_norm": 7.054548800806398, "learning_rate": 2.089872634978168e-06, "loss": 0.6275, "step": 15665 }, { "epoch": 1.1317933064822006, "grad_norm": 7.303672222224071, "learning_rate": 2.0895840982585598e-06, "loss": 0.6086, "step": 15666 }, { "epoch": 1.13186555168241, "grad_norm": 6.362614586778207, "learning_rate": 2.0892955671572256e-06, "loss": 0.6385, "step": 15667 }, { "epoch": 1.1319377968826196, "grad_norm": 7.118303866720277, "learning_rate": 2.089007041678116e-06, "loss": 0.612, "step": 15668 }, { "epoch": 1.1320100420828292, "grad_norm": 8.116455750071646, "learning_rate": 2.0887185218251814e-06, "loss": 0.7192, "step": 15669 }, { "epoch": 1.1320822872830387, "grad_norm": 6.210869324507905, "learning_rate": 2.0884300076023716e-06, "loss": 0.6135, "step": 15670 }, { "epoch": 1.132154532483248, "grad_norm": 6.714942314485302, "learning_rate": 2.088141499013634e-06, "loss": 0.6714, "step": 15671 }, { "epoch": 1.1322267776834576, "grad_norm": 7.273439597878323, "learning_rate": 2.0878529960629203e-06, "loss": 0.6222, "step": 15672 }, { "epoch": 1.1322990228836671, "grad_norm": 7.107947573046095, "learning_rate": 2.08756449875418e-06, "loss": 0.7135, "step": 15673 }, { "epoch": 1.1323712680838767, "grad_norm": 7.091261641534683, "learning_rate": 2.08727600709136e-06, "loss": 0.7242, "step": 15674 }, { "epoch": 1.1324435132840862, "grad_norm": 6.548330359195698, "learning_rate": 2.086987521078411e-06, "loss": 0.624, "step": 15675 }, { "epoch": 1.1325157584842958, "grad_norm": 6.4798191473884, "learning_rate": 2.0866990407192828e-06, "loss": 0.6347, "step": 15676 }, { "epoch": 1.1325880036845053, "grad_norm": 6.329740040339057, "learning_rate": 2.086410566017924e-06, "loss": 0.5957, "step": 15677 }, { "epoch": 1.1326602488847146, "grad_norm": 7.131171782563623, "learning_rate": 2.0861220969782827e-06, "loss": 0.631, "step": 15678 }, { "epoch": 1.1327324940849242, "grad_norm": 7.201719862778168, "learning_rate": 2.085833633604309e-06, "loss": 0.6805, "step": 15679 }, { "epoch": 1.1328047392851337, "grad_norm": 6.590220297574023, "learning_rate": 2.085545175899951e-06, "loss": 0.6912, "step": 15680 }, { "epoch": 1.1328769844853432, "grad_norm": 6.756166255407213, "learning_rate": 2.085256723869158e-06, "loss": 0.6472, "step": 15681 }, { "epoch": 1.1329492296855528, "grad_norm": 6.808716169384955, "learning_rate": 2.084968277515878e-06, "loss": 0.6836, "step": 15682 }, { "epoch": 1.1330214748857623, "grad_norm": 6.240482866237884, "learning_rate": 2.08467983684406e-06, "loss": 0.6906, "step": 15683 }, { "epoch": 1.1330937200859719, "grad_norm": 5.725255715601985, "learning_rate": 2.0843914018576543e-06, "loss": 0.709, "step": 15684 }, { "epoch": 1.1331659652861812, "grad_norm": 6.4553280965215265, "learning_rate": 2.0841029725606056e-06, "loss": 0.6171, "step": 15685 }, { "epoch": 1.1332382104863907, "grad_norm": 6.602382414874656, "learning_rate": 2.0838145489568644e-06, "loss": 0.5969, "step": 15686 }, { "epoch": 1.1333104556866003, "grad_norm": 6.073528522882042, "learning_rate": 2.0835261310503803e-06, "loss": 0.6291, "step": 15687 }, { "epoch": 1.1333827008868098, "grad_norm": 9.789784958126873, "learning_rate": 2.0832377188450992e-06, "loss": 0.5838, "step": 15688 }, { "epoch": 1.1334549460870194, "grad_norm": 7.044277211372652, "learning_rate": 2.08294931234497e-06, "loss": 0.6993, "step": 15689 }, { "epoch": 1.133527191287229, "grad_norm": 6.56014216617891, "learning_rate": 2.0826609115539407e-06, "loss": 0.689, "step": 15690 }, { "epoch": 1.1335994364874384, "grad_norm": 7.334951771250597, "learning_rate": 2.08237251647596e-06, "loss": 0.7113, "step": 15691 }, { "epoch": 1.1336716816876478, "grad_norm": 6.075078127009362, "learning_rate": 2.082084127114975e-06, "loss": 0.6723, "step": 15692 }, { "epoch": 1.1337439268878573, "grad_norm": 8.252963834183442, "learning_rate": 2.0817957434749335e-06, "loss": 0.7107, "step": 15693 }, { "epoch": 1.1338161720880668, "grad_norm": 7.769587712684481, "learning_rate": 2.081507365559784e-06, "loss": 0.6533, "step": 15694 }, { "epoch": 1.1338884172882764, "grad_norm": 7.141668531431699, "learning_rate": 2.081218993373474e-06, "loss": 0.7571, "step": 15695 }, { "epoch": 1.133960662488486, "grad_norm": 7.953369616510724, "learning_rate": 2.0809306269199504e-06, "loss": 0.703, "step": 15696 }, { "epoch": 1.1340329076886955, "grad_norm": 6.180422964590711, "learning_rate": 2.0806422662031607e-06, "loss": 0.6774, "step": 15697 }, { "epoch": 1.134105152888905, "grad_norm": 6.021993699064478, "learning_rate": 2.080353911227054e-06, "loss": 0.6548, "step": 15698 }, { "epoch": 1.1341773980891143, "grad_norm": 6.911424450382831, "learning_rate": 2.080065561995575e-06, "loss": 0.6563, "step": 15699 }, { "epoch": 1.1342496432893239, "grad_norm": 6.543350035532258, "learning_rate": 2.079777218512673e-06, "loss": 0.6354, "step": 15700 }, { "epoch": 1.1343218884895334, "grad_norm": 6.47515934928499, "learning_rate": 2.0794888807822947e-06, "loss": 0.611, "step": 15701 }, { "epoch": 1.134394133689743, "grad_norm": 6.470315200226586, "learning_rate": 2.079200548808388e-06, "loss": 0.6899, "step": 15702 }, { "epoch": 1.1344663788899525, "grad_norm": 7.290007077228057, "learning_rate": 2.078912222594898e-06, "loss": 0.6574, "step": 15703 }, { "epoch": 1.134538624090162, "grad_norm": 6.2014640679192325, "learning_rate": 2.0786239021457727e-06, "loss": 0.6657, "step": 15704 }, { "epoch": 1.1346108692903716, "grad_norm": 9.629950105056563, "learning_rate": 2.0783355874649598e-06, "loss": 0.7134, "step": 15705 }, { "epoch": 1.1346831144905811, "grad_norm": 6.716444462712628, "learning_rate": 2.078047278556404e-06, "loss": 0.6803, "step": 15706 }, { "epoch": 1.1347553596907907, "grad_norm": 8.429855839954909, "learning_rate": 2.077758975424054e-06, "loss": 0.6651, "step": 15707 }, { "epoch": 1.134827604891, "grad_norm": 5.875234071654381, "learning_rate": 2.0774706780718557e-06, "loss": 0.6093, "step": 15708 }, { "epoch": 1.1348998500912095, "grad_norm": 6.411290711712259, "learning_rate": 2.077182386503756e-06, "loss": 0.6234, "step": 15709 }, { "epoch": 1.134972095291419, "grad_norm": 8.284061746275995, "learning_rate": 2.076894100723701e-06, "loss": 0.6718, "step": 15710 }, { "epoch": 1.1350443404916286, "grad_norm": 8.443218638588244, "learning_rate": 2.076605820735637e-06, "loss": 0.6588, "step": 15711 }, { "epoch": 1.1351165856918382, "grad_norm": 7.808565415468852, "learning_rate": 2.0763175465435117e-06, "loss": 0.6431, "step": 15712 }, { "epoch": 1.1351888308920477, "grad_norm": 6.515448185925245, "learning_rate": 2.0760292781512685e-06, "loss": 0.5892, "step": 15713 }, { "epoch": 1.1352610760922572, "grad_norm": 5.7758348001964315, "learning_rate": 2.075741015562856e-06, "loss": 0.6896, "step": 15714 }, { "epoch": 1.1353333212924666, "grad_norm": 5.072185528944999, "learning_rate": 2.0754527587822195e-06, "loss": 0.6415, "step": 15715 }, { "epoch": 1.135405566492676, "grad_norm": 6.003832546717424, "learning_rate": 2.0751645078133064e-06, "loss": 0.6944, "step": 15716 }, { "epoch": 1.1354778116928856, "grad_norm": 8.481637305775267, "learning_rate": 2.07487626266006e-06, "loss": 0.7404, "step": 15717 }, { "epoch": 1.1355500568930952, "grad_norm": 5.197449484135149, "learning_rate": 2.0745880233264273e-06, "loss": 0.6408, "step": 15718 }, { "epoch": 1.1356223020933047, "grad_norm": 6.998723594914133, "learning_rate": 2.074299789816354e-06, "loss": 0.6922, "step": 15719 }, { "epoch": 1.1356945472935143, "grad_norm": 5.298144163146783, "learning_rate": 2.0740115621337873e-06, "loss": 0.66, "step": 15720 }, { "epoch": 1.1357667924937238, "grad_norm": 6.328164182941964, "learning_rate": 2.073723340282671e-06, "loss": 0.6932, "step": 15721 }, { "epoch": 1.1358390376939331, "grad_norm": 5.910310510007662, "learning_rate": 2.0734351242669508e-06, "loss": 0.6841, "step": 15722 }, { "epoch": 1.1359112828941427, "grad_norm": 5.627425963407498, "learning_rate": 2.0731469140905735e-06, "loss": 0.6413, "step": 15723 }, { "epoch": 1.1359835280943522, "grad_norm": 6.469853256424598, "learning_rate": 2.072858709757482e-06, "loss": 0.6437, "step": 15724 }, { "epoch": 1.1360557732945618, "grad_norm": 6.869158395804423, "learning_rate": 2.072570511271624e-06, "loss": 0.6702, "step": 15725 }, { "epoch": 1.1361280184947713, "grad_norm": 6.822938254130528, "learning_rate": 2.072282318636944e-06, "loss": 0.6335, "step": 15726 }, { "epoch": 1.1362002636949808, "grad_norm": 7.502249825312821, "learning_rate": 2.071994131857387e-06, "loss": 0.7014, "step": 15727 }, { "epoch": 1.1362725088951904, "grad_norm": 6.064843737420295, "learning_rate": 2.0717059509368974e-06, "loss": 0.6876, "step": 15728 }, { "epoch": 1.1363447540953997, "grad_norm": 6.473495438206206, "learning_rate": 2.071417775879421e-06, "loss": 0.6153, "step": 15729 }, { "epoch": 1.1364169992956092, "grad_norm": 6.299300491008934, "learning_rate": 2.0711296066889037e-06, "loss": 0.7204, "step": 15730 }, { "epoch": 1.1364892444958188, "grad_norm": 6.30176439555386, "learning_rate": 2.070841443369288e-06, "loss": 0.6049, "step": 15731 }, { "epoch": 1.1365614896960283, "grad_norm": 6.4993051010761915, "learning_rate": 2.070553285924519e-06, "loss": 0.6749, "step": 15732 }, { "epoch": 1.1366337348962379, "grad_norm": 6.403134067393074, "learning_rate": 2.070265134358542e-06, "loss": 0.6858, "step": 15733 }, { "epoch": 1.1367059800964474, "grad_norm": 6.826786564777413, "learning_rate": 2.069976988675304e-06, "loss": 0.7306, "step": 15734 }, { "epoch": 1.136778225296657, "grad_norm": 7.910884177540396, "learning_rate": 2.069688848878745e-06, "loss": 0.6388, "step": 15735 }, { "epoch": 1.1368504704968663, "grad_norm": 6.161198984353994, "learning_rate": 2.069400714972812e-06, "loss": 0.6693, "step": 15736 }, { "epoch": 1.1369227156970758, "grad_norm": 7.413927179514547, "learning_rate": 2.0691125869614492e-06, "loss": 0.639, "step": 15737 }, { "epoch": 1.1369949608972854, "grad_norm": 6.352511364394843, "learning_rate": 2.0688244648486e-06, "loss": 0.6057, "step": 15738 }, { "epoch": 1.137067206097495, "grad_norm": 6.68149189809993, "learning_rate": 2.0685363486382096e-06, "loss": 0.618, "step": 15739 }, { "epoch": 1.1371394512977044, "grad_norm": 6.70511254497064, "learning_rate": 2.0682482383342212e-06, "loss": 0.6136, "step": 15740 }, { "epoch": 1.137211696497914, "grad_norm": 6.3523798530443125, "learning_rate": 2.0679601339405797e-06, "loss": 0.6896, "step": 15741 }, { "epoch": 1.1372839416981235, "grad_norm": 6.984092348255074, "learning_rate": 2.0676720354612277e-06, "loss": 0.6699, "step": 15742 }, { "epoch": 1.1373561868983328, "grad_norm": 6.11385904817387, "learning_rate": 2.0673839429001104e-06, "loss": 0.6742, "step": 15743 }, { "epoch": 1.1374284320985424, "grad_norm": 6.761733982326864, "learning_rate": 2.067095856261171e-06, "loss": 0.637, "step": 15744 }, { "epoch": 1.137500677298752, "grad_norm": 8.38558928191576, "learning_rate": 2.066807775548354e-06, "loss": 0.6498, "step": 15745 }, { "epoch": 1.1375729224989615, "grad_norm": 6.843198989215925, "learning_rate": 2.066519700765601e-06, "loss": 0.6729, "step": 15746 }, { "epoch": 1.137645167699171, "grad_norm": 8.890415182470345, "learning_rate": 2.0662316319168567e-06, "loss": 0.7615, "step": 15747 }, { "epoch": 1.1377174128993806, "grad_norm": 6.39924379888044, "learning_rate": 2.0659435690060656e-06, "loss": 0.6505, "step": 15748 }, { "epoch": 1.13778965809959, "grad_norm": 6.197013246318651, "learning_rate": 2.065655512037169e-06, "loss": 0.5943, "step": 15749 }, { "epoch": 1.1378619032997994, "grad_norm": 6.664226784866847, "learning_rate": 2.0653674610141113e-06, "loss": 0.6648, "step": 15750 }, { "epoch": 1.137934148500009, "grad_norm": 6.073111459564444, "learning_rate": 2.065079415940836e-06, "loss": 0.7024, "step": 15751 }, { "epoch": 1.1380063937002185, "grad_norm": 6.793357623015989, "learning_rate": 2.064791376821286e-06, "loss": 0.6296, "step": 15752 }, { "epoch": 1.138078638900428, "grad_norm": 6.232853983571056, "learning_rate": 2.0645033436594035e-06, "loss": 0.6943, "step": 15753 }, { "epoch": 1.1381508841006376, "grad_norm": 6.1192737203248075, "learning_rate": 2.064215316459132e-06, "loss": 0.6127, "step": 15754 }, { "epoch": 1.1382231293008471, "grad_norm": 6.166590647185926, "learning_rate": 2.0639272952244153e-06, "loss": 0.6186, "step": 15755 }, { "epoch": 1.1382953745010567, "grad_norm": 6.275513112036014, "learning_rate": 2.0636392799591944e-06, "loss": 0.6243, "step": 15756 }, { "epoch": 1.138367619701266, "grad_norm": 6.41734610919538, "learning_rate": 2.0633512706674133e-06, "loss": 0.5934, "step": 15757 }, { "epoch": 1.1384398649014755, "grad_norm": 6.504551101196467, "learning_rate": 2.063063267353014e-06, "loss": 0.6523, "step": 15758 }, { "epoch": 1.138512110101685, "grad_norm": 6.370109028264934, "learning_rate": 2.0627752700199407e-06, "loss": 0.7143, "step": 15759 }, { "epoch": 1.1385843553018946, "grad_norm": 6.718751135537695, "learning_rate": 2.0624872786721333e-06, "loss": 0.659, "step": 15760 }, { "epoch": 1.1386566005021042, "grad_norm": 6.011801874628368, "learning_rate": 2.0621992933135354e-06, "loss": 0.6822, "step": 15761 }, { "epoch": 1.1387288457023137, "grad_norm": 6.036854092184295, "learning_rate": 2.0619113139480894e-06, "loss": 0.6898, "step": 15762 }, { "epoch": 1.1388010909025232, "grad_norm": 7.392113952980409, "learning_rate": 2.061623340579737e-06, "loss": 0.5987, "step": 15763 }, { "epoch": 1.1388733361027326, "grad_norm": 6.119767114035411, "learning_rate": 2.061335373212421e-06, "loss": 0.6068, "step": 15764 }, { "epoch": 1.138945581302942, "grad_norm": 6.0821152952606665, "learning_rate": 2.061047411850083e-06, "loss": 0.6005, "step": 15765 }, { "epoch": 1.1390178265031516, "grad_norm": 5.816172434332452, "learning_rate": 2.0607594564966655e-06, "loss": 0.6624, "step": 15766 }, { "epoch": 1.1390900717033612, "grad_norm": 7.089378589682018, "learning_rate": 2.0604715071561095e-06, "loss": 0.6131, "step": 15767 }, { "epoch": 1.1391623169035707, "grad_norm": 8.0867187122621, "learning_rate": 2.0601835638323576e-06, "loss": 0.6736, "step": 15768 }, { "epoch": 1.1392345621037803, "grad_norm": 6.526910389657509, "learning_rate": 2.0598956265293507e-06, "loss": 0.6341, "step": 15769 }, { "epoch": 1.1393068073039898, "grad_norm": 7.51099987825473, "learning_rate": 2.059607695251032e-06, "loss": 0.6819, "step": 15770 }, { "epoch": 1.1393790525041991, "grad_norm": 7.145926012452092, "learning_rate": 2.059319770001341e-06, "loss": 0.6395, "step": 15771 }, { "epoch": 1.1394512977044087, "grad_norm": 5.5086653678111075, "learning_rate": 2.0590318507842206e-06, "loss": 0.6587, "step": 15772 }, { "epoch": 1.1395235429046182, "grad_norm": 6.590304518568238, "learning_rate": 2.0587439376036125e-06, "loss": 0.6987, "step": 15773 }, { "epoch": 1.1395957881048278, "grad_norm": 9.09910651211687, "learning_rate": 2.0584560304634564e-06, "loss": 0.6514, "step": 15774 }, { "epoch": 1.1396680333050373, "grad_norm": 6.34075195426055, "learning_rate": 2.0581681293676944e-06, "loss": 0.6823, "step": 15775 }, { "epoch": 1.1397402785052468, "grad_norm": 6.990514322768116, "learning_rate": 2.057880234320267e-06, "loss": 0.6543, "step": 15776 }, { "epoch": 1.1398125237054564, "grad_norm": 7.268439947388905, "learning_rate": 2.0575923453251176e-06, "loss": 0.6509, "step": 15777 }, { "epoch": 1.139884768905666, "grad_norm": 6.534608406411702, "learning_rate": 2.0573044623861844e-06, "loss": 0.6312, "step": 15778 }, { "epoch": 1.1399570141058755, "grad_norm": 6.5049437282512175, "learning_rate": 2.057016585507409e-06, "loss": 0.6549, "step": 15779 }, { "epoch": 1.1400292593060848, "grad_norm": 6.357795687934733, "learning_rate": 2.056728714692734e-06, "loss": 0.6313, "step": 15780 }, { "epoch": 1.1401015045062943, "grad_norm": 7.987603357926861, "learning_rate": 2.0564408499460974e-06, "loss": 0.7, "step": 15781 }, { "epoch": 1.1401737497065039, "grad_norm": 6.521927161892357, "learning_rate": 2.0561529912714416e-06, "loss": 0.7176, "step": 15782 }, { "epoch": 1.1402459949067134, "grad_norm": 7.491748339498134, "learning_rate": 2.0558651386727064e-06, "loss": 0.7172, "step": 15783 }, { "epoch": 1.140318240106923, "grad_norm": 7.361118766092923, "learning_rate": 2.0555772921538335e-06, "loss": 0.7957, "step": 15784 }, { "epoch": 1.1403904853071325, "grad_norm": 7.400813558035413, "learning_rate": 2.0552894517187617e-06, "loss": 0.7465, "step": 15785 }, { "epoch": 1.140462730507342, "grad_norm": 5.64119525256514, "learning_rate": 2.055001617371432e-06, "loss": 0.6407, "step": 15786 }, { "epoch": 1.1405349757075514, "grad_norm": 8.45060559873383, "learning_rate": 2.0547137891157855e-06, "loss": 0.7474, "step": 15787 }, { "epoch": 1.140607220907761, "grad_norm": 5.949814404069349, "learning_rate": 2.0544259669557608e-06, "loss": 0.6138, "step": 15788 }, { "epoch": 1.1406794661079704, "grad_norm": 5.570465043000278, "learning_rate": 2.0541381508952986e-06, "loss": 0.6233, "step": 15789 }, { "epoch": 1.14075171130818, "grad_norm": 6.555384384047012, "learning_rate": 2.0538503409383382e-06, "loss": 0.66, "step": 15790 }, { "epoch": 1.1408239565083895, "grad_norm": 7.527580918681957, "learning_rate": 2.0535625370888216e-06, "loss": 0.5767, "step": 15791 }, { "epoch": 1.140896201708599, "grad_norm": 6.595154526803893, "learning_rate": 2.0532747393506867e-06, "loss": 0.628, "step": 15792 }, { "epoch": 1.1409684469088086, "grad_norm": 5.919877734597069, "learning_rate": 2.0529869477278735e-06, "loss": 0.578, "step": 15793 }, { "epoch": 1.141040692109018, "grad_norm": 6.960557167289236, "learning_rate": 2.052699162224322e-06, "loss": 0.6321, "step": 15794 }, { "epoch": 1.1411129373092275, "grad_norm": 6.756407345784294, "learning_rate": 2.052411382843972e-06, "loss": 0.6306, "step": 15795 }, { "epoch": 1.141185182509437, "grad_norm": 6.906603773903442, "learning_rate": 2.052123609590762e-06, "loss": 0.6839, "step": 15796 }, { "epoch": 1.1412574277096466, "grad_norm": 6.767919138233695, "learning_rate": 2.051835842468632e-06, "loss": 0.599, "step": 15797 }, { "epoch": 1.141329672909856, "grad_norm": 5.933148204758491, "learning_rate": 2.051548081481522e-06, "loss": 0.623, "step": 15798 }, { "epoch": 1.1414019181100656, "grad_norm": 6.476277417579726, "learning_rate": 2.0512603266333706e-06, "loss": 0.6168, "step": 15799 }, { "epoch": 1.1414741633102752, "grad_norm": 7.246303800021569, "learning_rate": 2.0509725779281163e-06, "loss": 0.6774, "step": 15800 }, { "epoch": 1.1415464085104845, "grad_norm": 7.94541020366119, "learning_rate": 2.050684835369699e-06, "loss": 0.7139, "step": 15801 }, { "epoch": 1.141618653710694, "grad_norm": 7.06136481690279, "learning_rate": 2.0503970989620585e-06, "loss": 0.6335, "step": 15802 }, { "epoch": 1.1416908989109036, "grad_norm": 5.989773300033233, "learning_rate": 2.050109368709131e-06, "loss": 0.5343, "step": 15803 }, { "epoch": 1.1417631441111131, "grad_norm": 5.344686682629173, "learning_rate": 2.049821644614857e-06, "loss": 0.6656, "step": 15804 }, { "epoch": 1.1418353893113227, "grad_norm": 7.942504026359712, "learning_rate": 2.0495339266831766e-06, "loss": 0.612, "step": 15805 }, { "epoch": 1.1419076345115322, "grad_norm": 7.855186605294867, "learning_rate": 2.0492462149180257e-06, "loss": 0.5424, "step": 15806 }, { "epoch": 1.1419798797117418, "grad_norm": 6.335382665383813, "learning_rate": 2.0489585093233446e-06, "loss": 0.6261, "step": 15807 }, { "epoch": 1.142052124911951, "grad_norm": 7.788595019117772, "learning_rate": 2.0486708099030712e-06, "loss": 0.6297, "step": 15808 }, { "epoch": 1.1421243701121606, "grad_norm": 7.091235820205366, "learning_rate": 2.048383116661144e-06, "loss": 0.7061, "step": 15809 }, { "epoch": 1.1421966153123702, "grad_norm": 6.199124908217735, "learning_rate": 2.048095429601501e-06, "loss": 0.5847, "step": 15810 }, { "epoch": 1.1422688605125797, "grad_norm": 5.648473969021273, "learning_rate": 2.047807748728081e-06, "loss": 0.5899, "step": 15811 }, { "epoch": 1.1423411057127892, "grad_norm": 6.596956314171255, "learning_rate": 2.047520074044822e-06, "loss": 0.6971, "step": 15812 }, { "epoch": 1.1424133509129988, "grad_norm": 5.629444485728189, "learning_rate": 2.0472324055556614e-06, "loss": 0.6246, "step": 15813 }, { "epoch": 1.1424855961132083, "grad_norm": 6.919895965587962, "learning_rate": 2.0469447432645377e-06, "loss": 0.6203, "step": 15814 }, { "epoch": 1.1425578413134176, "grad_norm": 6.332551138242551, "learning_rate": 2.046657087175389e-06, "loss": 0.686, "step": 15815 }, { "epoch": 1.1426300865136272, "grad_norm": 6.6669929742428495, "learning_rate": 2.0463694372921535e-06, "loss": 0.7485, "step": 15816 }, { "epoch": 1.1427023317138367, "grad_norm": 6.886266181775834, "learning_rate": 2.046081793618767e-06, "loss": 0.5864, "step": 15817 }, { "epoch": 1.1427745769140463, "grad_norm": 7.763366002229336, "learning_rate": 2.0457941561591676e-06, "loss": 0.6677, "step": 15818 }, { "epoch": 1.1428468221142558, "grad_norm": 6.758956618285924, "learning_rate": 2.0455065249172957e-06, "loss": 0.6212, "step": 15819 }, { "epoch": 1.1429190673144654, "grad_norm": 5.969925529999536, "learning_rate": 2.045218899897085e-06, "loss": 0.6106, "step": 15820 }, { "epoch": 1.142991312514675, "grad_norm": 8.464278613749189, "learning_rate": 2.0449312811024744e-06, "loss": 0.6591, "step": 15821 }, { "epoch": 1.1430635577148842, "grad_norm": 6.886838949780577, "learning_rate": 2.0446436685374016e-06, "loss": 0.7045, "step": 15822 }, { "epoch": 1.1431358029150938, "grad_norm": 8.761823676506141, "learning_rate": 2.044356062205804e-06, "loss": 0.6378, "step": 15823 }, { "epoch": 1.1432080481153033, "grad_norm": 7.395472930580123, "learning_rate": 2.0440684621116175e-06, "loss": 0.689, "step": 15824 }, { "epoch": 1.1432802933155128, "grad_norm": 7.884414857815797, "learning_rate": 2.0437808682587794e-06, "loss": 0.7313, "step": 15825 }, { "epoch": 1.1433525385157224, "grad_norm": 6.02653390186775, "learning_rate": 2.0434932806512275e-06, "loss": 0.691, "step": 15826 }, { "epoch": 1.143424783715932, "grad_norm": 5.426190420586873, "learning_rate": 2.0432056992928983e-06, "loss": 0.7035, "step": 15827 }, { "epoch": 1.1434970289161415, "grad_norm": 8.67147336837391, "learning_rate": 2.042918124187728e-06, "loss": 0.7234, "step": 15828 }, { "epoch": 1.1435692741163508, "grad_norm": 6.026055030731004, "learning_rate": 2.0426305553396536e-06, "loss": 0.5902, "step": 15829 }, { "epoch": 1.1436415193165603, "grad_norm": 6.646751826642146, "learning_rate": 2.042342992752613e-06, "loss": 0.6715, "step": 15830 }, { "epoch": 1.1437137645167699, "grad_norm": 7.073907062777437, "learning_rate": 2.0420554364305405e-06, "loss": 0.6614, "step": 15831 }, { "epoch": 1.1437860097169794, "grad_norm": 7.656451288808062, "learning_rate": 2.0417678863773725e-06, "loss": 0.7453, "step": 15832 }, { "epoch": 1.143858254917189, "grad_norm": 5.838053001986528, "learning_rate": 2.041480342597047e-06, "loss": 0.5757, "step": 15833 }, { "epoch": 1.1439305001173985, "grad_norm": 6.993399914526693, "learning_rate": 2.041192805093501e-06, "loss": 0.7168, "step": 15834 }, { "epoch": 1.144002745317608, "grad_norm": 6.444551328160019, "learning_rate": 2.040905273870668e-06, "loss": 0.6685, "step": 15835 }, { "epoch": 1.1440749905178174, "grad_norm": 7.572037664627088, "learning_rate": 2.0406177489324854e-06, "loss": 0.6361, "step": 15836 }, { "epoch": 1.144147235718027, "grad_norm": 6.929595292949817, "learning_rate": 2.04033023028289e-06, "loss": 0.6591, "step": 15837 }, { "epoch": 1.1442194809182364, "grad_norm": 4.906848457914757, "learning_rate": 2.0400427179258157e-06, "loss": 0.621, "step": 15838 }, { "epoch": 1.144291726118446, "grad_norm": 5.744066577111129, "learning_rate": 2.0397552118652e-06, "loss": 0.6455, "step": 15839 }, { "epoch": 1.1443639713186555, "grad_norm": 7.719991607266148, "learning_rate": 2.0394677121049773e-06, "loss": 0.6311, "step": 15840 }, { "epoch": 1.144436216518865, "grad_norm": 6.062214952826841, "learning_rate": 2.039180218649085e-06, "loss": 0.6545, "step": 15841 }, { "epoch": 1.1445084617190746, "grad_norm": 6.106128815587567, "learning_rate": 2.038892731501457e-06, "loss": 0.6439, "step": 15842 }, { "epoch": 1.144580706919284, "grad_norm": 5.730776954689, "learning_rate": 2.03860525066603e-06, "loss": 0.64, "step": 15843 }, { "epoch": 1.1446529521194935, "grad_norm": 6.002081509975733, "learning_rate": 2.0383177761467397e-06, "loss": 0.7373, "step": 15844 }, { "epoch": 1.144725197319703, "grad_norm": 6.75830499695632, "learning_rate": 2.0380303079475196e-06, "loss": 0.6512, "step": 15845 }, { "epoch": 1.1447974425199126, "grad_norm": 6.196221572352109, "learning_rate": 2.0377428460723055e-06, "loss": 0.7246, "step": 15846 }, { "epoch": 1.144869687720122, "grad_norm": 6.672575652521524, "learning_rate": 2.0374553905250327e-06, "loss": 0.653, "step": 15847 }, { "epoch": 1.1449419329203316, "grad_norm": 7.455969181832077, "learning_rate": 2.0371679413096378e-06, "loss": 0.6135, "step": 15848 }, { "epoch": 1.1450141781205412, "grad_norm": 7.854137336278195, "learning_rate": 2.0368804984300536e-06, "loss": 0.5675, "step": 15849 }, { "epoch": 1.1450864233207507, "grad_norm": 7.4823097134963374, "learning_rate": 2.0365930618902158e-06, "loss": 0.7047, "step": 15850 }, { "epoch": 1.14515866852096, "grad_norm": 5.624081345882847, "learning_rate": 2.036305631694059e-06, "loss": 0.6619, "step": 15851 }, { "epoch": 1.1452309137211696, "grad_norm": 7.756368911901352, "learning_rate": 2.0360182078455186e-06, "loss": 0.659, "step": 15852 }, { "epoch": 1.1453031589213791, "grad_norm": 6.751271163491957, "learning_rate": 2.035730790348528e-06, "loss": 0.599, "step": 15853 }, { "epoch": 1.1453754041215887, "grad_norm": 7.500037638251912, "learning_rate": 2.035443379207023e-06, "loss": 0.6554, "step": 15854 }, { "epoch": 1.1454476493217982, "grad_norm": 7.111157539666456, "learning_rate": 2.0351559744249376e-06, "loss": 0.6579, "step": 15855 }, { "epoch": 1.1455198945220078, "grad_norm": 6.283819024348308, "learning_rate": 2.0348685760062055e-06, "loss": 0.7155, "step": 15856 }, { "epoch": 1.1455921397222173, "grad_norm": 6.725371464676924, "learning_rate": 2.034581183954761e-06, "loss": 0.6586, "step": 15857 }, { "epoch": 1.1456643849224268, "grad_norm": 6.849099494049895, "learning_rate": 2.0342937982745394e-06, "loss": 0.6807, "step": 15858 }, { "epoch": 1.1457366301226362, "grad_norm": 6.339218851577327, "learning_rate": 2.0340064189694746e-06, "loss": 0.6479, "step": 15859 }, { "epoch": 1.1458088753228457, "grad_norm": 6.275190324670627, "learning_rate": 2.0337190460434993e-06, "loss": 0.6491, "step": 15860 }, { "epoch": 1.1458811205230552, "grad_norm": 6.338668516880697, "learning_rate": 2.033431679500548e-06, "loss": 0.6552, "step": 15861 }, { "epoch": 1.1459533657232648, "grad_norm": 6.426101815712107, "learning_rate": 2.033144319344556e-06, "loss": 0.5776, "step": 15862 }, { "epoch": 1.1460256109234743, "grad_norm": 6.515875283243772, "learning_rate": 2.032856965579455e-06, "loss": 0.6405, "step": 15863 }, { "epoch": 1.1460978561236839, "grad_norm": 7.531243921807714, "learning_rate": 2.0325696182091785e-06, "loss": 0.6909, "step": 15864 }, { "epoch": 1.1461701013238934, "grad_norm": 6.324915093014126, "learning_rate": 2.0322822772376618e-06, "loss": 0.647, "step": 15865 }, { "epoch": 1.1462423465241027, "grad_norm": 7.958686487906969, "learning_rate": 2.0319949426688382e-06, "loss": 0.6929, "step": 15866 }, { "epoch": 1.1463145917243123, "grad_norm": 7.589570578991036, "learning_rate": 2.0317076145066395e-06, "loss": 0.6171, "step": 15867 }, { "epoch": 1.1463868369245218, "grad_norm": 6.310299026972847, "learning_rate": 2.0314202927550003e-06, "loss": 0.6329, "step": 15868 }, { "epoch": 1.1464590821247314, "grad_norm": 5.9479907065179765, "learning_rate": 2.0311329774178536e-06, "loss": 0.6038, "step": 15869 }, { "epoch": 1.146531327324941, "grad_norm": 6.323205311789192, "learning_rate": 2.0308456684991325e-06, "loss": 0.5826, "step": 15870 }, { "epoch": 1.1466035725251504, "grad_norm": 14.69100741448792, "learning_rate": 2.030558366002769e-06, "loss": 0.7269, "step": 15871 }, { "epoch": 1.14667581772536, "grad_norm": 5.778721601881404, "learning_rate": 2.030271069932698e-06, "loss": 0.6433, "step": 15872 }, { "epoch": 1.1467480629255693, "grad_norm": 6.309529427031478, "learning_rate": 2.0299837802928516e-06, "loss": 0.6449, "step": 15873 }, { "epoch": 1.1468203081257788, "grad_norm": 5.440311976350659, "learning_rate": 2.0296964970871615e-06, "loss": 0.5678, "step": 15874 }, { "epoch": 1.1468925533259884, "grad_norm": 10.0417383806087, "learning_rate": 2.0294092203195605e-06, "loss": 0.6759, "step": 15875 }, { "epoch": 1.146964798526198, "grad_norm": 7.107570544955124, "learning_rate": 2.0291219499939827e-06, "loss": 0.6093, "step": 15876 }, { "epoch": 1.1470370437264075, "grad_norm": 5.380665122401117, "learning_rate": 2.0288346861143607e-06, "loss": 0.6402, "step": 15877 }, { "epoch": 1.147109288926617, "grad_norm": 6.79376275729023, "learning_rate": 2.028547428684625e-06, "loss": 0.5896, "step": 15878 }, { "epoch": 1.1471815341268266, "grad_norm": 6.411801496092418, "learning_rate": 2.028260177708709e-06, "loss": 0.6939, "step": 15879 }, { "epoch": 1.1472537793270359, "grad_norm": 8.171408645422666, "learning_rate": 2.0279729331905454e-06, "loss": 0.6867, "step": 15880 }, { "epoch": 1.1473260245272454, "grad_norm": 6.603402399391621, "learning_rate": 2.027685695134065e-06, "loss": 0.6617, "step": 15881 }, { "epoch": 1.147398269727455, "grad_norm": 7.0706229968941905, "learning_rate": 2.027398463543201e-06, "loss": 0.6914, "step": 15882 }, { "epoch": 1.1474705149276645, "grad_norm": 7.942151726569542, "learning_rate": 2.027111238421885e-06, "loss": 0.66, "step": 15883 }, { "epoch": 1.147542760127874, "grad_norm": 7.307874210889083, "learning_rate": 2.02682401977405e-06, "loss": 0.6523, "step": 15884 }, { "epoch": 1.1476150053280836, "grad_norm": 6.585134654134715, "learning_rate": 2.0265368076036255e-06, "loss": 0.7165, "step": 15885 }, { "epoch": 1.1476872505282931, "grad_norm": 6.50475460167242, "learning_rate": 2.026249601914545e-06, "loss": 0.6586, "step": 15886 }, { "epoch": 1.1477594957285024, "grad_norm": 7.07432120473848, "learning_rate": 2.02596240271074e-06, "loss": 0.7138, "step": 15887 }, { "epoch": 1.147831740928712, "grad_norm": 5.385102115891953, "learning_rate": 2.0256752099961413e-06, "loss": 0.5897, "step": 15888 }, { "epoch": 1.1479039861289215, "grad_norm": 5.868788336972927, "learning_rate": 2.02538802377468e-06, "loss": 0.6109, "step": 15889 }, { "epoch": 1.147976231329131, "grad_norm": 6.395742144436797, "learning_rate": 2.0251008440502883e-06, "loss": 0.7039, "step": 15890 }, { "epoch": 1.1480484765293406, "grad_norm": 6.034840518136805, "learning_rate": 2.0248136708268985e-06, "loss": 0.6416, "step": 15891 }, { "epoch": 1.1481207217295502, "grad_norm": 6.199097832291903, "learning_rate": 2.0245265041084395e-06, "loss": 0.6653, "step": 15892 }, { "epoch": 1.1481929669297597, "grad_norm": 6.636959293849322, "learning_rate": 2.024239343898844e-06, "loss": 0.6409, "step": 15893 }, { "epoch": 1.148265212129969, "grad_norm": 5.940692244243958, "learning_rate": 2.0239521902020428e-06, "loss": 0.7099, "step": 15894 }, { "epoch": 1.1483374573301786, "grad_norm": 6.956572563047924, "learning_rate": 2.0236650430219654e-06, "loss": 0.6882, "step": 15895 }, { "epoch": 1.148409702530388, "grad_norm": 6.169749992659656, "learning_rate": 2.023377902362544e-06, "loss": 0.635, "step": 15896 }, { "epoch": 1.1484819477305976, "grad_norm": 6.189870669195535, "learning_rate": 2.0230907682277093e-06, "loss": 0.6434, "step": 15897 }, { "epoch": 1.1485541929308072, "grad_norm": 6.534814765248766, "learning_rate": 2.022803640621392e-06, "loss": 0.6208, "step": 15898 }, { "epoch": 1.1486264381310167, "grad_norm": 6.733149370506839, "learning_rate": 2.0225165195475217e-06, "loss": 0.6868, "step": 15899 }, { "epoch": 1.1486986833312263, "grad_norm": 8.348667481596193, "learning_rate": 2.02222940501003e-06, "loss": 0.7701, "step": 15900 }, { "epoch": 1.1487709285314356, "grad_norm": 7.229522197969759, "learning_rate": 2.021942297012846e-06, "loss": 0.6145, "step": 15901 }, { "epoch": 1.1488431737316451, "grad_norm": 8.44693211950027, "learning_rate": 2.0216551955599026e-06, "loss": 0.6709, "step": 15902 }, { "epoch": 1.1489154189318547, "grad_norm": 5.692297955304421, "learning_rate": 2.021368100655126e-06, "loss": 0.6198, "step": 15903 }, { "epoch": 1.1489876641320642, "grad_norm": 6.309329303577863, "learning_rate": 2.0210810123024494e-06, "loss": 0.7119, "step": 15904 }, { "epoch": 1.1490599093322738, "grad_norm": 7.132446145951776, "learning_rate": 2.0207939305058028e-06, "loss": 0.7164, "step": 15905 }, { "epoch": 1.1491321545324833, "grad_norm": 7.136209069871483, "learning_rate": 2.020506855269114e-06, "loss": 0.728, "step": 15906 }, { "epoch": 1.1492043997326928, "grad_norm": 5.61602575945041, "learning_rate": 2.0202197865963143e-06, "loss": 0.6576, "step": 15907 }, { "epoch": 1.1492766449329022, "grad_norm": 7.313599071885014, "learning_rate": 2.019932724491333e-06, "loss": 0.701, "step": 15908 }, { "epoch": 1.1493488901331117, "grad_norm": 9.330543191844878, "learning_rate": 2.0196456689581007e-06, "loss": 0.7219, "step": 15909 }, { "epoch": 1.1494211353333212, "grad_norm": 6.697137750765948, "learning_rate": 2.0193586200005454e-06, "loss": 0.6206, "step": 15910 }, { "epoch": 1.1494933805335308, "grad_norm": 6.102386770608342, "learning_rate": 2.0190715776225976e-06, "loss": 0.6052, "step": 15911 }, { "epoch": 1.1495656257337403, "grad_norm": 7.065889878434982, "learning_rate": 2.018784541828187e-06, "loss": 0.6803, "step": 15912 }, { "epoch": 1.1496378709339499, "grad_norm": 5.354586672605855, "learning_rate": 2.018497512621242e-06, "loss": 0.577, "step": 15913 }, { "epoch": 1.1497101161341594, "grad_norm": 6.425532503734066, "learning_rate": 2.0182104900056922e-06, "loss": 0.6122, "step": 15914 }, { "epoch": 1.1497823613343687, "grad_norm": 8.678332424741312, "learning_rate": 2.017923473985466e-06, "loss": 0.7322, "step": 15915 }, { "epoch": 1.1498546065345783, "grad_norm": 6.959014524211985, "learning_rate": 2.017636464564495e-06, "loss": 0.6616, "step": 15916 }, { "epoch": 1.1499268517347878, "grad_norm": 8.37246255724449, "learning_rate": 2.0173494617467043e-06, "loss": 0.7078, "step": 15917 }, { "epoch": 1.1499990969349974, "grad_norm": 7.035359855725971, "learning_rate": 2.0170624655360256e-06, "loss": 0.6675, "step": 15918 }, { "epoch": 1.150071342135207, "grad_norm": 7.875051104667388, "learning_rate": 2.0167754759363873e-06, "loss": 0.6506, "step": 15919 }, { "epoch": 1.1501435873354164, "grad_norm": 7.064104016204589, "learning_rate": 2.0164884929517168e-06, "loss": 0.7061, "step": 15920 }, { "epoch": 1.150215832535626, "grad_norm": 6.096520992073029, "learning_rate": 2.0162015165859434e-06, "loss": 0.6371, "step": 15921 }, { "epoch": 1.1502880777358353, "grad_norm": 6.949929589662728, "learning_rate": 2.0159145468429956e-06, "loss": 0.6881, "step": 15922 }, { "epoch": 1.1503603229360448, "grad_norm": 9.231769226389728, "learning_rate": 2.015627583726802e-06, "loss": 0.6602, "step": 15923 }, { "epoch": 1.1504325681362544, "grad_norm": 6.899866804965842, "learning_rate": 2.0153406272412906e-06, "loss": 0.6002, "step": 15924 }, { "epoch": 1.150504813336464, "grad_norm": 9.25839429870182, "learning_rate": 2.0150536773903894e-06, "loss": 0.6432, "step": 15925 }, { "epoch": 1.1505770585366735, "grad_norm": 10.7223719936138, "learning_rate": 2.0147667341780277e-06, "loss": 0.6766, "step": 15926 }, { "epoch": 1.150649303736883, "grad_norm": 6.962573132225042, "learning_rate": 2.0144797976081318e-06, "loss": 0.6941, "step": 15927 }, { "epoch": 1.1507215489370926, "grad_norm": 6.171583569563286, "learning_rate": 2.0141928676846307e-06, "loss": 0.6456, "step": 15928 }, { "epoch": 1.150793794137302, "grad_norm": 6.826457992036647, "learning_rate": 2.0139059444114516e-06, "loss": 0.6849, "step": 15929 }, { "epoch": 1.1508660393375116, "grad_norm": 7.841152536183895, "learning_rate": 2.0136190277925244e-06, "loss": 0.6819, "step": 15930 }, { "epoch": 1.150938284537721, "grad_norm": 8.952634574946055, "learning_rate": 2.013332117831773e-06, "loss": 0.6871, "step": 15931 }, { "epoch": 1.1510105297379305, "grad_norm": 7.898749974500267, "learning_rate": 2.0130452145331276e-06, "loss": 0.6821, "step": 15932 }, { "epoch": 1.15108277493814, "grad_norm": 5.704848439295477, "learning_rate": 2.0127583179005154e-06, "loss": 0.6769, "step": 15933 }, { "epoch": 1.1511550201383496, "grad_norm": 7.657428012450992, "learning_rate": 2.012471427937864e-06, "loss": 0.6381, "step": 15934 }, { "epoch": 1.1512272653385591, "grad_norm": 6.321698728735381, "learning_rate": 2.0121845446491e-06, "loss": 0.6347, "step": 15935 }, { "epoch": 1.1512995105387687, "grad_norm": 5.94344056744985, "learning_rate": 2.01189766803815e-06, "loss": 0.6646, "step": 15936 }, { "epoch": 1.1513717557389782, "grad_norm": 6.874547492654318, "learning_rate": 2.011610798108943e-06, "loss": 0.6827, "step": 15937 }, { "epoch": 1.1514440009391875, "grad_norm": 6.1080420715641806, "learning_rate": 2.011323934865404e-06, "loss": 0.5908, "step": 15938 }, { "epoch": 1.151516246139397, "grad_norm": 6.411275836775752, "learning_rate": 2.011037078311461e-06, "loss": 0.6459, "step": 15939 }, { "epoch": 1.1515884913396066, "grad_norm": 7.234320424082072, "learning_rate": 2.0107502284510414e-06, "loss": 0.6646, "step": 15940 }, { "epoch": 1.1516607365398162, "grad_norm": 8.308916050695181, "learning_rate": 2.0104633852880714e-06, "loss": 0.6926, "step": 15941 }, { "epoch": 1.1517329817400257, "grad_norm": 7.288421112004177, "learning_rate": 2.010176548826477e-06, "loss": 0.6553, "step": 15942 }, { "epoch": 1.1518052269402352, "grad_norm": 7.357042594998951, "learning_rate": 2.009889719070185e-06, "loss": 0.6193, "step": 15943 }, { "epoch": 1.1518774721404448, "grad_norm": 6.5204012629127766, "learning_rate": 2.0096028960231233e-06, "loss": 0.6081, "step": 15944 }, { "epoch": 1.151949717340654, "grad_norm": 7.642903555715124, "learning_rate": 2.0093160796892163e-06, "loss": 0.7451, "step": 15945 }, { "epoch": 1.1520219625408636, "grad_norm": 6.430576193519836, "learning_rate": 2.009029270072391e-06, "loss": 0.6291, "step": 15946 }, { "epoch": 1.1520942077410732, "grad_norm": 8.065056447294518, "learning_rate": 2.008742467176574e-06, "loss": 0.6241, "step": 15947 }, { "epoch": 1.1521664529412827, "grad_norm": 7.294414881562158, "learning_rate": 2.0084556710056922e-06, "loss": 0.6101, "step": 15948 }, { "epoch": 1.1522386981414923, "grad_norm": 6.734321470114253, "learning_rate": 2.0081688815636697e-06, "loss": 0.622, "step": 15949 }, { "epoch": 1.1523109433417018, "grad_norm": 7.133090863013717, "learning_rate": 2.007882098854433e-06, "loss": 0.6383, "step": 15950 }, { "epoch": 1.1523831885419114, "grad_norm": 6.083844864471886, "learning_rate": 2.0075953228819093e-06, "loss": 0.6284, "step": 15951 }, { "epoch": 1.1524554337421207, "grad_norm": 8.067291019122035, "learning_rate": 2.0073085536500227e-06, "loss": 0.6787, "step": 15952 }, { "epoch": 1.1525276789423302, "grad_norm": 8.266943900269029, "learning_rate": 2.007021791162699e-06, "loss": 0.6819, "step": 15953 }, { "epoch": 1.1525999241425398, "grad_norm": 8.1887043074508, "learning_rate": 2.0067350354238645e-06, "loss": 0.6259, "step": 15954 }, { "epoch": 1.1526721693427493, "grad_norm": 6.83985434661515, "learning_rate": 2.006448286437445e-06, "loss": 0.7, "step": 15955 }, { "epoch": 1.1527444145429588, "grad_norm": 5.7261175006158735, "learning_rate": 2.0061615442073645e-06, "loss": 0.6097, "step": 15956 }, { "epoch": 1.1528166597431684, "grad_norm": 7.19461636069893, "learning_rate": 2.005874808737549e-06, "loss": 0.6875, "step": 15957 }, { "epoch": 1.152888904943378, "grad_norm": 5.939938335676611, "learning_rate": 2.0055880800319237e-06, "loss": 0.7516, "step": 15958 }, { "epoch": 1.1529611501435872, "grad_norm": 8.568870235297942, "learning_rate": 2.005301358094415e-06, "loss": 0.7234, "step": 15959 }, { "epoch": 1.1530333953437968, "grad_norm": 7.45928815416362, "learning_rate": 2.0050146429289447e-06, "loss": 0.6454, "step": 15960 }, { "epoch": 1.1531056405440063, "grad_norm": 6.236260227081055, "learning_rate": 2.0047279345394405e-06, "loss": 0.6325, "step": 15961 }, { "epoch": 1.1531778857442159, "grad_norm": 7.368065111268136, "learning_rate": 2.004441232929827e-06, "loss": 0.7, "step": 15962 }, { "epoch": 1.1532501309444254, "grad_norm": 6.482135284791717, "learning_rate": 2.0041545381040274e-06, "loss": 0.6696, "step": 15963 }, { "epoch": 1.153322376144635, "grad_norm": 6.788242092978905, "learning_rate": 2.003867850065967e-06, "loss": 0.7121, "step": 15964 }, { "epoch": 1.1533946213448445, "grad_norm": 5.769215528027869, "learning_rate": 2.003581168819571e-06, "loss": 0.6421, "step": 15965 }, { "epoch": 1.1534668665450538, "grad_norm": 6.683345181398362, "learning_rate": 2.003294494368763e-06, "loss": 0.6928, "step": 15966 }, { "epoch": 1.1535391117452634, "grad_norm": 6.265410992779905, "learning_rate": 2.0030078267174678e-06, "loss": 0.6023, "step": 15967 }, { "epoch": 1.153611356945473, "grad_norm": 6.705948241769322, "learning_rate": 2.0027211658696097e-06, "loss": 0.6495, "step": 15968 }, { "epoch": 1.1536836021456824, "grad_norm": 7.621247916175191, "learning_rate": 2.002434511829113e-06, "loss": 0.6441, "step": 15969 }, { "epoch": 1.153755847345892, "grad_norm": 7.871149968781861, "learning_rate": 2.0021478645999008e-06, "loss": 0.6412, "step": 15970 }, { "epoch": 1.1538280925461015, "grad_norm": 5.8255684898540565, "learning_rate": 2.001861224185898e-06, "loss": 0.6277, "step": 15971 }, { "epoch": 1.153900337746311, "grad_norm": 6.767291491494908, "learning_rate": 2.0015745905910282e-06, "loss": 0.7104, "step": 15972 }, { "epoch": 1.1539725829465204, "grad_norm": 6.334245448770788, "learning_rate": 2.0012879638192167e-06, "loss": 0.6449, "step": 15973 }, { "epoch": 1.15404482814673, "grad_norm": 5.732229886584823, "learning_rate": 2.0010013438743835e-06, "loss": 0.6272, "step": 15974 }, { "epoch": 1.1541170733469395, "grad_norm": 6.858764899369581, "learning_rate": 2.0007147307604556e-06, "loss": 0.6801, "step": 15975 }, { "epoch": 1.154189318547149, "grad_norm": 6.012815775678321, "learning_rate": 2.000428124481356e-06, "loss": 0.6429, "step": 15976 }, { "epoch": 1.1542615637473586, "grad_norm": 5.903598412209835, "learning_rate": 2.0001415250410064e-06, "loss": 0.6317, "step": 15977 }, { "epoch": 1.154333808947568, "grad_norm": 6.580819273617208, "learning_rate": 1.9998549324433312e-06, "loss": 0.6665, "step": 15978 }, { "epoch": 1.1544060541477776, "grad_norm": 7.4808514609182195, "learning_rate": 1.999568346692254e-06, "loss": 0.6591, "step": 15979 }, { "epoch": 1.154478299347987, "grad_norm": 6.576244384438387, "learning_rate": 1.9992817677916977e-06, "loss": 0.5402, "step": 15980 }, { "epoch": 1.1545505445481965, "grad_norm": 8.090626942563256, "learning_rate": 1.998995195745585e-06, "loss": 0.6519, "step": 15981 }, { "epoch": 1.154622789748406, "grad_norm": 6.645165899199271, "learning_rate": 1.998708630557839e-06, "loss": 0.6353, "step": 15982 }, { "epoch": 1.1546950349486156, "grad_norm": 7.953989278001971, "learning_rate": 1.998422072232383e-06, "loss": 0.6194, "step": 15983 }, { "epoch": 1.1547672801488251, "grad_norm": 6.096448095598307, "learning_rate": 1.998135520773139e-06, "loss": 0.6004, "step": 15984 }, { "epoch": 1.1548395253490347, "grad_norm": 6.872862604982213, "learning_rate": 1.99784897618403e-06, "loss": 0.6557, "step": 15985 }, { "epoch": 1.1549117705492442, "grad_norm": 6.240226733661205, "learning_rate": 1.9975624384689785e-06, "loss": 0.6709, "step": 15986 }, { "epoch": 1.1549840157494535, "grad_norm": 5.9028688471133, "learning_rate": 1.9972759076319085e-06, "loss": 0.5708, "step": 15987 }, { "epoch": 1.155056260949663, "grad_norm": 8.182507106406124, "learning_rate": 1.996989383676739e-06, "loss": 0.7274, "step": 15988 }, { "epoch": 1.1551285061498726, "grad_norm": 6.256161513641222, "learning_rate": 1.996702866607395e-06, "loss": 0.6035, "step": 15989 }, { "epoch": 1.1552007513500822, "grad_norm": 5.8638501790307656, "learning_rate": 1.996416356427798e-06, "loss": 0.709, "step": 15990 }, { "epoch": 1.1552729965502917, "grad_norm": 5.599652293173511, "learning_rate": 1.996129853141871e-06, "loss": 0.6067, "step": 15991 }, { "epoch": 1.1553452417505012, "grad_norm": 6.4074268283602756, "learning_rate": 1.9958433567535342e-06, "loss": 0.602, "step": 15992 }, { "epoch": 1.1554174869507108, "grad_norm": 5.486896163633685, "learning_rate": 1.9955568672667103e-06, "loss": 0.6244, "step": 15993 }, { "epoch": 1.15548973215092, "grad_norm": 9.403732013474757, "learning_rate": 1.9952703846853216e-06, "loss": 0.8005, "step": 15994 }, { "epoch": 1.1555619773511296, "grad_norm": 7.035490800103418, "learning_rate": 1.994983909013289e-06, "loss": 0.6234, "step": 15995 }, { "epoch": 1.1556342225513392, "grad_norm": 6.6841205364798295, "learning_rate": 1.994697440254535e-06, "loss": 0.6608, "step": 15996 }, { "epoch": 1.1557064677515487, "grad_norm": 6.286194330933443, "learning_rate": 1.99441097841298e-06, "loss": 0.6549, "step": 15997 }, { "epoch": 1.1557787129517583, "grad_norm": 7.525190899730888, "learning_rate": 1.994124523492547e-06, "loss": 0.7648, "step": 15998 }, { "epoch": 1.1558509581519678, "grad_norm": 6.5681860132675, "learning_rate": 1.993838075497156e-06, "loss": 0.6131, "step": 15999 }, { "epoch": 1.1559232033521774, "grad_norm": 7.175505340538034, "learning_rate": 1.9935516344307285e-06, "loss": 0.6907, "step": 16000 }, { "epoch": 1.155995448552387, "grad_norm": 5.238881462921636, "learning_rate": 1.993265200297187e-06, "loss": 0.6594, "step": 16001 }, { "epoch": 1.1560676937525962, "grad_norm": 7.88913245862351, "learning_rate": 1.9929787731004503e-06, "loss": 0.606, "step": 16002 }, { "epoch": 1.1561399389528058, "grad_norm": 7.36577378237249, "learning_rate": 1.9926923528444404e-06, "loss": 0.6517, "step": 16003 }, { "epoch": 1.1562121841530153, "grad_norm": 6.06572582508938, "learning_rate": 1.9924059395330787e-06, "loss": 0.6686, "step": 16004 }, { "epoch": 1.1562844293532248, "grad_norm": 6.544302277818988, "learning_rate": 1.9921195331702866e-06, "loss": 0.6605, "step": 16005 }, { "epoch": 1.1563566745534344, "grad_norm": 6.8557441851719885, "learning_rate": 1.991833133759983e-06, "loss": 0.6558, "step": 16006 }, { "epoch": 1.156428919753644, "grad_norm": 6.3734667561077725, "learning_rate": 1.9915467413060884e-06, "loss": 0.6415, "step": 16007 }, { "epoch": 1.1565011649538535, "grad_norm": 5.193589462571845, "learning_rate": 1.991260355812524e-06, "loss": 0.6775, "step": 16008 }, { "epoch": 1.156573410154063, "grad_norm": 6.479318435052602, "learning_rate": 1.9909739772832123e-06, "loss": 0.6627, "step": 16009 }, { "epoch": 1.1566456553542723, "grad_norm": 6.065176461491719, "learning_rate": 1.99068760572207e-06, "loss": 0.6351, "step": 16010 }, { "epoch": 1.1567179005544819, "grad_norm": 6.2545912091716795, "learning_rate": 1.990401241133019e-06, "loss": 0.665, "step": 16011 }, { "epoch": 1.1567901457546914, "grad_norm": 7.38691912076203, "learning_rate": 1.99011488351998e-06, "loss": 0.6573, "step": 16012 }, { "epoch": 1.156862390954901, "grad_norm": 7.057631443379299, "learning_rate": 1.989828532886872e-06, "loss": 0.6088, "step": 16013 }, { "epoch": 1.1569346361551105, "grad_norm": 6.264543321044259, "learning_rate": 1.989542189237615e-06, "loss": 0.6635, "step": 16014 }, { "epoch": 1.15700688135532, "grad_norm": 7.36108974557651, "learning_rate": 1.9892558525761295e-06, "loss": 0.6631, "step": 16015 }, { "epoch": 1.1570791265555296, "grad_norm": 6.042895523150383, "learning_rate": 1.988969522906335e-06, "loss": 0.6616, "step": 16016 }, { "epoch": 1.157151371755739, "grad_norm": 7.120370532049008, "learning_rate": 1.9886832002321503e-06, "loss": 0.6181, "step": 16017 }, { "epoch": 1.1572236169559484, "grad_norm": 7.811213028762747, "learning_rate": 1.988396884557496e-06, "loss": 0.6366, "step": 16018 }, { "epoch": 1.157295862156158, "grad_norm": 6.067679797802626, "learning_rate": 1.9881105758862917e-06, "loss": 0.6187, "step": 16019 }, { "epoch": 1.1573681073563675, "grad_norm": 6.037182040164433, "learning_rate": 1.987824274222455e-06, "loss": 0.6472, "step": 16020 }, { "epoch": 1.157440352556577, "grad_norm": 6.603874641323858, "learning_rate": 1.987537979569907e-06, "loss": 0.6866, "step": 16021 }, { "epoch": 1.1575125977567866, "grad_norm": 5.588336733437693, "learning_rate": 1.987251691932565e-06, "loss": 0.6953, "step": 16022 }, { "epoch": 1.1575848429569962, "grad_norm": 6.087939813223649, "learning_rate": 1.986965411314351e-06, "loss": 0.6551, "step": 16023 }, { "epoch": 1.1576570881572055, "grad_norm": 6.747238724672104, "learning_rate": 1.986679137719181e-06, "loss": 0.7055, "step": 16024 }, { "epoch": 1.157729333357415, "grad_norm": 6.9028093562010415, "learning_rate": 1.9863928711509754e-06, "loss": 0.6567, "step": 16025 }, { "epoch": 1.1578015785576246, "grad_norm": 8.63603013777322, "learning_rate": 1.986106611613653e-06, "loss": 0.7276, "step": 16026 }, { "epoch": 1.157873823757834, "grad_norm": 6.772799099907296, "learning_rate": 1.9858203591111315e-06, "loss": 0.6396, "step": 16027 }, { "epoch": 1.1579460689580436, "grad_norm": 5.89540582012261, "learning_rate": 1.9855341136473295e-06, "loss": 0.68, "step": 16028 }, { "epoch": 1.1580183141582532, "grad_norm": 4.9008039632374585, "learning_rate": 1.985247875226167e-06, "loss": 0.6788, "step": 16029 }, { "epoch": 1.1580905593584627, "grad_norm": 7.124099406830202, "learning_rate": 1.984961643851561e-06, "loss": 0.7136, "step": 16030 }, { "epoch": 1.158162804558672, "grad_norm": 7.284208163425537, "learning_rate": 1.98467541952743e-06, "loss": 0.5957, "step": 16031 }, { "epoch": 1.1582350497588816, "grad_norm": 6.809357364486399, "learning_rate": 1.984389202257693e-06, "loss": 0.6842, "step": 16032 }, { "epoch": 1.1583072949590911, "grad_norm": 7.685347340961286, "learning_rate": 1.9841029920462667e-06, "loss": 0.7301, "step": 16033 }, { "epoch": 1.1583795401593007, "grad_norm": 6.9721556411678, "learning_rate": 1.9838167888970713e-06, "loss": 0.662, "step": 16034 }, { "epoch": 1.1584517853595102, "grad_norm": 6.442654222175335, "learning_rate": 1.983530592814022e-06, "loss": 0.6699, "step": 16035 }, { "epoch": 1.1585240305597198, "grad_norm": 7.164397153869204, "learning_rate": 1.9832444038010384e-06, "loss": 0.6449, "step": 16036 }, { "epoch": 1.1585962757599293, "grad_norm": 6.4946992240961094, "learning_rate": 1.9829582218620376e-06, "loss": 0.6742, "step": 16037 }, { "epoch": 1.1586685209601386, "grad_norm": 5.795242449845411, "learning_rate": 1.982672047000937e-06, "loss": 0.6963, "step": 16038 }, { "epoch": 1.1587407661603482, "grad_norm": 6.987674488489005, "learning_rate": 1.9823858792216545e-06, "loss": 0.6689, "step": 16039 }, { "epoch": 1.1588130113605577, "grad_norm": 5.915999827170224, "learning_rate": 1.982099718528107e-06, "loss": 0.6803, "step": 16040 }, { "epoch": 1.1588852565607672, "grad_norm": 7.0924587334880185, "learning_rate": 1.981813564924213e-06, "loss": 0.585, "step": 16041 }, { "epoch": 1.1589575017609768, "grad_norm": 5.522514557514889, "learning_rate": 1.9815274184138884e-06, "loss": 0.6673, "step": 16042 }, { "epoch": 1.1590297469611863, "grad_norm": 6.579063511676554, "learning_rate": 1.981241279001051e-06, "loss": 0.6032, "step": 16043 }, { "epoch": 1.1591019921613959, "grad_norm": 6.148180324890695, "learning_rate": 1.980955146689618e-06, "loss": 0.6603, "step": 16044 }, { "epoch": 1.1591742373616052, "grad_norm": 8.703528677380703, "learning_rate": 1.980669021483506e-06, "loss": 0.6625, "step": 16045 }, { "epoch": 1.1592464825618147, "grad_norm": 5.576861583057011, "learning_rate": 1.980382903386631e-06, "loss": 0.6228, "step": 16046 }, { "epoch": 1.1593187277620243, "grad_norm": 5.560062002755662, "learning_rate": 1.9800967924029112e-06, "loss": 0.6484, "step": 16047 }, { "epoch": 1.1593909729622338, "grad_norm": 7.411136608597822, "learning_rate": 1.979810688536264e-06, "loss": 0.6665, "step": 16048 }, { "epoch": 1.1594632181624434, "grad_norm": 7.879412913261115, "learning_rate": 1.979524591790603e-06, "loss": 0.7633, "step": 16049 }, { "epoch": 1.159535463362653, "grad_norm": 7.225397921365322, "learning_rate": 1.9792385021698464e-06, "loss": 0.7578, "step": 16050 }, { "epoch": 1.1596077085628624, "grad_norm": 6.314175525041662, "learning_rate": 1.978952419677911e-06, "loss": 0.6389, "step": 16051 }, { "epoch": 1.1596799537630718, "grad_norm": 7.040018946015267, "learning_rate": 1.9786663443187115e-06, "loss": 0.7292, "step": 16052 }, { "epoch": 1.1597521989632813, "grad_norm": 4.947081720945283, "learning_rate": 1.978380276096165e-06, "loss": 0.5909, "step": 16053 }, { "epoch": 1.1598244441634908, "grad_norm": 5.166984158677139, "learning_rate": 1.978094215014188e-06, "loss": 0.6129, "step": 16054 }, { "epoch": 1.1598966893637004, "grad_norm": 5.564267681294778, "learning_rate": 1.9778081610766957e-06, "loss": 0.6346, "step": 16055 }, { "epoch": 1.15996893456391, "grad_norm": 7.648550217823657, "learning_rate": 1.9775221142876046e-06, "loss": 0.665, "step": 16056 }, { "epoch": 1.1600411797641195, "grad_norm": 6.698750097492244, "learning_rate": 1.9772360746508293e-06, "loss": 0.7082, "step": 16057 }, { "epoch": 1.160113424964329, "grad_norm": 6.971539542054235, "learning_rate": 1.9769500421702876e-06, "loss": 0.6533, "step": 16058 }, { "epoch": 1.1601856701645383, "grad_norm": 7.075484231613863, "learning_rate": 1.976664016849892e-06, "loss": 0.6673, "step": 16059 }, { "epoch": 1.1602579153647479, "grad_norm": 5.810752031567281, "learning_rate": 1.9763779986935606e-06, "loss": 0.6389, "step": 16060 }, { "epoch": 1.1603301605649574, "grad_norm": 5.807320532848194, "learning_rate": 1.9760919877052077e-06, "loss": 0.5918, "step": 16061 }, { "epoch": 1.160402405765167, "grad_norm": 5.109742877560501, "learning_rate": 1.9758059838887493e-06, "loss": 0.5923, "step": 16062 }, { "epoch": 1.1604746509653765, "grad_norm": 5.765590595256429, "learning_rate": 1.9755199872480995e-06, "loss": 0.5989, "step": 16063 }, { "epoch": 1.160546896165586, "grad_norm": 6.71549106782091, "learning_rate": 1.9752339977871733e-06, "loss": 0.6791, "step": 16064 }, { "epoch": 1.1606191413657956, "grad_norm": 6.665333455609171, "learning_rate": 1.974948015509886e-06, "loss": 0.6344, "step": 16065 }, { "epoch": 1.160691386566005, "grad_norm": 7.096753261904458, "learning_rate": 1.9746620404201545e-06, "loss": 0.6473, "step": 16066 }, { "epoch": 1.1607636317662144, "grad_norm": 7.25696045439281, "learning_rate": 1.97437607252189e-06, "loss": 0.65, "step": 16067 }, { "epoch": 1.160835876966424, "grad_norm": 6.487067635595382, "learning_rate": 1.97409011181901e-06, "loss": 0.7572, "step": 16068 }, { "epoch": 1.1609081221666335, "grad_norm": 7.394675953170131, "learning_rate": 1.9738041583154276e-06, "loss": 0.6628, "step": 16069 }, { "epoch": 1.160980367366843, "grad_norm": 6.166477441032418, "learning_rate": 1.9735182120150575e-06, "loss": 0.6794, "step": 16070 }, { "epoch": 1.1610526125670526, "grad_norm": 8.087546073393202, "learning_rate": 1.9732322729218143e-06, "loss": 0.6975, "step": 16071 }, { "epoch": 1.1611248577672622, "grad_norm": 6.129581528485894, "learning_rate": 1.972946341039612e-06, "loss": 0.6495, "step": 16072 }, { "epoch": 1.1611971029674717, "grad_norm": 7.518455686064812, "learning_rate": 1.972660416372366e-06, "loss": 0.637, "step": 16073 }, { "epoch": 1.161269348167681, "grad_norm": 6.3997799835533895, "learning_rate": 1.972374498923989e-06, "loss": 0.5813, "step": 16074 }, { "epoch": 1.1613415933678906, "grad_norm": 5.6424070803082005, "learning_rate": 1.9720885886983954e-06, "loss": 0.5887, "step": 16075 }, { "epoch": 1.1614138385681, "grad_norm": 7.043017496711504, "learning_rate": 1.9718026856995e-06, "loss": 0.6825, "step": 16076 }, { "epoch": 1.1614860837683096, "grad_norm": 6.73715258889863, "learning_rate": 1.971516789931215e-06, "loss": 0.6982, "step": 16077 }, { "epoch": 1.1615583289685192, "grad_norm": 6.942303893053506, "learning_rate": 1.971230901397454e-06, "loss": 0.6852, "step": 16078 }, { "epoch": 1.1616305741687287, "grad_norm": 5.928206066488246, "learning_rate": 1.9709450201021313e-06, "loss": 0.6326, "step": 16079 }, { "epoch": 1.1617028193689383, "grad_norm": 8.873021724664444, "learning_rate": 1.970659146049162e-06, "loss": 0.706, "step": 16080 }, { "epoch": 1.1617750645691478, "grad_norm": 7.511510535660837, "learning_rate": 1.9703732792424572e-06, "loss": 0.6634, "step": 16081 }, { "epoch": 1.1618473097693571, "grad_norm": 7.367905129748657, "learning_rate": 1.970087419685931e-06, "loss": 0.6871, "step": 16082 }, { "epoch": 1.1619195549695667, "grad_norm": 7.855485503540687, "learning_rate": 1.9698015673834968e-06, "loss": 0.6605, "step": 16083 }, { "epoch": 1.1619918001697762, "grad_norm": 5.435071205437764, "learning_rate": 1.969515722339067e-06, "loss": 0.5958, "step": 16084 }, { "epoch": 1.1620640453699858, "grad_norm": 6.509376804976046, "learning_rate": 1.9692298845565554e-06, "loss": 0.6667, "step": 16085 }, { "epoch": 1.1621362905701953, "grad_norm": 6.621101239855646, "learning_rate": 1.968944054039874e-06, "loss": 0.6405, "step": 16086 }, { "epoch": 1.1622085357704048, "grad_norm": 6.223375296850514, "learning_rate": 1.968658230792937e-06, "loss": 0.676, "step": 16087 }, { "epoch": 1.1622807809706144, "grad_norm": 6.162402182029032, "learning_rate": 1.9683724148196557e-06, "loss": 0.7026, "step": 16088 }, { "epoch": 1.1623530261708237, "grad_norm": 6.262863654201889, "learning_rate": 1.9680866061239435e-06, "loss": 0.629, "step": 16089 }, { "epoch": 1.1624252713710332, "grad_norm": 5.3230122397764195, "learning_rate": 1.9678008047097124e-06, "loss": 0.6048, "step": 16090 }, { "epoch": 1.1624975165712428, "grad_norm": 7.515037466071282, "learning_rate": 1.967515010580876e-06, "loss": 0.6302, "step": 16091 }, { "epoch": 1.1625697617714523, "grad_norm": 7.0332074640711415, "learning_rate": 1.9672292237413446e-06, "loss": 0.5999, "step": 16092 }, { "epoch": 1.1626420069716619, "grad_norm": 6.173002888436432, "learning_rate": 1.9669434441950313e-06, "loss": 0.6912, "step": 16093 }, { "epoch": 1.1627142521718714, "grad_norm": 6.589750550430155, "learning_rate": 1.9666576719458495e-06, "loss": 0.735, "step": 16094 }, { "epoch": 1.162786497372081, "grad_norm": 7.091073089872785, "learning_rate": 1.9663719069977094e-06, "loss": 0.6406, "step": 16095 }, { "epoch": 1.1628587425722903, "grad_norm": 6.96202385471096, "learning_rate": 1.9660861493545237e-06, "loss": 0.6547, "step": 16096 }, { "epoch": 1.1629309877724998, "grad_norm": 6.04770893191755, "learning_rate": 1.9658003990202036e-06, "loss": 0.6843, "step": 16097 }, { "epoch": 1.1630032329727094, "grad_norm": 5.8787628460499795, "learning_rate": 1.965514655998662e-06, "loss": 0.6703, "step": 16098 }, { "epoch": 1.163075478172919, "grad_norm": 5.900257764455017, "learning_rate": 1.965228920293809e-06, "loss": 0.6623, "step": 16099 }, { "epoch": 1.1631477233731284, "grad_norm": 6.820747632525452, "learning_rate": 1.9649431919095572e-06, "loss": 0.7075, "step": 16100 }, { "epoch": 1.163219968573338, "grad_norm": 6.073817749524871, "learning_rate": 1.9646574708498177e-06, "loss": 0.6925, "step": 16101 }, { "epoch": 1.1632922137735475, "grad_norm": 6.989856182141481, "learning_rate": 1.9643717571185017e-06, "loss": 0.6775, "step": 16102 }, { "epoch": 1.1633644589737568, "grad_norm": 6.870707055900291, "learning_rate": 1.96408605071952e-06, "loss": 0.6447, "step": 16103 }, { "epoch": 1.1634367041739664, "grad_norm": 7.028749284033593, "learning_rate": 1.9638003516567845e-06, "loss": 0.6918, "step": 16104 }, { "epoch": 1.163508949374176, "grad_norm": 6.117935777988663, "learning_rate": 1.963514659934207e-06, "loss": 0.6051, "step": 16105 }, { "epoch": 1.1635811945743855, "grad_norm": 6.530673791026078, "learning_rate": 1.963228975555696e-06, "loss": 0.7126, "step": 16106 }, { "epoch": 1.163653439774595, "grad_norm": 7.120585094371223, "learning_rate": 1.962943298525163e-06, "loss": 0.6504, "step": 16107 }, { "epoch": 1.1637256849748046, "grad_norm": 5.9017995385279525, "learning_rate": 1.962657628846521e-06, "loss": 0.6143, "step": 16108 }, { "epoch": 1.163797930175014, "grad_norm": 6.220870533461679, "learning_rate": 1.962371966523678e-06, "loss": 0.6577, "step": 16109 }, { "epoch": 1.1638701753752234, "grad_norm": 7.01440826943808, "learning_rate": 1.962086311560545e-06, "loss": 0.6908, "step": 16110 }, { "epoch": 1.163942420575433, "grad_norm": 8.309512870824054, "learning_rate": 1.9618006639610325e-06, "loss": 0.7209, "step": 16111 }, { "epoch": 1.1640146657756425, "grad_norm": 6.793747316024797, "learning_rate": 1.961515023729052e-06, "loss": 0.634, "step": 16112 }, { "epoch": 1.164086910975852, "grad_norm": 6.799638839555279, "learning_rate": 1.961229390868512e-06, "loss": 0.5973, "step": 16113 }, { "epoch": 1.1641591561760616, "grad_norm": 7.061466107605926, "learning_rate": 1.9609437653833235e-06, "loss": 0.6439, "step": 16114 }, { "epoch": 1.1642314013762711, "grad_norm": 6.930168883452541, "learning_rate": 1.9606581472773957e-06, "loss": 0.7037, "step": 16115 }, { "epoch": 1.1643036465764807, "grad_norm": 6.670414284532478, "learning_rate": 1.9603725365546404e-06, "loss": 0.6325, "step": 16116 }, { "epoch": 1.16437589177669, "grad_norm": 6.064990289213327, "learning_rate": 1.960086933218965e-06, "loss": 0.6304, "step": 16117 }, { "epoch": 1.1644481369768995, "grad_norm": 6.234022546428188, "learning_rate": 1.9598013372742806e-06, "loss": 0.5925, "step": 16118 }, { "epoch": 1.164520382177109, "grad_norm": 6.8065229320886615, "learning_rate": 1.9595157487244973e-06, "loss": 0.6331, "step": 16119 }, { "epoch": 1.1645926273773186, "grad_norm": 7.271814085242556, "learning_rate": 1.9592301675735227e-06, "loss": 0.7466, "step": 16120 }, { "epoch": 1.1646648725775282, "grad_norm": 8.185314556548724, "learning_rate": 1.9589445938252666e-06, "loss": 0.6523, "step": 16121 }, { "epoch": 1.1647371177777377, "grad_norm": 6.91799632087956, "learning_rate": 1.95865902748364e-06, "loss": 0.6746, "step": 16122 }, { "epoch": 1.1648093629779472, "grad_norm": 7.214419168103935, "learning_rate": 1.9583734685525514e-06, "loss": 0.712, "step": 16123 }, { "epoch": 1.1648816081781566, "grad_norm": 5.944714790606055, "learning_rate": 1.9580879170359083e-06, "loss": 0.6919, "step": 16124 }, { "epoch": 1.164953853378366, "grad_norm": 6.18559436159543, "learning_rate": 1.9578023729376213e-06, "loss": 0.6207, "step": 16125 }, { "epoch": 1.1650260985785756, "grad_norm": 7.870233697971958, "learning_rate": 1.9575168362615993e-06, "loss": 0.6151, "step": 16126 }, { "epoch": 1.1650983437787852, "grad_norm": 7.488523476373775, "learning_rate": 1.9572313070117503e-06, "loss": 0.6822, "step": 16127 }, { "epoch": 1.1651705889789947, "grad_norm": 6.2794451041991275, "learning_rate": 1.956945785191983e-06, "loss": 0.6417, "step": 16128 }, { "epoch": 1.1652428341792043, "grad_norm": 6.659704896729542, "learning_rate": 1.956660270806206e-06, "loss": 0.5882, "step": 16129 }, { "epoch": 1.1653150793794138, "grad_norm": 6.950146943878519, "learning_rate": 1.956374763858329e-06, "loss": 0.615, "step": 16130 }, { "epoch": 1.1653873245796231, "grad_norm": 6.408647488688806, "learning_rate": 1.9560892643522584e-06, "loss": 0.665, "step": 16131 }, { "epoch": 1.1654595697798327, "grad_norm": 5.820421624920802, "learning_rate": 1.9558037722919038e-06, "loss": 0.5865, "step": 16132 }, { "epoch": 1.1655318149800422, "grad_norm": 6.42630275476866, "learning_rate": 1.955518287681174e-06, "loss": 0.6865, "step": 16133 }, { "epoch": 1.1656040601802518, "grad_norm": 7.420849795557947, "learning_rate": 1.9552328105239747e-06, "loss": 0.6421, "step": 16134 }, { "epoch": 1.1656763053804613, "grad_norm": 6.983752877948496, "learning_rate": 1.954947340824216e-06, "loss": 0.6651, "step": 16135 }, { "epoch": 1.1657485505806708, "grad_norm": 6.016329478839172, "learning_rate": 1.954661878585804e-06, "loss": 0.6335, "step": 16136 }, { "epoch": 1.1658207957808804, "grad_norm": 6.325085774318814, "learning_rate": 1.954376423812649e-06, "loss": 0.6335, "step": 16137 }, { "epoch": 1.1658930409810897, "grad_norm": 6.890009361670436, "learning_rate": 1.954090976508656e-06, "loss": 0.628, "step": 16138 }, { "epoch": 1.1659652861812992, "grad_norm": 8.051305762116954, "learning_rate": 1.9538055366777335e-06, "loss": 0.6403, "step": 16139 }, { "epoch": 1.1660375313815088, "grad_norm": 7.148961515971109, "learning_rate": 1.9535201043237895e-06, "loss": 0.7226, "step": 16140 }, { "epoch": 1.1661097765817183, "grad_norm": 6.43599438884104, "learning_rate": 1.953234679450731e-06, "loss": 0.6787, "step": 16141 }, { "epoch": 1.1661820217819279, "grad_norm": 6.631737503986217, "learning_rate": 1.9529492620624654e-06, "loss": 0.6574, "step": 16142 }, { "epoch": 1.1662542669821374, "grad_norm": 7.59640921039972, "learning_rate": 1.9526638521628994e-06, "loss": 0.6996, "step": 16143 }, { "epoch": 1.166326512182347, "grad_norm": 5.997731415711257, "learning_rate": 1.9523784497559407e-06, "loss": 0.6629, "step": 16144 }, { "epoch": 1.1663987573825563, "grad_norm": 6.28170729153962, "learning_rate": 1.952093054845495e-06, "loss": 0.6479, "step": 16145 }, { "epoch": 1.1664710025827658, "grad_norm": 7.217095255587434, "learning_rate": 1.9518076674354703e-06, "loss": 0.6291, "step": 16146 }, { "epoch": 1.1665432477829754, "grad_norm": 6.990282943740183, "learning_rate": 1.951522287529773e-06, "loss": 0.6151, "step": 16147 }, { "epoch": 1.166615492983185, "grad_norm": 7.171338296060434, "learning_rate": 1.9512369151323106e-06, "loss": 0.6451, "step": 16148 }, { "epoch": 1.1666877381833944, "grad_norm": 6.849658662408216, "learning_rate": 1.950951550246988e-06, "loss": 0.7172, "step": 16149 }, { "epoch": 1.166759983383604, "grad_norm": 7.238032757219331, "learning_rate": 1.9506661928777116e-06, "loss": 0.6636, "step": 16150 }, { "epoch": 1.1668322285838135, "grad_norm": 7.704496868918426, "learning_rate": 1.95038084302839e-06, "loss": 0.5911, "step": 16151 }, { "epoch": 1.166904473784023, "grad_norm": 5.862070994347769, "learning_rate": 1.9500955007029268e-06, "loss": 0.5449, "step": 16152 }, { "epoch": 1.1669767189842326, "grad_norm": 5.888406269922126, "learning_rate": 1.9498101659052295e-06, "loss": 0.5775, "step": 16153 }, { "epoch": 1.167048964184442, "grad_norm": 7.9949680238505385, "learning_rate": 1.9495248386392035e-06, "loss": 0.7077, "step": 16154 }, { "epoch": 1.1671212093846515, "grad_norm": 6.553268770124291, "learning_rate": 1.949239518908756e-06, "loss": 0.6674, "step": 16155 }, { "epoch": 1.167193454584861, "grad_norm": 5.620007821617249, "learning_rate": 1.948954206717791e-06, "loss": 0.6424, "step": 16156 }, { "epoch": 1.1672656997850706, "grad_norm": 6.295597221277628, "learning_rate": 1.948668902070215e-06, "loss": 0.6294, "step": 16157 }, { "epoch": 1.16733794498528, "grad_norm": 5.949922115553475, "learning_rate": 1.9483836049699345e-06, "loss": 0.5591, "step": 16158 }, { "epoch": 1.1674101901854896, "grad_norm": 6.437292891939229, "learning_rate": 1.9480983154208534e-06, "loss": 0.6256, "step": 16159 }, { "epoch": 1.1674824353856992, "grad_norm": 6.031942219121007, "learning_rate": 1.947813033426878e-06, "loss": 0.5829, "step": 16160 }, { "epoch": 1.1675546805859085, "grad_norm": 7.4558898787654675, "learning_rate": 1.947527758991913e-06, "loss": 0.6433, "step": 16161 }, { "epoch": 1.167626925786118, "grad_norm": 5.993217767627427, "learning_rate": 1.947242492119866e-06, "loss": 0.6719, "step": 16162 }, { "epoch": 1.1676991709863276, "grad_norm": 6.443166368547626, "learning_rate": 1.946957232814638e-06, "loss": 0.6588, "step": 16163 }, { "epoch": 1.1677714161865371, "grad_norm": 7.222506251822645, "learning_rate": 1.946671981080136e-06, "loss": 0.6869, "step": 16164 }, { "epoch": 1.1678436613867467, "grad_norm": 6.789474351899244, "learning_rate": 1.9463867369202656e-06, "loss": 0.7121, "step": 16165 }, { "epoch": 1.1679159065869562, "grad_norm": 5.979217458192461, "learning_rate": 1.9461015003389316e-06, "loss": 0.6718, "step": 16166 }, { "epoch": 1.1679881517871658, "grad_norm": 6.0710335675139255, "learning_rate": 1.945816271340037e-06, "loss": 0.6172, "step": 16167 }, { "epoch": 1.168060396987375, "grad_norm": 7.896526886634268, "learning_rate": 1.9455310499274877e-06, "loss": 0.672, "step": 16168 }, { "epoch": 1.1681326421875846, "grad_norm": 7.920010740484797, "learning_rate": 1.945245836105188e-06, "loss": 0.6375, "step": 16169 }, { "epoch": 1.1682048873877942, "grad_norm": 6.4565922841186, "learning_rate": 1.944960629877042e-06, "loss": 0.7154, "step": 16170 }, { "epoch": 1.1682771325880037, "grad_norm": 5.9345998005325065, "learning_rate": 1.9446754312469534e-06, "loss": 0.5935, "step": 16171 }, { "epoch": 1.1683493777882132, "grad_norm": 7.055123857455868, "learning_rate": 1.9443902402188273e-06, "loss": 0.6795, "step": 16172 }, { "epoch": 1.1684216229884228, "grad_norm": 7.247482915189041, "learning_rate": 1.944105056796568e-06, "loss": 0.7044, "step": 16173 }, { "epoch": 1.1684938681886323, "grad_norm": 6.72298904472978, "learning_rate": 1.943819880984078e-06, "loss": 0.7123, "step": 16174 }, { "epoch": 1.1685661133888416, "grad_norm": 6.155194651967823, "learning_rate": 1.943534712785262e-06, "loss": 0.5876, "step": 16175 }, { "epoch": 1.1686383585890512, "grad_norm": 6.219570393337915, "learning_rate": 1.9432495522040253e-06, "loss": 0.6511, "step": 16176 }, { "epoch": 1.1687106037892607, "grad_norm": 7.449415391347939, "learning_rate": 1.9429643992442686e-06, "loss": 0.6958, "step": 16177 }, { "epoch": 1.1687828489894703, "grad_norm": 6.895421441291848, "learning_rate": 1.942679253909896e-06, "loss": 0.6906, "step": 16178 }, { "epoch": 1.1688550941896798, "grad_norm": 7.986641937158135, "learning_rate": 1.9423941162048122e-06, "loss": 0.647, "step": 16179 }, { "epoch": 1.1689273393898894, "grad_norm": 6.361634249139463, "learning_rate": 1.9421089861329213e-06, "loss": 0.626, "step": 16180 }, { "epoch": 1.168999584590099, "grad_norm": 6.741117779329695, "learning_rate": 1.941823863698124e-06, "loss": 0.6916, "step": 16181 }, { "epoch": 1.1690718297903082, "grad_norm": 6.820372066696495, "learning_rate": 1.941538748904325e-06, "loss": 0.6495, "step": 16182 }, { "epoch": 1.1691440749905178, "grad_norm": 6.139330545517088, "learning_rate": 1.941253641755427e-06, "loss": 0.6406, "step": 16183 }, { "epoch": 1.1692163201907273, "grad_norm": 7.094299832852302, "learning_rate": 1.9409685422553323e-06, "loss": 0.6889, "step": 16184 }, { "epoch": 1.1692885653909368, "grad_norm": 6.534549153660225, "learning_rate": 1.9406834504079444e-06, "loss": 0.6706, "step": 16185 }, { "epoch": 1.1693608105911464, "grad_norm": 6.578381313667354, "learning_rate": 1.9403983662171656e-06, "loss": 0.6205, "step": 16186 }, { "epoch": 1.169433055791356, "grad_norm": 6.118071393576925, "learning_rate": 1.9401132896868993e-06, "loss": 0.6386, "step": 16187 }, { "epoch": 1.1695053009915655, "grad_norm": 7.264038962894084, "learning_rate": 1.9398282208210467e-06, "loss": 0.6854, "step": 16188 }, { "epoch": 1.1695775461917748, "grad_norm": 6.6631064763396255, "learning_rate": 1.939543159623511e-06, "loss": 0.6415, "step": 16189 }, { "epoch": 1.1696497913919843, "grad_norm": 6.703734470216431, "learning_rate": 1.9392581060981953e-06, "loss": 0.6839, "step": 16190 }, { "epoch": 1.1697220365921939, "grad_norm": 5.753076476680386, "learning_rate": 1.9389730602489994e-06, "loss": 0.6619, "step": 16191 }, { "epoch": 1.1697942817924034, "grad_norm": 8.154414061657945, "learning_rate": 1.9386880220798266e-06, "loss": 0.7792, "step": 16192 }, { "epoch": 1.169866526992613, "grad_norm": 6.086079156326371, "learning_rate": 1.9384029915945793e-06, "loss": 0.6268, "step": 16193 }, { "epoch": 1.1699387721928225, "grad_norm": 7.376675948757578, "learning_rate": 1.9381179687971597e-06, "loss": 0.6338, "step": 16194 }, { "epoch": 1.170011017393032, "grad_norm": 6.89568504642733, "learning_rate": 1.9378329536914685e-06, "loss": 0.6413, "step": 16195 }, { "epoch": 1.1700832625932414, "grad_norm": 7.362757980412299, "learning_rate": 1.937547946281407e-06, "loss": 0.6408, "step": 16196 }, { "epoch": 1.170155507793451, "grad_norm": 6.584841527284702, "learning_rate": 1.937262946570878e-06, "loss": 0.6192, "step": 16197 }, { "epoch": 1.1702277529936604, "grad_norm": 7.136088526668459, "learning_rate": 1.9369779545637823e-06, "loss": 0.6718, "step": 16198 }, { "epoch": 1.17029999819387, "grad_norm": 7.667749356101064, "learning_rate": 1.9366929702640207e-06, "loss": 0.6797, "step": 16199 }, { "epoch": 1.1703722433940795, "grad_norm": 7.161302939838712, "learning_rate": 1.9364079936754955e-06, "loss": 0.6047, "step": 16200 }, { "epoch": 1.170444488594289, "grad_norm": 7.965133504568864, "learning_rate": 1.9361230248021072e-06, "loss": 0.6345, "step": 16201 }, { "epoch": 1.1705167337944986, "grad_norm": 6.756428800717148, "learning_rate": 1.935838063647757e-06, "loss": 0.6265, "step": 16202 }, { "epoch": 1.170588978994708, "grad_norm": 8.636382622218823, "learning_rate": 1.935553110216345e-06, "loss": 0.725, "step": 16203 }, { "epoch": 1.1706612241949175, "grad_norm": 7.063670643698394, "learning_rate": 1.935268164511773e-06, "loss": 0.6789, "step": 16204 }, { "epoch": 1.170733469395127, "grad_norm": 9.328032577798007, "learning_rate": 1.9349832265379426e-06, "loss": 0.6718, "step": 16205 }, { "epoch": 1.1708057145953366, "grad_norm": 6.646055052435976, "learning_rate": 1.934698296298751e-06, "loss": 0.61, "step": 16206 }, { "epoch": 1.170877959795546, "grad_norm": 7.5107492345931375, "learning_rate": 1.9344133737981017e-06, "loss": 0.7028, "step": 16207 }, { "epoch": 1.1709502049957556, "grad_norm": 6.205924298517166, "learning_rate": 1.934128459039895e-06, "loss": 0.6977, "step": 16208 }, { "epoch": 1.1710224501959652, "grad_norm": 5.549542712368853, "learning_rate": 1.9338435520280296e-06, "loss": 0.6089, "step": 16209 }, { "epoch": 1.1710946953961745, "grad_norm": 5.797997543350231, "learning_rate": 1.933558652766406e-06, "loss": 0.6209, "step": 16210 }, { "epoch": 1.171166940596384, "grad_norm": 6.940734659056048, "learning_rate": 1.9332737612589246e-06, "loss": 0.6334, "step": 16211 }, { "epoch": 1.1712391857965936, "grad_norm": 8.75231336257891, "learning_rate": 1.9329888775094862e-06, "loss": 0.6037, "step": 16212 }, { "epoch": 1.1713114309968031, "grad_norm": 7.5446506168070835, "learning_rate": 1.9327040015219893e-06, "loss": 0.6039, "step": 16213 }, { "epoch": 1.1713836761970127, "grad_norm": 7.461776478515826, "learning_rate": 1.932419133300334e-06, "loss": 0.706, "step": 16214 }, { "epoch": 1.1714559213972222, "grad_norm": 6.5006584420860065, "learning_rate": 1.9321342728484207e-06, "loss": 0.6343, "step": 16215 }, { "epoch": 1.1715281665974318, "grad_norm": 6.505464164482142, "learning_rate": 1.9318494201701477e-06, "loss": 0.6751, "step": 16216 }, { "epoch": 1.171600411797641, "grad_norm": 6.82402114276361, "learning_rate": 1.931564575269415e-06, "loss": 0.6554, "step": 16217 }, { "epoch": 1.1716726569978506, "grad_norm": 6.875990640186732, "learning_rate": 1.931279738150122e-06, "loss": 0.6423, "step": 16218 }, { "epoch": 1.1717449021980602, "grad_norm": 7.34241541646543, "learning_rate": 1.9309949088161687e-06, "loss": 0.6761, "step": 16219 }, { "epoch": 1.1718171473982697, "grad_norm": 7.388861350993737, "learning_rate": 1.9307100872714515e-06, "loss": 0.6689, "step": 16220 }, { "epoch": 1.1718893925984792, "grad_norm": 6.858585251375872, "learning_rate": 1.930425273519872e-06, "loss": 0.6567, "step": 16221 }, { "epoch": 1.1719616377986888, "grad_norm": 7.228483169594121, "learning_rate": 1.9301404675653283e-06, "loss": 0.5839, "step": 16222 }, { "epoch": 1.1720338829988983, "grad_norm": 7.822784490438036, "learning_rate": 1.9298556694117202e-06, "loss": 0.6845, "step": 16223 }, { "epoch": 1.1721061281991079, "grad_norm": 7.038790444590005, "learning_rate": 1.9295708790629443e-06, "loss": 0.6454, "step": 16224 }, { "epoch": 1.1721783733993172, "grad_norm": 7.807619569359392, "learning_rate": 1.9292860965229e-06, "loss": 0.6499, "step": 16225 }, { "epoch": 1.1722506185995267, "grad_norm": 6.9828215029870355, "learning_rate": 1.929001321795486e-06, "loss": 0.6906, "step": 16226 }, { "epoch": 1.1723228637997363, "grad_norm": 6.103680310278228, "learning_rate": 1.9287165548846005e-06, "loss": 0.6441, "step": 16227 }, { "epoch": 1.1723951089999458, "grad_norm": 7.278445772118249, "learning_rate": 1.928431795794142e-06, "loss": 0.6593, "step": 16228 }, { "epoch": 1.1724673542001554, "grad_norm": 6.964309988573409, "learning_rate": 1.9281470445280084e-06, "loss": 0.6501, "step": 16229 }, { "epoch": 1.172539599400365, "grad_norm": 9.69333674658602, "learning_rate": 1.9278623010900978e-06, "loss": 0.761, "step": 16230 }, { "epoch": 1.1726118446005744, "grad_norm": 7.730230403886289, "learning_rate": 1.927577565484308e-06, "loss": 0.6973, "step": 16231 }, { "epoch": 1.172684089800784, "grad_norm": 5.8135089613839765, "learning_rate": 1.9272928377145366e-06, "loss": 0.6358, "step": 16232 }, { "epoch": 1.1727563350009933, "grad_norm": 7.035588125711894, "learning_rate": 1.9270081177846825e-06, "loss": 0.6367, "step": 16233 }, { "epoch": 1.1728285802012028, "grad_norm": 6.046096791110005, "learning_rate": 1.9267234056986415e-06, "loss": 0.6355, "step": 16234 }, { "epoch": 1.1729008254014124, "grad_norm": 5.53113085947121, "learning_rate": 1.9264387014603116e-06, "loss": 0.6083, "step": 16235 }, { "epoch": 1.172973070601622, "grad_norm": 8.143931239139487, "learning_rate": 1.9261540050735904e-06, "loss": 0.6953, "step": 16236 }, { "epoch": 1.1730453158018315, "grad_norm": 6.937770803853832, "learning_rate": 1.925869316542377e-06, "loss": 0.5998, "step": 16237 }, { "epoch": 1.173117561002041, "grad_norm": 6.403821531421595, "learning_rate": 1.9255846358705657e-06, "loss": 0.6569, "step": 16238 }, { "epoch": 1.1731898062022506, "grad_norm": 7.216630368512193, "learning_rate": 1.925299963062055e-06, "loss": 0.6694, "step": 16239 }, { "epoch": 1.1732620514024599, "grad_norm": 6.750195818285946, "learning_rate": 1.9250152981207416e-06, "loss": 0.6702, "step": 16240 }, { "epoch": 1.1733342966026694, "grad_norm": 7.461520346998451, "learning_rate": 1.924730641050522e-06, "loss": 0.6761, "step": 16241 }, { "epoch": 1.173406541802879, "grad_norm": 6.583035844608206, "learning_rate": 1.9244459918552934e-06, "loss": 0.6002, "step": 16242 }, { "epoch": 1.1734787870030885, "grad_norm": 5.892595280365316, "learning_rate": 1.924161350538952e-06, "loss": 0.6819, "step": 16243 }, { "epoch": 1.173551032203298, "grad_norm": 6.2099783678645535, "learning_rate": 1.923876717105395e-06, "loss": 0.6585, "step": 16244 }, { "epoch": 1.1736232774035076, "grad_norm": 7.3771759961038095, "learning_rate": 1.923592091558518e-06, "loss": 0.7318, "step": 16245 }, { "epoch": 1.1736955226037171, "grad_norm": 6.000316611519476, "learning_rate": 1.9233074739022186e-06, "loss": 0.6743, "step": 16246 }, { "epoch": 1.1737677678039264, "grad_norm": 7.551022481839147, "learning_rate": 1.923022864140391e-06, "loss": 0.6181, "step": 16247 }, { "epoch": 1.173840013004136, "grad_norm": 6.604559981931548, "learning_rate": 1.922738262276934e-06, "loss": 0.6975, "step": 16248 }, { "epoch": 1.1739122582043455, "grad_norm": 6.615452346386787, "learning_rate": 1.9224536683157403e-06, "loss": 0.677, "step": 16249 }, { "epoch": 1.173984503404555, "grad_norm": 5.872657471190175, "learning_rate": 1.9221690822607077e-06, "loss": 0.641, "step": 16250 }, { "epoch": 1.1740567486047646, "grad_norm": 7.295363733271455, "learning_rate": 1.921884504115733e-06, "loss": 0.6696, "step": 16251 }, { "epoch": 1.1741289938049742, "grad_norm": 6.72731104284524, "learning_rate": 1.9215999338847094e-06, "loss": 0.5643, "step": 16252 }, { "epoch": 1.1742012390051837, "grad_norm": 7.48949153951774, "learning_rate": 1.921315371571534e-06, "loss": 0.6179, "step": 16253 }, { "epoch": 1.174273484205393, "grad_norm": 6.584650640235599, "learning_rate": 1.921030817180101e-06, "loss": 0.6246, "step": 16254 }, { "epoch": 1.1743457294056026, "grad_norm": 8.760369396346778, "learning_rate": 1.9207462707143076e-06, "loss": 0.7043, "step": 16255 }, { "epoch": 1.174417974605812, "grad_norm": 6.993579372202959, "learning_rate": 1.9204617321780478e-06, "loss": 0.5886, "step": 16256 }, { "epoch": 1.1744902198060216, "grad_norm": 4.697128173339772, "learning_rate": 1.9201772015752163e-06, "loss": 0.6131, "step": 16257 }, { "epoch": 1.1745624650062312, "grad_norm": 6.312943546839773, "learning_rate": 1.9198926789097095e-06, "loss": 0.7589, "step": 16258 }, { "epoch": 1.1746347102064407, "grad_norm": 5.510411205421246, "learning_rate": 1.919608164185421e-06, "loss": 0.6547, "step": 16259 }, { "epoch": 1.1747069554066503, "grad_norm": 7.119597946988145, "learning_rate": 1.9193236574062465e-06, "loss": 0.7401, "step": 16260 }, { "epoch": 1.1747792006068596, "grad_norm": 6.4052423940826095, "learning_rate": 1.9190391585760795e-06, "loss": 0.6209, "step": 16261 }, { "epoch": 1.1748514458070691, "grad_norm": 6.702490147402337, "learning_rate": 1.9187546676988167e-06, "loss": 0.6651, "step": 16262 }, { "epoch": 1.1749236910072787, "grad_norm": 6.908079155393885, "learning_rate": 1.9184701847783498e-06, "loss": 0.618, "step": 16263 }, { "epoch": 1.1749959362074882, "grad_norm": 6.334706140942123, "learning_rate": 1.9181857098185746e-06, "loss": 0.5673, "step": 16264 }, { "epoch": 1.1750681814076978, "grad_norm": 6.314085506330271, "learning_rate": 1.9179012428233867e-06, "loss": 0.6443, "step": 16265 }, { "epoch": 1.1751404266079073, "grad_norm": 7.474322040139613, "learning_rate": 1.9176167837966778e-06, "loss": 0.6286, "step": 16266 }, { "epoch": 1.1752126718081168, "grad_norm": 7.5310823413975605, "learning_rate": 1.917332332742343e-06, "loss": 0.6638, "step": 16267 }, { "epoch": 1.1752849170083262, "grad_norm": 6.668954393137946, "learning_rate": 1.917047889664276e-06, "loss": 0.6618, "step": 16268 }, { "epoch": 1.1753571622085357, "grad_norm": 6.0734010201181245, "learning_rate": 1.916763454566371e-06, "loss": 0.701, "step": 16269 }, { "epoch": 1.1754294074087452, "grad_norm": 7.9860709044692895, "learning_rate": 1.9164790274525215e-06, "loss": 0.767, "step": 16270 }, { "epoch": 1.1755016526089548, "grad_norm": 9.203630261888978, "learning_rate": 1.9161946083266205e-06, "loss": 0.5687, "step": 16271 }, { "epoch": 1.1755738978091643, "grad_norm": 6.7644010850772975, "learning_rate": 1.915910197192562e-06, "loss": 0.6617, "step": 16272 }, { "epoch": 1.1756461430093739, "grad_norm": 6.6790824036053325, "learning_rate": 1.9156257940542407e-06, "loss": 0.6005, "step": 16273 }, { "epoch": 1.1757183882095834, "grad_norm": 6.5284315534884785, "learning_rate": 1.9153413989155475e-06, "loss": 0.6333, "step": 16274 }, { "epoch": 1.1757906334097927, "grad_norm": 6.929156535728635, "learning_rate": 1.9150570117803762e-06, "loss": 0.6112, "step": 16275 }, { "epoch": 1.1758628786100023, "grad_norm": 6.444237007807283, "learning_rate": 1.9147726326526216e-06, "loss": 0.6267, "step": 16276 }, { "epoch": 1.1759351238102118, "grad_norm": 6.812203497077557, "learning_rate": 1.914488261536174e-06, "loss": 0.6374, "step": 16277 }, { "epoch": 1.1760073690104214, "grad_norm": 6.362093257830939, "learning_rate": 1.9142038984349275e-06, "loss": 0.5733, "step": 16278 }, { "epoch": 1.176079614210631, "grad_norm": 7.294006437289904, "learning_rate": 1.913919543352775e-06, "loss": 0.6795, "step": 16279 }, { "epoch": 1.1761518594108404, "grad_norm": 7.584137535071401, "learning_rate": 1.9136351962936104e-06, "loss": 0.5446, "step": 16280 }, { "epoch": 1.17622410461105, "grad_norm": 8.311186034102295, "learning_rate": 1.913350857261323e-06, "loss": 0.6571, "step": 16281 }, { "epoch": 1.1762963498112593, "grad_norm": 7.554861703814937, "learning_rate": 1.913066526259807e-06, "loss": 0.6028, "step": 16282 }, { "epoch": 1.1763685950114688, "grad_norm": 6.820563907075282, "learning_rate": 1.9127822032929553e-06, "loss": 0.6738, "step": 16283 }, { "epoch": 1.1764408402116784, "grad_norm": 6.967066266343327, "learning_rate": 1.9124978883646586e-06, "loss": 0.5738, "step": 16284 }, { "epoch": 1.176513085411888, "grad_norm": 6.699531641290045, "learning_rate": 1.9122135814788096e-06, "loss": 0.6472, "step": 16285 }, { "epoch": 1.1765853306120975, "grad_norm": 8.419093421099634, "learning_rate": 1.9119292826393002e-06, "loss": 0.6975, "step": 16286 }, { "epoch": 1.176657575812307, "grad_norm": 7.092083033506474, "learning_rate": 1.911644991850023e-06, "loss": 0.5925, "step": 16287 }, { "epoch": 1.1767298210125166, "grad_norm": 7.066547415584854, "learning_rate": 1.9113607091148684e-06, "loss": 0.6447, "step": 16288 }, { "epoch": 1.1768020662127259, "grad_norm": 7.507888968214248, "learning_rate": 1.9110764344377285e-06, "loss": 0.6685, "step": 16289 }, { "epoch": 1.1768743114129354, "grad_norm": 6.088183555372345, "learning_rate": 1.9107921678224966e-06, "loss": 0.6657, "step": 16290 }, { "epoch": 1.176946556613145, "grad_norm": 7.1413656639428975, "learning_rate": 1.91050790927306e-06, "loss": 0.6832, "step": 16291 }, { "epoch": 1.1770188018133545, "grad_norm": 6.428543526725163, "learning_rate": 1.9102236587933137e-06, "loss": 0.6347, "step": 16292 }, { "epoch": 1.177091047013564, "grad_norm": 8.04288195451589, "learning_rate": 1.909939416387147e-06, "loss": 0.7274, "step": 16293 }, { "epoch": 1.1771632922137736, "grad_norm": 6.835178808344447, "learning_rate": 1.909655182058453e-06, "loss": 0.6339, "step": 16294 }, { "epoch": 1.1772355374139831, "grad_norm": 7.980712050408972, "learning_rate": 1.90937095581112e-06, "loss": 0.6872, "step": 16295 }, { "epoch": 1.1773077826141927, "grad_norm": 7.036085576806228, "learning_rate": 1.90908673764904e-06, "loss": 0.7006, "step": 16296 }, { "epoch": 1.177380027814402, "grad_norm": 5.948493818172293, "learning_rate": 1.908802527576104e-06, "loss": 0.6503, "step": 16297 }, { "epoch": 1.1774522730146115, "grad_norm": 6.197998080533652, "learning_rate": 1.9085183255962027e-06, "loss": 0.6691, "step": 16298 }, { "epoch": 1.177524518214821, "grad_norm": 6.031346769989542, "learning_rate": 1.9082341317132255e-06, "loss": 0.5998, "step": 16299 }, { "epoch": 1.1775967634150306, "grad_norm": 6.408388255478829, "learning_rate": 1.907949945931064e-06, "loss": 0.6537, "step": 16300 }, { "epoch": 1.1776690086152402, "grad_norm": 9.862125551104194, "learning_rate": 1.9076657682536083e-06, "loss": 0.6764, "step": 16301 }, { "epoch": 1.1777412538154497, "grad_norm": 6.327696985911052, "learning_rate": 1.907381598684748e-06, "loss": 0.5897, "step": 16302 }, { "epoch": 1.1778134990156592, "grad_norm": 6.164140166553639, "learning_rate": 1.9070974372283728e-06, "loss": 0.6999, "step": 16303 }, { "epoch": 1.1778857442158688, "grad_norm": 6.023598674987773, "learning_rate": 1.9068132838883738e-06, "loss": 0.5747, "step": 16304 }, { "epoch": 1.177957989416078, "grad_norm": 6.076448734049296, "learning_rate": 1.906529138668641e-06, "loss": 0.6554, "step": 16305 }, { "epoch": 1.1780302346162876, "grad_norm": 6.250871521267304, "learning_rate": 1.9062450015730626e-06, "loss": 0.6038, "step": 16306 }, { "epoch": 1.1781024798164972, "grad_norm": 7.309972603429552, "learning_rate": 1.9059608726055294e-06, "loss": 0.6439, "step": 16307 }, { "epoch": 1.1781747250167067, "grad_norm": 6.11549854956778, "learning_rate": 1.9056767517699318e-06, "loss": 0.6997, "step": 16308 }, { "epoch": 1.1782469702169163, "grad_norm": 7.274318817863018, "learning_rate": 1.9053926390701569e-06, "loss": 0.6747, "step": 16309 }, { "epoch": 1.1783192154171258, "grad_norm": 8.185966056807414, "learning_rate": 1.9051085345100951e-06, "loss": 0.6573, "step": 16310 }, { "epoch": 1.1783914606173354, "grad_norm": 7.150210834942621, "learning_rate": 1.904824438093635e-06, "loss": 0.6558, "step": 16311 }, { "epoch": 1.1784637058175447, "grad_norm": 6.714161144132956, "learning_rate": 1.9045403498246673e-06, "loss": 0.6437, "step": 16312 }, { "epoch": 1.1785359510177542, "grad_norm": 7.011078650709572, "learning_rate": 1.9042562697070794e-06, "loss": 0.6586, "step": 16313 }, { "epoch": 1.1786081962179638, "grad_norm": 7.78910167994158, "learning_rate": 1.9039721977447602e-06, "loss": 0.6502, "step": 16314 }, { "epoch": 1.1786804414181733, "grad_norm": 7.174397608760193, "learning_rate": 1.9036881339415996e-06, "loss": 0.6276, "step": 16315 }, { "epoch": 1.1787526866183828, "grad_norm": 6.218395433909051, "learning_rate": 1.903404078301485e-06, "loss": 0.6353, "step": 16316 }, { "epoch": 1.1788249318185924, "grad_norm": 6.24864273592752, "learning_rate": 1.9031200308283051e-06, "loss": 0.7045, "step": 16317 }, { "epoch": 1.178897177018802, "grad_norm": 6.411693809352004, "learning_rate": 1.9028359915259486e-06, "loss": 0.6654, "step": 16318 }, { "epoch": 1.1789694222190112, "grad_norm": 7.921533821779694, "learning_rate": 1.9025519603983046e-06, "loss": 0.6655, "step": 16319 }, { "epoch": 1.1790416674192208, "grad_norm": 6.421841735011168, "learning_rate": 1.9022679374492595e-06, "loss": 0.7112, "step": 16320 }, { "epoch": 1.1791139126194303, "grad_norm": 6.704673321508254, "learning_rate": 1.9019839226827023e-06, "loss": 0.7134, "step": 16321 }, { "epoch": 1.1791861578196399, "grad_norm": 6.768776950095673, "learning_rate": 1.9016999161025217e-06, "loss": 0.7061, "step": 16322 }, { "epoch": 1.1792584030198494, "grad_norm": 6.664166108697803, "learning_rate": 1.9014159177126041e-06, "loss": 0.648, "step": 16323 }, { "epoch": 1.179330648220059, "grad_norm": 6.441170544125938, "learning_rate": 1.9011319275168372e-06, "loss": 0.6697, "step": 16324 }, { "epoch": 1.1794028934202685, "grad_norm": 5.307322773141893, "learning_rate": 1.9008479455191097e-06, "loss": 0.6215, "step": 16325 }, { "epoch": 1.1794751386204778, "grad_norm": 6.79451877487141, "learning_rate": 1.900563971723309e-06, "loss": 0.682, "step": 16326 }, { "epoch": 1.1795473838206874, "grad_norm": 6.0738576309456, "learning_rate": 1.9002800061333214e-06, "loss": 0.6314, "step": 16327 }, { "epoch": 1.179619629020897, "grad_norm": 8.052551282253365, "learning_rate": 1.8999960487530352e-06, "loss": 0.6637, "step": 16328 }, { "epoch": 1.1796918742211064, "grad_norm": 6.397480838414513, "learning_rate": 1.8997120995863371e-06, "loss": 0.64, "step": 16329 }, { "epoch": 1.179764119421316, "grad_norm": 6.8326897977170145, "learning_rate": 1.899428158637115e-06, "loss": 0.5913, "step": 16330 }, { "epoch": 1.1798363646215255, "grad_norm": 6.053957239227457, "learning_rate": 1.899144225909254e-06, "loss": 0.6407, "step": 16331 }, { "epoch": 1.179908609821735, "grad_norm": 6.2981192067149845, "learning_rate": 1.8988603014066425e-06, "loss": 0.6878, "step": 16332 }, { "epoch": 1.1799808550219444, "grad_norm": 7.736136557603584, "learning_rate": 1.898576385133167e-06, "loss": 0.7363, "step": 16333 }, { "epoch": 1.180053100222154, "grad_norm": 7.793869142455293, "learning_rate": 1.8982924770927133e-06, "loss": 0.7265, "step": 16334 }, { "epoch": 1.1801253454223635, "grad_norm": 7.223581786038009, "learning_rate": 1.8980085772891685e-06, "loss": 0.637, "step": 16335 }, { "epoch": 1.180197590622573, "grad_norm": 7.369468295458206, "learning_rate": 1.897724685726419e-06, "loss": 0.6615, "step": 16336 }, { "epoch": 1.1802698358227826, "grad_norm": 7.852381603729275, "learning_rate": 1.8974408024083519e-06, "loss": 0.6337, "step": 16337 }, { "epoch": 1.180342081022992, "grad_norm": 7.494749647442003, "learning_rate": 1.897156927338851e-06, "loss": 0.7047, "step": 16338 }, { "epoch": 1.1804143262232016, "grad_norm": 7.748462893734226, "learning_rate": 1.896873060521804e-06, "loss": 0.648, "step": 16339 }, { "epoch": 1.180486571423411, "grad_norm": 7.0280228054000125, "learning_rate": 1.8965892019610968e-06, "loss": 0.6492, "step": 16340 }, { "epoch": 1.1805588166236205, "grad_norm": 8.530247444239876, "learning_rate": 1.8963053516606145e-06, "loss": 0.6825, "step": 16341 }, { "epoch": 1.18063106182383, "grad_norm": 7.172381347866184, "learning_rate": 1.8960215096242433e-06, "loss": 0.6266, "step": 16342 }, { "epoch": 1.1807033070240396, "grad_norm": 6.358641036134688, "learning_rate": 1.8957376758558684e-06, "loss": 0.6723, "step": 16343 }, { "epoch": 1.1807755522242491, "grad_norm": 7.1152531395580185, "learning_rate": 1.8954538503593762e-06, "loss": 0.7002, "step": 16344 }, { "epoch": 1.1808477974244587, "grad_norm": 6.883767381088178, "learning_rate": 1.8951700331386507e-06, "loss": 0.6337, "step": 16345 }, { "epoch": 1.1809200426246682, "grad_norm": 6.429475097114388, "learning_rate": 1.8948862241975783e-06, "loss": 0.6236, "step": 16346 }, { "epoch": 1.1809922878248775, "grad_norm": 6.623591723202286, "learning_rate": 1.8946024235400445e-06, "loss": 0.6284, "step": 16347 }, { "epoch": 1.181064533025087, "grad_norm": 5.819928416437864, "learning_rate": 1.8943186311699317e-06, "loss": 0.6172, "step": 16348 }, { "epoch": 1.1811367782252966, "grad_norm": 6.4528246941322625, "learning_rate": 1.894034847091127e-06, "loss": 0.6092, "step": 16349 }, { "epoch": 1.1812090234255062, "grad_norm": 7.300227634590972, "learning_rate": 1.8937510713075151e-06, "loss": 0.5914, "step": 16350 }, { "epoch": 1.1812812686257157, "grad_norm": 6.621237784200313, "learning_rate": 1.893467303822981e-06, "loss": 0.6586, "step": 16351 }, { "epoch": 1.1813535138259252, "grad_norm": 5.680849651671181, "learning_rate": 1.8931835446414078e-06, "loss": 0.7406, "step": 16352 }, { "epoch": 1.1814257590261348, "grad_norm": 9.008639533396583, "learning_rate": 1.8928997937666808e-06, "loss": 0.6729, "step": 16353 }, { "epoch": 1.181498004226344, "grad_norm": 5.80619650693957, "learning_rate": 1.8926160512026833e-06, "loss": 0.6127, "step": 16354 }, { "epoch": 1.1815702494265536, "grad_norm": 6.7747217212690565, "learning_rate": 1.8923323169533025e-06, "loss": 0.6241, "step": 16355 }, { "epoch": 1.1816424946267632, "grad_norm": 7.852972801667561, "learning_rate": 1.892048591022419e-06, "loss": 0.6512, "step": 16356 }, { "epoch": 1.1817147398269727, "grad_norm": 5.954308099403906, "learning_rate": 1.8917648734139185e-06, "loss": 0.6032, "step": 16357 }, { "epoch": 1.1817869850271823, "grad_norm": 5.845423820799942, "learning_rate": 1.891481164131685e-06, "loss": 0.6075, "step": 16358 }, { "epoch": 1.1818592302273918, "grad_norm": 6.102110775822589, "learning_rate": 1.8911974631796015e-06, "loss": 0.6261, "step": 16359 }, { "epoch": 1.1819314754276014, "grad_norm": 5.951472175615787, "learning_rate": 1.8909137705615519e-06, "loss": 0.6682, "step": 16360 }, { "epoch": 1.1820037206278107, "grad_norm": 6.917573095775743, "learning_rate": 1.8906300862814197e-06, "loss": 0.6183, "step": 16361 }, { "epoch": 1.1820759658280202, "grad_norm": 5.728881865849681, "learning_rate": 1.8903464103430894e-06, "loss": 0.6716, "step": 16362 }, { "epoch": 1.1821482110282298, "grad_norm": 7.005120992766008, "learning_rate": 1.8900627427504425e-06, "loss": 0.6132, "step": 16363 }, { "epoch": 1.1822204562284393, "grad_norm": 8.118088350065713, "learning_rate": 1.8897790835073633e-06, "loss": 0.6455, "step": 16364 }, { "epoch": 1.1822927014286488, "grad_norm": 8.723959839474372, "learning_rate": 1.8894954326177356e-06, "loss": 0.7122, "step": 16365 }, { "epoch": 1.1823649466288584, "grad_norm": 6.393608907595244, "learning_rate": 1.8892117900854403e-06, "loss": 0.6823, "step": 16366 }, { "epoch": 1.182437191829068, "grad_norm": 7.311945706739797, "learning_rate": 1.8889281559143615e-06, "loss": 0.6352, "step": 16367 }, { "epoch": 1.1825094370292772, "grad_norm": 6.86172368673681, "learning_rate": 1.888644530108381e-06, "loss": 0.633, "step": 16368 }, { "epoch": 1.1825816822294868, "grad_norm": 6.1992138271877115, "learning_rate": 1.8883609126713842e-06, "loss": 0.6684, "step": 16369 }, { "epoch": 1.1826539274296963, "grad_norm": 6.706871141889936, "learning_rate": 1.8880773036072503e-06, "loss": 0.652, "step": 16370 }, { "epoch": 1.1827261726299059, "grad_norm": 5.927129744184324, "learning_rate": 1.887793702919863e-06, "loss": 0.532, "step": 16371 }, { "epoch": 1.1827984178301154, "grad_norm": 6.5747354432464915, "learning_rate": 1.8875101106131049e-06, "loss": 0.5694, "step": 16372 }, { "epoch": 1.182870663030325, "grad_norm": 9.645448834944855, "learning_rate": 1.8872265266908574e-06, "loss": 0.6804, "step": 16373 }, { "epoch": 1.1829429082305345, "grad_norm": 7.704995692287184, "learning_rate": 1.886942951157003e-06, "loss": 0.5972, "step": 16374 }, { "epoch": 1.183015153430744, "grad_norm": 8.602459187764184, "learning_rate": 1.8866593840154235e-06, "loss": 0.63, "step": 16375 }, { "epoch": 1.1830873986309536, "grad_norm": 7.099561554171822, "learning_rate": 1.8863758252700014e-06, "loss": 0.623, "step": 16376 }, { "epoch": 1.183159643831163, "grad_norm": 8.525513449501837, "learning_rate": 1.8860922749246175e-06, "loss": 0.6538, "step": 16377 }, { "epoch": 1.1832318890313724, "grad_norm": 7.532431355494993, "learning_rate": 1.8858087329831534e-06, "loss": 0.6328, "step": 16378 }, { "epoch": 1.183304134231582, "grad_norm": 6.427988080396443, "learning_rate": 1.8855251994494912e-06, "loss": 0.6101, "step": 16379 }, { "epoch": 1.1833763794317915, "grad_norm": 6.517239816474414, "learning_rate": 1.8852416743275126e-06, "loss": 0.6098, "step": 16380 }, { "epoch": 1.183448624632001, "grad_norm": 11.013892677395203, "learning_rate": 1.8849581576210972e-06, "loss": 0.6144, "step": 16381 }, { "epoch": 1.1835208698322106, "grad_norm": 8.930130060664885, "learning_rate": 1.8846746493341263e-06, "loss": 0.6814, "step": 16382 }, { "epoch": 1.1835931150324202, "grad_norm": 6.166954377482074, "learning_rate": 1.8843911494704837e-06, "loss": 0.6141, "step": 16383 }, { "epoch": 1.1836653602326295, "grad_norm": 7.603539706646941, "learning_rate": 1.884107658034047e-06, "loss": 0.6575, "step": 16384 }, { "epoch": 1.183737605432839, "grad_norm": 6.239498623977193, "learning_rate": 1.8838241750286983e-06, "loss": 0.6699, "step": 16385 }, { "epoch": 1.1838098506330486, "grad_norm": 6.482105271531827, "learning_rate": 1.8835407004583184e-06, "loss": 0.6347, "step": 16386 }, { "epoch": 1.183882095833258, "grad_norm": 7.333995818066971, "learning_rate": 1.883257234326788e-06, "loss": 0.6094, "step": 16387 }, { "epoch": 1.1839543410334676, "grad_norm": 6.625842922496258, "learning_rate": 1.8829737766379862e-06, "loss": 0.6097, "step": 16388 }, { "epoch": 1.1840265862336772, "grad_norm": 7.587083189826358, "learning_rate": 1.882690327395795e-06, "loss": 0.6691, "step": 16389 }, { "epoch": 1.1840988314338867, "grad_norm": 5.837505084082536, "learning_rate": 1.8824068866040943e-06, "loss": 0.6619, "step": 16390 }, { "epoch": 1.184171076634096, "grad_norm": 7.653017942241321, "learning_rate": 1.8821234542667632e-06, "loss": 0.6885, "step": 16391 }, { "epoch": 1.1842433218343056, "grad_norm": 7.577058005183543, "learning_rate": 1.8818400303876822e-06, "loss": 0.5879, "step": 16392 }, { "epoch": 1.1843155670345151, "grad_norm": 7.218798880803797, "learning_rate": 1.8815566149707312e-06, "loss": 0.6312, "step": 16393 }, { "epoch": 1.1843878122347247, "grad_norm": 6.112682803176204, "learning_rate": 1.881273208019791e-06, "loss": 0.6079, "step": 16394 }, { "epoch": 1.1844600574349342, "grad_norm": 6.164647605211148, "learning_rate": 1.880989809538739e-06, "loss": 0.5929, "step": 16395 }, { "epoch": 1.1845323026351438, "grad_norm": 7.210543372727223, "learning_rate": 1.8807064195314555e-06, "loss": 0.5885, "step": 16396 }, { "epoch": 1.1846045478353533, "grad_norm": 6.364849311860833, "learning_rate": 1.8804230380018221e-06, "loss": 0.625, "step": 16397 }, { "epoch": 1.1846767930355626, "grad_norm": 7.452622114765919, "learning_rate": 1.880139664953715e-06, "loss": 0.6539, "step": 16398 }, { "epoch": 1.1847490382357722, "grad_norm": 7.245556752339132, "learning_rate": 1.8798563003910144e-06, "loss": 0.7231, "step": 16399 }, { "epoch": 1.1848212834359817, "grad_norm": 6.789492612148563, "learning_rate": 1.8795729443175997e-06, "loss": 0.6112, "step": 16400 }, { "epoch": 1.1848935286361912, "grad_norm": 7.8506454008080455, "learning_rate": 1.8792895967373501e-06, "loss": 0.6353, "step": 16401 }, { "epoch": 1.1849657738364008, "grad_norm": 5.96565334019591, "learning_rate": 1.8790062576541435e-06, "loss": 0.6886, "step": 16402 }, { "epoch": 1.1850380190366103, "grad_norm": 6.195177343753498, "learning_rate": 1.878722927071859e-06, "loss": 0.5872, "step": 16403 }, { "epoch": 1.1851102642368199, "grad_norm": 5.872456426089372, "learning_rate": 1.8784396049943754e-06, "loss": 0.6198, "step": 16404 }, { "epoch": 1.1851825094370292, "grad_norm": 7.200221206657798, "learning_rate": 1.8781562914255714e-06, "loss": 0.6419, "step": 16405 }, { "epoch": 1.1852547546372387, "grad_norm": 7.007365302847343, "learning_rate": 1.8778729863693247e-06, "loss": 0.5838, "step": 16406 }, { "epoch": 1.1853269998374483, "grad_norm": 6.302948325408788, "learning_rate": 1.8775896898295131e-06, "loss": 0.6111, "step": 16407 }, { "epoch": 1.1853992450376578, "grad_norm": 6.257817986303775, "learning_rate": 1.877306401810017e-06, "loss": 0.6422, "step": 16408 }, { "epoch": 1.1854714902378674, "grad_norm": 10.150660646482995, "learning_rate": 1.8770231223147117e-06, "loss": 0.6746, "step": 16409 }, { "epoch": 1.185543735438077, "grad_norm": 6.686845693933176, "learning_rate": 1.8767398513474756e-06, "loss": 0.6271, "step": 16410 }, { "epoch": 1.1856159806382864, "grad_norm": 6.592297724028137, "learning_rate": 1.8764565889121872e-06, "loss": 0.6514, "step": 16411 }, { "epoch": 1.1856882258384958, "grad_norm": 7.800695182925305, "learning_rate": 1.8761733350127253e-06, "loss": 0.6457, "step": 16412 }, { "epoch": 1.1857604710387053, "grad_norm": 6.7430135103803455, "learning_rate": 1.8758900896529646e-06, "loss": 0.6874, "step": 16413 }, { "epoch": 1.1858327162389148, "grad_norm": 6.290652700743158, "learning_rate": 1.8756068528367847e-06, "loss": 0.6266, "step": 16414 }, { "epoch": 1.1859049614391244, "grad_norm": 6.277057216255288, "learning_rate": 1.8753236245680622e-06, "loss": 0.6653, "step": 16415 }, { "epoch": 1.185977206639334, "grad_norm": 6.001073741203593, "learning_rate": 1.8750404048506738e-06, "loss": 0.6047, "step": 16416 }, { "epoch": 1.1860494518395435, "grad_norm": 7.4325878687545845, "learning_rate": 1.874757193688497e-06, "loss": 0.7165, "step": 16417 }, { "epoch": 1.186121697039753, "grad_norm": 5.881825479532667, "learning_rate": 1.8744739910854087e-06, "loss": 0.6125, "step": 16418 }, { "epoch": 1.1861939422399623, "grad_norm": 6.665582409743651, "learning_rate": 1.8741907970452866e-06, "loss": 0.6251, "step": 16419 }, { "epoch": 1.1862661874401719, "grad_norm": 6.5616748109353065, "learning_rate": 1.8739076115720055e-06, "loss": 0.6396, "step": 16420 }, { "epoch": 1.1863384326403814, "grad_norm": 5.546646156425947, "learning_rate": 1.8736244346694439e-06, "loss": 0.6144, "step": 16421 }, { "epoch": 1.186410677840591, "grad_norm": 7.360090281720607, "learning_rate": 1.8733412663414778e-06, "loss": 0.6945, "step": 16422 }, { "epoch": 1.1864829230408005, "grad_norm": 8.511560544508393, "learning_rate": 1.8730581065919822e-06, "loss": 0.6674, "step": 16423 }, { "epoch": 1.18655516824101, "grad_norm": 8.316432854290467, "learning_rate": 1.8727749554248344e-06, "loss": 0.6836, "step": 16424 }, { "epoch": 1.1866274134412196, "grad_norm": 6.082017137808772, "learning_rate": 1.8724918128439102e-06, "loss": 0.5716, "step": 16425 }, { "epoch": 1.186699658641429, "grad_norm": 6.825511301192393, "learning_rate": 1.872208678853087e-06, "loss": 0.6321, "step": 16426 }, { "epoch": 1.1867719038416384, "grad_norm": 6.42535499156102, "learning_rate": 1.871925553456239e-06, "loss": 0.6613, "step": 16427 }, { "epoch": 1.186844149041848, "grad_norm": 7.296801286933618, "learning_rate": 1.8716424366572423e-06, "loss": 0.6798, "step": 16428 }, { "epoch": 1.1869163942420575, "grad_norm": 7.428769156161545, "learning_rate": 1.8713593284599728e-06, "loss": 0.6723, "step": 16429 }, { "epoch": 1.186988639442267, "grad_norm": 6.677574135709062, "learning_rate": 1.8710762288683059e-06, "loss": 0.6901, "step": 16430 }, { "epoch": 1.1870608846424766, "grad_norm": 6.6389729677601785, "learning_rate": 1.870793137886117e-06, "loss": 0.6679, "step": 16431 }, { "epoch": 1.1871331298426862, "grad_norm": 6.7393618925133945, "learning_rate": 1.8705100555172816e-06, "loss": 0.702, "step": 16432 }, { "epoch": 1.1872053750428955, "grad_norm": 7.595473105767636, "learning_rate": 1.8702269817656755e-06, "loss": 0.7719, "step": 16433 }, { "epoch": 1.187277620243105, "grad_norm": 6.447599322059511, "learning_rate": 1.8699439166351724e-06, "loss": 0.63, "step": 16434 }, { "epoch": 1.1873498654433146, "grad_norm": 7.195854837561811, "learning_rate": 1.8696608601296477e-06, "loss": 0.6787, "step": 16435 }, { "epoch": 1.187422110643524, "grad_norm": 6.695227320873715, "learning_rate": 1.8693778122529762e-06, "loss": 0.6642, "step": 16436 }, { "epoch": 1.1874943558437336, "grad_norm": 6.09581420366993, "learning_rate": 1.8690947730090343e-06, "loss": 0.6544, "step": 16437 }, { "epoch": 1.1875666010439432, "grad_norm": 7.886737130055353, "learning_rate": 1.8688117424016938e-06, "loss": 0.6326, "step": 16438 }, { "epoch": 1.1876388462441527, "grad_norm": 6.317931906907692, "learning_rate": 1.8685287204348298e-06, "loss": 0.7106, "step": 16439 }, { "epoch": 1.187711091444362, "grad_norm": 6.864872356653438, "learning_rate": 1.8682457071123192e-06, "loss": 0.6559, "step": 16440 }, { "epoch": 1.1877833366445716, "grad_norm": 7.345192625507553, "learning_rate": 1.867962702438033e-06, "loss": 0.6533, "step": 16441 }, { "epoch": 1.1878555818447811, "grad_norm": 7.305370017412438, "learning_rate": 1.867679706415847e-06, "loss": 0.5932, "step": 16442 }, { "epoch": 1.1879278270449907, "grad_norm": 7.059669129828696, "learning_rate": 1.8673967190496344e-06, "loss": 0.6527, "step": 16443 }, { "epoch": 1.1880000722452002, "grad_norm": 6.2934260992443845, "learning_rate": 1.8671137403432704e-06, "loss": 0.621, "step": 16444 }, { "epoch": 1.1880723174454098, "grad_norm": 5.737388047931639, "learning_rate": 1.8668307703006271e-06, "loss": 0.6314, "step": 16445 }, { "epoch": 1.1881445626456193, "grad_norm": 5.934838270572465, "learning_rate": 1.8665478089255796e-06, "loss": 0.594, "step": 16446 }, { "epoch": 1.1882168078458288, "grad_norm": 7.272132502424065, "learning_rate": 1.8662648562220006e-06, "loss": 0.6511, "step": 16447 }, { "epoch": 1.1882890530460382, "grad_norm": 6.673720950906354, "learning_rate": 1.8659819121937633e-06, "loss": 0.6667, "step": 16448 }, { "epoch": 1.1883612982462477, "grad_norm": 6.030690221553404, "learning_rate": 1.8656989768447414e-06, "loss": 0.6448, "step": 16449 }, { "epoch": 1.1884335434464572, "grad_norm": 6.6712946404858915, "learning_rate": 1.865416050178808e-06, "loss": 0.6571, "step": 16450 }, { "epoch": 1.1885057886466668, "grad_norm": 6.700845682093015, "learning_rate": 1.8651331321998374e-06, "loss": 0.6631, "step": 16451 }, { "epoch": 1.1885780338468763, "grad_norm": 6.416300412248838, "learning_rate": 1.8648502229116997e-06, "loss": 0.6052, "step": 16452 }, { "epoch": 1.1886502790470859, "grad_norm": 7.2978185660342305, "learning_rate": 1.8645673223182692e-06, "loss": 0.7384, "step": 16453 }, { "epoch": 1.1887225242472954, "grad_norm": 6.1880181606152, "learning_rate": 1.8642844304234199e-06, "loss": 0.679, "step": 16454 }, { "epoch": 1.188794769447505, "grad_norm": 7.511302061619455, "learning_rate": 1.8640015472310224e-06, "loss": 0.639, "step": 16455 }, { "epoch": 1.1888670146477143, "grad_norm": 6.94830251038906, "learning_rate": 1.8637186727449496e-06, "loss": 0.6053, "step": 16456 }, { "epoch": 1.1889392598479238, "grad_norm": 5.800334710296022, "learning_rate": 1.8634358069690744e-06, "loss": 0.6257, "step": 16457 }, { "epoch": 1.1890115050481334, "grad_norm": 6.283295103946754, "learning_rate": 1.8631529499072692e-06, "loss": 0.5755, "step": 16458 }, { "epoch": 1.189083750248343, "grad_norm": 6.893789243889035, "learning_rate": 1.862870101563405e-06, "loss": 0.6406, "step": 16459 }, { "epoch": 1.1891559954485524, "grad_norm": 6.201479446112568, "learning_rate": 1.862587261941355e-06, "loss": 0.6792, "step": 16460 }, { "epoch": 1.189228240648762, "grad_norm": 7.804397409601971, "learning_rate": 1.8623044310449895e-06, "loss": 0.6397, "step": 16461 }, { "epoch": 1.1893004858489715, "grad_norm": 6.046574074997642, "learning_rate": 1.8620216088781823e-06, "loss": 0.611, "step": 16462 }, { "epoch": 1.1893727310491808, "grad_norm": 7.069875731070752, "learning_rate": 1.8617387954448036e-06, "loss": 0.6315, "step": 16463 }, { "epoch": 1.1894449762493904, "grad_norm": 6.892156659949988, "learning_rate": 1.8614559907487245e-06, "loss": 0.7052, "step": 16464 }, { "epoch": 1.1895172214496, "grad_norm": 6.517353953666553, "learning_rate": 1.8611731947938188e-06, "loss": 0.6139, "step": 16465 }, { "epoch": 1.1895894666498095, "grad_norm": 6.083294628838863, "learning_rate": 1.8608904075839545e-06, "loss": 0.6007, "step": 16466 }, { "epoch": 1.189661711850019, "grad_norm": 7.773353093134705, "learning_rate": 1.860607629123004e-06, "loss": 0.6793, "step": 16467 }, { "epoch": 1.1897339570502286, "grad_norm": 6.6624972975047285, "learning_rate": 1.8603248594148393e-06, "loss": 0.6919, "step": 16468 }, { "epoch": 1.189806202250438, "grad_norm": 6.423310873444029, "learning_rate": 1.8600420984633317e-06, "loss": 0.6542, "step": 16469 }, { "epoch": 1.1898784474506474, "grad_norm": 6.919780198944832, "learning_rate": 1.8597593462723496e-06, "loss": 0.6891, "step": 16470 }, { "epoch": 1.189950692650857, "grad_norm": 7.316648692092309, "learning_rate": 1.8594766028457647e-06, "loss": 0.5775, "step": 16471 }, { "epoch": 1.1900229378510665, "grad_norm": 6.460355610227327, "learning_rate": 1.8591938681874485e-06, "loss": 0.7218, "step": 16472 }, { "epoch": 1.190095183051276, "grad_norm": 7.496800566761328, "learning_rate": 1.8589111423012702e-06, "loss": 0.6436, "step": 16473 }, { "epoch": 1.1901674282514856, "grad_norm": 7.145606509094417, "learning_rate": 1.8586284251911008e-06, "loss": 0.651, "step": 16474 }, { "epoch": 1.1902396734516951, "grad_norm": 7.588937749499036, "learning_rate": 1.8583457168608098e-06, "loss": 0.6814, "step": 16475 }, { "epoch": 1.1903119186519047, "grad_norm": 7.131100368685167, "learning_rate": 1.8580630173142685e-06, "loss": 0.6722, "step": 16476 }, { "epoch": 1.190384163852114, "grad_norm": 6.529430373205243, "learning_rate": 1.8577803265553457e-06, "loss": 0.6654, "step": 16477 }, { "epoch": 1.1904564090523235, "grad_norm": 6.342054619704596, "learning_rate": 1.857497644587911e-06, "loss": 0.6452, "step": 16478 }, { "epoch": 1.190528654252533, "grad_norm": 7.983732611985307, "learning_rate": 1.8572149714158366e-06, "loss": 0.7103, "step": 16479 }, { "epoch": 1.1906008994527426, "grad_norm": 8.328028350899084, "learning_rate": 1.8569323070429883e-06, "loss": 0.6459, "step": 16480 }, { "epoch": 1.1906731446529522, "grad_norm": 7.190911586310784, "learning_rate": 1.8566496514732375e-06, "loss": 0.6562, "step": 16481 }, { "epoch": 1.1907453898531617, "grad_norm": 7.643831606013685, "learning_rate": 1.8563670047104536e-06, "loss": 0.5965, "step": 16482 }, { "epoch": 1.1908176350533712, "grad_norm": 6.414385732853457, "learning_rate": 1.856084366758507e-06, "loss": 0.6047, "step": 16483 }, { "epoch": 1.1908898802535806, "grad_norm": 8.640171584085666, "learning_rate": 1.8558017376212639e-06, "loss": 0.704, "step": 16484 }, { "epoch": 1.19096212545379, "grad_norm": 7.960686407350303, "learning_rate": 1.8555191173025954e-06, "loss": 0.6088, "step": 16485 }, { "epoch": 1.1910343706539996, "grad_norm": 8.777071681642317, "learning_rate": 1.8552365058063692e-06, "loss": 0.6618, "step": 16486 }, { "epoch": 1.1911066158542092, "grad_norm": 6.381210891947528, "learning_rate": 1.8549539031364555e-06, "loss": 0.6176, "step": 16487 }, { "epoch": 1.1911788610544187, "grad_norm": 7.108349267092784, "learning_rate": 1.8546713092967216e-06, "loss": 0.6702, "step": 16488 }, { "epoch": 1.1912511062546283, "grad_norm": 7.411270178530782, "learning_rate": 1.8543887242910362e-06, "loss": 0.6845, "step": 16489 }, { "epoch": 1.1913233514548378, "grad_norm": 6.1469116644257955, "learning_rate": 1.8541061481232685e-06, "loss": 0.6728, "step": 16490 }, { "epoch": 1.1913955966550471, "grad_norm": 8.246696966923048, "learning_rate": 1.8538235807972854e-06, "loss": 0.6253, "step": 16491 }, { "epoch": 1.1914678418552567, "grad_norm": 6.294226456215958, "learning_rate": 1.8535410223169558e-06, "loss": 0.6964, "step": 16492 }, { "epoch": 1.1915400870554662, "grad_norm": 7.189654483478109, "learning_rate": 1.853258472686148e-06, "loss": 0.791, "step": 16493 }, { "epoch": 1.1916123322556758, "grad_norm": 6.988874040327419, "learning_rate": 1.8529759319087303e-06, "loss": 0.6745, "step": 16494 }, { "epoch": 1.1916845774558853, "grad_norm": 8.018009894357553, "learning_rate": 1.8526933999885683e-06, "loss": 0.6586, "step": 16495 }, { "epoch": 1.1917568226560948, "grad_norm": 6.688697778978777, "learning_rate": 1.8524108769295313e-06, "loss": 0.703, "step": 16496 }, { "epoch": 1.1918290678563044, "grad_norm": 6.110202899067546, "learning_rate": 1.852128362735488e-06, "loss": 0.6062, "step": 16497 }, { "epoch": 1.1919013130565137, "grad_norm": 5.80542184695005, "learning_rate": 1.8518458574103032e-06, "loss": 0.6032, "step": 16498 }, { "epoch": 1.1919735582567232, "grad_norm": 5.820299719470029, "learning_rate": 1.851563360957845e-06, "loss": 0.6773, "step": 16499 }, { "epoch": 1.1920458034569328, "grad_norm": 6.07356181134035, "learning_rate": 1.8512808733819815e-06, "loss": 0.6764, "step": 16500 }, { "epoch": 1.1921180486571423, "grad_norm": 6.027493428654295, "learning_rate": 1.8509983946865791e-06, "loss": 0.6306, "step": 16501 }, { "epoch": 1.1921902938573519, "grad_norm": 6.15795100379871, "learning_rate": 1.8507159248755048e-06, "loss": 0.6467, "step": 16502 }, { "epoch": 1.1922625390575614, "grad_norm": 8.028129713651788, "learning_rate": 1.850433463952625e-06, "loss": 0.7188, "step": 16503 }, { "epoch": 1.192334784257771, "grad_norm": 6.586125164141232, "learning_rate": 1.8501510119218073e-06, "loss": 0.6628, "step": 16504 }, { "epoch": 1.1924070294579803, "grad_norm": 7.303775636600887, "learning_rate": 1.8498685687869169e-06, "loss": 0.6768, "step": 16505 }, { "epoch": 1.1924792746581898, "grad_norm": 8.192671911340952, "learning_rate": 1.8495861345518211e-06, "loss": 0.7312, "step": 16506 }, { "epoch": 1.1925515198583994, "grad_norm": 8.539284077637525, "learning_rate": 1.8493037092203864e-06, "loss": 0.7052, "step": 16507 }, { "epoch": 1.192623765058609, "grad_norm": 7.035890124739876, "learning_rate": 1.8490212927964796e-06, "loss": 0.7053, "step": 16508 }, { "epoch": 1.1926960102588184, "grad_norm": 6.307135086783038, "learning_rate": 1.8487388852839643e-06, "loss": 0.573, "step": 16509 }, { "epoch": 1.192768255459028, "grad_norm": 9.29144857062274, "learning_rate": 1.8484564866867083e-06, "loss": 0.715, "step": 16510 }, { "epoch": 1.1928405006592375, "grad_norm": 8.06057598015402, "learning_rate": 1.8481740970085774e-06, "loss": 0.6757, "step": 16511 }, { "epoch": 1.1929127458594468, "grad_norm": 6.5288659814257155, "learning_rate": 1.847891716253438e-06, "loss": 0.6534, "step": 16512 }, { "epoch": 1.1929849910596564, "grad_norm": 7.115875022626978, "learning_rate": 1.8476093444251537e-06, "loss": 0.6455, "step": 16513 }, { "epoch": 1.193057236259866, "grad_norm": 7.8883539236339875, "learning_rate": 1.8473269815275908e-06, "loss": 0.6572, "step": 16514 }, { "epoch": 1.1931294814600755, "grad_norm": 8.241284014462265, "learning_rate": 1.847044627564615e-06, "loss": 0.6408, "step": 16515 }, { "epoch": 1.193201726660285, "grad_norm": 6.6805072482189445, "learning_rate": 1.846762282540091e-06, "loss": 0.6433, "step": 16516 }, { "epoch": 1.1932739718604946, "grad_norm": 7.294205171034637, "learning_rate": 1.8464799464578841e-06, "loss": 0.6356, "step": 16517 }, { "epoch": 1.193346217060704, "grad_norm": 7.543824900434657, "learning_rate": 1.8461976193218595e-06, "loss": 0.6696, "step": 16518 }, { "epoch": 1.1934184622609136, "grad_norm": 6.318787417881143, "learning_rate": 1.8459153011358822e-06, "loss": 0.6658, "step": 16519 }, { "epoch": 1.193490707461123, "grad_norm": 6.409844706494608, "learning_rate": 1.845632991903816e-06, "loss": 0.6092, "step": 16520 }, { "epoch": 1.1935629526613325, "grad_norm": 5.995066521842636, "learning_rate": 1.845350691629526e-06, "loss": 0.6811, "step": 16521 }, { "epoch": 1.193635197861542, "grad_norm": 6.320970046044994, "learning_rate": 1.8450684003168778e-06, "loss": 0.7549, "step": 16522 }, { "epoch": 1.1937074430617516, "grad_norm": 6.342379116519764, "learning_rate": 1.844786117969734e-06, "loss": 0.6385, "step": 16523 }, { "epoch": 1.1937796882619611, "grad_norm": 6.381558205151186, "learning_rate": 1.8445038445919586e-06, "loss": 0.5641, "step": 16524 }, { "epoch": 1.1938519334621707, "grad_norm": 6.402509388110552, "learning_rate": 1.8442215801874175e-06, "loss": 0.6683, "step": 16525 }, { "epoch": 1.1939241786623802, "grad_norm": 7.194082148727973, "learning_rate": 1.8439393247599744e-06, "loss": 0.6516, "step": 16526 }, { "epoch": 1.1939964238625898, "grad_norm": 6.218433774642867, "learning_rate": 1.8436570783134915e-06, "loss": 0.6325, "step": 16527 }, { "epoch": 1.194068669062799, "grad_norm": 6.657508695269416, "learning_rate": 1.8433748408518337e-06, "loss": 0.6907, "step": 16528 }, { "epoch": 1.1941409142630086, "grad_norm": 6.134151550669581, "learning_rate": 1.843092612378865e-06, "loss": 0.6316, "step": 16529 }, { "epoch": 1.1942131594632182, "grad_norm": 7.8218701132844055, "learning_rate": 1.842810392898448e-06, "loss": 0.6311, "step": 16530 }, { "epoch": 1.1942854046634277, "grad_norm": 5.3297429914222185, "learning_rate": 1.8425281824144463e-06, "loss": 0.6505, "step": 16531 }, { "epoch": 1.1943576498636372, "grad_norm": 6.634992153735146, "learning_rate": 1.8422459809307237e-06, "loss": 0.599, "step": 16532 }, { "epoch": 1.1944298950638468, "grad_norm": 7.848885485018468, "learning_rate": 1.841963788451143e-06, "loss": 0.6549, "step": 16533 }, { "epoch": 1.1945021402640563, "grad_norm": 6.1674312770495385, "learning_rate": 1.8416816049795666e-06, "loss": 0.7018, "step": 16534 }, { "epoch": 1.1945743854642656, "grad_norm": 6.024782814765018, "learning_rate": 1.841399430519858e-06, "loss": 0.6145, "step": 16535 }, { "epoch": 1.1946466306644752, "grad_norm": 7.946203071248036, "learning_rate": 1.8411172650758807e-06, "loss": 0.6579, "step": 16536 }, { "epoch": 1.1947188758646847, "grad_norm": 5.947637637174508, "learning_rate": 1.840835108651496e-06, "loss": 0.6426, "step": 16537 }, { "epoch": 1.1947911210648943, "grad_norm": 6.06412948260803, "learning_rate": 1.8405529612505656e-06, "loss": 0.6125, "step": 16538 }, { "epoch": 1.1948633662651038, "grad_norm": 6.705127905892251, "learning_rate": 1.840270822876954e-06, "loss": 0.686, "step": 16539 }, { "epoch": 1.1949356114653134, "grad_norm": 6.411874971918712, "learning_rate": 1.8399886935345234e-06, "loss": 0.7178, "step": 16540 }, { "epoch": 1.195007856665523, "grad_norm": 9.39755019060611, "learning_rate": 1.8397065732271342e-06, "loss": 0.6446, "step": 16541 }, { "epoch": 1.1950801018657322, "grad_norm": 7.038010259258211, "learning_rate": 1.8394244619586493e-06, "loss": 0.6851, "step": 16542 }, { "epoch": 1.1951523470659418, "grad_norm": 6.526847268016141, "learning_rate": 1.8391423597329305e-06, "loss": 0.6498, "step": 16543 }, { "epoch": 1.1952245922661513, "grad_norm": 6.279182966987835, "learning_rate": 1.8388602665538407e-06, "loss": 0.7005, "step": 16544 }, { "epoch": 1.1952968374663608, "grad_norm": 6.9443957450006835, "learning_rate": 1.8385781824252397e-06, "loss": 0.541, "step": 16545 }, { "epoch": 1.1953690826665704, "grad_norm": 6.559766218160069, "learning_rate": 1.8382961073509897e-06, "loss": 0.5938, "step": 16546 }, { "epoch": 1.19544132786678, "grad_norm": 6.325365006239418, "learning_rate": 1.8380140413349529e-06, "loss": 0.697, "step": 16547 }, { "epoch": 1.1955135730669895, "grad_norm": 8.46550618225957, "learning_rate": 1.837731984380989e-06, "loss": 0.7274, "step": 16548 }, { "epoch": 1.1955858182671988, "grad_norm": 5.9239466121104405, "learning_rate": 1.8374499364929605e-06, "loss": 0.623, "step": 16549 }, { "epoch": 1.1956580634674083, "grad_norm": 6.918247762230648, "learning_rate": 1.8371678976747277e-06, "loss": 0.598, "step": 16550 }, { "epoch": 1.1957303086676179, "grad_norm": 6.392410140914612, "learning_rate": 1.8368858679301527e-06, "loss": 0.623, "step": 16551 }, { "epoch": 1.1958025538678274, "grad_norm": 6.872090781841228, "learning_rate": 1.8366038472630937e-06, "loss": 0.6146, "step": 16552 }, { "epoch": 1.195874799068037, "grad_norm": 7.140011950998429, "learning_rate": 1.8363218356774135e-06, "loss": 0.6449, "step": 16553 }, { "epoch": 1.1959470442682465, "grad_norm": 7.710487793212395, "learning_rate": 1.836039833176973e-06, "loss": 0.7487, "step": 16554 }, { "epoch": 1.196019289468456, "grad_norm": 6.84109292205303, "learning_rate": 1.8357578397656309e-06, "loss": 0.6019, "step": 16555 }, { "epoch": 1.1960915346686654, "grad_norm": 7.246745464290763, "learning_rate": 1.835475855447248e-06, "loss": 0.6466, "step": 16556 }, { "epoch": 1.196163779868875, "grad_norm": 7.620215243606096, "learning_rate": 1.8351938802256846e-06, "loss": 0.6053, "step": 16557 }, { "epoch": 1.1962360250690844, "grad_norm": 6.344398202336033, "learning_rate": 1.8349119141048017e-06, "loss": 0.6625, "step": 16558 }, { "epoch": 1.196308270269294, "grad_norm": 7.322505007840128, "learning_rate": 1.8346299570884575e-06, "loss": 0.6577, "step": 16559 }, { "epoch": 1.1963805154695035, "grad_norm": 6.922895300349386, "learning_rate": 1.8343480091805126e-06, "loss": 0.6844, "step": 16560 }, { "epoch": 1.196452760669713, "grad_norm": 6.380449247418002, "learning_rate": 1.8340660703848273e-06, "loss": 0.715, "step": 16561 }, { "epoch": 1.1965250058699226, "grad_norm": 7.587095256737881, "learning_rate": 1.8337841407052597e-06, "loss": 0.7096, "step": 16562 }, { "epoch": 1.196597251070132, "grad_norm": 7.906326956996386, "learning_rate": 1.8335022201456699e-06, "loss": 0.7375, "step": 16563 }, { "epoch": 1.1966694962703415, "grad_norm": 6.746922109138162, "learning_rate": 1.833220308709917e-06, "loss": 0.684, "step": 16564 }, { "epoch": 1.196741741470551, "grad_norm": 6.393626508517675, "learning_rate": 1.832938406401862e-06, "loss": 0.6678, "step": 16565 }, { "epoch": 1.1968139866707606, "grad_norm": 6.401920400032291, "learning_rate": 1.8326565132253605e-06, "loss": 0.5975, "step": 16566 }, { "epoch": 1.19688623187097, "grad_norm": 7.193273729720258, "learning_rate": 1.8323746291842733e-06, "loss": 0.6389, "step": 16567 }, { "epoch": 1.1969584770711796, "grad_norm": 6.180933078089449, "learning_rate": 1.8320927542824596e-06, "loss": 0.6542, "step": 16568 }, { "epoch": 1.1970307222713892, "grad_norm": 6.073921691892173, "learning_rate": 1.8318108885237782e-06, "loss": 0.6231, "step": 16569 }, { "epoch": 1.1971029674715985, "grad_norm": 6.978802328895752, "learning_rate": 1.831529031912086e-06, "loss": 0.7213, "step": 16570 }, { "epoch": 1.197175212671808, "grad_norm": 8.103516331118913, "learning_rate": 1.831247184451242e-06, "loss": 0.7041, "step": 16571 }, { "epoch": 1.1972474578720176, "grad_norm": 7.819556139669025, "learning_rate": 1.8309653461451054e-06, "loss": 0.693, "step": 16572 }, { "epoch": 1.1973197030722271, "grad_norm": 7.372614070492356, "learning_rate": 1.8306835169975338e-06, "loss": 0.6731, "step": 16573 }, { "epoch": 1.1973919482724367, "grad_norm": 6.256316693204862, "learning_rate": 1.8304016970123845e-06, "loss": 0.6247, "step": 16574 }, { "epoch": 1.1974641934726462, "grad_norm": 6.866543649602362, "learning_rate": 1.8301198861935165e-06, "loss": 0.6031, "step": 16575 }, { "epoch": 1.1975364386728558, "grad_norm": 6.5700503556689265, "learning_rate": 1.8298380845447872e-06, "loss": 0.5949, "step": 16576 }, { "epoch": 1.197608683873065, "grad_norm": 6.408551058731082, "learning_rate": 1.8295562920700542e-06, "loss": 0.6652, "step": 16577 }, { "epoch": 1.1976809290732746, "grad_norm": 6.352228521545915, "learning_rate": 1.8292745087731745e-06, "loss": 0.6386, "step": 16578 }, { "epoch": 1.1977531742734842, "grad_norm": 6.4935258755085, "learning_rate": 1.8289927346580073e-06, "loss": 0.6338, "step": 16579 }, { "epoch": 1.1978254194736937, "grad_norm": 6.874589110146847, "learning_rate": 1.8287109697284069e-06, "loss": 0.659, "step": 16580 }, { "epoch": 1.1978976646739032, "grad_norm": 7.479020084513783, "learning_rate": 1.8284292139882326e-06, "loss": 0.7318, "step": 16581 }, { "epoch": 1.1979699098741128, "grad_norm": 6.399433957340698, "learning_rate": 1.8281474674413413e-06, "loss": 0.5875, "step": 16582 }, { "epoch": 1.1980421550743223, "grad_norm": 6.927962887406882, "learning_rate": 1.8278657300915902e-06, "loss": 0.6669, "step": 16583 }, { "epoch": 1.1981144002745316, "grad_norm": 6.390022678484245, "learning_rate": 1.8275840019428342e-06, "loss": 0.578, "step": 16584 }, { "epoch": 1.1981866454747412, "grad_norm": 5.823184130627285, "learning_rate": 1.8273022829989312e-06, "loss": 0.6259, "step": 16585 }, { "epoch": 1.1982588906749507, "grad_norm": 5.2588890753314645, "learning_rate": 1.8270205732637383e-06, "loss": 0.6547, "step": 16586 }, { "epoch": 1.1983311358751603, "grad_norm": 7.895668880256466, "learning_rate": 1.8267388727411106e-06, "loss": 0.7156, "step": 16587 }, { "epoch": 1.1984033810753698, "grad_norm": 6.196135380900523, "learning_rate": 1.8264571814349053e-06, "loss": 0.6744, "step": 16588 }, { "epoch": 1.1984756262755794, "grad_norm": 8.557560965041976, "learning_rate": 1.8261754993489777e-06, "loss": 0.614, "step": 16589 }, { "epoch": 1.198547871475789, "grad_norm": 7.262524014825559, "learning_rate": 1.825893826487185e-06, "loss": 0.6942, "step": 16590 }, { "epoch": 1.1986201166759982, "grad_norm": 7.647263090935372, "learning_rate": 1.825612162853382e-06, "loss": 0.6781, "step": 16591 }, { "epoch": 1.1986923618762078, "grad_norm": 9.197675195608195, "learning_rate": 1.825330508451425e-06, "loss": 0.641, "step": 16592 }, { "epoch": 1.1987646070764173, "grad_norm": 7.090568467608761, "learning_rate": 1.8250488632851693e-06, "loss": 0.5833, "step": 16593 }, { "epoch": 1.1988368522766268, "grad_norm": 6.1350155900603225, "learning_rate": 1.824767227358471e-06, "loss": 0.6464, "step": 16594 }, { "epoch": 1.1989090974768364, "grad_norm": 6.981837825223715, "learning_rate": 1.824485600675185e-06, "loss": 0.6664, "step": 16595 }, { "epoch": 1.198981342677046, "grad_norm": 6.617010517298213, "learning_rate": 1.824203983239166e-06, "loss": 0.6614, "step": 16596 }, { "epoch": 1.1990535878772555, "grad_norm": 5.784188838129053, "learning_rate": 1.823922375054271e-06, "loss": 0.6528, "step": 16597 }, { "epoch": 1.199125833077465, "grad_norm": 6.548932974294869, "learning_rate": 1.823640776124353e-06, "loss": 0.6503, "step": 16598 }, { "epoch": 1.1991980782776746, "grad_norm": 6.549314495061648, "learning_rate": 1.8233591864532674e-06, "loss": 0.6472, "step": 16599 }, { "epoch": 1.1992703234778839, "grad_norm": 6.160720673551861, "learning_rate": 1.8230776060448684e-06, "loss": 0.6453, "step": 16600 }, { "epoch": 1.1993425686780934, "grad_norm": 7.079585112145161, "learning_rate": 1.8227960349030133e-06, "loss": 0.7066, "step": 16601 }, { "epoch": 1.199414813878303, "grad_norm": 5.963273816708161, "learning_rate": 1.8225144730315537e-06, "loss": 0.6581, "step": 16602 }, { "epoch": 1.1994870590785125, "grad_norm": 6.4352869470459755, "learning_rate": 1.8222329204343448e-06, "loss": 0.61, "step": 16603 }, { "epoch": 1.199559304278722, "grad_norm": 6.6058285263863405, "learning_rate": 1.8219513771152414e-06, "loss": 0.6515, "step": 16604 }, { "epoch": 1.1996315494789316, "grad_norm": 6.543899187575282, "learning_rate": 1.8216698430780965e-06, "loss": 0.6731, "step": 16605 }, { "epoch": 1.1997037946791411, "grad_norm": 6.937272626570281, "learning_rate": 1.821388318326765e-06, "loss": 0.66, "step": 16606 }, { "epoch": 1.1997760398793504, "grad_norm": 5.419635233129686, "learning_rate": 1.8211068028651005e-06, "loss": 0.6269, "step": 16607 }, { "epoch": 1.19984828507956, "grad_norm": 7.443336568861021, "learning_rate": 1.8208252966969572e-06, "loss": 0.6844, "step": 16608 }, { "epoch": 1.1999205302797695, "grad_norm": 6.159887178552074, "learning_rate": 1.8205437998261876e-06, "loss": 0.6796, "step": 16609 }, { "epoch": 1.199992775479979, "grad_norm": 5.407952365868611, "learning_rate": 1.820262312256646e-06, "loss": 0.6696, "step": 16610 }, { "epoch": 1.2000650206801886, "grad_norm": 6.341957778698694, "learning_rate": 1.8199808339921868e-06, "loss": 0.6129, "step": 16611 }, { "epoch": 1.2001372658803982, "grad_norm": 6.853688170206158, "learning_rate": 1.8196993650366607e-06, "loss": 0.6086, "step": 16612 }, { "epoch": 1.2002095110806077, "grad_norm": 7.3238330627616826, "learning_rate": 1.8194179053939221e-06, "loss": 0.6104, "step": 16613 }, { "epoch": 1.200281756280817, "grad_norm": 6.290172711110221, "learning_rate": 1.8191364550678237e-06, "loss": 0.6738, "step": 16614 }, { "epoch": 1.2003540014810266, "grad_norm": 8.059069472900886, "learning_rate": 1.8188550140622194e-06, "loss": 0.6294, "step": 16615 }, { "epoch": 1.200426246681236, "grad_norm": 6.349391289318453, "learning_rate": 1.8185735823809606e-06, "loss": 0.5913, "step": 16616 }, { "epoch": 1.2004984918814456, "grad_norm": 6.757462791258575, "learning_rate": 1.8182921600279002e-06, "loss": 0.7, "step": 16617 }, { "epoch": 1.2005707370816552, "grad_norm": 6.293331843769307, "learning_rate": 1.8180107470068907e-06, "loss": 0.6565, "step": 16618 }, { "epoch": 1.2006429822818647, "grad_norm": 6.003047804805267, "learning_rate": 1.8177293433217853e-06, "loss": 0.5605, "step": 16619 }, { "epoch": 1.2007152274820743, "grad_norm": 7.803345710954242, "learning_rate": 1.8174479489764348e-06, "loss": 0.6618, "step": 16620 }, { "epoch": 1.2007874726822836, "grad_norm": 7.7249496211412225, "learning_rate": 1.8171665639746921e-06, "loss": 0.6805, "step": 16621 }, { "epoch": 1.2008597178824931, "grad_norm": 7.571955546833503, "learning_rate": 1.8168851883204092e-06, "loss": 0.6014, "step": 16622 }, { "epoch": 1.2009319630827027, "grad_norm": 6.413673231088839, "learning_rate": 1.8166038220174372e-06, "loss": 0.699, "step": 16623 }, { "epoch": 1.2010042082829122, "grad_norm": 6.2366260107371145, "learning_rate": 1.8163224650696284e-06, "loss": 0.6891, "step": 16624 }, { "epoch": 1.2010764534831218, "grad_norm": 6.5340841614458425, "learning_rate": 1.816041117480834e-06, "loss": 0.6306, "step": 16625 }, { "epoch": 1.2011486986833313, "grad_norm": 6.133752291647664, "learning_rate": 1.8157597792549069e-06, "loss": 0.6286, "step": 16626 }, { "epoch": 1.2012209438835408, "grad_norm": 6.007176240284276, "learning_rate": 1.8154784503956963e-06, "loss": 0.6639, "step": 16627 }, { "epoch": 1.2012931890837502, "grad_norm": 7.000591525561513, "learning_rate": 1.8151971309070537e-06, "loss": 0.5907, "step": 16628 }, { "epoch": 1.2013654342839597, "grad_norm": 10.999228710530684, "learning_rate": 1.8149158207928313e-06, "loss": 0.7088, "step": 16629 }, { "epoch": 1.2014376794841692, "grad_norm": 6.038159459588975, "learning_rate": 1.8146345200568793e-06, "loss": 0.5767, "step": 16630 }, { "epoch": 1.2015099246843788, "grad_norm": 6.423830797259249, "learning_rate": 1.814353228703048e-06, "loss": 0.6724, "step": 16631 }, { "epoch": 1.2015821698845883, "grad_norm": 5.381471685612753, "learning_rate": 1.8140719467351892e-06, "loss": 0.5884, "step": 16632 }, { "epoch": 1.2016544150847979, "grad_norm": 6.403998746968325, "learning_rate": 1.813790674157153e-06, "loss": 0.7206, "step": 16633 }, { "epoch": 1.2017266602850074, "grad_norm": 6.438699258742503, "learning_rate": 1.813509410972789e-06, "loss": 0.7042, "step": 16634 }, { "epoch": 1.2017989054852167, "grad_norm": 7.038003755092026, "learning_rate": 1.8132281571859483e-06, "loss": 0.6549, "step": 16635 }, { "epoch": 1.2018711506854263, "grad_norm": 5.8770108941162045, "learning_rate": 1.8129469128004823e-06, "loss": 0.6367, "step": 16636 }, { "epoch": 1.2019433958856358, "grad_norm": 6.676563482733077, "learning_rate": 1.8126656778202376e-06, "loss": 0.6377, "step": 16637 }, { "epoch": 1.2020156410858454, "grad_norm": 6.011279947131326, "learning_rate": 1.8123844522490666e-06, "loss": 0.6337, "step": 16638 }, { "epoch": 1.202087886286055, "grad_norm": 7.365804597059343, "learning_rate": 1.8121032360908185e-06, "loss": 0.7007, "step": 16639 }, { "epoch": 1.2021601314862644, "grad_norm": 6.202295050842476, "learning_rate": 1.8118220293493443e-06, "loss": 0.6226, "step": 16640 }, { "epoch": 1.202232376686474, "grad_norm": 5.617850273834178, "learning_rate": 1.811540832028491e-06, "loss": 0.623, "step": 16641 }, { "epoch": 1.2023046218866833, "grad_norm": 7.805290130168979, "learning_rate": 1.8112596441321095e-06, "loss": 0.7192, "step": 16642 }, { "epoch": 1.2023768670868928, "grad_norm": 6.9274045329569445, "learning_rate": 1.8109784656640478e-06, "loss": 0.6634, "step": 16643 }, { "epoch": 1.2024491122871024, "grad_norm": 6.775774876949903, "learning_rate": 1.8106972966281575e-06, "loss": 0.7116, "step": 16644 }, { "epoch": 1.202521357487312, "grad_norm": 6.798753500739555, "learning_rate": 1.810416137028285e-06, "loss": 0.6355, "step": 16645 }, { "epoch": 1.2025936026875215, "grad_norm": 6.6832607059226765, "learning_rate": 1.81013498686828e-06, "loss": 0.7011, "step": 16646 }, { "epoch": 1.202665847887731, "grad_norm": 7.229468640736508, "learning_rate": 1.8098538461519921e-06, "loss": 0.6375, "step": 16647 }, { "epoch": 1.2027380930879406, "grad_norm": 6.497061431984198, "learning_rate": 1.8095727148832687e-06, "loss": 0.6835, "step": 16648 }, { "epoch": 1.2028103382881499, "grad_norm": 7.1400063411566945, "learning_rate": 1.8092915930659588e-06, "loss": 0.6839, "step": 16649 }, { "epoch": 1.2028825834883594, "grad_norm": 6.310234947606479, "learning_rate": 1.8090104807039105e-06, "loss": 0.7378, "step": 16650 }, { "epoch": 1.202954828688569, "grad_norm": 5.796620982894771, "learning_rate": 1.8087293778009729e-06, "loss": 0.6031, "step": 16651 }, { "epoch": 1.2030270738887785, "grad_norm": 6.517642506986475, "learning_rate": 1.8084482843609927e-06, "loss": 0.6214, "step": 16652 }, { "epoch": 1.203099319088988, "grad_norm": 6.2353524037049155, "learning_rate": 1.8081672003878186e-06, "loss": 0.6425, "step": 16653 }, { "epoch": 1.2031715642891976, "grad_norm": 7.921765448962065, "learning_rate": 1.8078861258852992e-06, "loss": 0.5797, "step": 16654 }, { "epoch": 1.2032438094894071, "grad_norm": 6.933913945071445, "learning_rate": 1.8076050608572804e-06, "loss": 0.6675, "step": 16655 }, { "epoch": 1.2033160546896164, "grad_norm": 7.964413409092206, "learning_rate": 1.8073240053076108e-06, "loss": 0.6699, "step": 16656 }, { "epoch": 1.203388299889826, "grad_norm": 6.281008587653246, "learning_rate": 1.8070429592401373e-06, "loss": 0.6584, "step": 16657 }, { "epoch": 1.2034605450900355, "grad_norm": 6.413212858140478, "learning_rate": 1.806761922658709e-06, "loss": 0.6001, "step": 16658 }, { "epoch": 1.203532790290245, "grad_norm": 6.629229077473571, "learning_rate": 1.806480895567171e-06, "loss": 0.6539, "step": 16659 }, { "epoch": 1.2036050354904546, "grad_norm": 6.051338854734087, "learning_rate": 1.8061998779693706e-06, "loss": 0.6204, "step": 16660 }, { "epoch": 1.2036772806906642, "grad_norm": 6.07604034683664, "learning_rate": 1.805918869869156e-06, "loss": 0.6434, "step": 16661 }, { "epoch": 1.2037495258908737, "grad_norm": 6.89489890352952, "learning_rate": 1.8056378712703727e-06, "loss": 0.7696, "step": 16662 }, { "epoch": 1.203821771091083, "grad_norm": 7.202287405968595, "learning_rate": 1.8053568821768674e-06, "loss": 0.7057, "step": 16663 }, { "epoch": 1.2038940162912926, "grad_norm": 6.2250218034845, "learning_rate": 1.8050759025924874e-06, "loss": 0.7108, "step": 16664 }, { "epoch": 1.203966261491502, "grad_norm": 7.287109113257165, "learning_rate": 1.8047949325210793e-06, "loss": 0.621, "step": 16665 }, { "epoch": 1.2040385066917116, "grad_norm": 6.955724474582687, "learning_rate": 1.8045139719664881e-06, "loss": 0.6676, "step": 16666 }, { "epoch": 1.2041107518919212, "grad_norm": 6.272380278970151, "learning_rate": 1.8042330209325604e-06, "loss": 0.6894, "step": 16667 }, { "epoch": 1.2041829970921307, "grad_norm": 6.593866283964271, "learning_rate": 1.803952079423144e-06, "loss": 0.6411, "step": 16668 }, { "epoch": 1.2042552422923403, "grad_norm": 6.825708585852066, "learning_rate": 1.8036711474420816e-06, "loss": 0.6269, "step": 16669 }, { "epoch": 1.2043274874925498, "grad_norm": 6.66903247181384, "learning_rate": 1.8033902249932205e-06, "loss": 0.7063, "step": 16670 }, { "epoch": 1.2043997326927591, "grad_norm": 6.555642459674111, "learning_rate": 1.8031093120804063e-06, "loss": 0.615, "step": 16671 }, { "epoch": 1.2044719778929687, "grad_norm": 6.714402890559801, "learning_rate": 1.802828408707486e-06, "loss": 0.6498, "step": 16672 }, { "epoch": 1.2045442230931782, "grad_norm": 5.328853971221809, "learning_rate": 1.8025475148783023e-06, "loss": 0.6002, "step": 16673 }, { "epoch": 1.2046164682933878, "grad_norm": 6.9949267940767035, "learning_rate": 1.8022666305967012e-06, "loss": 0.6776, "step": 16674 }, { "epoch": 1.2046887134935973, "grad_norm": 6.126219958823444, "learning_rate": 1.8019857558665289e-06, "loss": 0.6304, "step": 16675 }, { "epoch": 1.2047609586938068, "grad_norm": 7.926675699975866, "learning_rate": 1.8017048906916295e-06, "loss": 0.7045, "step": 16676 }, { "epoch": 1.2048332038940164, "grad_norm": 5.831760775815302, "learning_rate": 1.8014240350758476e-06, "loss": 0.6083, "step": 16677 }, { "epoch": 1.204905449094226, "grad_norm": 6.712771288399548, "learning_rate": 1.8011431890230286e-06, "loss": 0.719, "step": 16678 }, { "epoch": 1.2049776942944352, "grad_norm": 6.604409808026264, "learning_rate": 1.8008623525370168e-06, "loss": 0.666, "step": 16679 }, { "epoch": 1.2050499394946448, "grad_norm": 6.238706838175305, "learning_rate": 1.8005815256216563e-06, "loss": 0.675, "step": 16680 }, { "epoch": 1.2051221846948543, "grad_norm": 6.431855921844428, "learning_rate": 1.8003007082807916e-06, "loss": 0.5756, "step": 16681 }, { "epoch": 1.2051944298950639, "grad_norm": 6.618395410248355, "learning_rate": 1.8000199005182667e-06, "loss": 0.6201, "step": 16682 }, { "epoch": 1.2052666750952734, "grad_norm": 6.974571635994231, "learning_rate": 1.7997391023379275e-06, "loss": 0.5858, "step": 16683 }, { "epoch": 1.205338920295483, "grad_norm": 8.44729881677005, "learning_rate": 1.7994583137436144e-06, "loss": 0.6918, "step": 16684 }, { "epoch": 1.2054111654956925, "grad_norm": 5.954358711464646, "learning_rate": 1.7991775347391731e-06, "loss": 0.5916, "step": 16685 }, { "epoch": 1.2054834106959018, "grad_norm": 6.0571917074346615, "learning_rate": 1.7988967653284487e-06, "loss": 0.6454, "step": 16686 }, { "epoch": 1.2055556558961114, "grad_norm": 7.220491273603046, "learning_rate": 1.7986160055152819e-06, "loss": 0.71, "step": 16687 }, { "epoch": 1.205627901096321, "grad_norm": 6.6373966762166, "learning_rate": 1.7983352553035176e-06, "loss": 0.7372, "step": 16688 }, { "epoch": 1.2057001462965304, "grad_norm": 6.947332888608169, "learning_rate": 1.7980545146969988e-06, "loss": 0.68, "step": 16689 }, { "epoch": 1.20577239149674, "grad_norm": 6.665555511699007, "learning_rate": 1.7977737836995692e-06, "loss": 0.5591, "step": 16690 }, { "epoch": 1.2058446366969495, "grad_norm": 7.022656967022157, "learning_rate": 1.797493062315071e-06, "loss": 0.7301, "step": 16691 }, { "epoch": 1.205916881897159, "grad_norm": 6.369232860226042, "learning_rate": 1.7972123505473468e-06, "loss": 0.6762, "step": 16692 }, { "epoch": 1.2059891270973684, "grad_norm": 6.636427326775621, "learning_rate": 1.7969316484002408e-06, "loss": 0.6085, "step": 16693 }, { "epoch": 1.206061372297578, "grad_norm": 6.93822770984508, "learning_rate": 1.7966509558775939e-06, "loss": 0.6672, "step": 16694 }, { "epoch": 1.2061336174977875, "grad_norm": 7.048523974284578, "learning_rate": 1.7963702729832494e-06, "loss": 0.6847, "step": 16695 }, { "epoch": 1.206205862697997, "grad_norm": 7.209961929538229, "learning_rate": 1.7960895997210492e-06, "loss": 0.6736, "step": 16696 }, { "epoch": 1.2062781078982066, "grad_norm": 6.5124700212429305, "learning_rate": 1.7958089360948372e-06, "loss": 0.653, "step": 16697 }, { "epoch": 1.206350353098416, "grad_norm": 6.233087774869572, "learning_rate": 1.795528282108453e-06, "loss": 0.6459, "step": 16698 }, { "epoch": 1.2064225982986256, "grad_norm": 6.535484000205598, "learning_rate": 1.795247637765739e-06, "loss": 0.6168, "step": 16699 }, { "epoch": 1.206494843498835, "grad_norm": 7.422353757173721, "learning_rate": 1.7949670030705386e-06, "loss": 0.5726, "step": 16700 }, { "epoch": 1.2065670886990445, "grad_norm": 5.9766703676948785, "learning_rate": 1.7946863780266933e-06, "loss": 0.6287, "step": 16701 }, { "epoch": 1.206639333899254, "grad_norm": 6.8096338244788415, "learning_rate": 1.7944057626380422e-06, "loss": 0.6805, "step": 16702 }, { "epoch": 1.2067115790994636, "grad_norm": 5.674389158471363, "learning_rate": 1.794125156908429e-06, "loss": 0.6056, "step": 16703 }, { "epoch": 1.2067838242996731, "grad_norm": 7.4705478946290995, "learning_rate": 1.7938445608416949e-06, "loss": 0.6936, "step": 16704 }, { "epoch": 1.2068560694998827, "grad_norm": 5.993727266111409, "learning_rate": 1.7935639744416797e-06, "loss": 0.6699, "step": 16705 }, { "epoch": 1.2069283147000922, "grad_norm": 7.301144382187684, "learning_rate": 1.7932833977122252e-06, "loss": 0.6214, "step": 16706 }, { "epoch": 1.2070005599003015, "grad_norm": 6.3030061240610555, "learning_rate": 1.7930028306571723e-06, "loss": 0.6351, "step": 16707 }, { "epoch": 1.207072805100511, "grad_norm": 6.531279495391682, "learning_rate": 1.7927222732803622e-06, "loss": 0.6917, "step": 16708 }, { "epoch": 1.2071450503007206, "grad_norm": 6.236778312321276, "learning_rate": 1.7924417255856346e-06, "loss": 0.6481, "step": 16709 }, { "epoch": 1.2072172955009302, "grad_norm": 6.834677897344484, "learning_rate": 1.7921611875768302e-06, "loss": 0.7049, "step": 16710 }, { "epoch": 1.2072895407011397, "grad_norm": 7.136080775483733, "learning_rate": 1.7918806592577906e-06, "loss": 0.6541, "step": 16711 }, { "epoch": 1.2073617859013492, "grad_norm": 6.717937189660865, "learning_rate": 1.7916001406323541e-06, "loss": 0.5953, "step": 16712 }, { "epoch": 1.2074340311015588, "grad_norm": 5.971366268988045, "learning_rate": 1.7913196317043613e-06, "loss": 0.6624, "step": 16713 }, { "epoch": 1.207506276301768, "grad_norm": 6.203967135747541, "learning_rate": 1.7910391324776522e-06, "loss": 0.6118, "step": 16714 }, { "epoch": 1.2075785215019776, "grad_norm": 8.065631583564729, "learning_rate": 1.7907586429560685e-06, "loss": 0.6231, "step": 16715 }, { "epoch": 1.2076507667021872, "grad_norm": 7.600013692743113, "learning_rate": 1.7904781631434473e-06, "loss": 0.6983, "step": 16716 }, { "epoch": 1.2077230119023967, "grad_norm": 6.038330033705285, "learning_rate": 1.790197693043629e-06, "loss": 0.6619, "step": 16717 }, { "epoch": 1.2077952571026063, "grad_norm": 6.775967135497182, "learning_rate": 1.7899172326604537e-06, "loss": 0.7086, "step": 16718 }, { "epoch": 1.2078675023028158, "grad_norm": 6.6938372381030335, "learning_rate": 1.7896367819977598e-06, "loss": 0.6265, "step": 16719 }, { "epoch": 1.2079397475030254, "grad_norm": 6.144042328470174, "learning_rate": 1.7893563410593866e-06, "loss": 0.6354, "step": 16720 }, { "epoch": 1.2080119927032347, "grad_norm": 5.558151774417548, "learning_rate": 1.7890759098491733e-06, "loss": 0.6078, "step": 16721 }, { "epoch": 1.2080842379034442, "grad_norm": 5.683195728116535, "learning_rate": 1.7887954883709594e-06, "loss": 0.6215, "step": 16722 }, { "epoch": 1.2081564831036538, "grad_norm": 6.885239068384524, "learning_rate": 1.7885150766285825e-06, "loss": 0.6427, "step": 16723 }, { "epoch": 1.2082287283038633, "grad_norm": 6.089029684705166, "learning_rate": 1.7882346746258816e-06, "loss": 0.5992, "step": 16724 }, { "epoch": 1.2083009735040728, "grad_norm": 7.928944700431451, "learning_rate": 1.7879542823666956e-06, "loss": 0.7575, "step": 16725 }, { "epoch": 1.2083732187042824, "grad_norm": 6.936161496368935, "learning_rate": 1.7876738998548637e-06, "loss": 0.5836, "step": 16726 }, { "epoch": 1.208445463904492, "grad_norm": 7.155747575072383, "learning_rate": 1.7873935270942216e-06, "loss": 0.6776, "step": 16727 }, { "epoch": 1.2085177091047012, "grad_norm": 7.395495368506796, "learning_rate": 1.7871131640886087e-06, "loss": 0.6704, "step": 16728 }, { "epoch": 1.2085899543049108, "grad_norm": 7.034347733419778, "learning_rate": 1.7868328108418641e-06, "loss": 0.6738, "step": 16729 }, { "epoch": 1.2086621995051203, "grad_norm": 6.797682861023508, "learning_rate": 1.7865524673578244e-06, "loss": 0.6808, "step": 16730 }, { "epoch": 1.2087344447053299, "grad_norm": 7.611474219553126, "learning_rate": 1.7862721336403266e-06, "loss": 0.7053, "step": 16731 }, { "epoch": 1.2088066899055394, "grad_norm": 7.815417912121736, "learning_rate": 1.7859918096932096e-06, "loss": 0.6635, "step": 16732 }, { "epoch": 1.208878935105749, "grad_norm": 6.885414419784603, "learning_rate": 1.785711495520311e-06, "loss": 0.6692, "step": 16733 }, { "epoch": 1.2089511803059585, "grad_norm": 5.8816341521972415, "learning_rate": 1.7854311911254662e-06, "loss": 0.5526, "step": 16734 }, { "epoch": 1.2090234255061678, "grad_norm": 6.329799703650194, "learning_rate": 1.7851508965125143e-06, "loss": 0.685, "step": 16735 }, { "epoch": 1.2090956707063774, "grad_norm": 7.761544274114718, "learning_rate": 1.7848706116852914e-06, "loss": 0.7117, "step": 16736 }, { "epoch": 1.209167915906587, "grad_norm": 5.782397347086228, "learning_rate": 1.7845903366476347e-06, "loss": 0.6514, "step": 16737 }, { "epoch": 1.2092401611067964, "grad_norm": 7.410463575104409, "learning_rate": 1.7843100714033806e-06, "loss": 0.6003, "step": 16738 }, { "epoch": 1.209312406307006, "grad_norm": 6.759385542171333, "learning_rate": 1.7840298159563657e-06, "loss": 0.6793, "step": 16739 }, { "epoch": 1.2093846515072155, "grad_norm": 6.75796265187235, "learning_rate": 1.783749570310428e-06, "loss": 0.6547, "step": 16740 }, { "epoch": 1.209456896707425, "grad_norm": 8.538782392621044, "learning_rate": 1.7834693344694016e-06, "loss": 0.6217, "step": 16741 }, { "epoch": 1.2095291419076346, "grad_norm": 6.09431869591931, "learning_rate": 1.7831891084371228e-06, "loss": 0.6188, "step": 16742 }, { "epoch": 1.209601387107844, "grad_norm": 8.234514441567208, "learning_rate": 1.7829088922174304e-06, "loss": 0.6285, "step": 16743 }, { "epoch": 1.2096736323080535, "grad_norm": 7.91955123015261, "learning_rate": 1.7826286858141573e-06, "loss": 0.7203, "step": 16744 }, { "epoch": 1.209745877508263, "grad_norm": 7.118018226422273, "learning_rate": 1.7823484892311404e-06, "loss": 0.6831, "step": 16745 }, { "epoch": 1.2098181227084726, "grad_norm": 6.579075687964, "learning_rate": 1.7820683024722158e-06, "loss": 0.7039, "step": 16746 }, { "epoch": 1.209890367908682, "grad_norm": 6.985872180746827, "learning_rate": 1.781788125541219e-06, "loss": 0.6006, "step": 16747 }, { "epoch": 1.2099626131088916, "grad_norm": 6.35371675826733, "learning_rate": 1.7815079584419844e-06, "loss": 0.6225, "step": 16748 }, { "epoch": 1.2100348583091012, "grad_norm": 6.633552644768711, "learning_rate": 1.7812278011783482e-06, "loss": 0.6371, "step": 16749 }, { "epoch": 1.2101071035093107, "grad_norm": 6.215662817746472, "learning_rate": 1.7809476537541453e-06, "loss": 0.6304, "step": 16750 }, { "epoch": 1.21017934870952, "grad_norm": 8.171252721138591, "learning_rate": 1.780667516173211e-06, "loss": 0.6584, "step": 16751 }, { "epoch": 1.2102515939097296, "grad_norm": 6.558780742476521, "learning_rate": 1.7803873884393796e-06, "loss": 0.6941, "step": 16752 }, { "epoch": 1.2103238391099391, "grad_norm": 6.577163444790785, "learning_rate": 1.7801072705564863e-06, "loss": 0.6435, "step": 16753 }, { "epoch": 1.2103960843101487, "grad_norm": 5.647076159104684, "learning_rate": 1.7798271625283664e-06, "loss": 0.6612, "step": 16754 }, { "epoch": 1.2104683295103582, "grad_norm": 8.033331575301151, "learning_rate": 1.779547064358852e-06, "loss": 0.6664, "step": 16755 }, { "epoch": 1.2105405747105678, "grad_norm": 7.375182585961484, "learning_rate": 1.7792669760517788e-06, "loss": 0.644, "step": 16756 }, { "epoch": 1.2106128199107773, "grad_norm": 7.903380344379228, "learning_rate": 1.7789868976109814e-06, "loss": 0.7582, "step": 16757 }, { "epoch": 1.2106850651109866, "grad_norm": 6.444182547732186, "learning_rate": 1.7787068290402947e-06, "loss": 0.6438, "step": 16758 }, { "epoch": 1.2107573103111962, "grad_norm": 6.106468035925456, "learning_rate": 1.7784267703435503e-06, "loss": 0.5638, "step": 16759 }, { "epoch": 1.2108295555114057, "grad_norm": 7.096102555506895, "learning_rate": 1.7781467215245835e-06, "loss": 0.6235, "step": 16760 }, { "epoch": 1.2109018007116152, "grad_norm": 7.248390545274598, "learning_rate": 1.7778666825872278e-06, "loss": 0.6699, "step": 16761 }, { "epoch": 1.2109740459118248, "grad_norm": 6.374078422205195, "learning_rate": 1.7775866535353163e-06, "loss": 0.6698, "step": 16762 }, { "epoch": 1.2110462911120343, "grad_norm": 6.960142559038476, "learning_rate": 1.7773066343726823e-06, "loss": 0.6354, "step": 16763 }, { "epoch": 1.2111185363122439, "grad_norm": 6.2631496191497344, "learning_rate": 1.7770266251031593e-06, "loss": 0.7321, "step": 16764 }, { "epoch": 1.2111907815124532, "grad_norm": 6.8092649285504745, "learning_rate": 1.776746625730581e-06, "loss": 0.5865, "step": 16765 }, { "epoch": 1.2112630267126627, "grad_norm": 5.869951392107359, "learning_rate": 1.7764666362587795e-06, "loss": 0.604, "step": 16766 }, { "epoch": 1.2113352719128723, "grad_norm": 6.772158386469249, "learning_rate": 1.7761866566915881e-06, "loss": 0.6886, "step": 16767 }, { "epoch": 1.2114075171130818, "grad_norm": 6.937806818595372, "learning_rate": 1.7759066870328401e-06, "loss": 0.5613, "step": 16768 }, { "epoch": 1.2114797623132914, "grad_norm": 6.835333120604211, "learning_rate": 1.7756267272863662e-06, "loss": 0.5996, "step": 16769 }, { "epoch": 1.211552007513501, "grad_norm": 5.8311419412591325, "learning_rate": 1.7753467774559997e-06, "loss": 0.598, "step": 16770 }, { "epoch": 1.2116242527137104, "grad_norm": 6.264789934708537, "learning_rate": 1.7750668375455735e-06, "loss": 0.6456, "step": 16771 }, { "epoch": 1.2116964979139198, "grad_norm": 7.744972567229575, "learning_rate": 1.7747869075589206e-06, "loss": 0.5855, "step": 16772 }, { "epoch": 1.2117687431141293, "grad_norm": 7.137586485544189, "learning_rate": 1.7745069874998705e-06, "loss": 0.6973, "step": 16773 }, { "epoch": 1.2118409883143388, "grad_norm": 7.530616068771807, "learning_rate": 1.774227077372257e-06, "loss": 0.6802, "step": 16774 }, { "epoch": 1.2119132335145484, "grad_norm": 8.26514870437724, "learning_rate": 1.7739471771799109e-06, "loss": 0.6316, "step": 16775 }, { "epoch": 1.211985478714758, "grad_norm": 5.870582705531899, "learning_rate": 1.7736672869266646e-06, "loss": 0.6268, "step": 16776 }, { "epoch": 1.2120577239149675, "grad_norm": 8.661913619899114, "learning_rate": 1.7733874066163487e-06, "loss": 0.6985, "step": 16777 }, { "epoch": 1.212129969115177, "grad_norm": 7.864566733909083, "learning_rate": 1.7731075362527953e-06, "loss": 0.6422, "step": 16778 }, { "epoch": 1.2122022143153863, "grad_norm": 5.670861355919963, "learning_rate": 1.7728276758398354e-06, "loss": 0.6134, "step": 16779 }, { "epoch": 1.2122744595155959, "grad_norm": 7.613342629386362, "learning_rate": 1.7725478253813e-06, "loss": 0.6668, "step": 16780 }, { "epoch": 1.2123467047158054, "grad_norm": 7.538902352168856, "learning_rate": 1.7722679848810198e-06, "loss": 0.623, "step": 16781 }, { "epoch": 1.212418949916015, "grad_norm": 6.860900570225565, "learning_rate": 1.7719881543428257e-06, "loss": 0.5963, "step": 16782 }, { "epoch": 1.2124911951162245, "grad_norm": 5.676007407422168, "learning_rate": 1.7717083337705499e-06, "loss": 0.6718, "step": 16783 }, { "epoch": 1.212563440316434, "grad_norm": 5.8299557671414775, "learning_rate": 1.7714285231680201e-06, "loss": 0.6046, "step": 16784 }, { "epoch": 1.2126356855166436, "grad_norm": 8.042123046996194, "learning_rate": 1.771148722539068e-06, "loss": 0.6826, "step": 16785 }, { "epoch": 1.212707930716853, "grad_norm": 6.187849555111955, "learning_rate": 1.7708689318875255e-06, "loss": 0.613, "step": 16786 }, { "epoch": 1.2127801759170624, "grad_norm": 5.921385088058704, "learning_rate": 1.7705891512172199e-06, "loss": 0.6305, "step": 16787 }, { "epoch": 1.212852421117272, "grad_norm": 6.467604973482655, "learning_rate": 1.7703093805319826e-06, "loss": 0.7332, "step": 16788 }, { "epoch": 1.2129246663174815, "grad_norm": 6.572440910552569, "learning_rate": 1.770029619835643e-06, "loss": 0.7036, "step": 16789 }, { "epoch": 1.212996911517691, "grad_norm": 6.285069457332678, "learning_rate": 1.7697498691320322e-06, "loss": 0.6905, "step": 16790 }, { "epoch": 1.2130691567179006, "grad_norm": 6.677895752930838, "learning_rate": 1.7694701284249776e-06, "loss": 0.6409, "step": 16791 }, { "epoch": 1.2131414019181102, "grad_norm": 7.624125946477662, "learning_rate": 1.7691903977183103e-06, "loss": 0.5828, "step": 16792 }, { "epoch": 1.2132136471183195, "grad_norm": 5.595783567266469, "learning_rate": 1.7689106770158592e-06, "loss": 0.627, "step": 16793 }, { "epoch": 1.213285892318529, "grad_norm": 6.82445100805034, "learning_rate": 1.768630966321453e-06, "loss": 0.6611, "step": 16794 }, { "epoch": 1.2133581375187386, "grad_norm": 6.314367641220781, "learning_rate": 1.7683512656389208e-06, "loss": 0.6769, "step": 16795 }, { "epoch": 1.213430382718948, "grad_norm": 5.525535778459686, "learning_rate": 1.7680715749720915e-06, "loss": 0.6457, "step": 16796 }, { "epoch": 1.2135026279191576, "grad_norm": 8.008890933479803, "learning_rate": 1.767791894324795e-06, "loss": 0.6747, "step": 16797 }, { "epoch": 1.2135748731193672, "grad_norm": 6.386281223067727, "learning_rate": 1.767512223700858e-06, "loss": 0.6128, "step": 16798 }, { "epoch": 1.2136471183195767, "grad_norm": 7.308068652635977, "learning_rate": 1.76723256310411e-06, "loss": 0.5996, "step": 16799 }, { "epoch": 1.213719363519786, "grad_norm": 6.822996959262368, "learning_rate": 1.7669529125383803e-06, "loss": 0.5949, "step": 16800 }, { "epoch": 1.2137916087199956, "grad_norm": 6.076846108325237, "learning_rate": 1.766673272007495e-06, "loss": 0.6404, "step": 16801 }, { "epoch": 1.2138638539202051, "grad_norm": 7.045962526237461, "learning_rate": 1.7663936415152832e-06, "loss": 0.563, "step": 16802 }, { "epoch": 1.2139360991204147, "grad_norm": 6.771226638717027, "learning_rate": 1.7661140210655731e-06, "loss": 0.6301, "step": 16803 }, { "epoch": 1.2140083443206242, "grad_norm": 8.177784054288436, "learning_rate": 1.7658344106621928e-06, "loss": 0.7302, "step": 16804 }, { "epoch": 1.2140805895208338, "grad_norm": 8.601741667638995, "learning_rate": 1.7655548103089685e-06, "loss": 0.6254, "step": 16805 }, { "epoch": 1.2141528347210433, "grad_norm": 6.070370000029311, "learning_rate": 1.7652752200097288e-06, "loss": 0.6293, "step": 16806 }, { "epoch": 1.2142250799212526, "grad_norm": 6.431171454570013, "learning_rate": 1.7649956397683004e-06, "loss": 0.6093, "step": 16807 }, { "epoch": 1.2142973251214622, "grad_norm": 5.851361382036949, "learning_rate": 1.7647160695885121e-06, "loss": 0.68, "step": 16808 }, { "epoch": 1.2143695703216717, "grad_norm": 7.2365619163481645, "learning_rate": 1.7644365094741891e-06, "loss": 0.6722, "step": 16809 }, { "epoch": 1.2144418155218812, "grad_norm": 5.871580386726641, "learning_rate": 1.764156959429159e-06, "loss": 0.5605, "step": 16810 }, { "epoch": 1.2145140607220908, "grad_norm": 6.8386155468527585, "learning_rate": 1.7638774194572505e-06, "loss": 0.6442, "step": 16811 }, { "epoch": 1.2145863059223003, "grad_norm": 7.529119799717368, "learning_rate": 1.7635978895622867e-06, "loss": 0.6245, "step": 16812 }, { "epoch": 1.2146585511225099, "grad_norm": 6.440665938734519, "learning_rate": 1.763318369748096e-06, "loss": 0.6618, "step": 16813 }, { "epoch": 1.2147307963227192, "grad_norm": 5.775595709391356, "learning_rate": 1.7630388600185054e-06, "loss": 0.6015, "step": 16814 }, { "epoch": 1.2148030415229287, "grad_norm": 6.292395576880658, "learning_rate": 1.7627593603773414e-06, "loss": 0.5981, "step": 16815 }, { "epoch": 1.2148752867231383, "grad_norm": 6.131237046347651, "learning_rate": 1.7624798708284282e-06, "loss": 0.649, "step": 16816 }, { "epoch": 1.2149475319233478, "grad_norm": 7.108098110348357, "learning_rate": 1.7622003913755931e-06, "loss": 0.7114, "step": 16817 }, { "epoch": 1.2150197771235574, "grad_norm": 6.631672791603381, "learning_rate": 1.7619209220226625e-06, "loss": 0.7261, "step": 16818 }, { "epoch": 1.215092022323767, "grad_norm": 7.1340154529657624, "learning_rate": 1.7616414627734604e-06, "loss": 0.6681, "step": 16819 }, { "epoch": 1.2151642675239764, "grad_norm": 6.608615984971165, "learning_rate": 1.7613620136318139e-06, "loss": 0.587, "step": 16820 }, { "epoch": 1.215236512724186, "grad_norm": 6.380229226421998, "learning_rate": 1.7610825746015476e-06, "loss": 0.6403, "step": 16821 }, { "epoch": 1.2153087579243955, "grad_norm": 6.022689198918335, "learning_rate": 1.7608031456864874e-06, "loss": 0.6237, "step": 16822 }, { "epoch": 1.2153810031246048, "grad_norm": 6.680868408039997, "learning_rate": 1.760523726890458e-06, "loss": 0.6216, "step": 16823 }, { "epoch": 1.2154532483248144, "grad_norm": 8.822674011953032, "learning_rate": 1.7602443182172846e-06, "loss": 0.7101, "step": 16824 }, { "epoch": 1.215525493525024, "grad_norm": 6.077916629622409, "learning_rate": 1.759964919670793e-06, "loss": 0.6516, "step": 16825 }, { "epoch": 1.2155977387252335, "grad_norm": 7.164564341832095, "learning_rate": 1.7596855312548061e-06, "loss": 0.6712, "step": 16826 }, { "epoch": 1.215669983925443, "grad_norm": 7.711801714330598, "learning_rate": 1.7594061529731489e-06, "loss": 0.7055, "step": 16827 }, { "epoch": 1.2157422291256526, "grad_norm": 6.328718444878159, "learning_rate": 1.759126784829647e-06, "loss": 0.591, "step": 16828 }, { "epoch": 1.215814474325862, "grad_norm": 8.220580850762662, "learning_rate": 1.7588474268281254e-06, "loss": 0.6284, "step": 16829 }, { "epoch": 1.2158867195260714, "grad_norm": 6.726704559421079, "learning_rate": 1.7585680789724057e-06, "loss": 0.6244, "step": 16830 }, { "epoch": 1.215958964726281, "grad_norm": 7.0083775117126565, "learning_rate": 1.7582887412663132e-06, "loss": 0.7091, "step": 16831 }, { "epoch": 1.2160312099264905, "grad_norm": 5.956566964488975, "learning_rate": 1.7580094137136722e-06, "loss": 0.605, "step": 16832 }, { "epoch": 1.2161034551267, "grad_norm": 5.852745598646884, "learning_rate": 1.7577300963183068e-06, "loss": 0.6197, "step": 16833 }, { "epoch": 1.2161757003269096, "grad_norm": 5.719613432192835, "learning_rate": 1.7574507890840392e-06, "loss": 0.5827, "step": 16834 }, { "epoch": 1.2162479455271191, "grad_norm": 5.4543105970741586, "learning_rate": 1.7571714920146943e-06, "loss": 0.6248, "step": 16835 }, { "epoch": 1.2163201907273287, "grad_norm": 7.484163986130252, "learning_rate": 1.756892205114095e-06, "loss": 0.6599, "step": 16836 }, { "epoch": 1.216392435927538, "grad_norm": 6.756114309749817, "learning_rate": 1.756612928386064e-06, "loss": 0.6304, "step": 16837 }, { "epoch": 1.2164646811277475, "grad_norm": 6.273410136732043, "learning_rate": 1.7563336618344245e-06, "loss": 0.6691, "step": 16838 }, { "epoch": 1.216536926327957, "grad_norm": 6.013580212540301, "learning_rate": 1.7560544054630002e-06, "loss": 0.5611, "step": 16839 }, { "epoch": 1.2166091715281666, "grad_norm": 6.444384994817254, "learning_rate": 1.7557751592756145e-06, "loss": 0.7164, "step": 16840 }, { "epoch": 1.2166814167283762, "grad_norm": 7.176329581917942, "learning_rate": 1.755495923276087e-06, "loss": 0.593, "step": 16841 }, { "epoch": 1.2167536619285857, "grad_norm": 6.87559645406281, "learning_rate": 1.7552166974682433e-06, "loss": 0.5696, "step": 16842 }, { "epoch": 1.2168259071287952, "grad_norm": 6.22575865262871, "learning_rate": 1.7549374818559054e-06, "loss": 0.669, "step": 16843 }, { "epoch": 1.2168981523290046, "grad_norm": 6.211993817406775, "learning_rate": 1.7546582764428938e-06, "loss": 0.676, "step": 16844 }, { "epoch": 1.216970397529214, "grad_norm": 6.16393686995709, "learning_rate": 1.7543790812330318e-06, "loss": 0.5909, "step": 16845 }, { "epoch": 1.2170426427294236, "grad_norm": 6.519561090966757, "learning_rate": 1.7540998962301415e-06, "loss": 0.6426, "step": 16846 }, { "epoch": 1.2171148879296332, "grad_norm": 6.240639658200581, "learning_rate": 1.7538207214380445e-06, "loss": 0.5962, "step": 16847 }, { "epoch": 1.2171871331298427, "grad_norm": 7.701714253028525, "learning_rate": 1.7535415568605624e-06, "loss": 0.6391, "step": 16848 }, { "epoch": 1.2172593783300523, "grad_norm": 6.315658232513228, "learning_rate": 1.7532624025015166e-06, "loss": 0.6276, "step": 16849 }, { "epoch": 1.2173316235302618, "grad_norm": 7.072573069643004, "learning_rate": 1.752983258364729e-06, "loss": 0.7428, "step": 16850 }, { "epoch": 1.2174038687304711, "grad_norm": 6.17409566210899, "learning_rate": 1.7527041244540206e-06, "loss": 0.7225, "step": 16851 }, { "epoch": 1.2174761139306807, "grad_norm": 6.877444578257389, "learning_rate": 1.752425000773212e-06, "loss": 0.7327, "step": 16852 }, { "epoch": 1.2175483591308902, "grad_norm": 7.6693215403583315, "learning_rate": 1.7521458873261249e-06, "loss": 0.6192, "step": 16853 }, { "epoch": 1.2176206043310998, "grad_norm": 6.877512524731019, "learning_rate": 1.7518667841165806e-06, "loss": 0.5696, "step": 16854 }, { "epoch": 1.2176928495313093, "grad_norm": 6.698496681202876, "learning_rate": 1.751587691148398e-06, "loss": 0.6007, "step": 16855 }, { "epoch": 1.2177650947315188, "grad_norm": 6.3763192999520815, "learning_rate": 1.7513086084253994e-06, "loss": 0.6612, "step": 16856 }, { "epoch": 1.2178373399317284, "grad_norm": 6.676151236364448, "learning_rate": 1.7510295359514043e-06, "loss": 0.6579, "step": 16857 }, { "epoch": 1.2179095851319377, "grad_norm": 6.1447060106067815, "learning_rate": 1.7507504737302344e-06, "loss": 0.6319, "step": 16858 }, { "epoch": 1.2179818303321472, "grad_norm": 5.745386013943602, "learning_rate": 1.7504714217657078e-06, "loss": 0.6271, "step": 16859 }, { "epoch": 1.2180540755323568, "grad_norm": 6.021561663264934, "learning_rate": 1.7501923800616455e-06, "loss": 0.5927, "step": 16860 }, { "epoch": 1.2181263207325663, "grad_norm": 6.453013273662049, "learning_rate": 1.7499133486218678e-06, "loss": 0.6544, "step": 16861 }, { "epoch": 1.2181985659327759, "grad_norm": 5.621039797886493, "learning_rate": 1.7496343274501936e-06, "loss": 0.6174, "step": 16862 }, { "epoch": 1.2182708111329854, "grad_norm": 7.443725032428215, "learning_rate": 1.7493553165504427e-06, "loss": 0.6412, "step": 16863 }, { "epoch": 1.218343056333195, "grad_norm": 6.764624118554706, "learning_rate": 1.7490763159264345e-06, "loss": 0.6946, "step": 16864 }, { "epoch": 1.2184153015334043, "grad_norm": 6.985846788964579, "learning_rate": 1.7487973255819894e-06, "loss": 0.6334, "step": 16865 }, { "epoch": 1.2184875467336138, "grad_norm": 5.656200429794011, "learning_rate": 1.7485183455209249e-06, "loss": 0.5973, "step": 16866 }, { "epoch": 1.2185597919338234, "grad_norm": 6.403125726821732, "learning_rate": 1.7482393757470607e-06, "loss": 0.6288, "step": 16867 }, { "epoch": 1.218632037134033, "grad_norm": 8.492472737706233, "learning_rate": 1.7479604162642167e-06, "loss": 0.6548, "step": 16868 }, { "epoch": 1.2187042823342424, "grad_norm": 7.156262260326561, "learning_rate": 1.7476814670762093e-06, "loss": 0.6495, "step": 16869 }, { "epoch": 1.218776527534452, "grad_norm": 7.807685772545072, "learning_rate": 1.747402528186859e-06, "loss": 0.6175, "step": 16870 }, { "epoch": 1.2188487727346615, "grad_norm": 7.568703873207915, "learning_rate": 1.7471235995999836e-06, "loss": 0.7374, "step": 16871 }, { "epoch": 1.2189210179348708, "grad_norm": 5.924208048111955, "learning_rate": 1.746844681319403e-06, "loss": 0.6484, "step": 16872 }, { "epoch": 1.2189932631350804, "grad_norm": 6.983000413095155, "learning_rate": 1.7465657733489322e-06, "loss": 0.6284, "step": 16873 }, { "epoch": 1.21906550833529, "grad_norm": 7.675929348125989, "learning_rate": 1.7462868756923915e-06, "loss": 0.6408, "step": 16874 }, { "epoch": 1.2191377535354995, "grad_norm": 7.736193510568294, "learning_rate": 1.7460079883535986e-06, "loss": 0.721, "step": 16875 }, { "epoch": 1.219209998735709, "grad_norm": 7.020577838297482, "learning_rate": 1.7457291113363707e-06, "loss": 0.6609, "step": 16876 }, { "epoch": 1.2192822439359186, "grad_norm": 9.22084606070372, "learning_rate": 1.745450244644525e-06, "loss": 0.7157, "step": 16877 }, { "epoch": 1.219354489136128, "grad_norm": 7.111925946016388, "learning_rate": 1.7451713882818799e-06, "loss": 0.6659, "step": 16878 }, { "epoch": 1.2194267343363374, "grad_norm": 6.8525537450186516, "learning_rate": 1.7448925422522528e-06, "loss": 0.6599, "step": 16879 }, { "epoch": 1.219498979536547, "grad_norm": 7.4239478499290925, "learning_rate": 1.7446137065594601e-06, "loss": 0.7038, "step": 16880 }, { "epoch": 1.2195712247367565, "grad_norm": 6.951224558138373, "learning_rate": 1.7443348812073191e-06, "loss": 0.6602, "step": 16881 }, { "epoch": 1.219643469936966, "grad_norm": 8.508892568753117, "learning_rate": 1.744056066199647e-06, "loss": 0.6236, "step": 16882 }, { "epoch": 1.2197157151371756, "grad_norm": 6.830546702542574, "learning_rate": 1.743777261540261e-06, "loss": 0.6612, "step": 16883 }, { "epoch": 1.2197879603373851, "grad_norm": 6.758037444323385, "learning_rate": 1.7434984672329764e-06, "loss": 0.6739, "step": 16884 }, { "epoch": 1.2198602055375947, "grad_norm": 7.4583634814571225, "learning_rate": 1.7432196832816105e-06, "loss": 0.697, "step": 16885 }, { "epoch": 1.219932450737804, "grad_norm": 6.856638855804335, "learning_rate": 1.7429409096899807e-06, "loss": 0.6947, "step": 16886 }, { "epoch": 1.2200046959380135, "grad_norm": 8.649196574213962, "learning_rate": 1.742662146461901e-06, "loss": 0.6463, "step": 16887 }, { "epoch": 1.220076941138223, "grad_norm": 6.658831315243185, "learning_rate": 1.742383393601188e-06, "loss": 0.6795, "step": 16888 }, { "epoch": 1.2201491863384326, "grad_norm": 6.184561600069854, "learning_rate": 1.7421046511116578e-06, "loss": 0.5774, "step": 16889 }, { "epoch": 1.2202214315386422, "grad_norm": 6.4585860008118505, "learning_rate": 1.7418259189971282e-06, "loss": 0.69, "step": 16890 }, { "epoch": 1.2202936767388517, "grad_norm": 6.46864856880018, "learning_rate": 1.7415471972614121e-06, "loss": 0.6551, "step": 16891 }, { "epoch": 1.2203659219390612, "grad_norm": 7.965183552017742, "learning_rate": 1.7412684859083256e-06, "loss": 0.6459, "step": 16892 }, { "epoch": 1.2204381671392708, "grad_norm": 7.063608538295097, "learning_rate": 1.7409897849416852e-06, "loss": 0.7448, "step": 16893 }, { "epoch": 1.22051041233948, "grad_norm": 8.02496495699733, "learning_rate": 1.740711094365305e-06, "loss": 0.6801, "step": 16894 }, { "epoch": 1.2205826575396896, "grad_norm": 7.041184659032022, "learning_rate": 1.7404324141829997e-06, "loss": 0.6739, "step": 16895 }, { "epoch": 1.2206549027398992, "grad_norm": 6.844439467446195, "learning_rate": 1.7401537443985855e-06, "loss": 0.6992, "step": 16896 }, { "epoch": 1.2207271479401087, "grad_norm": 5.692062392471217, "learning_rate": 1.7398750850158768e-06, "loss": 0.6195, "step": 16897 }, { "epoch": 1.2207993931403183, "grad_norm": 7.950379982750333, "learning_rate": 1.7395964360386874e-06, "loss": 0.6799, "step": 16898 }, { "epoch": 1.2208716383405278, "grad_norm": 7.032861957151556, "learning_rate": 1.7393177974708325e-06, "loss": 0.6801, "step": 16899 }, { "epoch": 1.2209438835407374, "grad_norm": 8.390155800428783, "learning_rate": 1.7390391693161276e-06, "loss": 0.7232, "step": 16900 }, { "epoch": 1.221016128740947, "grad_norm": 5.61905932906303, "learning_rate": 1.7387605515783845e-06, "loss": 0.6175, "step": 16901 }, { "epoch": 1.2210883739411562, "grad_norm": 6.547720114848571, "learning_rate": 1.7384819442614182e-06, "loss": 0.6627, "step": 16902 }, { "epoch": 1.2211606191413658, "grad_norm": 5.609420053625214, "learning_rate": 1.7382033473690426e-06, "loss": 0.5698, "step": 16903 }, { "epoch": 1.2212328643415753, "grad_norm": 7.092213199532777, "learning_rate": 1.7379247609050726e-06, "loss": 0.6462, "step": 16904 }, { "epoch": 1.2213051095417848, "grad_norm": 5.955553095296728, "learning_rate": 1.7376461848733201e-06, "loss": 0.6212, "step": 16905 }, { "epoch": 1.2213773547419944, "grad_norm": 6.585228208673009, "learning_rate": 1.7373676192775996e-06, "loss": 0.5713, "step": 16906 }, { "epoch": 1.221449599942204, "grad_norm": 6.758560403881152, "learning_rate": 1.7370890641217248e-06, "loss": 0.6268, "step": 16907 }, { "epoch": 1.2215218451424135, "grad_norm": 6.570508738523054, "learning_rate": 1.7368105194095076e-06, "loss": 0.6474, "step": 16908 }, { "epoch": 1.2215940903426228, "grad_norm": 6.626875126180384, "learning_rate": 1.736531985144762e-06, "loss": 0.673, "step": 16909 }, { "epoch": 1.2216663355428323, "grad_norm": 7.840258060412133, "learning_rate": 1.7362534613313007e-06, "loss": 0.6773, "step": 16910 }, { "epoch": 1.2217385807430419, "grad_norm": 6.832142361696533, "learning_rate": 1.735974947972937e-06, "loss": 0.6675, "step": 16911 }, { "epoch": 1.2218108259432514, "grad_norm": 6.918598441485909, "learning_rate": 1.7356964450734825e-06, "loss": 0.6406, "step": 16912 }, { "epoch": 1.221883071143461, "grad_norm": 5.808841989904001, "learning_rate": 1.7354179526367503e-06, "loss": 0.6566, "step": 16913 }, { "epoch": 1.2219553163436705, "grad_norm": 6.9908084468449365, "learning_rate": 1.7351394706665526e-06, "loss": 0.6049, "step": 16914 }, { "epoch": 1.22202756154388, "grad_norm": 6.906962776513464, "learning_rate": 1.7348609991667026e-06, "loss": 0.6226, "step": 16915 }, { "epoch": 1.2220998067440894, "grad_norm": 6.618255348921299, "learning_rate": 1.7345825381410105e-06, "loss": 0.696, "step": 16916 }, { "epoch": 1.222172051944299, "grad_norm": 6.30819071613121, "learning_rate": 1.7343040875932894e-06, "loss": 0.6911, "step": 16917 }, { "epoch": 1.2222442971445084, "grad_norm": 5.460788359126714, "learning_rate": 1.7340256475273509e-06, "loss": 0.5865, "step": 16918 }, { "epoch": 1.222316542344718, "grad_norm": 7.0164103928594, "learning_rate": 1.733747217947006e-06, "loss": 0.63, "step": 16919 }, { "epoch": 1.2223887875449275, "grad_norm": 6.951013274391834, "learning_rate": 1.7334687988560673e-06, "loss": 0.7279, "step": 16920 }, { "epoch": 1.222461032745137, "grad_norm": 6.207905116663753, "learning_rate": 1.733190390258345e-06, "loss": 0.6084, "step": 16921 }, { "epoch": 1.2225332779453466, "grad_norm": 7.281881804334968, "learning_rate": 1.7329119921576515e-06, "loss": 0.6561, "step": 16922 }, { "epoch": 1.222605523145556, "grad_norm": 5.932272446902396, "learning_rate": 1.7326336045577969e-06, "loss": 0.5851, "step": 16923 }, { "epoch": 1.2226777683457655, "grad_norm": 8.671644932899707, "learning_rate": 1.732355227462592e-06, "loss": 0.7314, "step": 16924 }, { "epoch": 1.222750013545975, "grad_norm": 8.648583056871313, "learning_rate": 1.7320768608758499e-06, "loss": 0.6261, "step": 16925 }, { "epoch": 1.2228222587461846, "grad_norm": 7.746278423006643, "learning_rate": 1.731798504801377e-06, "loss": 0.6203, "step": 16926 }, { "epoch": 1.222894503946394, "grad_norm": 5.954236665193677, "learning_rate": 1.731520159242987e-06, "loss": 0.6136, "step": 16927 }, { "epoch": 1.2229667491466036, "grad_norm": 7.072544483248536, "learning_rate": 1.731241824204489e-06, "loss": 0.6223, "step": 16928 }, { "epoch": 1.2230389943468132, "grad_norm": 7.768149995842649, "learning_rate": 1.7309634996896945e-06, "loss": 0.6774, "step": 16929 }, { "epoch": 1.2231112395470225, "grad_norm": 7.6920415597508045, "learning_rate": 1.730685185702412e-06, "loss": 0.6797, "step": 16930 }, { "epoch": 1.223183484747232, "grad_norm": 8.8067933130892, "learning_rate": 1.7304068822464518e-06, "loss": 0.6068, "step": 16931 }, { "epoch": 1.2232557299474416, "grad_norm": 6.713623077948573, "learning_rate": 1.7301285893256243e-06, "loss": 0.6675, "step": 16932 }, { "epoch": 1.2233279751476511, "grad_norm": 7.655164244600922, "learning_rate": 1.7298503069437382e-06, "loss": 0.6549, "step": 16933 }, { "epoch": 1.2234002203478607, "grad_norm": 8.219624868033298, "learning_rate": 1.7295720351046033e-06, "loss": 0.7568, "step": 16934 }, { "epoch": 1.2234724655480702, "grad_norm": 5.972588867446094, "learning_rate": 1.7292937738120292e-06, "loss": 0.5817, "step": 16935 }, { "epoch": 1.2235447107482798, "grad_norm": 6.9494026421354755, "learning_rate": 1.7290155230698251e-06, "loss": 0.6401, "step": 16936 }, { "epoch": 1.223616955948489, "grad_norm": 7.633106112811701, "learning_rate": 1.7287372828817994e-06, "loss": 0.7013, "step": 16937 }, { "epoch": 1.2236892011486986, "grad_norm": 6.011210776439244, "learning_rate": 1.728459053251762e-06, "loss": 0.639, "step": 16938 }, { "epoch": 1.2237614463489082, "grad_norm": 6.4978736920865545, "learning_rate": 1.7281808341835204e-06, "loss": 0.6435, "step": 16939 }, { "epoch": 1.2238336915491177, "grad_norm": 5.773503903225202, "learning_rate": 1.727902625680885e-06, "loss": 0.5839, "step": 16940 }, { "epoch": 1.2239059367493272, "grad_norm": 6.9918841589228915, "learning_rate": 1.7276244277476622e-06, "loss": 0.6661, "step": 16941 }, { "epoch": 1.2239781819495368, "grad_norm": 6.072358914428933, "learning_rate": 1.7273462403876616e-06, "loss": 0.6565, "step": 16942 }, { "epoch": 1.2240504271497463, "grad_norm": 8.040939482652004, "learning_rate": 1.7270680636046918e-06, "loss": 0.6467, "step": 16943 }, { "epoch": 1.2241226723499556, "grad_norm": 6.61345660207573, "learning_rate": 1.7267898974025594e-06, "loss": 0.647, "step": 16944 }, { "epoch": 1.2241949175501652, "grad_norm": 8.29052445357542, "learning_rate": 1.7265117417850723e-06, "loss": 0.6501, "step": 16945 }, { "epoch": 1.2242671627503747, "grad_norm": 7.953423575022312, "learning_rate": 1.726233596756039e-06, "loss": 0.7039, "step": 16946 }, { "epoch": 1.2243394079505843, "grad_norm": 5.89541779077059, "learning_rate": 1.7259554623192682e-06, "loss": 0.6019, "step": 16947 }, { "epoch": 1.2244116531507938, "grad_norm": 7.245130284666859, "learning_rate": 1.7256773384785652e-06, "loss": 0.706, "step": 16948 }, { "epoch": 1.2244838983510034, "grad_norm": 6.399622619946771, "learning_rate": 1.7253992252377383e-06, "loss": 0.5852, "step": 16949 }, { "epoch": 1.224556143551213, "grad_norm": 7.466576679005366, "learning_rate": 1.7251211226005948e-06, "loss": 0.6621, "step": 16950 }, { "epoch": 1.2246283887514222, "grad_norm": 6.8864215650390985, "learning_rate": 1.7248430305709413e-06, "loss": 0.709, "step": 16951 }, { "epoch": 1.2247006339516318, "grad_norm": 8.207945942007298, "learning_rate": 1.7245649491525845e-06, "loss": 0.6435, "step": 16952 }, { "epoch": 1.2247728791518413, "grad_norm": 6.159438183913779, "learning_rate": 1.7242868783493316e-06, "loss": 0.6901, "step": 16953 }, { "epoch": 1.2248451243520508, "grad_norm": 6.077209560620826, "learning_rate": 1.7240088181649895e-06, "loss": 0.6084, "step": 16954 }, { "epoch": 1.2249173695522604, "grad_norm": 6.7472734949369375, "learning_rate": 1.723730768603364e-06, "loss": 0.6523, "step": 16955 }, { "epoch": 1.22498961475247, "grad_norm": 6.693745201529413, "learning_rate": 1.7234527296682612e-06, "loss": 0.7064, "step": 16956 }, { "epoch": 1.2250618599526795, "grad_norm": 6.672557644014423, "learning_rate": 1.7231747013634886e-06, "loss": 0.5935, "step": 16957 }, { "epoch": 1.2251341051528888, "grad_norm": 7.1873237919976605, "learning_rate": 1.72289668369285e-06, "loss": 0.6415, "step": 16958 }, { "epoch": 1.2252063503530983, "grad_norm": 6.120435298323707, "learning_rate": 1.7226186766601527e-06, "loss": 0.5958, "step": 16959 }, { "epoch": 1.2252785955533079, "grad_norm": 6.236108218699546, "learning_rate": 1.7223406802692017e-06, "loss": 0.6427, "step": 16960 }, { "epoch": 1.2253508407535174, "grad_norm": 7.491046392995555, "learning_rate": 1.7220626945238045e-06, "loss": 0.6726, "step": 16961 }, { "epoch": 1.225423085953727, "grad_norm": 5.703273823507706, "learning_rate": 1.7217847194277636e-06, "loss": 0.5548, "step": 16962 }, { "epoch": 1.2254953311539365, "grad_norm": 6.369793130578668, "learning_rate": 1.7215067549848858e-06, "loss": 0.5964, "step": 16963 }, { "epoch": 1.225567576354146, "grad_norm": 5.818614082101147, "learning_rate": 1.7212288011989762e-06, "loss": 0.6101, "step": 16964 }, { "epoch": 1.2256398215543556, "grad_norm": 8.565284888700981, "learning_rate": 1.7209508580738398e-06, "loss": 0.7416, "step": 16965 }, { "epoch": 1.225712066754565, "grad_norm": 7.058840993784823, "learning_rate": 1.7206729256132811e-06, "loss": 0.7106, "step": 16966 }, { "epoch": 1.2257843119547744, "grad_norm": 7.215961132257841, "learning_rate": 1.720395003821105e-06, "loss": 0.6758, "step": 16967 }, { "epoch": 1.225856557154984, "grad_norm": 6.879104498758104, "learning_rate": 1.7201170927011163e-06, "loss": 0.5649, "step": 16968 }, { "epoch": 1.2259288023551935, "grad_norm": 6.257985011472031, "learning_rate": 1.7198391922571187e-06, "loss": 0.6466, "step": 16969 }, { "epoch": 1.226001047555403, "grad_norm": 5.862915596942788, "learning_rate": 1.7195613024929165e-06, "loss": 0.6424, "step": 16970 }, { "epoch": 1.2260732927556126, "grad_norm": 6.492782400259471, "learning_rate": 1.7192834234123146e-06, "loss": 0.5846, "step": 16971 }, { "epoch": 1.2261455379558222, "grad_norm": 6.271221905030064, "learning_rate": 1.719005555019117e-06, "loss": 0.6843, "step": 16972 }, { "epoch": 1.2262177831560317, "grad_norm": 6.813863075450396, "learning_rate": 1.7187276973171261e-06, "loss": 0.5989, "step": 16973 }, { "epoch": 1.226290028356241, "grad_norm": 6.883196574308074, "learning_rate": 1.718449850310146e-06, "loss": 0.6929, "step": 16974 }, { "epoch": 1.2263622735564506, "grad_norm": 6.590070665180329, "learning_rate": 1.7181720140019819e-06, "loss": 0.6693, "step": 16975 }, { "epoch": 1.22643451875666, "grad_norm": 6.113166432627842, "learning_rate": 1.7178941883964351e-06, "loss": 0.6616, "step": 16976 }, { "epoch": 1.2265067639568696, "grad_norm": 7.213808953099354, "learning_rate": 1.7176163734973094e-06, "loss": 0.7019, "step": 16977 }, { "epoch": 1.2265790091570792, "grad_norm": 6.704525105543952, "learning_rate": 1.7173385693084082e-06, "loss": 0.6461, "step": 16978 }, { "epoch": 1.2266512543572887, "grad_norm": 7.023732960971057, "learning_rate": 1.7170607758335346e-06, "loss": 0.649, "step": 16979 }, { "epoch": 1.2267234995574983, "grad_norm": 6.587152298902429, "learning_rate": 1.7167829930764907e-06, "loss": 0.6352, "step": 16980 }, { "epoch": 1.2267957447577076, "grad_norm": 5.8503576446498124, "learning_rate": 1.7165052210410794e-06, "loss": 0.6627, "step": 16981 }, { "epoch": 1.2268679899579171, "grad_norm": 8.283544143460585, "learning_rate": 1.716227459731104e-06, "loss": 0.7281, "step": 16982 }, { "epoch": 1.2269402351581267, "grad_norm": 8.021570688679155, "learning_rate": 1.7159497091503655e-06, "loss": 0.6497, "step": 16983 }, { "epoch": 1.2270124803583362, "grad_norm": 8.187669592112849, "learning_rate": 1.7156719693026667e-06, "loss": 0.6637, "step": 16984 }, { "epoch": 1.2270847255585458, "grad_norm": 7.1654249786395585, "learning_rate": 1.7153942401918095e-06, "loss": 0.6609, "step": 16985 }, { "epoch": 1.2271569707587553, "grad_norm": 7.590133245250973, "learning_rate": 1.715116521821597e-06, "loss": 0.5896, "step": 16986 }, { "epoch": 1.2272292159589648, "grad_norm": 6.54602103479201, "learning_rate": 1.7148388141958292e-06, "loss": 0.6706, "step": 16987 }, { "epoch": 1.2273014611591742, "grad_norm": 7.799825901142587, "learning_rate": 1.7145611173183075e-06, "loss": 0.7177, "step": 16988 }, { "epoch": 1.2273737063593837, "grad_norm": 6.541597046046309, "learning_rate": 1.7142834311928353e-06, "loss": 0.6561, "step": 16989 }, { "epoch": 1.2274459515595932, "grad_norm": 6.737509013720199, "learning_rate": 1.7140057558232132e-06, "loss": 0.6894, "step": 16990 }, { "epoch": 1.2275181967598028, "grad_norm": 8.019183047725603, "learning_rate": 1.7137280912132415e-06, "loss": 0.6913, "step": 16991 }, { "epoch": 1.2275904419600123, "grad_norm": 7.386976184121307, "learning_rate": 1.7134504373667215e-06, "loss": 0.5951, "step": 16992 }, { "epoch": 1.2276626871602219, "grad_norm": 6.765424835556627, "learning_rate": 1.7131727942874548e-06, "loss": 0.7469, "step": 16993 }, { "epoch": 1.2277349323604314, "grad_norm": 6.327341591650607, "learning_rate": 1.712895161979241e-06, "loss": 0.6353, "step": 16994 }, { "epoch": 1.2278071775606407, "grad_norm": 6.7253862121181145, "learning_rate": 1.7126175404458818e-06, "loss": 0.6826, "step": 16995 }, { "epoch": 1.2278794227608503, "grad_norm": 7.712408140058956, "learning_rate": 1.7123399296911763e-06, "loss": 0.6609, "step": 16996 }, { "epoch": 1.2279516679610598, "grad_norm": 6.453493564970468, "learning_rate": 1.7120623297189265e-06, "loss": 0.6734, "step": 16997 }, { "epoch": 1.2280239131612694, "grad_norm": 6.472525411698136, "learning_rate": 1.7117847405329313e-06, "loss": 0.6807, "step": 16998 }, { "epoch": 1.228096158361479, "grad_norm": 6.919464587187353, "learning_rate": 1.7115071621369908e-06, "loss": 0.5548, "step": 16999 }, { "epoch": 1.2281684035616884, "grad_norm": 9.674807003964721, "learning_rate": 1.7112295945349062e-06, "loss": 0.6577, "step": 17000 }, { "epoch": 1.228240648761898, "grad_norm": 6.482157647522466, "learning_rate": 1.7109520377304748e-06, "loss": 0.6276, "step": 17001 }, { "epoch": 1.2283128939621073, "grad_norm": 6.974446931740331, "learning_rate": 1.7106744917274976e-06, "loss": 0.6576, "step": 17002 }, { "epoch": 1.2283851391623168, "grad_norm": 6.3443464928923134, "learning_rate": 1.7103969565297729e-06, "loss": 0.6524, "step": 17003 }, { "epoch": 1.2284573843625264, "grad_norm": 5.99185263283676, "learning_rate": 1.7101194321411025e-06, "loss": 0.6839, "step": 17004 }, { "epoch": 1.228529629562736, "grad_norm": 8.295893321897333, "learning_rate": 1.7098419185652826e-06, "loss": 0.6504, "step": 17005 }, { "epoch": 1.2286018747629455, "grad_norm": 8.123691394768846, "learning_rate": 1.7095644158061133e-06, "loss": 0.7303, "step": 17006 }, { "epoch": 1.228674119963155, "grad_norm": 7.0164182762465135, "learning_rate": 1.7092869238673943e-06, "loss": 0.6228, "step": 17007 }, { "epoch": 1.2287463651633646, "grad_norm": 6.301565236214692, "learning_rate": 1.7090094427529225e-06, "loss": 0.6574, "step": 17008 }, { "epoch": 1.2288186103635739, "grad_norm": 6.426524166305408, "learning_rate": 1.7087319724664972e-06, "loss": 0.6312, "step": 17009 }, { "epoch": 1.2288908555637834, "grad_norm": 6.290666344890988, "learning_rate": 1.708454513011917e-06, "loss": 0.6488, "step": 17010 }, { "epoch": 1.228963100763993, "grad_norm": 5.955969423808668, "learning_rate": 1.7081770643929803e-06, "loss": 0.6834, "step": 17011 }, { "epoch": 1.2290353459642025, "grad_norm": 8.026802939507578, "learning_rate": 1.7078996266134845e-06, "loss": 0.6701, "step": 17012 }, { "epoch": 1.229107591164412, "grad_norm": 6.811416119786308, "learning_rate": 1.7076221996772273e-06, "loss": 0.613, "step": 17013 }, { "epoch": 1.2291798363646216, "grad_norm": 6.503846717539101, "learning_rate": 1.7073447835880074e-06, "loss": 0.7023, "step": 17014 }, { "epoch": 1.2292520815648311, "grad_norm": 5.416199101539707, "learning_rate": 1.707067378349623e-06, "loss": 0.5986, "step": 17015 }, { "epoch": 1.2293243267650404, "grad_norm": 6.102612433173311, "learning_rate": 1.7067899839658694e-06, "loss": 0.5596, "step": 17016 }, { "epoch": 1.22939657196525, "grad_norm": 6.150980131620476, "learning_rate": 1.7065126004405446e-06, "loss": 0.6641, "step": 17017 }, { "epoch": 1.2294688171654595, "grad_norm": 6.651362814402652, "learning_rate": 1.7062352277774473e-06, "loss": 0.6255, "step": 17018 }, { "epoch": 1.229541062365669, "grad_norm": 6.736646370961784, "learning_rate": 1.705957865980373e-06, "loss": 0.6867, "step": 17019 }, { "epoch": 1.2296133075658786, "grad_norm": 6.621567034614728, "learning_rate": 1.7056805150531187e-06, "loss": 0.6652, "step": 17020 }, { "epoch": 1.2296855527660882, "grad_norm": 7.787673198800676, "learning_rate": 1.7054031749994814e-06, "loss": 0.5959, "step": 17021 }, { "epoch": 1.2297577979662977, "grad_norm": 6.349706099309434, "learning_rate": 1.7051258458232583e-06, "loss": 0.6219, "step": 17022 }, { "epoch": 1.229830043166507, "grad_norm": 7.809615678501627, "learning_rate": 1.7048485275282444e-06, "loss": 0.6506, "step": 17023 }, { "epoch": 1.2299022883667166, "grad_norm": 6.421916283987586, "learning_rate": 1.7045712201182369e-06, "loss": 0.5594, "step": 17024 }, { "epoch": 1.229974533566926, "grad_norm": 7.041537883717041, "learning_rate": 1.7042939235970325e-06, "loss": 0.6475, "step": 17025 }, { "epoch": 1.2300467787671356, "grad_norm": 7.220364476776102, "learning_rate": 1.7040166379684255e-06, "loss": 0.7024, "step": 17026 }, { "epoch": 1.2301190239673452, "grad_norm": 7.387138076410982, "learning_rate": 1.7037393632362132e-06, "loss": 0.6888, "step": 17027 }, { "epoch": 1.2301912691675547, "grad_norm": 7.19547684903238, "learning_rate": 1.7034620994041903e-06, "loss": 0.6565, "step": 17028 }, { "epoch": 1.2302635143677643, "grad_norm": 6.069622457159995, "learning_rate": 1.7031848464761542e-06, "loss": 0.6244, "step": 17029 }, { "epoch": 1.2303357595679736, "grad_norm": 7.1652194787195915, "learning_rate": 1.7029076044558976e-06, "loss": 0.5792, "step": 17030 }, { "epoch": 1.2304080047681831, "grad_norm": 7.325299399444633, "learning_rate": 1.7026303733472166e-06, "loss": 0.682, "step": 17031 }, { "epoch": 1.2304802499683927, "grad_norm": 5.834662077205138, "learning_rate": 1.702353153153908e-06, "loss": 0.6088, "step": 17032 }, { "epoch": 1.2305524951686022, "grad_norm": 6.601398967094316, "learning_rate": 1.7020759438797647e-06, "loss": 0.6548, "step": 17033 }, { "epoch": 1.2306247403688118, "grad_norm": 6.84316888719701, "learning_rate": 1.7017987455285818e-06, "loss": 0.6436, "step": 17034 }, { "epoch": 1.2306969855690213, "grad_norm": 6.166921902434992, "learning_rate": 1.7015215581041543e-06, "loss": 0.6396, "step": 17035 }, { "epoch": 1.2307692307692308, "grad_norm": 6.708514414489549, "learning_rate": 1.7012443816102778e-06, "loss": 0.6501, "step": 17036 }, { "epoch": 1.2308414759694402, "grad_norm": 8.939947046724727, "learning_rate": 1.7009672160507445e-06, "loss": 0.6107, "step": 17037 }, { "epoch": 1.2309137211696497, "grad_norm": 5.979671053817696, "learning_rate": 1.7006900614293497e-06, "loss": 0.5865, "step": 17038 }, { "epoch": 1.2309859663698592, "grad_norm": 6.699472423677497, "learning_rate": 1.700412917749888e-06, "loss": 0.5985, "step": 17039 }, { "epoch": 1.2310582115700688, "grad_norm": 7.406341423403953, "learning_rate": 1.700135785016152e-06, "loss": 0.7252, "step": 17040 }, { "epoch": 1.2311304567702783, "grad_norm": 6.9505942550293405, "learning_rate": 1.6998586632319357e-06, "loss": 0.664, "step": 17041 }, { "epoch": 1.2312027019704879, "grad_norm": 7.262624338369253, "learning_rate": 1.6995815524010337e-06, "loss": 0.7226, "step": 17042 }, { "epoch": 1.2312749471706974, "grad_norm": 6.921857640091803, "learning_rate": 1.6993044525272395e-06, "loss": 0.643, "step": 17043 }, { "epoch": 1.231347192370907, "grad_norm": 7.13832105199859, "learning_rate": 1.6990273636143445e-06, "loss": 0.678, "step": 17044 }, { "epoch": 1.2314194375711165, "grad_norm": 5.577677219914726, "learning_rate": 1.6987502856661426e-06, "loss": 0.6084, "step": 17045 }, { "epoch": 1.2314916827713258, "grad_norm": 7.8810188664933065, "learning_rate": 1.6984732186864274e-06, "loss": 0.614, "step": 17046 }, { "epoch": 1.2315639279715354, "grad_norm": 7.860057156575814, "learning_rate": 1.6981961626789928e-06, "loss": 0.6517, "step": 17047 }, { "epoch": 1.231636173171745, "grad_norm": 6.595796818811873, "learning_rate": 1.697919117647629e-06, "loss": 0.6969, "step": 17048 }, { "epoch": 1.2317084183719544, "grad_norm": 6.5924678477956, "learning_rate": 1.6976420835961299e-06, "loss": 0.6468, "step": 17049 }, { "epoch": 1.231780663572164, "grad_norm": 6.46559958289319, "learning_rate": 1.6973650605282882e-06, "loss": 0.6493, "step": 17050 }, { "epoch": 1.2318529087723735, "grad_norm": 8.680137093863307, "learning_rate": 1.6970880484478946e-06, "loss": 0.683, "step": 17051 }, { "epoch": 1.231925153972583, "grad_norm": 7.707517783521539, "learning_rate": 1.696811047358743e-06, "loss": 0.664, "step": 17052 }, { "epoch": 1.2319973991727924, "grad_norm": 7.157316303354196, "learning_rate": 1.6965340572646239e-06, "loss": 0.6864, "step": 17053 }, { "epoch": 1.232069644373002, "grad_norm": 6.711175099475525, "learning_rate": 1.6962570781693305e-06, "loss": 0.5378, "step": 17054 }, { "epoch": 1.2321418895732115, "grad_norm": 6.798714505029494, "learning_rate": 1.6959801100766529e-06, "loss": 0.6303, "step": 17055 }, { "epoch": 1.232214134773421, "grad_norm": 7.19209103118689, "learning_rate": 1.6957031529903834e-06, "loss": 0.6294, "step": 17056 }, { "epoch": 1.2322863799736306, "grad_norm": 7.961184991016202, "learning_rate": 1.6954262069143146e-06, "loss": 0.7207, "step": 17057 }, { "epoch": 1.23235862517384, "grad_norm": 6.74137129103159, "learning_rate": 1.695149271852235e-06, "loss": 0.7273, "step": 17058 }, { "epoch": 1.2324308703740496, "grad_norm": 7.6110938165596576, "learning_rate": 1.6948723478079363e-06, "loss": 0.6165, "step": 17059 }, { "epoch": 1.232503115574259, "grad_norm": 6.411650079689687, "learning_rate": 1.6945954347852106e-06, "loss": 0.5874, "step": 17060 }, { "epoch": 1.2325753607744685, "grad_norm": 6.92266496753133, "learning_rate": 1.6943185327878492e-06, "loss": 0.7039, "step": 17061 }, { "epoch": 1.232647605974678, "grad_norm": 7.766038112282593, "learning_rate": 1.6940416418196403e-06, "loss": 0.6428, "step": 17062 }, { "epoch": 1.2327198511748876, "grad_norm": 6.3317535170044055, "learning_rate": 1.6937647618843756e-06, "loss": 0.6655, "step": 17063 }, { "epoch": 1.2327920963750971, "grad_norm": 7.321971530253566, "learning_rate": 1.693487892985846e-06, "loss": 0.6379, "step": 17064 }, { "epoch": 1.2328643415753067, "grad_norm": 6.42595548538476, "learning_rate": 1.6932110351278402e-06, "loss": 0.632, "step": 17065 }, { "epoch": 1.2329365867755162, "grad_norm": 5.987737841252438, "learning_rate": 1.6929341883141493e-06, "loss": 0.6498, "step": 17066 }, { "epoch": 1.2330088319757255, "grad_norm": 7.433599651452597, "learning_rate": 1.6926573525485622e-06, "loss": 0.6776, "step": 17067 }, { "epoch": 1.233081077175935, "grad_norm": 7.142886548662964, "learning_rate": 1.6923805278348698e-06, "loss": 0.6534, "step": 17068 }, { "epoch": 1.2331533223761446, "grad_norm": 6.814239279927828, "learning_rate": 1.6921037141768605e-06, "loss": 0.6373, "step": 17069 }, { "epoch": 1.2332255675763542, "grad_norm": 7.231273010797577, "learning_rate": 1.6918269115783238e-06, "loss": 0.6315, "step": 17070 }, { "epoch": 1.2332978127765637, "grad_norm": 6.058705509023223, "learning_rate": 1.6915501200430493e-06, "loss": 0.5779, "step": 17071 }, { "epoch": 1.2333700579767732, "grad_norm": 6.611335048925473, "learning_rate": 1.691273339574827e-06, "loss": 0.6677, "step": 17072 }, { "epoch": 1.2334423031769828, "grad_norm": 7.3364294047672, "learning_rate": 1.6909965701774433e-06, "loss": 0.6652, "step": 17073 }, { "epoch": 1.233514548377192, "grad_norm": 8.077916273596935, "learning_rate": 1.6907198118546886e-06, "loss": 0.6799, "step": 17074 }, { "epoch": 1.2335867935774016, "grad_norm": 6.191623431226364, "learning_rate": 1.6904430646103526e-06, "loss": 0.6172, "step": 17075 }, { "epoch": 1.2336590387776112, "grad_norm": 6.946492733135471, "learning_rate": 1.6901663284482214e-06, "loss": 0.6551, "step": 17076 }, { "epoch": 1.2337312839778207, "grad_norm": 5.587090988038244, "learning_rate": 1.6898896033720844e-06, "loss": 0.691, "step": 17077 }, { "epoch": 1.2338035291780303, "grad_norm": 5.838031112394581, "learning_rate": 1.6896128893857296e-06, "loss": 0.5924, "step": 17078 }, { "epoch": 1.2338757743782398, "grad_norm": 8.700487706337213, "learning_rate": 1.689336186492946e-06, "loss": 0.6454, "step": 17079 }, { "epoch": 1.2339480195784494, "grad_norm": 6.650042150837103, "learning_rate": 1.6890594946975194e-06, "loss": 0.6041, "step": 17080 }, { "epoch": 1.2340202647786587, "grad_norm": 6.987142198167438, "learning_rate": 1.6887828140032392e-06, "loss": 0.626, "step": 17081 }, { "epoch": 1.2340925099788682, "grad_norm": 7.0978491950590765, "learning_rate": 1.6885061444138928e-06, "loss": 0.6615, "step": 17082 }, { "epoch": 1.2341647551790778, "grad_norm": 7.9244503372140995, "learning_rate": 1.6882294859332665e-06, "loss": 0.5947, "step": 17083 }, { "epoch": 1.2342370003792873, "grad_norm": 6.048996821691025, "learning_rate": 1.6879528385651484e-06, "loss": 0.7309, "step": 17084 }, { "epoch": 1.2343092455794968, "grad_norm": 7.925847669448163, "learning_rate": 1.6876762023133253e-06, "loss": 0.6501, "step": 17085 }, { "epoch": 1.2343814907797064, "grad_norm": 6.934015997266072, "learning_rate": 1.6873995771815854e-06, "loss": 0.6627, "step": 17086 }, { "epoch": 1.234453735979916, "grad_norm": 6.177606861729381, "learning_rate": 1.687122963173713e-06, "loss": 0.6202, "step": 17087 }, { "epoch": 1.2345259811801252, "grad_norm": 7.017206842042898, "learning_rate": 1.6868463602934962e-06, "loss": 0.6825, "step": 17088 }, { "epoch": 1.2345982263803348, "grad_norm": 6.896557388632191, "learning_rate": 1.6865697685447223e-06, "loss": 0.7316, "step": 17089 }, { "epoch": 1.2346704715805443, "grad_norm": 6.265412819330492, "learning_rate": 1.6862931879311762e-06, "loss": 0.6542, "step": 17090 }, { "epoch": 1.2347427167807539, "grad_norm": 6.0510488688598505, "learning_rate": 1.6860166184566443e-06, "loss": 0.6117, "step": 17091 }, { "epoch": 1.2348149619809634, "grad_norm": 6.325088488295292, "learning_rate": 1.685740060124913e-06, "loss": 0.6076, "step": 17092 }, { "epoch": 1.234887207181173, "grad_norm": 5.463401402314542, "learning_rate": 1.6854635129397682e-06, "loss": 0.667, "step": 17093 }, { "epoch": 1.2349594523813825, "grad_norm": 6.391069392116902, "learning_rate": 1.685186976904995e-06, "loss": 0.6479, "step": 17094 }, { "epoch": 1.2350316975815918, "grad_norm": 6.36349076686611, "learning_rate": 1.6849104520243797e-06, "loss": 0.6368, "step": 17095 }, { "epoch": 1.2351039427818014, "grad_norm": 7.352169323968984, "learning_rate": 1.6846339383017074e-06, "loss": 0.6542, "step": 17096 }, { "epoch": 1.235176187982011, "grad_norm": 7.018610329737173, "learning_rate": 1.6843574357407638e-06, "loss": 0.6447, "step": 17097 }, { "epoch": 1.2352484331822204, "grad_norm": 6.682856293330322, "learning_rate": 1.6840809443453332e-06, "loss": 0.7204, "step": 17098 }, { "epoch": 1.23532067838243, "grad_norm": 7.011129523466545, "learning_rate": 1.6838044641192008e-06, "loss": 0.677, "step": 17099 }, { "epoch": 1.2353929235826395, "grad_norm": 6.761742162636388, "learning_rate": 1.6835279950661527e-06, "loss": 0.6221, "step": 17100 }, { "epoch": 1.235465168782849, "grad_norm": 5.477825372904839, "learning_rate": 1.683251537189971e-06, "loss": 0.6294, "step": 17101 }, { "epoch": 1.2355374139830584, "grad_norm": 7.32804561482521, "learning_rate": 1.6829750904944414e-06, "loss": 0.6649, "step": 17102 }, { "epoch": 1.235609659183268, "grad_norm": 6.471537261358114, "learning_rate": 1.6826986549833491e-06, "loss": 0.6976, "step": 17103 }, { "epoch": 1.2356819043834775, "grad_norm": 7.086265346088571, "learning_rate": 1.6824222306604785e-06, "loss": 0.6895, "step": 17104 }, { "epoch": 1.235754149583687, "grad_norm": 7.236486007471314, "learning_rate": 1.6821458175296116e-06, "loss": 0.6738, "step": 17105 }, { "epoch": 1.2358263947838966, "grad_norm": 6.130896085426888, "learning_rate": 1.6818694155945335e-06, "loss": 0.5399, "step": 17106 }, { "epoch": 1.235898639984106, "grad_norm": 6.299084599914102, "learning_rate": 1.6815930248590284e-06, "loss": 0.6966, "step": 17107 }, { "epoch": 1.2359708851843156, "grad_norm": 7.391309645960633, "learning_rate": 1.6813166453268787e-06, "loss": 0.6744, "step": 17108 }, { "epoch": 1.236043130384525, "grad_norm": 6.722722923750576, "learning_rate": 1.6810402770018685e-06, "loss": 0.5842, "step": 17109 }, { "epoch": 1.2361153755847345, "grad_norm": 8.011353542535499, "learning_rate": 1.6807639198877811e-06, "loss": 0.6514, "step": 17110 }, { "epoch": 1.236187620784944, "grad_norm": 7.873600668937718, "learning_rate": 1.6804875739884002e-06, "loss": 0.6736, "step": 17111 }, { "epoch": 1.2362598659851536, "grad_norm": 7.143319120453784, "learning_rate": 1.6802112393075077e-06, "loss": 0.6452, "step": 17112 }, { "epoch": 1.2363321111853631, "grad_norm": 7.161630531783897, "learning_rate": 1.6799349158488864e-06, "loss": 0.6891, "step": 17113 }, { "epoch": 1.2364043563855727, "grad_norm": 7.123596053085763, "learning_rate": 1.6796586036163207e-06, "loss": 0.678, "step": 17114 }, { "epoch": 1.2364766015857822, "grad_norm": 6.441627142399598, "learning_rate": 1.6793823026135906e-06, "loss": 0.7031, "step": 17115 }, { "epoch": 1.2365488467859918, "grad_norm": 6.301773778285408, "learning_rate": 1.679106012844479e-06, "loss": 0.6405, "step": 17116 }, { "epoch": 1.236621091986201, "grad_norm": 5.780616849303132, "learning_rate": 1.6788297343127695e-06, "loss": 0.6572, "step": 17117 }, { "epoch": 1.2366933371864106, "grad_norm": 6.774154114791548, "learning_rate": 1.6785534670222441e-06, "loss": 0.6698, "step": 17118 }, { "epoch": 1.2367655823866202, "grad_norm": 7.053843366406215, "learning_rate": 1.678277210976683e-06, "loss": 0.6548, "step": 17119 }, { "epoch": 1.2368378275868297, "grad_norm": 6.985065059154083, "learning_rate": 1.6780009661798685e-06, "loss": 0.6019, "step": 17120 }, { "epoch": 1.2369100727870392, "grad_norm": 6.443555335759152, "learning_rate": 1.677724732635583e-06, "loss": 0.6173, "step": 17121 }, { "epoch": 1.2369823179872488, "grad_norm": 6.911498686024187, "learning_rate": 1.677448510347608e-06, "loss": 0.6171, "step": 17122 }, { "epoch": 1.2370545631874583, "grad_norm": 6.1998611434646005, "learning_rate": 1.6771722993197234e-06, "loss": 0.631, "step": 17123 }, { "epoch": 1.2371268083876679, "grad_norm": 7.199389368230518, "learning_rate": 1.6768960995557113e-06, "loss": 0.5651, "step": 17124 }, { "epoch": 1.2371990535878772, "grad_norm": 6.004715020550712, "learning_rate": 1.6766199110593527e-06, "loss": 0.686, "step": 17125 }, { "epoch": 1.2372712987880867, "grad_norm": 5.780265765564473, "learning_rate": 1.6763437338344281e-06, "loss": 0.6681, "step": 17126 }, { "epoch": 1.2373435439882963, "grad_norm": 6.5260611186507615, "learning_rate": 1.6760675678847182e-06, "loss": 0.6565, "step": 17127 }, { "epoch": 1.2374157891885058, "grad_norm": 5.79856629903182, "learning_rate": 1.6757914132140035e-06, "loss": 0.642, "step": 17128 }, { "epoch": 1.2374880343887154, "grad_norm": 7.342093552797942, "learning_rate": 1.6755152698260658e-06, "loss": 0.7371, "step": 17129 }, { "epoch": 1.237560279588925, "grad_norm": 6.729349265755742, "learning_rate": 1.6752391377246818e-06, "loss": 0.8148, "step": 17130 }, { "epoch": 1.2376325247891344, "grad_norm": 6.240467278517535, "learning_rate": 1.674963016913634e-06, "loss": 0.6457, "step": 17131 }, { "epoch": 1.2377047699893438, "grad_norm": 5.495868865335211, "learning_rate": 1.6746869073967035e-06, "loss": 0.676, "step": 17132 }, { "epoch": 1.2377770151895533, "grad_norm": 5.814889396212584, "learning_rate": 1.674410809177667e-06, "loss": 0.6526, "step": 17133 }, { "epoch": 1.2378492603897628, "grad_norm": 6.9641812662529485, "learning_rate": 1.6741347222603055e-06, "loss": 0.6337, "step": 17134 }, { "epoch": 1.2379215055899724, "grad_norm": 5.980744298844757, "learning_rate": 1.6738586466483985e-06, "loss": 0.6299, "step": 17135 }, { "epoch": 1.237993750790182, "grad_norm": 5.878456559836001, "learning_rate": 1.673582582345726e-06, "loss": 0.6204, "step": 17136 }, { "epoch": 1.2380659959903915, "grad_norm": 7.035085759294267, "learning_rate": 1.6733065293560651e-06, "loss": 0.6735, "step": 17137 }, { "epoch": 1.238138241190601, "grad_norm": 8.158176706100987, "learning_rate": 1.6730304876831965e-06, "loss": 0.6569, "step": 17138 }, { "epoch": 1.2382104863908103, "grad_norm": 7.18092908283293, "learning_rate": 1.6727544573308985e-06, "loss": 0.6615, "step": 17139 }, { "epoch": 1.2382827315910199, "grad_norm": 8.847126346576337, "learning_rate": 1.6724784383029496e-06, "loss": 0.6303, "step": 17140 }, { "epoch": 1.2383549767912294, "grad_norm": 6.392062522171816, "learning_rate": 1.672202430603128e-06, "loss": 0.6301, "step": 17141 }, { "epoch": 1.238427221991439, "grad_norm": 8.148181885671931, "learning_rate": 1.6719264342352126e-06, "loss": 0.6945, "step": 17142 }, { "epoch": 1.2384994671916485, "grad_norm": 7.596131504698291, "learning_rate": 1.6716504492029823e-06, "loss": 0.6327, "step": 17143 }, { "epoch": 1.238571712391858, "grad_norm": 6.802528853574304, "learning_rate": 1.6713744755102125e-06, "loss": 0.6502, "step": 17144 }, { "epoch": 1.2386439575920676, "grad_norm": 6.448588182756072, "learning_rate": 1.6710985131606832e-06, "loss": 0.6109, "step": 17145 }, { "epoch": 1.238716202792277, "grad_norm": 7.829287140064671, "learning_rate": 1.670822562158172e-06, "loss": 0.6914, "step": 17146 }, { "epoch": 1.2387884479924864, "grad_norm": 8.316214513113824, "learning_rate": 1.670546622506457e-06, "loss": 0.7089, "step": 17147 }, { "epoch": 1.238860693192696, "grad_norm": 7.523270174346455, "learning_rate": 1.6702706942093138e-06, "loss": 0.6553, "step": 17148 }, { "epoch": 1.2389329383929055, "grad_norm": 6.264746092988856, "learning_rate": 1.6699947772705204e-06, "loss": 0.6815, "step": 17149 }, { "epoch": 1.239005183593115, "grad_norm": 7.943819669092356, "learning_rate": 1.6697188716938545e-06, "loss": 0.6959, "step": 17150 }, { "epoch": 1.2390774287933246, "grad_norm": 6.382243211588324, "learning_rate": 1.6694429774830923e-06, "loss": 0.6201, "step": 17151 }, { "epoch": 1.2391496739935342, "grad_norm": 5.643422962400031, "learning_rate": 1.6691670946420104e-06, "loss": 0.6355, "step": 17152 }, { "epoch": 1.2392219191937435, "grad_norm": 7.720405184929248, "learning_rate": 1.6688912231743863e-06, "loss": 0.5891, "step": 17153 }, { "epoch": 1.239294164393953, "grad_norm": 5.963603572163224, "learning_rate": 1.6686153630839964e-06, "loss": 0.6525, "step": 17154 }, { "epoch": 1.2393664095941626, "grad_norm": 6.05688877575325, "learning_rate": 1.6683395143746162e-06, "loss": 0.6059, "step": 17155 }, { "epoch": 1.239438654794372, "grad_norm": 6.823334643735451, "learning_rate": 1.6680636770500222e-06, "loss": 0.6033, "step": 17156 }, { "epoch": 1.2395108999945816, "grad_norm": 5.448312991759629, "learning_rate": 1.6677878511139916e-06, "loss": 0.7245, "step": 17157 }, { "epoch": 1.2395831451947912, "grad_norm": 6.306888011948815, "learning_rate": 1.6675120365702973e-06, "loss": 0.6074, "step": 17158 }, { "epoch": 1.2396553903950007, "grad_norm": 7.8481775683147506, "learning_rate": 1.6672362334227177e-06, "loss": 0.683, "step": 17159 }, { "epoch": 1.23972763559521, "grad_norm": 6.448502998213519, "learning_rate": 1.6669604416750277e-06, "loss": 0.637, "step": 17160 }, { "epoch": 1.2397998807954196, "grad_norm": 6.665722048911851, "learning_rate": 1.6666846613310031e-06, "loss": 0.6439, "step": 17161 }, { "epoch": 1.2398721259956291, "grad_norm": 7.911888554527773, "learning_rate": 1.6664088923944174e-06, "loss": 0.6092, "step": 17162 }, { "epoch": 1.2399443711958387, "grad_norm": 6.937771078776231, "learning_rate": 1.6661331348690468e-06, "loss": 0.7002, "step": 17163 }, { "epoch": 1.2400166163960482, "grad_norm": 5.663817680592671, "learning_rate": 1.6658573887586669e-06, "loss": 0.6137, "step": 17164 }, { "epoch": 1.2400888615962578, "grad_norm": 6.3878758866704555, "learning_rate": 1.6655816540670508e-06, "loss": 0.6132, "step": 17165 }, { "epoch": 1.2401611067964673, "grad_norm": 6.398428855210125, "learning_rate": 1.6653059307979742e-06, "loss": 0.7003, "step": 17166 }, { "epoch": 1.2402333519966766, "grad_norm": 5.951552936909815, "learning_rate": 1.665030218955211e-06, "loss": 0.5775, "step": 17167 }, { "epoch": 1.2403055971968862, "grad_norm": 7.564358569558139, "learning_rate": 1.6647545185425368e-06, "loss": 0.6592, "step": 17168 }, { "epoch": 1.2403778423970957, "grad_norm": 5.794487392079034, "learning_rate": 1.6644788295637238e-06, "loss": 0.558, "step": 17169 }, { "epoch": 1.2404500875973052, "grad_norm": 6.3670506000428375, "learning_rate": 1.664203152022547e-06, "loss": 0.6285, "step": 17170 }, { "epoch": 1.2405223327975148, "grad_norm": 7.362897090857419, "learning_rate": 1.6639274859227811e-06, "loss": 0.6763, "step": 17171 }, { "epoch": 1.2405945779977243, "grad_norm": 7.683379270646579, "learning_rate": 1.663651831268197e-06, "loss": 0.6334, "step": 17172 }, { "epoch": 1.2406668231979339, "grad_norm": 6.37661303888328, "learning_rate": 1.663376188062571e-06, "loss": 0.7154, "step": 17173 }, { "epoch": 1.2407390683981432, "grad_norm": 6.552315502167261, "learning_rate": 1.663100556309675e-06, "loss": 0.6258, "step": 17174 }, { "epoch": 1.2408113135983527, "grad_norm": 7.319546947529597, "learning_rate": 1.6628249360132836e-06, "loss": 0.6598, "step": 17175 }, { "epoch": 1.2408835587985623, "grad_norm": 7.117711136918475, "learning_rate": 1.6625493271771678e-06, "loss": 0.6365, "step": 17176 }, { "epoch": 1.2409558039987718, "grad_norm": 7.019360335895749, "learning_rate": 1.6622737298051017e-06, "loss": 0.6141, "step": 17177 }, { "epoch": 1.2410280491989814, "grad_norm": 7.033448007139928, "learning_rate": 1.661998143900857e-06, "loss": 0.6216, "step": 17178 }, { "epoch": 1.241100294399191, "grad_norm": 6.998741309216256, "learning_rate": 1.6617225694682088e-06, "loss": 0.6445, "step": 17179 }, { "epoch": 1.2411725395994004, "grad_norm": 7.268078855196972, "learning_rate": 1.6614470065109264e-06, "loss": 0.6388, "step": 17180 }, { "epoch": 1.2412447847996098, "grad_norm": 6.12508158726752, "learning_rate": 1.6611714550327838e-06, "loss": 0.5695, "step": 17181 }, { "epoch": 1.2413170299998193, "grad_norm": 7.217106355424438, "learning_rate": 1.6608959150375527e-06, "loss": 0.6653, "step": 17182 }, { "epoch": 1.2413892752000288, "grad_norm": 6.098226145493625, "learning_rate": 1.660620386529005e-06, "loss": 0.6027, "step": 17183 }, { "epoch": 1.2414615204002384, "grad_norm": 6.768052720552185, "learning_rate": 1.660344869510912e-06, "loss": 0.6308, "step": 17184 }, { "epoch": 1.241533765600448, "grad_norm": 7.0380015870352945, "learning_rate": 1.6600693639870464e-06, "loss": 0.6099, "step": 17185 }, { "epoch": 1.2416060108006575, "grad_norm": 8.575859840813477, "learning_rate": 1.6597938699611788e-06, "loss": 0.694, "step": 17186 }, { "epoch": 1.241678256000867, "grad_norm": 6.189061748039433, "learning_rate": 1.6595183874370807e-06, "loss": 0.6288, "step": 17187 }, { "epoch": 1.2417505012010763, "grad_norm": 6.508394542736771, "learning_rate": 1.6592429164185232e-06, "loss": 0.6356, "step": 17188 }, { "epoch": 1.2418227464012859, "grad_norm": 6.24177284439019, "learning_rate": 1.6589674569092785e-06, "loss": 0.6864, "step": 17189 }, { "epoch": 1.2418949916014954, "grad_norm": 7.147295951372373, "learning_rate": 1.658692008913115e-06, "loss": 0.6507, "step": 17190 }, { "epoch": 1.241967236801705, "grad_norm": 7.390502412517287, "learning_rate": 1.6584165724338048e-06, "loss": 0.6456, "step": 17191 }, { "epoch": 1.2420394820019145, "grad_norm": 6.487118501416173, "learning_rate": 1.6581411474751185e-06, "loss": 0.6207, "step": 17192 }, { "epoch": 1.242111727202124, "grad_norm": 6.9168917454632055, "learning_rate": 1.6578657340408264e-06, "loss": 0.6448, "step": 17193 }, { "epoch": 1.2421839724023336, "grad_norm": 6.8339542323927995, "learning_rate": 1.657590332134698e-06, "loss": 0.6902, "step": 17194 }, { "epoch": 1.2422562176025431, "grad_norm": 7.4051288770218395, "learning_rate": 1.6573149417605042e-06, "loss": 0.6648, "step": 17195 }, { "epoch": 1.2423284628027527, "grad_norm": 7.533198820730557, "learning_rate": 1.6570395629220148e-06, "loss": 0.7101, "step": 17196 }, { "epoch": 1.242400708002962, "grad_norm": 6.853903845605414, "learning_rate": 1.6567641956229989e-06, "loss": 0.653, "step": 17197 }, { "epoch": 1.2424729532031715, "grad_norm": 6.696659553540511, "learning_rate": 1.6564888398672262e-06, "loss": 0.6754, "step": 17198 }, { "epoch": 1.242545198403381, "grad_norm": 6.15590794576387, "learning_rate": 1.6562134956584665e-06, "loss": 0.6533, "step": 17199 }, { "epoch": 1.2426174436035906, "grad_norm": 6.957438093452126, "learning_rate": 1.6559381630004895e-06, "loss": 0.6582, "step": 17200 }, { "epoch": 1.2426896888038002, "grad_norm": 6.0008623774988905, "learning_rate": 1.655662841897063e-06, "loss": 0.6628, "step": 17201 }, { "epoch": 1.2427619340040097, "grad_norm": 6.31633740106262, "learning_rate": 1.6553875323519567e-06, "loss": 0.6997, "step": 17202 }, { "epoch": 1.2428341792042192, "grad_norm": 6.315750040801239, "learning_rate": 1.6551122343689391e-06, "loss": 0.588, "step": 17203 }, { "epoch": 1.2429064244044286, "grad_norm": 6.3230861618057315, "learning_rate": 1.6548369479517803e-06, "loss": 0.554, "step": 17204 }, { "epoch": 1.242978669604638, "grad_norm": 7.125514497083318, "learning_rate": 1.6545616731042462e-06, "loss": 0.6538, "step": 17205 }, { "epoch": 1.2430509148048476, "grad_norm": 6.318513027015027, "learning_rate": 1.6542864098301064e-06, "loss": 0.6956, "step": 17206 }, { "epoch": 1.2431231600050572, "grad_norm": 5.923196046376364, "learning_rate": 1.6540111581331296e-06, "loss": 0.6574, "step": 17207 }, { "epoch": 1.2431954052052667, "grad_norm": 6.321768424417561, "learning_rate": 1.6537359180170827e-06, "loss": 0.6174, "step": 17208 }, { "epoch": 1.2432676504054763, "grad_norm": 6.923883495459135, "learning_rate": 1.6534606894857342e-06, "loss": 0.6796, "step": 17209 }, { "epoch": 1.2433398956056858, "grad_norm": 6.527816234863124, "learning_rate": 1.653185472542851e-06, "loss": 0.7307, "step": 17210 }, { "epoch": 1.2434121408058951, "grad_norm": 6.650043871742032, "learning_rate": 1.6529102671922021e-06, "loss": 0.6287, "step": 17211 }, { "epoch": 1.2434843860061047, "grad_norm": 7.003024401574149, "learning_rate": 1.6526350734375534e-06, "loss": 0.6166, "step": 17212 }, { "epoch": 1.2435566312063142, "grad_norm": 6.32284755344315, "learning_rate": 1.6523598912826724e-06, "loss": 0.6341, "step": 17213 }, { "epoch": 1.2436288764065238, "grad_norm": 7.159178821634956, "learning_rate": 1.6520847207313273e-06, "loss": 0.666, "step": 17214 }, { "epoch": 1.2437011216067333, "grad_norm": 7.58820382236507, "learning_rate": 1.6518095617872827e-06, "loss": 0.762, "step": 17215 }, { "epoch": 1.2437733668069428, "grad_norm": 8.345652616934105, "learning_rate": 1.6515344144543073e-06, "loss": 0.7377, "step": 17216 }, { "epoch": 1.2438456120071524, "grad_norm": 6.327778069675979, "learning_rate": 1.6512592787361663e-06, "loss": 0.6704, "step": 17217 }, { "epoch": 1.2439178572073617, "grad_norm": 8.050615407983766, "learning_rate": 1.6509841546366283e-06, "loss": 0.6864, "step": 17218 }, { "epoch": 1.2439901024075712, "grad_norm": 7.271451062155092, "learning_rate": 1.650709042159457e-06, "loss": 0.6565, "step": 17219 }, { "epoch": 1.2440623476077808, "grad_norm": 6.275087284529738, "learning_rate": 1.6504339413084196e-06, "loss": 0.6729, "step": 17220 }, { "epoch": 1.2441345928079903, "grad_norm": 5.257245831043793, "learning_rate": 1.6501588520872822e-06, "loss": 0.6349, "step": 17221 }, { "epoch": 1.2442068380081999, "grad_norm": 7.536872759226437, "learning_rate": 1.6498837744998097e-06, "loss": 0.714, "step": 17222 }, { "epoch": 1.2442790832084094, "grad_norm": 6.795848970689525, "learning_rate": 1.6496087085497683e-06, "loss": 0.6637, "step": 17223 }, { "epoch": 1.244351328408619, "grad_norm": 6.690158208735437, "learning_rate": 1.6493336542409232e-06, "loss": 0.644, "step": 17224 }, { "epoch": 1.2444235736088283, "grad_norm": 6.845149763654937, "learning_rate": 1.6490586115770407e-06, "loss": 0.6505, "step": 17225 }, { "epoch": 1.2444958188090378, "grad_norm": 6.238657310009765, "learning_rate": 1.6487835805618845e-06, "loss": 0.5782, "step": 17226 }, { "epoch": 1.2445680640092474, "grad_norm": 6.482279169960569, "learning_rate": 1.6485085611992202e-06, "loss": 0.5977, "step": 17227 }, { "epoch": 1.244640309209457, "grad_norm": 7.090596443344049, "learning_rate": 1.6482335534928122e-06, "loss": 0.7099, "step": 17228 }, { "epoch": 1.2447125544096664, "grad_norm": 7.1222226267692355, "learning_rate": 1.6479585574464267e-06, "loss": 0.7054, "step": 17229 }, { "epoch": 1.244784799609876, "grad_norm": 6.050971011590245, "learning_rate": 1.6476835730638258e-06, "loss": 0.6467, "step": 17230 }, { "epoch": 1.2448570448100855, "grad_norm": 6.102331135064896, "learning_rate": 1.6474086003487755e-06, "loss": 0.6273, "step": 17231 }, { "epoch": 1.2449292900102948, "grad_norm": 6.7586430915686755, "learning_rate": 1.6471336393050403e-06, "loss": 0.6985, "step": 17232 }, { "epoch": 1.2450015352105044, "grad_norm": 5.949276137998816, "learning_rate": 1.646858689936382e-06, "loss": 0.6167, "step": 17233 }, { "epoch": 1.245073780410714, "grad_norm": 6.1391835935499195, "learning_rate": 1.6465837522465662e-06, "loss": 0.6356, "step": 17234 }, { "epoch": 1.2451460256109235, "grad_norm": 7.303657859000372, "learning_rate": 1.6463088262393557e-06, "loss": 0.6784, "step": 17235 }, { "epoch": 1.245218270811133, "grad_norm": 6.084667146680143, "learning_rate": 1.6460339119185158e-06, "loss": 0.6123, "step": 17236 }, { "epoch": 1.2452905160113426, "grad_norm": 7.228190800507078, "learning_rate": 1.6457590092878079e-06, "loss": 0.636, "step": 17237 }, { "epoch": 1.245362761211552, "grad_norm": 5.569179853124712, "learning_rate": 1.6454841183509956e-06, "loss": 0.6486, "step": 17238 }, { "epoch": 1.2454350064117614, "grad_norm": 6.8017440018031, "learning_rate": 1.645209239111843e-06, "loss": 0.6575, "step": 17239 }, { "epoch": 1.245507251611971, "grad_norm": 7.375952966188425, "learning_rate": 1.6449343715741115e-06, "loss": 0.65, "step": 17240 }, { "epoch": 1.2455794968121805, "grad_norm": 7.942777786368132, "learning_rate": 1.6446595157415645e-06, "loss": 0.6629, "step": 17241 }, { "epoch": 1.24565174201239, "grad_norm": 6.3466757055159455, "learning_rate": 1.6443846716179646e-06, "loss": 0.6668, "step": 17242 }, { "epoch": 1.2457239872125996, "grad_norm": 6.531119459389965, "learning_rate": 1.6441098392070749e-06, "loss": 0.6017, "step": 17243 }, { "epoch": 1.2457962324128091, "grad_norm": 7.102301060845472, "learning_rate": 1.6438350185126561e-06, "loss": 0.6665, "step": 17244 }, { "epoch": 1.2458684776130187, "grad_norm": 6.60535122650109, "learning_rate": 1.6435602095384718e-06, "loss": 0.6943, "step": 17245 }, { "epoch": 1.245940722813228, "grad_norm": 7.456018298147121, "learning_rate": 1.6432854122882835e-06, "loss": 0.602, "step": 17246 }, { "epoch": 1.2460129680134375, "grad_norm": 7.08913752293548, "learning_rate": 1.6430106267658522e-06, "loss": 0.7516, "step": 17247 }, { "epoch": 1.246085213213647, "grad_norm": 5.66513762694918, "learning_rate": 1.6427358529749398e-06, "loss": 0.5787, "step": 17248 }, { "epoch": 1.2461574584138566, "grad_norm": 7.144208213548591, "learning_rate": 1.6424610909193073e-06, "loss": 0.7288, "step": 17249 }, { "epoch": 1.2462297036140662, "grad_norm": 5.698330805464514, "learning_rate": 1.6421863406027183e-06, "loss": 0.5909, "step": 17250 }, { "epoch": 1.2463019488142757, "grad_norm": 6.942064587872077, "learning_rate": 1.6419116020289313e-06, "loss": 0.588, "step": 17251 }, { "epoch": 1.2463741940144852, "grad_norm": 7.529225690730297, "learning_rate": 1.6416368752017081e-06, "loss": 0.7742, "step": 17252 }, { "epoch": 1.2464464392146946, "grad_norm": 6.73049878929019, "learning_rate": 1.6413621601248097e-06, "loss": 0.6078, "step": 17253 }, { "epoch": 1.246518684414904, "grad_norm": 7.068879073876139, "learning_rate": 1.6410874568019973e-06, "loss": 0.6161, "step": 17254 }, { "epoch": 1.2465909296151136, "grad_norm": 6.391099236014105, "learning_rate": 1.6408127652370297e-06, "loss": 0.6163, "step": 17255 }, { "epoch": 1.2466631748153232, "grad_norm": 6.42648231832882, "learning_rate": 1.6405380854336688e-06, "loss": 0.6391, "step": 17256 }, { "epoch": 1.2467354200155327, "grad_norm": 6.006295080740192, "learning_rate": 1.6402634173956745e-06, "loss": 0.6692, "step": 17257 }, { "epoch": 1.2468076652157423, "grad_norm": 8.00403826834144, "learning_rate": 1.6399887611268059e-06, "loss": 0.6758, "step": 17258 }, { "epoch": 1.2468799104159518, "grad_norm": 7.499198107766022, "learning_rate": 1.6397141166308235e-06, "loss": 0.6254, "step": 17259 }, { "epoch": 1.2469521556161611, "grad_norm": 5.691448475950084, "learning_rate": 1.6394394839114865e-06, "loss": 0.693, "step": 17260 }, { "epoch": 1.2470244008163707, "grad_norm": 8.583529522576445, "learning_rate": 1.6391648629725562e-06, "loss": 0.594, "step": 17261 }, { "epoch": 1.2470966460165802, "grad_norm": 6.46934970895362, "learning_rate": 1.6388902538177896e-06, "loss": 0.6391, "step": 17262 }, { "epoch": 1.2471688912167898, "grad_norm": 7.004650206235765, "learning_rate": 1.6386156564509458e-06, "loss": 0.6826, "step": 17263 }, { "epoch": 1.2472411364169993, "grad_norm": 6.861250288663258, "learning_rate": 1.638341070875787e-06, "loss": 0.6077, "step": 17264 }, { "epoch": 1.2473133816172088, "grad_norm": 7.348148408557062, "learning_rate": 1.6380664970960685e-06, "loss": 0.6076, "step": 17265 }, { "epoch": 1.2473856268174184, "grad_norm": 6.437195705888412, "learning_rate": 1.6377919351155502e-06, "loss": 0.5446, "step": 17266 }, { "epoch": 1.247457872017628, "grad_norm": 6.4002898985055445, "learning_rate": 1.637517384937991e-06, "loss": 0.574, "step": 17267 }, { "epoch": 1.2475301172178375, "grad_norm": 6.588492227200459, "learning_rate": 1.63724284656715e-06, "loss": 0.594, "step": 17268 }, { "epoch": 1.2476023624180468, "grad_norm": 7.225420623467893, "learning_rate": 1.6369683200067837e-06, "loss": 0.6946, "step": 17269 }, { "epoch": 1.2476746076182563, "grad_norm": 7.648880631075849, "learning_rate": 1.6366938052606513e-06, "loss": 0.7039, "step": 17270 }, { "epoch": 1.2477468528184659, "grad_norm": 6.438607129937472, "learning_rate": 1.6364193023325102e-06, "loss": 0.6914, "step": 17271 }, { "epoch": 1.2478190980186754, "grad_norm": 7.191729553348475, "learning_rate": 1.6361448112261185e-06, "loss": 0.6581, "step": 17272 }, { "epoch": 1.247891343218885, "grad_norm": 7.298038888122536, "learning_rate": 1.6358703319452334e-06, "loss": 0.7058, "step": 17273 }, { "epoch": 1.2479635884190945, "grad_norm": 6.941724436116192, "learning_rate": 1.6355958644936126e-06, "loss": 0.5903, "step": 17274 }, { "epoch": 1.248035833619304, "grad_norm": 6.627124949419231, "learning_rate": 1.635321408875014e-06, "loss": 0.6639, "step": 17275 }, { "epoch": 1.2481080788195134, "grad_norm": 5.811652008875413, "learning_rate": 1.6350469650931933e-06, "loss": 0.6141, "step": 17276 }, { "epoch": 1.248180324019723, "grad_norm": 7.012981095020243, "learning_rate": 1.634772533151907e-06, "loss": 0.6472, "step": 17277 }, { "epoch": 1.2482525692199324, "grad_norm": 7.633713043408271, "learning_rate": 1.634498113054915e-06, "loss": 0.6584, "step": 17278 }, { "epoch": 1.248324814420142, "grad_norm": 5.733193091222398, "learning_rate": 1.6342237048059707e-06, "loss": 0.6333, "step": 17279 }, { "epoch": 1.2483970596203515, "grad_norm": 7.2588200028980365, "learning_rate": 1.6339493084088315e-06, "loss": 0.6049, "step": 17280 }, { "epoch": 1.248469304820561, "grad_norm": 6.362978502428189, "learning_rate": 1.6336749238672539e-06, "loss": 0.5925, "step": 17281 }, { "epoch": 1.2485415500207706, "grad_norm": 7.96039361565201, "learning_rate": 1.6334005511849942e-06, "loss": 0.7064, "step": 17282 }, { "epoch": 1.24861379522098, "grad_norm": 6.932304292510181, "learning_rate": 1.6331261903658077e-06, "loss": 0.6965, "step": 17283 }, { "epoch": 1.2486860404211895, "grad_norm": 7.824070777198487, "learning_rate": 1.6328518414134505e-06, "loss": 0.693, "step": 17284 }, { "epoch": 1.248758285621399, "grad_norm": 6.954732027684769, "learning_rate": 1.6325775043316786e-06, "loss": 0.6469, "step": 17285 }, { "epoch": 1.2488305308216086, "grad_norm": 6.52538390217777, "learning_rate": 1.6323031791242475e-06, "loss": 0.6194, "step": 17286 }, { "epoch": 1.248902776021818, "grad_norm": 7.178718841926168, "learning_rate": 1.6320288657949116e-06, "loss": 0.629, "step": 17287 }, { "epoch": 1.2489750212220276, "grad_norm": 6.631370216883472, "learning_rate": 1.6317545643474264e-06, "loss": 0.6667, "step": 17288 }, { "epoch": 1.2490472664222372, "grad_norm": 5.98045790638305, "learning_rate": 1.6314802747855485e-06, "loss": 0.663, "step": 17289 }, { "epoch": 1.2491195116224465, "grad_norm": 6.135175388257269, "learning_rate": 1.63120599711303e-06, "loss": 0.6544, "step": 17290 }, { "epoch": 1.249191756822656, "grad_norm": 8.994867344758603, "learning_rate": 1.630931731333627e-06, "loss": 0.6683, "step": 17291 }, { "epoch": 1.2492640020228656, "grad_norm": 6.122764646605699, "learning_rate": 1.630657477451093e-06, "loss": 0.7172, "step": 17292 }, { "epoch": 1.2493362472230751, "grad_norm": 6.886229897478572, "learning_rate": 1.630383235469185e-06, "loss": 0.6175, "step": 17293 }, { "epoch": 1.2494084924232847, "grad_norm": 8.373966437943766, "learning_rate": 1.630109005391654e-06, "loss": 0.6249, "step": 17294 }, { "epoch": 1.2494807376234942, "grad_norm": 7.1744574258593765, "learning_rate": 1.6298347872222552e-06, "loss": 0.6615, "step": 17295 }, { "epoch": 1.2495529828237038, "grad_norm": 6.800238818294535, "learning_rate": 1.6295605809647436e-06, "loss": 0.6707, "step": 17296 }, { "epoch": 1.249625228023913, "grad_norm": 6.389786569675536, "learning_rate": 1.629286386622871e-06, "loss": 0.6021, "step": 17297 }, { "epoch": 1.2496974732241226, "grad_norm": 6.321906003249022, "learning_rate": 1.6290122042003915e-06, "loss": 0.5758, "step": 17298 }, { "epoch": 1.2497697184243322, "grad_norm": 6.047680231935559, "learning_rate": 1.6287380337010586e-06, "loss": 0.6369, "step": 17299 }, { "epoch": 1.2498419636245417, "grad_norm": 6.924595558667889, "learning_rate": 1.6284638751286263e-06, "loss": 0.6271, "step": 17300 }, { "epoch": 1.2499142088247512, "grad_norm": 6.815413105927135, "learning_rate": 1.6281897284868465e-06, "loss": 0.6486, "step": 17301 }, { "epoch": 1.2499864540249608, "grad_norm": 7.048045262099383, "learning_rate": 1.627915593779472e-06, "loss": 0.6777, "step": 17302 }, { "epoch": 1.2500586992251703, "grad_norm": 6.774305312235263, "learning_rate": 1.627641471010257e-06, "loss": 0.6356, "step": 17303 }, { "epoch": 1.2501309444253796, "grad_norm": 9.70943404303439, "learning_rate": 1.627367360182952e-06, "loss": 0.7046, "step": 17304 }, { "epoch": 1.2502031896255892, "grad_norm": 6.297429895680906, "learning_rate": 1.6270932613013102e-06, "loss": 0.6897, "step": 17305 }, { "epoch": 1.2502754348257987, "grad_norm": 6.702434370835818, "learning_rate": 1.626819174369083e-06, "loss": 0.6225, "step": 17306 }, { "epoch": 1.2503476800260083, "grad_norm": 7.162803614174716, "learning_rate": 1.6265450993900257e-06, "loss": 0.6711, "step": 17307 }, { "epoch": 1.2504199252262178, "grad_norm": 6.528123609251438, "learning_rate": 1.6262710363678863e-06, "loss": 0.6702, "step": 17308 }, { "epoch": 1.2504921704264274, "grad_norm": 7.310172207704743, "learning_rate": 1.6259969853064177e-06, "loss": 0.6106, "step": 17309 }, { "epoch": 1.250564415626637, "grad_norm": 7.627265468556344, "learning_rate": 1.6257229462093723e-06, "loss": 0.6315, "step": 17310 }, { "epoch": 1.2506366608268462, "grad_norm": 6.248045653915167, "learning_rate": 1.6254489190805012e-06, "loss": 0.6287, "step": 17311 }, { "epoch": 1.2507089060270558, "grad_norm": 7.014098547224425, "learning_rate": 1.6251749039235548e-06, "loss": 0.6152, "step": 17312 }, { "epoch": 1.2507811512272653, "grad_norm": 7.023730516951505, "learning_rate": 1.624900900742285e-06, "loss": 0.6395, "step": 17313 }, { "epoch": 1.2508533964274748, "grad_norm": 6.7818777611216845, "learning_rate": 1.6246269095404426e-06, "loss": 0.6157, "step": 17314 }, { "epoch": 1.2509256416276844, "grad_norm": 8.605366793256659, "learning_rate": 1.6243529303217774e-06, "loss": 0.5962, "step": 17315 }, { "epoch": 1.250997886827894, "grad_norm": 6.463998422358103, "learning_rate": 1.6240789630900414e-06, "loss": 0.5661, "step": 17316 }, { "epoch": 1.2510701320281035, "grad_norm": 5.956629725197418, "learning_rate": 1.6238050078489837e-06, "loss": 0.5716, "step": 17317 }, { "epoch": 1.2511423772283128, "grad_norm": 7.90880293207255, "learning_rate": 1.6235310646023567e-06, "loss": 0.7175, "step": 17318 }, { "epoch": 1.2512146224285223, "grad_norm": 7.983182157334112, "learning_rate": 1.6232571333539071e-06, "loss": 0.5819, "step": 17319 }, { "epoch": 1.2512868676287319, "grad_norm": 6.87407775241807, "learning_rate": 1.6229832141073865e-06, "loss": 0.6723, "step": 17320 }, { "epoch": 1.2513591128289414, "grad_norm": 6.318606303134708, "learning_rate": 1.6227093068665465e-06, "loss": 0.6097, "step": 17321 }, { "epoch": 1.251431358029151, "grad_norm": 5.937410695257669, "learning_rate": 1.6224354116351338e-06, "loss": 0.6492, "step": 17322 }, { "epoch": 1.2515036032293605, "grad_norm": 6.0696859343221865, "learning_rate": 1.6221615284168985e-06, "loss": 0.6197, "step": 17323 }, { "epoch": 1.25157584842957, "grad_norm": 6.632715015571165, "learning_rate": 1.6218876572155905e-06, "loss": 0.7417, "step": 17324 }, { "epoch": 1.2516480936297794, "grad_norm": 7.353272063944508, "learning_rate": 1.6216137980349595e-06, "loss": 0.6958, "step": 17325 }, { "epoch": 1.2517203388299891, "grad_norm": 6.271552803951658, "learning_rate": 1.6213399508787525e-06, "loss": 0.6648, "step": 17326 }, { "epoch": 1.2517925840301984, "grad_norm": 8.356289131797633, "learning_rate": 1.6210661157507196e-06, "loss": 0.5988, "step": 17327 }, { "epoch": 1.251864829230408, "grad_norm": 6.44311426765539, "learning_rate": 1.62079229265461e-06, "loss": 0.7039, "step": 17328 }, { "epoch": 1.2519370744306175, "grad_norm": 7.021830170118633, "learning_rate": 1.6205184815941705e-06, "loss": 0.6088, "step": 17329 }, { "epoch": 1.252009319630827, "grad_norm": 7.426811937364411, "learning_rate": 1.62024468257315e-06, "loss": 0.6108, "step": 17330 }, { "epoch": 1.2520815648310366, "grad_norm": 5.224008069953702, "learning_rate": 1.6199708955952964e-06, "loss": 0.6649, "step": 17331 }, { "epoch": 1.252153810031246, "grad_norm": 7.405894596891954, "learning_rate": 1.6196971206643593e-06, "loss": 0.5967, "step": 17332 }, { "epoch": 1.2522260552314557, "grad_norm": 6.059208556511224, "learning_rate": 1.6194233577840842e-06, "loss": 0.6322, "step": 17333 }, { "epoch": 1.252298300431665, "grad_norm": 6.708126594090466, "learning_rate": 1.6191496069582192e-06, "loss": 0.6784, "step": 17334 }, { "epoch": 1.2523705456318746, "grad_norm": 7.516052868375451, "learning_rate": 1.6188758681905123e-06, "loss": 0.6965, "step": 17335 }, { "epoch": 1.252442790832084, "grad_norm": 6.438645936765233, "learning_rate": 1.618602141484712e-06, "loss": 0.6578, "step": 17336 }, { "epoch": 1.2525150360322936, "grad_norm": 7.501241199785107, "learning_rate": 1.6183284268445626e-06, "loss": 0.6326, "step": 17337 }, { "epoch": 1.2525872812325032, "grad_norm": 8.170563630887818, "learning_rate": 1.6180547242738126e-06, "loss": 0.6718, "step": 17338 }, { "epoch": 1.2526595264327125, "grad_norm": 7.924289553594014, "learning_rate": 1.6177810337762093e-06, "loss": 0.62, "step": 17339 }, { "epoch": 1.2527317716329223, "grad_norm": 6.86281574127425, "learning_rate": 1.617507355355498e-06, "loss": 0.6095, "step": 17340 }, { "epoch": 1.2528040168331316, "grad_norm": 6.853280177275144, "learning_rate": 1.6172336890154258e-06, "loss": 0.585, "step": 17341 }, { "epoch": 1.2528762620333411, "grad_norm": 7.998640898651842, "learning_rate": 1.6169600347597388e-06, "loss": 0.6588, "step": 17342 }, { "epoch": 1.2529485072335507, "grad_norm": 7.526087087386368, "learning_rate": 1.6166863925921841e-06, "loss": 0.6246, "step": 17343 }, { "epoch": 1.2530207524337602, "grad_norm": 7.490148813885502, "learning_rate": 1.6164127625165063e-06, "loss": 0.6327, "step": 17344 }, { "epoch": 1.2530929976339698, "grad_norm": 8.053785242112818, "learning_rate": 1.6161391445364516e-06, "loss": 0.6953, "step": 17345 }, { "epoch": 1.253165242834179, "grad_norm": 6.782517275199445, "learning_rate": 1.615865538655767e-06, "loss": 0.5918, "step": 17346 }, { "epoch": 1.2532374880343888, "grad_norm": 7.218367661428492, "learning_rate": 1.6155919448781953e-06, "loss": 0.6949, "step": 17347 }, { "epoch": 1.2533097332345982, "grad_norm": 6.605547001587798, "learning_rate": 1.6153183632074829e-06, "loss": 0.6585, "step": 17348 }, { "epoch": 1.2533819784348077, "grad_norm": 7.405164164205148, "learning_rate": 1.6150447936473756e-06, "loss": 0.617, "step": 17349 }, { "epoch": 1.2534542236350172, "grad_norm": 5.799960484863372, "learning_rate": 1.614771236201619e-06, "loss": 0.6358, "step": 17350 }, { "epoch": 1.2535264688352268, "grad_norm": 6.52659477534589, "learning_rate": 1.6144976908739557e-06, "loss": 0.6828, "step": 17351 }, { "epoch": 1.2535987140354363, "grad_norm": 6.278805992671179, "learning_rate": 1.6142241576681318e-06, "loss": 0.6426, "step": 17352 }, { "epoch": 1.2536709592356456, "grad_norm": 6.900261815950327, "learning_rate": 1.6139506365878915e-06, "loss": 0.6344, "step": 17353 }, { "epoch": 1.2537432044358554, "grad_norm": 8.028391050902474, "learning_rate": 1.613677127636979e-06, "loss": 0.6192, "step": 17354 }, { "epoch": 1.2538154496360647, "grad_norm": 6.584160217634772, "learning_rate": 1.6134036308191382e-06, "loss": 0.6519, "step": 17355 }, { "epoch": 1.2538876948362743, "grad_norm": 6.819979699963201, "learning_rate": 1.6131301461381133e-06, "loss": 0.6356, "step": 17356 }, { "epoch": 1.2539599400364838, "grad_norm": 5.0773468829930515, "learning_rate": 1.6128566735976486e-06, "loss": 0.5931, "step": 17357 }, { "epoch": 1.2540321852366934, "grad_norm": 7.200957065127426, "learning_rate": 1.6125832132014865e-06, "loss": 0.6501, "step": 17358 }, { "epoch": 1.254104430436903, "grad_norm": 6.305992473565137, "learning_rate": 1.6123097649533714e-06, "loss": 0.6577, "step": 17359 }, { "epoch": 1.2541766756371124, "grad_norm": 6.426364786383962, "learning_rate": 1.6120363288570462e-06, "loss": 0.6892, "step": 17360 }, { "epoch": 1.254248920837322, "grad_norm": 6.716327461152388, "learning_rate": 1.6117629049162553e-06, "loss": 0.7248, "step": 17361 }, { "epoch": 1.2543211660375313, "grad_norm": 6.501640699631853, "learning_rate": 1.611489493134739e-06, "loss": 0.6059, "step": 17362 }, { "epoch": 1.2543934112377408, "grad_norm": 6.564916256721305, "learning_rate": 1.6112160935162424e-06, "loss": 0.6495, "step": 17363 }, { "epoch": 1.2544656564379504, "grad_norm": 6.407976020339503, "learning_rate": 1.6109427060645084e-06, "loss": 0.686, "step": 17364 }, { "epoch": 1.25453790163816, "grad_norm": 7.055202258351634, "learning_rate": 1.6106693307832772e-06, "loss": 0.6522, "step": 17365 }, { "epoch": 1.2546101468383695, "grad_norm": 6.961327401660924, "learning_rate": 1.6103959676762922e-06, "loss": 0.6498, "step": 17366 }, { "epoch": 1.254682392038579, "grad_norm": 6.930647481420387, "learning_rate": 1.610122616747296e-06, "loss": 0.6283, "step": 17367 }, { "epoch": 1.2547546372387886, "grad_norm": 6.544568367896413, "learning_rate": 1.6098492780000308e-06, "loss": 0.6693, "step": 17368 }, { "epoch": 1.2548268824389979, "grad_norm": 5.849745994335326, "learning_rate": 1.609575951438237e-06, "loss": 0.5172, "step": 17369 }, { "epoch": 1.2548991276392074, "grad_norm": 6.181859692010891, "learning_rate": 1.6093026370656573e-06, "loss": 0.5537, "step": 17370 }, { "epoch": 1.254971372839417, "grad_norm": 8.138042135388787, "learning_rate": 1.6090293348860332e-06, "loss": 0.678, "step": 17371 }, { "epoch": 1.2550436180396265, "grad_norm": 7.195320452458961, "learning_rate": 1.6087560449031054e-06, "loss": 0.6323, "step": 17372 }, { "epoch": 1.255115863239836, "grad_norm": 6.477749521329455, "learning_rate": 1.6084827671206152e-06, "loss": 0.6314, "step": 17373 }, { "epoch": 1.2551881084400456, "grad_norm": 5.766716913140338, "learning_rate": 1.608209501542304e-06, "loss": 0.6362, "step": 17374 }, { "epoch": 1.2552603536402551, "grad_norm": 5.8093548540953135, "learning_rate": 1.607936248171913e-06, "loss": 0.6989, "step": 17375 }, { "epoch": 1.2553325988404644, "grad_norm": 7.10203062220519, "learning_rate": 1.6076630070131805e-06, "loss": 0.7208, "step": 17376 }, { "epoch": 1.255404844040674, "grad_norm": 7.49709925186954, "learning_rate": 1.6073897780698491e-06, "loss": 0.5791, "step": 17377 }, { "epoch": 1.2554770892408835, "grad_norm": 6.8967363240759125, "learning_rate": 1.6071165613456596e-06, "loss": 0.5967, "step": 17378 }, { "epoch": 1.255549334441093, "grad_norm": 6.472657134033585, "learning_rate": 1.6068433568443503e-06, "loss": 0.6187, "step": 17379 }, { "epoch": 1.2556215796413026, "grad_norm": 6.4604952567432745, "learning_rate": 1.6065701645696618e-06, "loss": 0.6066, "step": 17380 }, { "epoch": 1.2556938248415122, "grad_norm": 8.465226795382721, "learning_rate": 1.6062969845253343e-06, "loss": 0.6212, "step": 17381 }, { "epoch": 1.2557660700417217, "grad_norm": 6.786930078566997, "learning_rate": 1.6060238167151077e-06, "loss": 0.6078, "step": 17382 }, { "epoch": 1.255838315241931, "grad_norm": 7.241307802896453, "learning_rate": 1.6057506611427198e-06, "loss": 0.6569, "step": 17383 }, { "epoch": 1.2559105604421406, "grad_norm": 7.331695402620505, "learning_rate": 1.6054775178119114e-06, "loss": 0.6565, "step": 17384 }, { "epoch": 1.25598280564235, "grad_norm": 6.777275643569898, "learning_rate": 1.6052043867264217e-06, "loss": 0.7011, "step": 17385 }, { "epoch": 1.2560550508425596, "grad_norm": 6.0320655389964095, "learning_rate": 1.6049312678899888e-06, "loss": 0.6432, "step": 17386 }, { "epoch": 1.2561272960427692, "grad_norm": 8.491512324384614, "learning_rate": 1.604658161306352e-06, "loss": 0.7126, "step": 17387 }, { "epoch": 1.2561995412429787, "grad_norm": 8.086875794744774, "learning_rate": 1.6043850669792494e-06, "loss": 0.6345, "step": 17388 }, { "epoch": 1.2562717864431883, "grad_norm": 6.682634526706462, "learning_rate": 1.6041119849124212e-06, "loss": 0.6138, "step": 17389 }, { "epoch": 1.2563440316433976, "grad_norm": 6.645321753571913, "learning_rate": 1.6038389151096031e-06, "loss": 0.718, "step": 17390 }, { "epoch": 1.2564162768436071, "grad_norm": 6.3406532884834395, "learning_rate": 1.6035658575745339e-06, "loss": 0.6084, "step": 17391 }, { "epoch": 1.2564885220438167, "grad_norm": 9.283165936242048, "learning_rate": 1.6032928123109525e-06, "loss": 0.6218, "step": 17392 }, { "epoch": 1.2565607672440262, "grad_norm": 6.854976277717262, "learning_rate": 1.6030197793225976e-06, "loss": 0.6834, "step": 17393 }, { "epoch": 1.2566330124442358, "grad_norm": 7.238410104509446, "learning_rate": 1.6027467586132041e-06, "loss": 0.6231, "step": 17394 }, { "epoch": 1.2567052576444453, "grad_norm": 6.615378825094729, "learning_rate": 1.6024737501865111e-06, "loss": 0.6347, "step": 17395 }, { "epoch": 1.2567775028446548, "grad_norm": 5.989482244108436, "learning_rate": 1.602200754046256e-06, "loss": 0.6433, "step": 17396 }, { "epoch": 1.2568497480448642, "grad_norm": 7.933574036010711, "learning_rate": 1.6019277701961747e-06, "loss": 0.6562, "step": 17397 }, { "epoch": 1.2569219932450737, "grad_norm": 6.023693351407994, "learning_rate": 1.601654798640005e-06, "loss": 0.6673, "step": 17398 }, { "epoch": 1.2569942384452832, "grad_norm": 9.000002543131151, "learning_rate": 1.6013818393814839e-06, "loss": 0.6608, "step": 17399 }, { "epoch": 1.2570664836454928, "grad_norm": 6.266645315147441, "learning_rate": 1.601108892424348e-06, "loss": 0.5871, "step": 17400 }, { "epoch": 1.2571387288457023, "grad_norm": 6.407467014735376, "learning_rate": 1.6008359577723328e-06, "loss": 0.6391, "step": 17401 }, { "epoch": 1.2572109740459119, "grad_norm": 8.174865292247128, "learning_rate": 1.6005630354291751e-06, "loss": 0.5952, "step": 17402 }, { "epoch": 1.2572832192461214, "grad_norm": 6.7569276087012, "learning_rate": 1.6002901253986125e-06, "loss": 0.6184, "step": 17403 }, { "epoch": 1.2573554644463307, "grad_norm": 7.063275319980781, "learning_rate": 1.6000172276843783e-06, "loss": 0.7081, "step": 17404 }, { "epoch": 1.2574277096465405, "grad_norm": 6.064877702528787, "learning_rate": 1.5997443422902088e-06, "loss": 0.684, "step": 17405 }, { "epoch": 1.2574999548467498, "grad_norm": 7.900126019209109, "learning_rate": 1.5994714692198409e-06, "loss": 0.628, "step": 17406 }, { "epoch": 1.2575722000469594, "grad_norm": 5.822803512138014, "learning_rate": 1.5991986084770105e-06, "loss": 0.6517, "step": 17407 }, { "epoch": 1.257644445247169, "grad_norm": 6.52927671857867, "learning_rate": 1.5989257600654506e-06, "loss": 0.5919, "step": 17408 }, { "epoch": 1.2577166904473784, "grad_norm": 6.515884943106107, "learning_rate": 1.5986529239888973e-06, "loss": 0.6286, "step": 17409 }, { "epoch": 1.257788935647588, "grad_norm": 6.7170779964433, "learning_rate": 1.5983801002510862e-06, "loss": 0.5895, "step": 17410 }, { "epoch": 1.2578611808477973, "grad_norm": 6.4323754514933755, "learning_rate": 1.5981072888557508e-06, "loss": 0.6497, "step": 17411 }, { "epoch": 1.257933426048007, "grad_norm": 7.433910882291997, "learning_rate": 1.5978344898066268e-06, "loss": 0.7114, "step": 17412 }, { "epoch": 1.2580056712482164, "grad_norm": 7.122569422492325, "learning_rate": 1.5975617031074475e-06, "loss": 0.601, "step": 17413 }, { "epoch": 1.258077916448426, "grad_norm": 5.942924511181802, "learning_rate": 1.5972889287619487e-06, "loss": 0.5537, "step": 17414 }, { "epoch": 1.2581501616486355, "grad_norm": 5.854992827452608, "learning_rate": 1.5970161667738632e-06, "loss": 0.6081, "step": 17415 }, { "epoch": 1.258222406848845, "grad_norm": 7.449550579263679, "learning_rate": 1.5967434171469248e-06, "loss": 0.6371, "step": 17416 }, { "epoch": 1.2582946520490546, "grad_norm": 6.988269786708038, "learning_rate": 1.5964706798848681e-06, "loss": 0.6328, "step": 17417 }, { "epoch": 1.2583668972492639, "grad_norm": 7.631009844626024, "learning_rate": 1.596197954991427e-06, "loss": 0.6859, "step": 17418 }, { "epoch": 1.2584391424494736, "grad_norm": 5.57034913809392, "learning_rate": 1.5959252424703327e-06, "loss": 0.5958, "step": 17419 }, { "epoch": 1.258511387649683, "grad_norm": 7.195530659270785, "learning_rate": 1.5956525423253206e-06, "loss": 0.6678, "step": 17420 }, { "epoch": 1.2585836328498925, "grad_norm": 6.604512619760831, "learning_rate": 1.5953798545601238e-06, "loss": 0.6453, "step": 17421 }, { "epoch": 1.258655878050102, "grad_norm": 5.934333679794601, "learning_rate": 1.5951071791784733e-06, "loss": 0.6162, "step": 17422 }, { "epoch": 1.2587281232503116, "grad_norm": 6.9749129204458695, "learning_rate": 1.594834516184103e-06, "loss": 0.6282, "step": 17423 }, { "epoch": 1.2588003684505211, "grad_norm": 7.1518538519244865, "learning_rate": 1.5945618655807455e-06, "loss": 0.5808, "step": 17424 }, { "epoch": 1.2588726136507304, "grad_norm": 7.0878039743945624, "learning_rate": 1.5942892273721333e-06, "loss": 0.6404, "step": 17425 }, { "epoch": 1.2589448588509402, "grad_norm": 6.806371049181117, "learning_rate": 1.594016601561998e-06, "loss": 0.6236, "step": 17426 }, { "epoch": 1.2590171040511495, "grad_norm": 7.066520424295286, "learning_rate": 1.5937439881540717e-06, "loss": 0.6359, "step": 17427 }, { "epoch": 1.259089349251359, "grad_norm": 7.10395005717206, "learning_rate": 1.5934713871520875e-06, "loss": 0.6891, "step": 17428 }, { "epoch": 1.2591615944515686, "grad_norm": 5.965917744155488, "learning_rate": 1.5931987985597752e-06, "loss": 0.5961, "step": 17429 }, { "epoch": 1.2592338396517782, "grad_norm": 8.062991105137314, "learning_rate": 1.5929262223808676e-06, "loss": 0.6436, "step": 17430 }, { "epoch": 1.2593060848519877, "grad_norm": 8.229706282166008, "learning_rate": 1.5926536586190956e-06, "loss": 0.5821, "step": 17431 }, { "epoch": 1.2593783300521972, "grad_norm": 6.148361496413185, "learning_rate": 1.5923811072781914e-06, "loss": 0.577, "step": 17432 }, { "epoch": 1.2594505752524068, "grad_norm": 6.924814258935178, "learning_rate": 1.5921085683618837e-06, "loss": 0.7321, "step": 17433 }, { "epoch": 1.259522820452616, "grad_norm": 7.176023924601637, "learning_rate": 1.5918360418739054e-06, "loss": 0.669, "step": 17434 }, { "epoch": 1.2595950656528256, "grad_norm": 6.073471680847507, "learning_rate": 1.5915635278179872e-06, "loss": 0.6193, "step": 17435 }, { "epoch": 1.2596673108530352, "grad_norm": 7.938297982541291, "learning_rate": 1.5912910261978582e-06, "loss": 0.6316, "step": 17436 }, { "epoch": 1.2597395560532447, "grad_norm": 7.144438612114698, "learning_rate": 1.5910185370172493e-06, "loss": 0.6522, "step": 17437 }, { "epoch": 1.2598118012534543, "grad_norm": 6.70422781028707, "learning_rate": 1.5907460602798913e-06, "loss": 0.6362, "step": 17438 }, { "epoch": 1.2598840464536638, "grad_norm": 6.64722844929591, "learning_rate": 1.590473595989514e-06, "loss": 0.6671, "step": 17439 }, { "epoch": 1.2599562916538734, "grad_norm": 8.014640762550394, "learning_rate": 1.590201144149846e-06, "loss": 0.7188, "step": 17440 }, { "epoch": 1.2600285368540827, "grad_norm": 6.299488519159897, "learning_rate": 1.5899287047646183e-06, "loss": 0.6867, "step": 17441 }, { "epoch": 1.2601007820542922, "grad_norm": 5.303706711901051, "learning_rate": 1.5896562778375603e-06, "loss": 0.5977, "step": 17442 }, { "epoch": 1.2601730272545018, "grad_norm": 6.371883153980345, "learning_rate": 1.5893838633724012e-06, "loss": 0.685, "step": 17443 }, { "epoch": 1.2602452724547113, "grad_norm": 9.04629267002662, "learning_rate": 1.5891114613728695e-06, "loss": 0.6747, "step": 17444 }, { "epoch": 1.2603175176549208, "grad_norm": 7.829175562614136, "learning_rate": 1.5888390718426945e-06, "loss": 0.6992, "step": 17445 }, { "epoch": 1.2603897628551304, "grad_norm": 6.5655254837621095, "learning_rate": 1.5885666947856066e-06, "loss": 0.675, "step": 17446 }, { "epoch": 1.26046200805534, "grad_norm": 6.207177210268588, "learning_rate": 1.588294330205331e-06, "loss": 0.5381, "step": 17447 }, { "epoch": 1.2605342532555492, "grad_norm": 8.278165422731856, "learning_rate": 1.588021978105599e-06, "loss": 0.6735, "step": 17448 }, { "epoch": 1.2606064984557588, "grad_norm": 7.099819729087977, "learning_rate": 1.5877496384901374e-06, "loss": 0.6856, "step": 17449 }, { "epoch": 1.2606787436559683, "grad_norm": 7.042395950865001, "learning_rate": 1.5874773113626764e-06, "loss": 0.7032, "step": 17450 }, { "epoch": 1.2607509888561779, "grad_norm": 6.332830040994705, "learning_rate": 1.5872049967269414e-06, "loss": 0.7138, "step": 17451 }, { "epoch": 1.2608232340563874, "grad_norm": 5.7955376658199285, "learning_rate": 1.5869326945866614e-06, "loss": 0.6685, "step": 17452 }, { "epoch": 1.260895479256597, "grad_norm": 6.822130308384623, "learning_rate": 1.5866604049455642e-06, "loss": 0.6074, "step": 17453 }, { "epoch": 1.2609677244568065, "grad_norm": 6.256569423390667, "learning_rate": 1.5863881278073765e-06, "loss": 0.6147, "step": 17454 }, { "epoch": 1.2610399696570158, "grad_norm": 6.082821794018233, "learning_rate": 1.5861158631758258e-06, "loss": 0.6011, "step": 17455 }, { "epoch": 1.2611122148572254, "grad_norm": 6.133116035482086, "learning_rate": 1.5858436110546394e-06, "loss": 0.6553, "step": 17456 }, { "epoch": 1.261184460057435, "grad_norm": 6.885238791364562, "learning_rate": 1.5855713714475446e-06, "loss": 0.7171, "step": 17457 }, { "epoch": 1.2612567052576444, "grad_norm": 7.23704449895356, "learning_rate": 1.5852991443582674e-06, "loss": 0.6869, "step": 17458 }, { "epoch": 1.261328950457854, "grad_norm": 7.290256153101346, "learning_rate": 1.5850269297905346e-06, "loss": 0.6464, "step": 17459 }, { "epoch": 1.2614011956580635, "grad_norm": 6.6882455000285645, "learning_rate": 1.5847547277480737e-06, "loss": 0.6234, "step": 17460 }, { "epoch": 1.261473440858273, "grad_norm": 5.469212278173666, "learning_rate": 1.5844825382346084e-06, "loss": 0.6041, "step": 17461 }, { "epoch": 1.2615456860584824, "grad_norm": 6.522944521979043, "learning_rate": 1.5842103612538668e-06, "loss": 0.5849, "step": 17462 }, { "epoch": 1.261617931258692, "grad_norm": 7.549026214250899, "learning_rate": 1.5839381968095747e-06, "loss": 0.6349, "step": 17463 }, { "epoch": 1.2616901764589015, "grad_norm": 5.826333923848469, "learning_rate": 1.5836660449054577e-06, "loss": 0.6469, "step": 17464 }, { "epoch": 1.261762421659111, "grad_norm": 6.379046782553323, "learning_rate": 1.5833939055452404e-06, "loss": 0.6917, "step": 17465 }, { "epoch": 1.2618346668593206, "grad_norm": 6.571029496958779, "learning_rate": 1.583121778732649e-06, "loss": 0.5924, "step": 17466 }, { "epoch": 1.26190691205953, "grad_norm": 6.959144164335152, "learning_rate": 1.582849664471408e-06, "loss": 0.6543, "step": 17467 }, { "epoch": 1.2619791572597396, "grad_norm": 7.382464236164114, "learning_rate": 1.5825775627652439e-06, "loss": 0.6012, "step": 17468 }, { "epoch": 1.262051402459949, "grad_norm": 6.242289860999747, "learning_rate": 1.5823054736178804e-06, "loss": 0.656, "step": 17469 }, { "epoch": 1.2621236476601585, "grad_norm": 6.82101999569993, "learning_rate": 1.582033397033042e-06, "loss": 0.611, "step": 17470 }, { "epoch": 1.262195892860368, "grad_norm": 7.764719121585106, "learning_rate": 1.5817613330144543e-06, "loss": 0.6392, "step": 17471 }, { "epoch": 1.2622681380605776, "grad_norm": 5.971754985343963, "learning_rate": 1.5814892815658402e-06, "loss": 0.579, "step": 17472 }, { "epoch": 1.2623403832607871, "grad_norm": 6.574502196522569, "learning_rate": 1.581217242690925e-06, "loss": 0.6518, "step": 17473 }, { "epoch": 1.2624126284609967, "grad_norm": 7.597816163564519, "learning_rate": 1.580945216393432e-06, "loss": 0.6328, "step": 17474 }, { "epoch": 1.2624848736612062, "grad_norm": 5.6424753635935145, "learning_rate": 1.5806732026770863e-06, "loss": 0.589, "step": 17475 }, { "epoch": 1.2625571188614155, "grad_norm": 5.5500412191973885, "learning_rate": 1.5804012015456099e-06, "loss": 0.5906, "step": 17476 }, { "epoch": 1.2626293640616253, "grad_norm": 7.311630066860546, "learning_rate": 1.5801292130027271e-06, "loss": 0.6527, "step": 17477 }, { "epoch": 1.2627016092618346, "grad_norm": 7.169292442066584, "learning_rate": 1.5798572370521623e-06, "loss": 0.6264, "step": 17478 }, { "epoch": 1.2627738544620442, "grad_norm": 6.226158751221346, "learning_rate": 1.5795852736976364e-06, "loss": 0.6168, "step": 17479 }, { "epoch": 1.2628460996622537, "grad_norm": 6.624524081428032, "learning_rate": 1.5793133229428731e-06, "loss": 0.6137, "step": 17480 }, { "epoch": 1.2629183448624632, "grad_norm": 6.218275502567418, "learning_rate": 1.579041384791596e-06, "loss": 0.6904, "step": 17481 }, { "epoch": 1.2629905900626728, "grad_norm": 6.247250371726048, "learning_rate": 1.5787694592475275e-06, "loss": 0.6554, "step": 17482 }, { "epoch": 1.263062835262882, "grad_norm": 7.887455853370413, "learning_rate": 1.5784975463143892e-06, "loss": 0.6567, "step": 17483 }, { "epoch": 1.2631350804630919, "grad_norm": 5.898363125565868, "learning_rate": 1.5782256459959044e-06, "loss": 0.5455, "step": 17484 }, { "epoch": 1.2632073256633012, "grad_norm": 7.230878673043639, "learning_rate": 1.5779537582957949e-06, "loss": 0.6671, "step": 17485 }, { "epoch": 1.2632795708635107, "grad_norm": 6.78742467792792, "learning_rate": 1.5776818832177823e-06, "loss": 0.7247, "step": 17486 }, { "epoch": 1.2633518160637203, "grad_norm": 5.9701664227623095, "learning_rate": 1.5774100207655885e-06, "loss": 0.6645, "step": 17487 }, { "epoch": 1.2634240612639298, "grad_norm": 6.562212256526531, "learning_rate": 1.5771381709429352e-06, "loss": 0.5865, "step": 17488 }, { "epoch": 1.2634963064641394, "grad_norm": 6.850562202048154, "learning_rate": 1.5768663337535439e-06, "loss": 0.6056, "step": 17489 }, { "epoch": 1.2635685516643487, "grad_norm": 7.206318509387455, "learning_rate": 1.576594509201136e-06, "loss": 0.6945, "step": 17490 }, { "epoch": 1.2636407968645584, "grad_norm": 6.340510701690516, "learning_rate": 1.5763226972894317e-06, "loss": 0.6379, "step": 17491 }, { "epoch": 1.2637130420647678, "grad_norm": 6.339161683985737, "learning_rate": 1.5760508980221525e-06, "loss": 0.6838, "step": 17492 }, { "epoch": 1.2637852872649773, "grad_norm": 7.338917542346376, "learning_rate": 1.5757791114030201e-06, "loss": 0.6268, "step": 17493 }, { "epoch": 1.2638575324651868, "grad_norm": 5.905713576727354, "learning_rate": 1.5755073374357532e-06, "loss": 0.6804, "step": 17494 }, { "epoch": 1.2639297776653964, "grad_norm": 6.120068803288278, "learning_rate": 1.575235576124073e-06, "loss": 0.6649, "step": 17495 }, { "epoch": 1.264002022865606, "grad_norm": 6.565369187700017, "learning_rate": 1.5749638274717005e-06, "loss": 0.587, "step": 17496 }, { "epoch": 1.2640742680658152, "grad_norm": 7.0247930421583415, "learning_rate": 1.574692091482354e-06, "loss": 0.6533, "step": 17497 }, { "epoch": 1.264146513266025, "grad_norm": 7.205255222393181, "learning_rate": 1.5744203681597548e-06, "loss": 0.6833, "step": 17498 }, { "epoch": 1.2642187584662343, "grad_norm": 7.381340017262411, "learning_rate": 1.5741486575076214e-06, "loss": 0.6286, "step": 17499 }, { "epoch": 1.2642910036664439, "grad_norm": 6.978700111698699, "learning_rate": 1.5738769595296752e-06, "loss": 0.6421, "step": 17500 }, { "epoch": 1.2643632488666534, "grad_norm": 7.364965303938821, "learning_rate": 1.5736052742296337e-06, "loss": 0.6644, "step": 17501 }, { "epoch": 1.264435494066863, "grad_norm": 5.889216004089361, "learning_rate": 1.5733336016112163e-06, "loss": 0.6209, "step": 17502 }, { "epoch": 1.2645077392670725, "grad_norm": 6.972902984955237, "learning_rate": 1.5730619416781436e-06, "loss": 0.6664, "step": 17503 }, { "epoch": 1.264579984467282, "grad_norm": 6.804604250620925, "learning_rate": 1.5727902944341316e-06, "loss": 0.6297, "step": 17504 }, { "epoch": 1.2646522296674916, "grad_norm": 7.044101211470588, "learning_rate": 1.5725186598829014e-06, "loss": 0.6383, "step": 17505 }, { "epoch": 1.264724474867701, "grad_norm": 6.786181085348705, "learning_rate": 1.5722470380281703e-06, "loss": 0.6533, "step": 17506 }, { "epoch": 1.2647967200679104, "grad_norm": 5.572472366257645, "learning_rate": 1.571975428873658e-06, "loss": 0.6646, "step": 17507 }, { "epoch": 1.26486896526812, "grad_norm": 5.768600233785191, "learning_rate": 1.5717038324230805e-06, "loss": 0.6666, "step": 17508 }, { "epoch": 1.2649412104683295, "grad_norm": 7.495541073620244, "learning_rate": 1.5714322486801566e-06, "loss": 0.6474, "step": 17509 }, { "epoch": 1.265013455668539, "grad_norm": 5.339948941961786, "learning_rate": 1.571160677648605e-06, "loss": 0.5658, "step": 17510 }, { "epoch": 1.2650857008687486, "grad_norm": 7.982056043962912, "learning_rate": 1.5708891193321418e-06, "loss": 0.6346, "step": 17511 }, { "epoch": 1.2651579460689582, "grad_norm": 7.592358622611665, "learning_rate": 1.5706175737344853e-06, "loss": 0.6525, "step": 17512 }, { "epoch": 1.2652301912691675, "grad_norm": 7.756117344535669, "learning_rate": 1.5703460408593526e-06, "loss": 0.677, "step": 17513 }, { "epoch": 1.265302436469377, "grad_norm": 7.245750234023902, "learning_rate": 1.570074520710461e-06, "loss": 0.6065, "step": 17514 }, { "epoch": 1.2653746816695866, "grad_norm": 6.276470738434134, "learning_rate": 1.5698030132915272e-06, "loss": 0.6407, "step": 17515 }, { "epoch": 1.265446926869796, "grad_norm": 5.5522158417864285, "learning_rate": 1.5695315186062676e-06, "loss": 0.6151, "step": 17516 }, { "epoch": 1.2655191720700056, "grad_norm": 6.300124927069279, "learning_rate": 1.5692600366583987e-06, "loss": 0.5735, "step": 17517 }, { "epoch": 1.2655914172702152, "grad_norm": 5.796682513966468, "learning_rate": 1.568988567451638e-06, "loss": 0.5676, "step": 17518 }, { "epoch": 1.2656636624704247, "grad_norm": 6.835790456007795, "learning_rate": 1.568717110989701e-06, "loss": 0.6758, "step": 17519 }, { "epoch": 1.265735907670634, "grad_norm": 6.924567187724059, "learning_rate": 1.5684456672763026e-06, "loss": 0.658, "step": 17520 }, { "epoch": 1.2658081528708436, "grad_norm": 9.288749567636833, "learning_rate": 1.5681742363151615e-06, "loss": 0.6378, "step": 17521 }, { "epoch": 1.2658803980710531, "grad_norm": 6.518243277322754, "learning_rate": 1.5679028181099903e-06, "loss": 0.6518, "step": 17522 }, { "epoch": 1.2659526432712627, "grad_norm": 6.47193071239148, "learning_rate": 1.5676314126645059e-06, "loss": 0.6178, "step": 17523 }, { "epoch": 1.2660248884714722, "grad_norm": 6.807321522614886, "learning_rate": 1.5673600199824228e-06, "loss": 0.5806, "step": 17524 }, { "epoch": 1.2660971336716818, "grad_norm": 6.489311526889241, "learning_rate": 1.5670886400674586e-06, "loss": 0.6184, "step": 17525 }, { "epoch": 1.2661693788718913, "grad_norm": 6.130201349980977, "learning_rate": 1.5668172729233256e-06, "loss": 0.5787, "step": 17526 }, { "epoch": 1.2662416240721006, "grad_norm": 6.277884877209373, "learning_rate": 1.5665459185537394e-06, "loss": 0.6113, "step": 17527 }, { "epoch": 1.2663138692723102, "grad_norm": 6.6475773578803805, "learning_rate": 1.5662745769624154e-06, "loss": 0.6516, "step": 17528 }, { "epoch": 1.2663861144725197, "grad_norm": 9.049904535116742, "learning_rate": 1.5660032481530667e-06, "loss": 0.7581, "step": 17529 }, { "epoch": 1.2664583596727292, "grad_norm": 6.410233017707593, "learning_rate": 1.5657319321294087e-06, "loss": 0.5838, "step": 17530 }, { "epoch": 1.2665306048729388, "grad_norm": 6.274400914991089, "learning_rate": 1.5654606288951548e-06, "loss": 0.7056, "step": 17531 }, { "epoch": 1.2666028500731483, "grad_norm": 6.128890067298585, "learning_rate": 1.5651893384540202e-06, "loss": 0.6705, "step": 17532 }, { "epoch": 1.2666750952733579, "grad_norm": 7.557263287298828, "learning_rate": 1.564918060809717e-06, "loss": 0.5873, "step": 17533 }, { "epoch": 1.2667473404735672, "grad_norm": 6.997334926630242, "learning_rate": 1.5646467959659592e-06, "loss": 0.637, "step": 17534 }, { "epoch": 1.2668195856737767, "grad_norm": 7.536906417324409, "learning_rate": 1.5643755439264619e-06, "loss": 0.6627, "step": 17535 }, { "epoch": 1.2668918308739863, "grad_norm": 6.9276027704584555, "learning_rate": 1.5641043046949356e-06, "loss": 0.6346, "step": 17536 }, { "epoch": 1.2669640760741958, "grad_norm": 6.007477233818181, "learning_rate": 1.5638330782750948e-06, "loss": 0.6305, "step": 17537 }, { "epoch": 1.2670363212744054, "grad_norm": 6.990487856451179, "learning_rate": 1.5635618646706522e-06, "loss": 0.6242, "step": 17538 }, { "epoch": 1.267108566474615, "grad_norm": 5.882406881870461, "learning_rate": 1.5632906638853218e-06, "loss": 0.6191, "step": 17539 }, { "epoch": 1.2671808116748244, "grad_norm": 7.154361658951155, "learning_rate": 1.5630194759228137e-06, "loss": 0.6133, "step": 17540 }, { "epoch": 1.2672530568750338, "grad_norm": 7.233561591606208, "learning_rate": 1.5627483007868417e-06, "loss": 0.6592, "step": 17541 }, { "epoch": 1.2673253020752433, "grad_norm": 6.212624451524782, "learning_rate": 1.562477138481118e-06, "loss": 0.5909, "step": 17542 }, { "epoch": 1.2673975472754528, "grad_norm": 7.338451015832127, "learning_rate": 1.5622059890093539e-06, "loss": 0.6479, "step": 17543 }, { "epoch": 1.2674697924756624, "grad_norm": 7.320831247878352, "learning_rate": 1.5619348523752619e-06, "loss": 0.7124, "step": 17544 }, { "epoch": 1.267542037675872, "grad_norm": 7.5023858408137984, "learning_rate": 1.5616637285825532e-06, "loss": 0.6828, "step": 17545 }, { "epoch": 1.2676142828760815, "grad_norm": 6.937301770376685, "learning_rate": 1.5613926176349401e-06, "loss": 0.6974, "step": 17546 }, { "epoch": 1.267686528076291, "grad_norm": 6.051813834181761, "learning_rate": 1.5611215195361327e-06, "loss": 0.5721, "step": 17547 }, { "epoch": 1.2677587732765003, "grad_norm": 6.359188442633681, "learning_rate": 1.5608504342898425e-06, "loss": 0.6484, "step": 17548 }, { "epoch": 1.26783101847671, "grad_norm": 6.641611542068116, "learning_rate": 1.560579361899781e-06, "loss": 0.747, "step": 17549 }, { "epoch": 1.2679032636769194, "grad_norm": 8.996796779683416, "learning_rate": 1.5603083023696597e-06, "loss": 0.6908, "step": 17550 }, { "epoch": 1.267975508877129, "grad_norm": 6.4218114399607025, "learning_rate": 1.5600372557031867e-06, "loss": 0.5582, "step": 17551 }, { "epoch": 1.2680477540773385, "grad_norm": 5.553935283048939, "learning_rate": 1.5597662219040735e-06, "loss": 0.639, "step": 17552 }, { "epoch": 1.268119999277548, "grad_norm": 6.883222067659509, "learning_rate": 1.5594952009760323e-06, "loss": 0.7237, "step": 17553 }, { "epoch": 1.2681922444777576, "grad_norm": 6.701728871033193, "learning_rate": 1.5592241929227703e-06, "loss": 0.6941, "step": 17554 }, { "epoch": 1.268264489677967, "grad_norm": 6.669284719750365, "learning_rate": 1.5589531977479989e-06, "loss": 0.6126, "step": 17555 }, { "epoch": 1.2683367348781767, "grad_norm": 6.463405004179057, "learning_rate": 1.5586822154554276e-06, "loss": 0.5632, "step": 17556 }, { "epoch": 1.268408980078386, "grad_norm": 8.70537588711352, "learning_rate": 1.5584112460487661e-06, "loss": 0.6309, "step": 17557 }, { "epoch": 1.2684812252785955, "grad_norm": 6.971656090733459, "learning_rate": 1.5581402895317233e-06, "loss": 0.5861, "step": 17558 }, { "epoch": 1.268553470478805, "grad_norm": 5.977448361635224, "learning_rate": 1.5578693459080085e-06, "loss": 0.581, "step": 17559 }, { "epoch": 1.2686257156790146, "grad_norm": 6.410815589186347, "learning_rate": 1.5575984151813311e-06, "loss": 0.6032, "step": 17560 }, { "epoch": 1.2686979608792242, "grad_norm": 7.040481678350111, "learning_rate": 1.5573274973553996e-06, "loss": 0.6208, "step": 17561 }, { "epoch": 1.2687702060794335, "grad_norm": 7.358320610176512, "learning_rate": 1.5570565924339227e-06, "loss": 0.6842, "step": 17562 }, { "epoch": 1.2688424512796432, "grad_norm": 6.6266258691853315, "learning_rate": 1.5567857004206085e-06, "loss": 0.5833, "step": 17563 }, { "epoch": 1.2689146964798526, "grad_norm": 6.450228948743675, "learning_rate": 1.556514821319167e-06, "loss": 0.6771, "step": 17564 }, { "epoch": 1.268986941680062, "grad_norm": 8.698852147641288, "learning_rate": 1.5562439551333038e-06, "loss": 0.7078, "step": 17565 }, { "epoch": 1.2690591868802716, "grad_norm": 5.713322633238439, "learning_rate": 1.555973101866728e-06, "loss": 0.6453, "step": 17566 }, { "epoch": 1.2691314320804812, "grad_norm": 7.352665071576778, "learning_rate": 1.5557022615231488e-06, "loss": 0.694, "step": 17567 }, { "epoch": 1.2692036772806907, "grad_norm": 7.167082072202571, "learning_rate": 1.555431434106271e-06, "loss": 0.6669, "step": 17568 }, { "epoch": 1.2692759224809, "grad_norm": 5.721240225832131, "learning_rate": 1.5551606196198038e-06, "loss": 0.6022, "step": 17569 }, { "epoch": 1.2693481676811098, "grad_norm": 8.180350640203615, "learning_rate": 1.5548898180674544e-06, "loss": 0.6392, "step": 17570 }, { "epoch": 1.2694204128813191, "grad_norm": 10.260617223544862, "learning_rate": 1.5546190294529295e-06, "loss": 0.7057, "step": 17571 }, { "epoch": 1.2694926580815287, "grad_norm": 6.557622641123012, "learning_rate": 1.554348253779936e-06, "loss": 0.6023, "step": 17572 }, { "epoch": 1.2695649032817382, "grad_norm": 6.3978585713749885, "learning_rate": 1.55407749105218e-06, "loss": 0.6094, "step": 17573 }, { "epoch": 1.2696371484819478, "grad_norm": 6.659534199323037, "learning_rate": 1.5538067412733693e-06, "loss": 0.6302, "step": 17574 }, { "epoch": 1.2697093936821573, "grad_norm": 6.4207946908205935, "learning_rate": 1.55353600444721e-06, "loss": 0.5877, "step": 17575 }, { "epoch": 1.2697816388823666, "grad_norm": 6.758926705473358, "learning_rate": 1.5532652805774074e-06, "loss": 0.7489, "step": 17576 }, { "epoch": 1.2698538840825764, "grad_norm": 9.523313726508022, "learning_rate": 1.5529945696676679e-06, "loss": 0.6055, "step": 17577 }, { "epoch": 1.2699261292827857, "grad_norm": 6.326708524436212, "learning_rate": 1.5527238717216986e-06, "loss": 0.6462, "step": 17578 }, { "epoch": 1.2699983744829952, "grad_norm": 5.812080942450113, "learning_rate": 1.552453186743203e-06, "loss": 0.7507, "step": 17579 }, { "epoch": 1.2700706196832048, "grad_norm": 5.653769439184796, "learning_rate": 1.5521825147358874e-06, "loss": 0.594, "step": 17580 }, { "epoch": 1.2701428648834143, "grad_norm": 6.0075058083698645, "learning_rate": 1.5519118557034569e-06, "loss": 0.6453, "step": 17581 }, { "epoch": 1.2702151100836239, "grad_norm": 7.0828103564309846, "learning_rate": 1.5516412096496183e-06, "loss": 0.6719, "step": 17582 }, { "epoch": 1.2702873552838334, "grad_norm": 6.261910647987474, "learning_rate": 1.5513705765780742e-06, "loss": 0.6567, "step": 17583 }, { "epoch": 1.270359600484043, "grad_norm": 7.4605867473180565, "learning_rate": 1.5510999564925305e-06, "loss": 0.6226, "step": 17584 }, { "epoch": 1.2704318456842523, "grad_norm": 6.377500455071984, "learning_rate": 1.5508293493966919e-06, "loss": 0.5883, "step": 17585 }, { "epoch": 1.2705040908844618, "grad_norm": 6.5406767790704565, "learning_rate": 1.5505587552942622e-06, "loss": 0.6019, "step": 17586 }, { "epoch": 1.2705763360846714, "grad_norm": 7.952211698542582, "learning_rate": 1.5502881741889458e-06, "loss": 0.6016, "step": 17587 }, { "epoch": 1.270648581284881, "grad_norm": 7.783349424205324, "learning_rate": 1.5500176060844472e-06, "loss": 0.6354, "step": 17588 }, { "epoch": 1.2707208264850904, "grad_norm": 5.68385684528519, "learning_rate": 1.5497470509844704e-06, "loss": 0.6219, "step": 17589 }, { "epoch": 1.2707930716853, "grad_norm": 6.828224443283302, "learning_rate": 1.5494765088927177e-06, "loss": 0.6757, "step": 17590 }, { "epoch": 1.2708653168855095, "grad_norm": 9.008948222255974, "learning_rate": 1.5492059798128938e-06, "loss": 0.6863, "step": 17591 }, { "epoch": 1.2709375620857188, "grad_norm": 6.967565872105474, "learning_rate": 1.5489354637487031e-06, "loss": 0.647, "step": 17592 }, { "epoch": 1.2710098072859284, "grad_norm": 6.45843952050641, "learning_rate": 1.5486649607038459e-06, "loss": 0.6396, "step": 17593 }, { "epoch": 1.271082052486138, "grad_norm": 6.848899262805706, "learning_rate": 1.548394470682027e-06, "loss": 0.7048, "step": 17594 }, { "epoch": 1.2711542976863475, "grad_norm": 6.952157696935996, "learning_rate": 1.5481239936869485e-06, "loss": 0.6643, "step": 17595 }, { "epoch": 1.271226542886557, "grad_norm": 7.6278723637784935, "learning_rate": 1.5478535297223149e-06, "loss": 0.6831, "step": 17596 }, { "epoch": 1.2712987880867666, "grad_norm": 6.488830653383629, "learning_rate": 1.5475830787918261e-06, "loss": 0.6229, "step": 17597 }, { "epoch": 1.271371033286976, "grad_norm": 6.163282685939115, "learning_rate": 1.5473126408991857e-06, "loss": 0.6334, "step": 17598 }, { "epoch": 1.2714432784871854, "grad_norm": 7.00077461316433, "learning_rate": 1.5470422160480952e-06, "loss": 0.7198, "step": 17599 }, { "epoch": 1.271515523687395, "grad_norm": 6.514897221232477, "learning_rate": 1.5467718042422575e-06, "loss": 0.628, "step": 17600 }, { "epoch": 1.2715877688876045, "grad_norm": 6.760447293894495, "learning_rate": 1.5465014054853732e-06, "loss": 0.6931, "step": 17601 }, { "epoch": 1.271660014087814, "grad_norm": 9.477528949714465, "learning_rate": 1.5462310197811443e-06, "loss": 0.672, "step": 17602 }, { "epoch": 1.2717322592880236, "grad_norm": 6.417348486936268, "learning_rate": 1.5459606471332726e-06, "loss": 0.6952, "step": 17603 }, { "epoch": 1.2718045044882331, "grad_norm": 5.4970969428039, "learning_rate": 1.5456902875454582e-06, "loss": 0.5931, "step": 17604 }, { "epoch": 1.2718767496884427, "grad_norm": 8.95457012769876, "learning_rate": 1.5454199410214033e-06, "loss": 0.6912, "step": 17605 }, { "epoch": 1.271948994888652, "grad_norm": 5.7714433955820965, "learning_rate": 1.5451496075648078e-06, "loss": 0.5804, "step": 17606 }, { "epoch": 1.2720212400888615, "grad_norm": 6.554887116112715, "learning_rate": 1.5448792871793738e-06, "loss": 0.5735, "step": 17607 }, { "epoch": 1.272093485289071, "grad_norm": 6.75062049556893, "learning_rate": 1.5446089798688003e-06, "loss": 0.6884, "step": 17608 }, { "epoch": 1.2721657304892806, "grad_norm": 6.3963319996163746, "learning_rate": 1.544338685636787e-06, "loss": 0.727, "step": 17609 }, { "epoch": 1.2722379756894902, "grad_norm": 6.510405794261755, "learning_rate": 1.5440684044870367e-06, "loss": 0.6282, "step": 17610 }, { "epoch": 1.2723102208896997, "grad_norm": 6.965585580323211, "learning_rate": 1.5437981364232466e-06, "loss": 0.6701, "step": 17611 }, { "epoch": 1.2723824660899092, "grad_norm": 8.231613014459816, "learning_rate": 1.5435278814491178e-06, "loss": 0.6815, "step": 17612 }, { "epoch": 1.2724547112901186, "grad_norm": 6.486799480859788, "learning_rate": 1.5432576395683492e-06, "loss": 0.6786, "step": 17613 }, { "epoch": 1.272526956490328, "grad_norm": 7.8699559603359255, "learning_rate": 1.5429874107846415e-06, "loss": 0.6027, "step": 17614 }, { "epoch": 1.2725992016905376, "grad_norm": 7.128045485277542, "learning_rate": 1.5427171951016927e-06, "loss": 0.5946, "step": 17615 }, { "epoch": 1.2726714468907472, "grad_norm": 6.83947564686237, "learning_rate": 1.5424469925232021e-06, "loss": 0.6438, "step": 17616 }, { "epoch": 1.2727436920909567, "grad_norm": 6.317835903740441, "learning_rate": 1.5421768030528689e-06, "loss": 0.6067, "step": 17617 }, { "epoch": 1.2728159372911663, "grad_norm": 6.172991147097851, "learning_rate": 1.5419066266943916e-06, "loss": 0.6439, "step": 17618 }, { "epoch": 1.2728881824913758, "grad_norm": 6.3358369530015155, "learning_rate": 1.5416364634514684e-06, "loss": 0.6531, "step": 17619 }, { "epoch": 1.2729604276915851, "grad_norm": 5.932776891497531, "learning_rate": 1.5413663133277978e-06, "loss": 0.5832, "step": 17620 }, { "epoch": 1.2730326728917947, "grad_norm": 7.190784002649079, "learning_rate": 1.541096176327079e-06, "loss": 0.6726, "step": 17621 }, { "epoch": 1.2731049180920042, "grad_norm": 6.2027049655098185, "learning_rate": 1.5408260524530082e-06, "loss": 0.6515, "step": 17622 }, { "epoch": 1.2731771632922138, "grad_norm": 7.450420024731617, "learning_rate": 1.5405559417092832e-06, "loss": 0.6084, "step": 17623 }, { "epoch": 1.2732494084924233, "grad_norm": 8.1030954733361, "learning_rate": 1.5402858440996032e-06, "loss": 0.5925, "step": 17624 }, { "epoch": 1.2733216536926328, "grad_norm": 6.960932019849134, "learning_rate": 1.5400157596276658e-06, "loss": 0.651, "step": 17625 }, { "epoch": 1.2733938988928424, "grad_norm": 6.450829196828897, "learning_rate": 1.5397456882971662e-06, "loss": 0.6454, "step": 17626 }, { "epoch": 1.2734661440930517, "grad_norm": 6.532782415919728, "learning_rate": 1.5394756301118024e-06, "loss": 0.6205, "step": 17627 }, { "epoch": 1.2735383892932615, "grad_norm": 8.386171547850692, "learning_rate": 1.5392055850752725e-06, "loss": 0.5979, "step": 17628 }, { "epoch": 1.2736106344934708, "grad_norm": 7.860624968553088, "learning_rate": 1.538935553191271e-06, "loss": 0.6581, "step": 17629 }, { "epoch": 1.2736828796936803, "grad_norm": 6.537757077824365, "learning_rate": 1.5386655344634957e-06, "loss": 0.6422, "step": 17630 }, { "epoch": 1.2737551248938899, "grad_norm": 6.876925112215738, "learning_rate": 1.538395528895643e-06, "loss": 0.5981, "step": 17631 }, { "epoch": 1.2738273700940994, "grad_norm": 8.14273764348124, "learning_rate": 1.5381255364914092e-06, "loss": 0.6487, "step": 17632 }, { "epoch": 1.273899615294309, "grad_norm": 6.709797701337283, "learning_rate": 1.5378555572544895e-06, "loss": 0.6903, "step": 17633 }, { "epoch": 1.2739718604945183, "grad_norm": 7.32817211013975, "learning_rate": 1.53758559118858e-06, "loss": 0.5873, "step": 17634 }, { "epoch": 1.274044105694728, "grad_norm": 7.1199100450126664, "learning_rate": 1.5373156382973774e-06, "loss": 0.5757, "step": 17635 }, { "epoch": 1.2741163508949374, "grad_norm": 6.648208273534382, "learning_rate": 1.5370456985845758e-06, "loss": 0.7004, "step": 17636 }, { "epoch": 1.274188596095147, "grad_norm": 5.73759050152445, "learning_rate": 1.53677577205387e-06, "loss": 0.6003, "step": 17637 }, { "epoch": 1.2742608412953564, "grad_norm": 7.195557166629449, "learning_rate": 1.5365058587089565e-06, "loss": 0.6864, "step": 17638 }, { "epoch": 1.274333086495566, "grad_norm": 7.679659683316865, "learning_rate": 1.5362359585535307e-06, "loss": 0.6556, "step": 17639 }, { "epoch": 1.2744053316957755, "grad_norm": 6.391454666116583, "learning_rate": 1.5359660715912856e-06, "loss": 0.5907, "step": 17640 }, { "epoch": 1.2744775768959848, "grad_norm": 6.73492698442943, "learning_rate": 1.535696197825916e-06, "loss": 0.615, "step": 17641 }, { "epoch": 1.2745498220961946, "grad_norm": 7.19859585315637, "learning_rate": 1.5354263372611177e-06, "loss": 0.6887, "step": 17642 }, { "epoch": 1.274622067296404, "grad_norm": 6.992536653293728, "learning_rate": 1.535156489900583e-06, "loss": 0.7085, "step": 17643 }, { "epoch": 1.2746943124966135, "grad_norm": 9.082891844306586, "learning_rate": 1.5348866557480068e-06, "loss": 0.6251, "step": 17644 }, { "epoch": 1.274766557696823, "grad_norm": 7.018618754164426, "learning_rate": 1.5346168348070834e-06, "loss": 0.5991, "step": 17645 }, { "epoch": 1.2748388028970326, "grad_norm": 10.02306757160311, "learning_rate": 1.5343470270815058e-06, "loss": 0.7075, "step": 17646 }, { "epoch": 1.274911048097242, "grad_norm": 6.731402739170645, "learning_rate": 1.5340772325749675e-06, "loss": 0.7025, "step": 17647 }, { "epoch": 1.2749832932974514, "grad_norm": 7.1343346733728055, "learning_rate": 1.5338074512911621e-06, "loss": 0.6703, "step": 17648 }, { "epoch": 1.2750555384976612, "grad_norm": 7.630748145545245, "learning_rate": 1.533537683233783e-06, "loss": 0.6962, "step": 17649 }, { "epoch": 1.2751277836978705, "grad_norm": 7.125804119577379, "learning_rate": 1.533267928406522e-06, "loss": 0.646, "step": 17650 }, { "epoch": 1.27520002889808, "grad_norm": 8.309549596816492, "learning_rate": 1.5329981868130718e-06, "loss": 0.5999, "step": 17651 }, { "epoch": 1.2752722740982896, "grad_norm": 9.723091750240442, "learning_rate": 1.5327284584571262e-06, "loss": 0.6761, "step": 17652 }, { "epoch": 1.2753445192984991, "grad_norm": 8.851379338765822, "learning_rate": 1.5324587433423776e-06, "loss": 0.6796, "step": 17653 }, { "epoch": 1.2754167644987087, "grad_norm": 6.480553514364268, "learning_rate": 1.5321890414725168e-06, "loss": 0.6316, "step": 17654 }, { "epoch": 1.2754890096989182, "grad_norm": 7.15497001295117, "learning_rate": 1.5319193528512366e-06, "loss": 0.6495, "step": 17655 }, { "epoch": 1.2755612548991277, "grad_norm": 5.849480252122466, "learning_rate": 1.5316496774822287e-06, "loss": 0.6196, "step": 17656 }, { "epoch": 1.275633500099337, "grad_norm": 6.869339433254669, "learning_rate": 1.5313800153691856e-06, "loss": 0.7625, "step": 17657 }, { "epoch": 1.2757057452995466, "grad_norm": 6.024233200220155, "learning_rate": 1.5311103665157973e-06, "loss": 0.6309, "step": 17658 }, { "epoch": 1.2757779904997562, "grad_norm": 6.527263392028547, "learning_rate": 1.5308407309257555e-06, "loss": 0.705, "step": 17659 }, { "epoch": 1.2758502356999657, "grad_norm": 6.642437425002087, "learning_rate": 1.5305711086027525e-06, "loss": 0.6303, "step": 17660 }, { "epoch": 1.2759224809001752, "grad_norm": 6.887283172240626, "learning_rate": 1.5303014995504778e-06, "loss": 0.6614, "step": 17661 }, { "epoch": 1.2759947261003848, "grad_norm": 7.34502071993134, "learning_rate": 1.5300319037726225e-06, "loss": 0.6433, "step": 17662 }, { "epoch": 1.2760669713005943, "grad_norm": 6.552642103020234, "learning_rate": 1.5297623212728773e-06, "loss": 0.6495, "step": 17663 }, { "epoch": 1.2761392165008036, "grad_norm": 7.3350453834715905, "learning_rate": 1.5294927520549336e-06, "loss": 0.6576, "step": 17664 }, { "epoch": 1.2762114617010132, "grad_norm": 6.559544651696309, "learning_rate": 1.529223196122479e-06, "loss": 0.6158, "step": 17665 }, { "epoch": 1.2762837069012227, "grad_norm": 6.967549721027488, "learning_rate": 1.5289536534792057e-06, "loss": 0.6876, "step": 17666 }, { "epoch": 1.2763559521014323, "grad_norm": 7.62747827497431, "learning_rate": 1.528684124128804e-06, "loss": 0.6308, "step": 17667 }, { "epoch": 1.2764281973016418, "grad_norm": 6.714738366774131, "learning_rate": 1.5284146080749613e-06, "loss": 0.604, "step": 17668 }, { "epoch": 1.2765004425018514, "grad_norm": 7.1994516269816025, "learning_rate": 1.5281451053213684e-06, "loss": 0.66, "step": 17669 }, { "epoch": 1.276572687702061, "grad_norm": 6.507071243329943, "learning_rate": 1.5278756158717142e-06, "loss": 0.592, "step": 17670 }, { "epoch": 1.2766449329022702, "grad_norm": 6.806673410520037, "learning_rate": 1.5276061397296887e-06, "loss": 0.6476, "step": 17671 }, { "epoch": 1.2767171781024798, "grad_norm": 6.602504324458423, "learning_rate": 1.5273366768989794e-06, "loss": 0.6923, "step": 17672 }, { "epoch": 1.2767894233026893, "grad_norm": 6.85381117558802, "learning_rate": 1.527067227383276e-06, "loss": 0.7139, "step": 17673 }, { "epoch": 1.2768616685028988, "grad_norm": 7.361782060990762, "learning_rate": 1.5267977911862673e-06, "loss": 0.6408, "step": 17674 }, { "epoch": 1.2769339137031084, "grad_norm": 6.478325726885984, "learning_rate": 1.5265283683116405e-06, "loss": 0.5914, "step": 17675 }, { "epoch": 1.277006158903318, "grad_norm": 8.609786174391905, "learning_rate": 1.5262589587630847e-06, "loss": 0.6534, "step": 17676 }, { "epoch": 1.2770784041035275, "grad_norm": 7.860933122840289, "learning_rate": 1.5259895625442878e-06, "loss": 0.6306, "step": 17677 }, { "epoch": 1.2771506493037368, "grad_norm": 6.476902638742383, "learning_rate": 1.5257201796589382e-06, "loss": 0.5851, "step": 17678 }, { "epoch": 1.2772228945039463, "grad_norm": 9.418513114193056, "learning_rate": 1.5254508101107217e-06, "loss": 0.6161, "step": 17679 }, { "epoch": 1.2772951397041559, "grad_norm": 6.728486710750646, "learning_rate": 1.525181453903327e-06, "loss": 0.5991, "step": 17680 }, { "epoch": 1.2773673849043654, "grad_norm": 7.271770806707483, "learning_rate": 1.5249121110404414e-06, "loss": 0.6506, "step": 17681 }, { "epoch": 1.277439630104575, "grad_norm": 9.42907905037821, "learning_rate": 1.524642781525753e-06, "loss": 0.6189, "step": 17682 }, { "epoch": 1.2775118753047845, "grad_norm": 6.474321260557906, "learning_rate": 1.5243734653629466e-06, "loss": 0.6883, "step": 17683 }, { "epoch": 1.277584120504994, "grad_norm": 8.129726033116686, "learning_rate": 1.5241041625557102e-06, "loss": 0.6354, "step": 17684 }, { "epoch": 1.2776563657052034, "grad_norm": 6.8801483297992245, "learning_rate": 1.5238348731077302e-06, "loss": 0.6306, "step": 17685 }, { "epoch": 1.277728610905413, "grad_norm": 6.325719606980899, "learning_rate": 1.5235655970226926e-06, "loss": 0.5788, "step": 17686 }, { "epoch": 1.2778008561056224, "grad_norm": 7.0099101079599935, "learning_rate": 1.5232963343042834e-06, "loss": 0.5734, "step": 17687 }, { "epoch": 1.277873101305832, "grad_norm": 6.659755589579805, "learning_rate": 1.5230270849561895e-06, "loss": 0.595, "step": 17688 }, { "epoch": 1.2779453465060415, "grad_norm": 7.515066653506183, "learning_rate": 1.5227578489820966e-06, "loss": 0.6935, "step": 17689 }, { "epoch": 1.278017591706251, "grad_norm": 6.546805370024262, "learning_rate": 1.5224886263856891e-06, "loss": 0.6263, "step": 17690 }, { "epoch": 1.2780898369064606, "grad_norm": 6.228967295574439, "learning_rate": 1.5222194171706539e-06, "loss": 0.6698, "step": 17691 }, { "epoch": 1.27816208210667, "grad_norm": 6.873747555485506, "learning_rate": 1.5219502213406762e-06, "loss": 0.6996, "step": 17692 }, { "epoch": 1.2782343273068795, "grad_norm": 7.934332508455663, "learning_rate": 1.52168103889944e-06, "loss": 0.7008, "step": 17693 }, { "epoch": 1.278306572507089, "grad_norm": 6.128987785268678, "learning_rate": 1.52141186985063e-06, "loss": 0.6515, "step": 17694 }, { "epoch": 1.2783788177072986, "grad_norm": 7.017688473248749, "learning_rate": 1.521142714197932e-06, "loss": 0.5781, "step": 17695 }, { "epoch": 1.278451062907508, "grad_norm": 6.431529711691694, "learning_rate": 1.5208735719450316e-06, "loss": 0.6048, "step": 17696 }, { "epoch": 1.2785233081077176, "grad_norm": 8.349091495269812, "learning_rate": 1.5206044430956108e-06, "loss": 0.6092, "step": 17697 }, { "epoch": 1.2785955533079272, "grad_norm": 6.602241147162318, "learning_rate": 1.5203353276533544e-06, "loss": 0.6179, "step": 17698 }, { "epoch": 1.2786677985081365, "grad_norm": 6.729039461648001, "learning_rate": 1.5200662256219473e-06, "loss": 0.6226, "step": 17699 }, { "epoch": 1.2787400437083463, "grad_norm": 6.02431963486237, "learning_rate": 1.5197971370050722e-06, "loss": 0.6695, "step": 17700 }, { "epoch": 1.2788122889085556, "grad_norm": 7.007609999164524, "learning_rate": 1.5195280618064131e-06, "loss": 0.6639, "step": 17701 }, { "epoch": 1.2788845341087651, "grad_norm": 7.701652834968664, "learning_rate": 1.5192590000296537e-06, "loss": 0.6656, "step": 17702 }, { "epoch": 1.2789567793089747, "grad_norm": 7.040628240036844, "learning_rate": 1.5189899516784775e-06, "loss": 0.7064, "step": 17703 }, { "epoch": 1.2790290245091842, "grad_norm": 5.807454205848493, "learning_rate": 1.5187209167565669e-06, "loss": 0.5653, "step": 17704 }, { "epoch": 1.2791012697093938, "grad_norm": 6.240665025730744, "learning_rate": 1.5184518952676046e-06, "loss": 0.6633, "step": 17705 }, { "epoch": 1.279173514909603, "grad_norm": 7.7270075686242325, "learning_rate": 1.5181828872152743e-06, "loss": 0.5782, "step": 17706 }, { "epoch": 1.2792457601098128, "grad_norm": 6.3728170210235335, "learning_rate": 1.5179138926032585e-06, "loss": 0.6267, "step": 17707 }, { "epoch": 1.2793180053100222, "grad_norm": 8.58595127316343, "learning_rate": 1.5176449114352376e-06, "loss": 0.6679, "step": 17708 }, { "epoch": 1.2793902505102317, "grad_norm": 7.048406802402117, "learning_rate": 1.5173759437148955e-06, "loss": 0.7122, "step": 17709 }, { "epoch": 1.2794624957104412, "grad_norm": 5.605709142547866, "learning_rate": 1.517106989445915e-06, "loss": 0.6027, "step": 17710 }, { "epoch": 1.2795347409106508, "grad_norm": 6.906850106934289, "learning_rate": 1.5168380486319755e-06, "loss": 0.687, "step": 17711 }, { "epoch": 1.2796069861108603, "grad_norm": 5.8763260663726005, "learning_rate": 1.5165691212767597e-06, "loss": 0.6743, "step": 17712 }, { "epoch": 1.2796792313110696, "grad_norm": 7.269937658659186, "learning_rate": 1.5163002073839492e-06, "loss": 0.7357, "step": 17713 }, { "epoch": 1.2797514765112794, "grad_norm": 6.081592815965174, "learning_rate": 1.5160313069572254e-06, "loss": 0.6727, "step": 17714 }, { "epoch": 1.2798237217114887, "grad_norm": 6.720555262558993, "learning_rate": 1.5157624200002685e-06, "loss": 0.7035, "step": 17715 }, { "epoch": 1.2798959669116983, "grad_norm": 7.025974312005345, "learning_rate": 1.5154935465167602e-06, "loss": 0.6049, "step": 17716 }, { "epoch": 1.2799682121119078, "grad_norm": 9.010991060915682, "learning_rate": 1.5152246865103809e-06, "loss": 0.7433, "step": 17717 }, { "epoch": 1.2800404573121174, "grad_norm": 6.857244417028829, "learning_rate": 1.5149558399848108e-06, "loss": 0.6407, "step": 17718 }, { "epoch": 1.280112702512327, "grad_norm": 6.243270608179889, "learning_rate": 1.5146870069437302e-06, "loss": 0.5652, "step": 17719 }, { "epoch": 1.2801849477125362, "grad_norm": 7.0236876106919715, "learning_rate": 1.5144181873908198e-06, "loss": 0.6463, "step": 17720 }, { "epoch": 1.280257192912746, "grad_norm": 5.987612971313216, "learning_rate": 1.5141493813297598e-06, "loss": 0.609, "step": 17721 }, { "epoch": 1.2803294381129553, "grad_norm": 6.918240594073067, "learning_rate": 1.5138805887642278e-06, "loss": 0.6393, "step": 17722 }, { "epoch": 1.2804016833131648, "grad_norm": 6.526823889475533, "learning_rate": 1.5136118096979056e-06, "loss": 0.63, "step": 17723 }, { "epoch": 1.2804739285133744, "grad_norm": 6.462320126773235, "learning_rate": 1.5133430441344727e-06, "loss": 0.5436, "step": 17724 }, { "epoch": 1.280546173713584, "grad_norm": 8.066305519195508, "learning_rate": 1.5130742920776065e-06, "loss": 0.686, "step": 17725 }, { "epoch": 1.2806184189137935, "grad_norm": 8.526766203099756, "learning_rate": 1.5128055535309868e-06, "loss": 0.6809, "step": 17726 }, { "epoch": 1.280690664114003, "grad_norm": 7.694979123092796, "learning_rate": 1.5125368284982929e-06, "loss": 0.599, "step": 17727 }, { "epoch": 1.2807629093142125, "grad_norm": 7.859210943553649, "learning_rate": 1.512268116983203e-06, "loss": 0.6483, "step": 17728 }, { "epoch": 1.2808351545144219, "grad_norm": 6.828062148863339, "learning_rate": 1.5119994189893956e-06, "loss": 0.6082, "step": 17729 }, { "epoch": 1.2809073997146314, "grad_norm": 9.135409335949406, "learning_rate": 1.511730734520549e-06, "loss": 0.6615, "step": 17730 }, { "epoch": 1.280979644914841, "grad_norm": 7.010959220701254, "learning_rate": 1.5114620635803412e-06, "loss": 0.6798, "step": 17731 }, { "epoch": 1.2810518901150505, "grad_norm": 6.61393850792929, "learning_rate": 1.5111934061724509e-06, "loss": 0.6199, "step": 17732 }, { "epoch": 1.28112413531526, "grad_norm": 7.1136933678152605, "learning_rate": 1.5109247623005544e-06, "loss": 0.6208, "step": 17733 }, { "epoch": 1.2811963805154696, "grad_norm": 7.1625053685352915, "learning_rate": 1.51065613196833e-06, "loss": 0.6301, "step": 17734 }, { "epoch": 1.2812686257156791, "grad_norm": 8.048874808391068, "learning_rate": 1.5103875151794559e-06, "loss": 0.7505, "step": 17735 }, { "epoch": 1.2813408709158884, "grad_norm": 6.408920996631528, "learning_rate": 1.5101189119376069e-06, "loss": 0.5375, "step": 17736 }, { "epoch": 1.281413116116098, "grad_norm": 7.052882575118361, "learning_rate": 1.5098503222464617e-06, "loss": 0.6674, "step": 17737 }, { "epoch": 1.2814853613163075, "grad_norm": 6.64297551461272, "learning_rate": 1.5095817461096973e-06, "loss": 0.7058, "step": 17738 }, { "epoch": 1.281557606516517, "grad_norm": 7.034102611445176, "learning_rate": 1.5093131835309905e-06, "loss": 0.6747, "step": 17739 }, { "epoch": 1.2816298517167266, "grad_norm": 8.066097905728933, "learning_rate": 1.5090446345140158e-06, "loss": 0.6214, "step": 17740 }, { "epoch": 1.2817020969169362, "grad_norm": 6.305794355190405, "learning_rate": 1.5087760990624505e-06, "loss": 0.6604, "step": 17741 }, { "epoch": 1.2817743421171457, "grad_norm": 7.130720285037639, "learning_rate": 1.5085075771799717e-06, "loss": 0.6747, "step": 17742 }, { "epoch": 1.281846587317355, "grad_norm": 6.575785241177356, "learning_rate": 1.5082390688702538e-06, "loss": 0.6375, "step": 17743 }, { "epoch": 1.2819188325175646, "grad_norm": 6.346848806818335, "learning_rate": 1.507970574136973e-06, "loss": 0.6431, "step": 17744 }, { "epoch": 1.281991077717774, "grad_norm": 5.975837693488337, "learning_rate": 1.5077020929838046e-06, "loss": 0.5723, "step": 17745 }, { "epoch": 1.2820633229179836, "grad_norm": 8.054630194237223, "learning_rate": 1.5074336254144245e-06, "loss": 0.5747, "step": 17746 }, { "epoch": 1.2821355681181932, "grad_norm": 6.411046758395237, "learning_rate": 1.507165171432507e-06, "loss": 0.6358, "step": 17747 }, { "epoch": 1.2822078133184027, "grad_norm": 7.727240090122759, "learning_rate": 1.5068967310417276e-06, "loss": 0.6893, "step": 17748 }, { "epoch": 1.2822800585186123, "grad_norm": 6.964414607940773, "learning_rate": 1.5066283042457618e-06, "loss": 0.631, "step": 17749 }, { "epoch": 1.2823523037188216, "grad_norm": 6.317910170469278, "learning_rate": 1.5063598910482815e-06, "loss": 0.6335, "step": 17750 }, { "epoch": 1.2824245489190311, "grad_norm": 5.910457343722471, "learning_rate": 1.5060914914529634e-06, "loss": 0.6889, "step": 17751 }, { "epoch": 1.2824967941192407, "grad_norm": 6.781641232503861, "learning_rate": 1.5058231054634809e-06, "loss": 0.5955, "step": 17752 }, { "epoch": 1.2825690393194502, "grad_norm": 6.479398504527157, "learning_rate": 1.5055547330835097e-06, "loss": 0.8278, "step": 17753 }, { "epoch": 1.2826412845196598, "grad_norm": 6.365324269655186, "learning_rate": 1.5052863743167206e-06, "loss": 0.625, "step": 17754 }, { "epoch": 1.2827135297198693, "grad_norm": 5.5582250391044035, "learning_rate": 1.5050180291667887e-06, "loss": 0.566, "step": 17755 }, { "epoch": 1.2827857749200788, "grad_norm": 6.676280655325379, "learning_rate": 1.5047496976373874e-06, "loss": 0.6383, "step": 17756 }, { "epoch": 1.2828580201202882, "grad_norm": 7.413823243472239, "learning_rate": 1.5044813797321906e-06, "loss": 0.6573, "step": 17757 }, { "epoch": 1.2829302653204977, "grad_norm": 6.1354110363695336, "learning_rate": 1.50421307545487e-06, "loss": 0.6445, "step": 17758 }, { "epoch": 1.2830025105207072, "grad_norm": 6.870476639161768, "learning_rate": 1.5039447848090996e-06, "loss": 0.5785, "step": 17759 }, { "epoch": 1.2830747557209168, "grad_norm": 7.231726406694951, "learning_rate": 1.503676507798552e-06, "loss": 0.6882, "step": 17760 }, { "epoch": 1.2831470009211263, "grad_norm": 6.108129818359274, "learning_rate": 1.503408244426899e-06, "loss": 0.6288, "step": 17761 }, { "epoch": 1.2832192461213359, "grad_norm": 6.424472701109772, "learning_rate": 1.5031399946978134e-06, "loss": 0.6756, "step": 17762 }, { "epoch": 1.2832914913215454, "grad_norm": 5.919957315961216, "learning_rate": 1.5028717586149672e-06, "loss": 0.6181, "step": 17763 }, { "epoch": 1.2833637365217547, "grad_norm": 5.96351050045304, "learning_rate": 1.5026035361820328e-06, "loss": 0.5912, "step": 17764 }, { "epoch": 1.2834359817219643, "grad_norm": 6.258428459951434, "learning_rate": 1.502335327402681e-06, "loss": 0.5984, "step": 17765 }, { "epoch": 1.2835082269221738, "grad_norm": 6.68031110027702, "learning_rate": 1.502067132280584e-06, "loss": 0.6275, "step": 17766 }, { "epoch": 1.2835804721223834, "grad_norm": 6.700512926196105, "learning_rate": 1.5017989508194141e-06, "loss": 0.6161, "step": 17767 }, { "epoch": 1.283652717322593, "grad_norm": 6.962846522037745, "learning_rate": 1.501530783022841e-06, "loss": 0.6629, "step": 17768 }, { "epoch": 1.2837249625228024, "grad_norm": 6.234178888969881, "learning_rate": 1.5012626288945353e-06, "loss": 0.6042, "step": 17769 }, { "epoch": 1.283797207723012, "grad_norm": 5.486515160126228, "learning_rate": 1.5009944884381693e-06, "loss": 0.613, "step": 17770 }, { "epoch": 1.2838694529232213, "grad_norm": 7.235875808232076, "learning_rate": 1.5007263616574135e-06, "loss": 0.6035, "step": 17771 }, { "epoch": 1.283941698123431, "grad_norm": 6.667542527202266, "learning_rate": 1.5004582485559376e-06, "loss": 0.5818, "step": 17772 }, { "epoch": 1.2840139433236404, "grad_norm": 6.0862800394015375, "learning_rate": 1.500190149137412e-06, "loss": 0.6716, "step": 17773 }, { "epoch": 1.28408618852385, "grad_norm": 6.667498187024471, "learning_rate": 1.4999220634055072e-06, "loss": 0.5848, "step": 17774 }, { "epoch": 1.2841584337240595, "grad_norm": 7.031508513042855, "learning_rate": 1.499653991363893e-06, "loss": 0.6357, "step": 17775 }, { "epoch": 1.284230678924269, "grad_norm": 6.135145232106813, "learning_rate": 1.4993859330162386e-06, "loss": 0.6058, "step": 17776 }, { "epoch": 1.2843029241244786, "grad_norm": 7.220061740347089, "learning_rate": 1.4991178883662135e-06, "loss": 0.6408, "step": 17777 }, { "epoch": 1.2843751693246879, "grad_norm": 6.692179531081089, "learning_rate": 1.4988498574174882e-06, "loss": 0.6019, "step": 17778 }, { "epoch": 1.2844474145248976, "grad_norm": 7.2713574181436424, "learning_rate": 1.4985818401737306e-06, "loss": 0.6468, "step": 17779 }, { "epoch": 1.284519659725107, "grad_norm": 7.688851090462644, "learning_rate": 1.4983138366386098e-06, "loss": 0.5914, "step": 17780 }, { "epoch": 1.2845919049253165, "grad_norm": 7.940919635085026, "learning_rate": 1.498045846815796e-06, "loss": 0.672, "step": 17781 }, { "epoch": 1.284664150125526, "grad_norm": 9.010444938012608, "learning_rate": 1.4977778707089558e-06, "loss": 0.6919, "step": 17782 }, { "epoch": 1.2847363953257356, "grad_norm": 7.2519407634388315, "learning_rate": 1.4975099083217582e-06, "loss": 0.6204, "step": 17783 }, { "epoch": 1.2848086405259451, "grad_norm": 6.627068826355137, "learning_rate": 1.4972419596578713e-06, "loss": 0.6906, "step": 17784 }, { "epoch": 1.2848808857261544, "grad_norm": 6.388202534540874, "learning_rate": 1.4969740247209642e-06, "loss": 0.6196, "step": 17785 }, { "epoch": 1.2849531309263642, "grad_norm": 5.90140589111605, "learning_rate": 1.496706103514703e-06, "loss": 0.6412, "step": 17786 }, { "epoch": 1.2850253761265735, "grad_norm": 6.452567826941403, "learning_rate": 1.4964381960427566e-06, "loss": 0.6067, "step": 17787 }, { "epoch": 1.285097621326783, "grad_norm": 6.955641936042398, "learning_rate": 1.496170302308792e-06, "loss": 0.6314, "step": 17788 }, { "epoch": 1.2851698665269926, "grad_norm": 7.523242032848236, "learning_rate": 1.495902422316477e-06, "loss": 0.7007, "step": 17789 }, { "epoch": 1.2852421117272022, "grad_norm": 6.327586059226225, "learning_rate": 1.495634556069478e-06, "loss": 0.654, "step": 17790 }, { "epoch": 1.2853143569274117, "grad_norm": 6.4938328169534065, "learning_rate": 1.4953667035714615e-06, "loss": 0.6067, "step": 17791 }, { "epoch": 1.285386602127621, "grad_norm": 6.353282662690289, "learning_rate": 1.4950988648260962e-06, "loss": 0.6247, "step": 17792 }, { "epoch": 1.2854588473278308, "grad_norm": 5.396988537626197, "learning_rate": 1.494831039837046e-06, "loss": 0.6131, "step": 17793 }, { "epoch": 1.28553109252804, "grad_norm": 5.8308010962954775, "learning_rate": 1.494563228607978e-06, "loss": 0.6076, "step": 17794 }, { "epoch": 1.2856033377282496, "grad_norm": 6.944723956099049, "learning_rate": 1.4942954311425595e-06, "loss": 0.618, "step": 17795 }, { "epoch": 1.2856755829284592, "grad_norm": 7.609328878838667, "learning_rate": 1.4940276474444562e-06, "loss": 0.6617, "step": 17796 }, { "epoch": 1.2857478281286687, "grad_norm": 6.222152929071569, "learning_rate": 1.4937598775173326e-06, "loss": 0.6654, "step": 17797 }, { "epoch": 1.2858200733288783, "grad_norm": 7.865795993575528, "learning_rate": 1.4934921213648549e-06, "loss": 0.6623, "step": 17798 }, { "epoch": 1.2858923185290876, "grad_norm": 6.605635358354927, "learning_rate": 1.4932243789906892e-06, "loss": 0.6235, "step": 17799 }, { "epoch": 1.2859645637292973, "grad_norm": 6.002864471771379, "learning_rate": 1.4929566503984994e-06, "loss": 0.616, "step": 17800 }, { "epoch": 1.2860368089295067, "grad_norm": 6.698384491604172, "learning_rate": 1.492688935591951e-06, "loss": 0.7133, "step": 17801 }, { "epoch": 1.2861090541297162, "grad_norm": 6.112206312741863, "learning_rate": 1.4924212345747092e-06, "loss": 0.625, "step": 17802 }, { "epoch": 1.2861812993299258, "grad_norm": 6.696633634788021, "learning_rate": 1.4921535473504383e-06, "loss": 0.6065, "step": 17803 }, { "epoch": 1.2862535445301353, "grad_norm": 6.827713244700268, "learning_rate": 1.491885873922803e-06, "loss": 0.5919, "step": 17804 }, { "epoch": 1.2863257897303448, "grad_norm": 5.650854076547509, "learning_rate": 1.4916182142954672e-06, "loss": 0.6062, "step": 17805 }, { "epoch": 1.2863980349305544, "grad_norm": 8.535609881394725, "learning_rate": 1.4913505684720958e-06, "loss": 0.6221, "step": 17806 }, { "epoch": 1.286470280130764, "grad_norm": 10.285390281115408, "learning_rate": 1.4910829364563506e-06, "loss": 0.5907, "step": 17807 }, { "epoch": 1.2865425253309732, "grad_norm": 6.182709968725126, "learning_rate": 1.4908153182518969e-06, "loss": 0.6158, "step": 17808 }, { "epoch": 1.2866147705311828, "grad_norm": 6.929975123064932, "learning_rate": 1.4905477138623983e-06, "loss": 0.6411, "step": 17809 }, { "epoch": 1.2866870157313923, "grad_norm": 8.926630412971356, "learning_rate": 1.4902801232915186e-06, "loss": 0.7297, "step": 17810 }, { "epoch": 1.2867592609316019, "grad_norm": 7.605835892525123, "learning_rate": 1.4900125465429188e-06, "loss": 0.7367, "step": 17811 }, { "epoch": 1.2868315061318114, "grad_norm": 6.047830038033916, "learning_rate": 1.4897449836202633e-06, "loss": 0.6335, "step": 17812 }, { "epoch": 1.286903751332021, "grad_norm": 6.428381229796169, "learning_rate": 1.489477434527214e-06, "loss": 0.6573, "step": 17813 }, { "epoch": 1.2869759965322305, "grad_norm": 7.543998596595803, "learning_rate": 1.4892098992674353e-06, "loss": 0.6609, "step": 17814 }, { "epoch": 1.2870482417324398, "grad_norm": 7.5742371329231535, "learning_rate": 1.4889423778445877e-06, "loss": 0.6696, "step": 17815 }, { "epoch": 1.2871204869326494, "grad_norm": 7.241762150821598, "learning_rate": 1.4886748702623334e-06, "loss": 0.6622, "step": 17816 }, { "epoch": 1.287192732132859, "grad_norm": 5.9224779129099545, "learning_rate": 1.4884073765243357e-06, "loss": 0.6519, "step": 17817 }, { "epoch": 1.2872649773330684, "grad_norm": 8.592579820898706, "learning_rate": 1.488139896634255e-06, "loss": 0.6965, "step": 17818 }, { "epoch": 1.287337222533278, "grad_norm": 6.142361383298536, "learning_rate": 1.4878724305957533e-06, "loss": 0.6255, "step": 17819 }, { "epoch": 1.2874094677334875, "grad_norm": 7.8958988505709256, "learning_rate": 1.4876049784124924e-06, "loss": 0.6422, "step": 17820 }, { "epoch": 1.287481712933697, "grad_norm": 6.951688536660873, "learning_rate": 1.4873375400881337e-06, "loss": 0.6882, "step": 17821 }, { "epoch": 1.2875539581339064, "grad_norm": 6.36497277433052, "learning_rate": 1.4870701156263373e-06, "loss": 0.653, "step": 17822 }, { "epoch": 1.287626203334116, "grad_norm": 7.155742510662765, "learning_rate": 1.4868027050307643e-06, "loss": 0.6187, "step": 17823 }, { "epoch": 1.2876984485343255, "grad_norm": 7.702853617810861, "learning_rate": 1.4865353083050772e-06, "loss": 0.6348, "step": 17824 }, { "epoch": 1.287770693734535, "grad_norm": 6.749632083614643, "learning_rate": 1.486267925452933e-06, "loss": 0.5525, "step": 17825 }, { "epoch": 1.2878429389347446, "grad_norm": 8.525176069393533, "learning_rate": 1.4860005564779944e-06, "loss": 0.6405, "step": 17826 }, { "epoch": 1.287915184134954, "grad_norm": 8.811417316997154, "learning_rate": 1.4857332013839199e-06, "loss": 0.6794, "step": 17827 }, { "epoch": 1.2879874293351636, "grad_norm": 7.008727763443273, "learning_rate": 1.4854658601743722e-06, "loss": 0.5214, "step": 17828 }, { "epoch": 1.288059674535373, "grad_norm": 8.000461565053996, "learning_rate": 1.485198532853008e-06, "loss": 0.6629, "step": 17829 }, { "epoch": 1.2881319197355825, "grad_norm": 6.059103417372253, "learning_rate": 1.484931219423488e-06, "loss": 0.6468, "step": 17830 }, { "epoch": 1.288204164935792, "grad_norm": 7.1452328028301935, "learning_rate": 1.4846639198894719e-06, "loss": 0.6376, "step": 17831 }, { "epoch": 1.2882764101360016, "grad_norm": 7.064024634070986, "learning_rate": 1.4843966342546179e-06, "loss": 0.5861, "step": 17832 }, { "epoch": 1.2883486553362111, "grad_norm": 6.685172843667322, "learning_rate": 1.4841293625225856e-06, "loss": 0.6352, "step": 17833 }, { "epoch": 1.2884209005364207, "grad_norm": 8.045227948854118, "learning_rate": 1.483862104697033e-06, "loss": 0.6847, "step": 17834 }, { "epoch": 1.2884931457366302, "grad_norm": 7.645854509897728, "learning_rate": 1.4835948607816203e-06, "loss": 0.7052, "step": 17835 }, { "epoch": 1.2885653909368395, "grad_norm": 6.531642774243113, "learning_rate": 1.4833276307800038e-06, "loss": 0.5743, "step": 17836 }, { "epoch": 1.288637636137049, "grad_norm": 6.3332805965971835, "learning_rate": 1.483060414695843e-06, "loss": 0.6354, "step": 17837 }, { "epoch": 1.2887098813372586, "grad_norm": 6.877971215199245, "learning_rate": 1.4827932125327953e-06, "loss": 0.5711, "step": 17838 }, { "epoch": 1.2887821265374682, "grad_norm": 8.93152295837373, "learning_rate": 1.4825260242945199e-06, "loss": 0.6673, "step": 17839 }, { "epoch": 1.2888543717376777, "grad_norm": 7.746430590255646, "learning_rate": 1.482258849984672e-06, "loss": 0.6199, "step": 17840 }, { "epoch": 1.2889266169378872, "grad_norm": 6.1453087879192, "learning_rate": 1.4819916896069097e-06, "loss": 0.6576, "step": 17841 }, { "epoch": 1.2889988621380968, "grad_norm": 6.703886117828267, "learning_rate": 1.4817245431648925e-06, "loss": 0.6437, "step": 17842 }, { "epoch": 1.289071107338306, "grad_norm": 7.273116779734146, "learning_rate": 1.4814574106622747e-06, "loss": 0.7343, "step": 17843 }, { "epoch": 1.2891433525385156, "grad_norm": 6.055494212890762, "learning_rate": 1.4811902921027138e-06, "loss": 0.6191, "step": 17844 }, { "epoch": 1.2892155977387252, "grad_norm": 5.886516241021556, "learning_rate": 1.480923187489867e-06, "loss": 0.655, "step": 17845 }, { "epoch": 1.2892878429389347, "grad_norm": 6.413441264353978, "learning_rate": 1.480656096827391e-06, "loss": 0.6467, "step": 17846 }, { "epoch": 1.2893600881391443, "grad_norm": 6.122959653485632, "learning_rate": 1.4803890201189408e-06, "loss": 0.6764, "step": 17847 }, { "epoch": 1.2894323333393538, "grad_norm": 8.680801114148304, "learning_rate": 1.4801219573681736e-06, "loss": 0.6695, "step": 17848 }, { "epoch": 1.2895045785395634, "grad_norm": 6.374388123113606, "learning_rate": 1.4798549085787451e-06, "loss": 0.5887, "step": 17849 }, { "epoch": 1.2895768237397727, "grad_norm": 6.4540837570916825, "learning_rate": 1.47958787375431e-06, "loss": 0.6263, "step": 17850 }, { "epoch": 1.2896490689399824, "grad_norm": 7.147769617450635, "learning_rate": 1.4793208528985254e-06, "loss": 0.6273, "step": 17851 }, { "epoch": 1.2897213141401918, "grad_norm": 5.961196033875826, "learning_rate": 1.4790538460150455e-06, "loss": 0.6589, "step": 17852 }, { "epoch": 1.2897935593404013, "grad_norm": 7.399006452103653, "learning_rate": 1.4787868531075266e-06, "loss": 0.6137, "step": 17853 }, { "epoch": 1.2898658045406108, "grad_norm": 6.51519905728931, "learning_rate": 1.478519874179622e-06, "loss": 0.6367, "step": 17854 }, { "epoch": 1.2899380497408204, "grad_norm": 6.181167907478283, "learning_rate": 1.4782529092349863e-06, "loss": 0.5911, "step": 17855 }, { "epoch": 1.29001029494103, "grad_norm": 7.278897801896192, "learning_rate": 1.4779859582772766e-06, "loss": 0.6248, "step": 17856 }, { "epoch": 1.2900825401412392, "grad_norm": 6.451916597663296, "learning_rate": 1.4777190213101446e-06, "loss": 0.6307, "step": 17857 }, { "epoch": 1.290154785341449, "grad_norm": 6.942959398641306, "learning_rate": 1.4774520983372457e-06, "loss": 0.6498, "step": 17858 }, { "epoch": 1.2902270305416583, "grad_norm": 6.359078665162632, "learning_rate": 1.4771851893622336e-06, "loss": 0.6353, "step": 17859 }, { "epoch": 1.2902992757418679, "grad_norm": 6.395503264451925, "learning_rate": 1.4769182943887622e-06, "loss": 0.5937, "step": 17860 }, { "epoch": 1.2903715209420774, "grad_norm": 7.047624163337334, "learning_rate": 1.4766514134204845e-06, "loss": 0.5968, "step": 17861 }, { "epoch": 1.290443766142287, "grad_norm": 7.4239622373442655, "learning_rate": 1.4763845464610549e-06, "loss": 0.6958, "step": 17862 }, { "epoch": 1.2905160113424965, "grad_norm": 5.957339899790898, "learning_rate": 1.4761176935141256e-06, "loss": 0.6454, "step": 17863 }, { "epoch": 1.2905882565427058, "grad_norm": 6.387709869102241, "learning_rate": 1.4758508545833505e-06, "loss": 0.6462, "step": 17864 }, { "epoch": 1.2906605017429156, "grad_norm": 6.414966440566447, "learning_rate": 1.475584029672382e-06, "loss": 0.6191, "step": 17865 }, { "epoch": 1.290732746943125, "grad_norm": 6.710982121595688, "learning_rate": 1.4753172187848725e-06, "loss": 0.6108, "step": 17866 }, { "epoch": 1.2908049921433344, "grad_norm": 7.737367240000996, "learning_rate": 1.4750504219244754e-06, "loss": 0.685, "step": 17867 }, { "epoch": 1.290877237343544, "grad_norm": 7.207891579323811, "learning_rate": 1.4747836390948417e-06, "loss": 0.6551, "step": 17868 }, { "epoch": 1.2909494825437535, "grad_norm": 5.8215429042384175, "learning_rate": 1.4745168702996235e-06, "loss": 0.6033, "step": 17869 }, { "epoch": 1.291021727743963, "grad_norm": 7.46624305181916, "learning_rate": 1.4742501155424727e-06, "loss": 0.6438, "step": 17870 }, { "epoch": 1.2910939729441724, "grad_norm": 7.279008381139241, "learning_rate": 1.473983374827043e-06, "loss": 0.7489, "step": 17871 }, { "epoch": 1.2911662181443821, "grad_norm": 7.194221074168744, "learning_rate": 1.4737166481569832e-06, "loss": 0.6748, "step": 17872 }, { "epoch": 1.2912384633445915, "grad_norm": 7.30462352704273, "learning_rate": 1.4734499355359454e-06, "loss": 0.6667, "step": 17873 }, { "epoch": 1.291310708544801, "grad_norm": 6.264756140076716, "learning_rate": 1.473183236967582e-06, "loss": 0.6678, "step": 17874 }, { "epoch": 1.2913829537450106, "grad_norm": 7.416411434739576, "learning_rate": 1.4729165524555418e-06, "loss": 0.7038, "step": 17875 }, { "epoch": 1.29145519894522, "grad_norm": 7.487555033333463, "learning_rate": 1.4726498820034768e-06, "loss": 0.6433, "step": 17876 }, { "epoch": 1.2915274441454296, "grad_norm": 7.634960235512138, "learning_rate": 1.4723832256150369e-06, "loss": 0.7061, "step": 17877 }, { "epoch": 1.2915996893456392, "grad_norm": 6.683626853731578, "learning_rate": 1.4721165832938736e-06, "loss": 0.5683, "step": 17878 }, { "epoch": 1.2916719345458487, "grad_norm": 9.009580387416731, "learning_rate": 1.4718499550436354e-06, "loss": 0.6598, "step": 17879 }, { "epoch": 1.291744179746058, "grad_norm": 5.945079262528309, "learning_rate": 1.471583340867973e-06, "loss": 0.5991, "step": 17880 }, { "epoch": 1.2918164249462676, "grad_norm": 7.787313649502788, "learning_rate": 1.4713167407705376e-06, "loss": 0.5659, "step": 17881 }, { "epoch": 1.2918886701464771, "grad_norm": 6.2236992392353745, "learning_rate": 1.471050154754976e-06, "loss": 0.6418, "step": 17882 }, { "epoch": 1.2919609153466867, "grad_norm": 7.108935801595143, "learning_rate": 1.4707835828249386e-06, "loss": 0.6698, "step": 17883 }, { "epoch": 1.2920331605468962, "grad_norm": 7.092498534467651, "learning_rate": 1.4705170249840745e-06, "loss": 0.6219, "step": 17884 }, { "epoch": 1.2921054057471058, "grad_norm": 6.4162810899229115, "learning_rate": 1.4702504812360346e-06, "loss": 0.6499, "step": 17885 }, { "epoch": 1.2921776509473153, "grad_norm": 6.93934455612721, "learning_rate": 1.469983951584465e-06, "loss": 0.6627, "step": 17886 }, { "epoch": 1.2922498961475246, "grad_norm": 7.076438989317385, "learning_rate": 1.4697174360330155e-06, "loss": 0.6109, "step": 17887 }, { "epoch": 1.2923221413477342, "grad_norm": 6.712621546378311, "learning_rate": 1.4694509345853342e-06, "loss": 0.6137, "step": 17888 }, { "epoch": 1.2923943865479437, "grad_norm": 7.063375773186106, "learning_rate": 1.46918444724507e-06, "loss": 0.666, "step": 17889 }, { "epoch": 1.2924666317481532, "grad_norm": 6.956814933382517, "learning_rate": 1.4689179740158696e-06, "loss": 0.6785, "step": 17890 }, { "epoch": 1.2925388769483628, "grad_norm": 6.353213012505987, "learning_rate": 1.4686515149013819e-06, "loss": 0.5935, "step": 17891 }, { "epoch": 1.2926111221485723, "grad_norm": 7.811197401172748, "learning_rate": 1.4683850699052543e-06, "loss": 0.7188, "step": 17892 }, { "epoch": 1.2926833673487819, "grad_norm": 7.877384127651773, "learning_rate": 1.4681186390311337e-06, "loss": 0.5951, "step": 17893 }, { "epoch": 1.2927556125489912, "grad_norm": 8.603472837932765, "learning_rate": 1.467852222282668e-06, "loss": 0.6316, "step": 17894 }, { "epoch": 1.2928278577492007, "grad_norm": 7.603926256812527, "learning_rate": 1.4675858196635036e-06, "loss": 0.6355, "step": 17895 }, { "epoch": 1.2929001029494103, "grad_norm": 7.865866556804351, "learning_rate": 1.467319431177289e-06, "loss": 0.6526, "step": 17896 }, { "epoch": 1.2929723481496198, "grad_norm": 7.131224740708305, "learning_rate": 1.4670530568276684e-06, "loss": 0.6123, "step": 17897 }, { "epoch": 1.2930445933498294, "grad_norm": 7.235436643804828, "learning_rate": 1.4667866966182888e-06, "loss": 0.6084, "step": 17898 }, { "epoch": 1.293116838550039, "grad_norm": 6.882961587665963, "learning_rate": 1.4665203505527989e-06, "loss": 0.5896, "step": 17899 }, { "epoch": 1.2931890837502484, "grad_norm": 7.662779970017179, "learning_rate": 1.4662540186348418e-06, "loss": 0.6158, "step": 17900 }, { "epoch": 1.2932613289504578, "grad_norm": 6.883889848997077, "learning_rate": 1.4659877008680644e-06, "loss": 0.7984, "step": 17901 }, { "epoch": 1.2933335741506673, "grad_norm": 6.254623533975312, "learning_rate": 1.465721397256113e-06, "loss": 0.6071, "step": 17902 }, { "epoch": 1.2934058193508768, "grad_norm": 8.422679402678966, "learning_rate": 1.4654551078026324e-06, "loss": 0.697, "step": 17903 }, { "epoch": 1.2934780645510864, "grad_norm": 6.93106330365602, "learning_rate": 1.4651888325112681e-06, "loss": 0.6408, "step": 17904 }, { "epoch": 1.293550309751296, "grad_norm": 8.089814985915787, "learning_rate": 1.464922571385665e-06, "loss": 0.6068, "step": 17905 }, { "epoch": 1.2936225549515055, "grad_norm": 6.326086248069779, "learning_rate": 1.4646563244294689e-06, "loss": 0.595, "step": 17906 }, { "epoch": 1.293694800151715, "grad_norm": 5.812481951941906, "learning_rate": 1.4643900916463233e-06, "loss": 0.7286, "step": 17907 }, { "epoch": 1.2937670453519243, "grad_norm": 6.246262310108317, "learning_rate": 1.4641238730398732e-06, "loss": 0.5959, "step": 17908 }, { "epoch": 1.2938392905521339, "grad_norm": 6.676328651123497, "learning_rate": 1.4638576686137629e-06, "loss": 0.6302, "step": 17909 }, { "epoch": 1.2939115357523434, "grad_norm": 6.157005922023149, "learning_rate": 1.4635914783716377e-06, "loss": 0.5513, "step": 17910 }, { "epoch": 1.293983780952553, "grad_norm": 7.138763786173023, "learning_rate": 1.4633253023171392e-06, "loss": 0.6187, "step": 17911 }, { "epoch": 1.2940560261527625, "grad_norm": 5.684983252078762, "learning_rate": 1.463059140453912e-06, "loss": 0.7063, "step": 17912 }, { "epoch": 1.294128271352972, "grad_norm": 8.173832558615882, "learning_rate": 1.4627929927856017e-06, "loss": 0.6679, "step": 17913 }, { "epoch": 1.2942005165531816, "grad_norm": 9.873353458704281, "learning_rate": 1.4625268593158487e-06, "loss": 0.6558, "step": 17914 }, { "epoch": 1.294272761753391, "grad_norm": 6.513114610251345, "learning_rate": 1.462260740048298e-06, "loss": 0.6397, "step": 17915 }, { "epoch": 1.2943450069536004, "grad_norm": 7.198527227877122, "learning_rate": 1.4619946349865915e-06, "loss": 0.6748, "step": 17916 }, { "epoch": 1.29441725215381, "grad_norm": 7.100204959113912, "learning_rate": 1.4617285441343732e-06, "loss": 0.7126, "step": 17917 }, { "epoch": 1.2944894973540195, "grad_norm": 7.585827371915598, "learning_rate": 1.4614624674952843e-06, "loss": 0.6369, "step": 17918 }, { "epoch": 1.294561742554229, "grad_norm": 6.603844314875043, "learning_rate": 1.461196405072968e-06, "loss": 0.6831, "step": 17919 }, { "epoch": 1.2946339877544386, "grad_norm": 7.269147384159091, "learning_rate": 1.460930356871066e-06, "loss": 0.6754, "step": 17920 }, { "epoch": 1.2947062329546482, "grad_norm": 5.967919097106576, "learning_rate": 1.4606643228932217e-06, "loss": 0.5376, "step": 17921 }, { "epoch": 1.2947784781548575, "grad_norm": 5.913819321907626, "learning_rate": 1.460398303143075e-06, "loss": 0.7143, "step": 17922 }, { "epoch": 1.2948507233550672, "grad_norm": 6.281046242516828, "learning_rate": 1.4601322976242683e-06, "loss": 0.6262, "step": 17923 }, { "epoch": 1.2949229685552766, "grad_norm": 6.421984595038828, "learning_rate": 1.459866306340444e-06, "loss": 0.6775, "step": 17924 }, { "epoch": 1.294995213755486, "grad_norm": 7.046071888565848, "learning_rate": 1.4596003292952416e-06, "loss": 0.7134, "step": 17925 }, { "epoch": 1.2950674589556956, "grad_norm": 5.805827587189269, "learning_rate": 1.4593343664923026e-06, "loss": 0.5868, "step": 17926 }, { "epoch": 1.2951397041559052, "grad_norm": 6.328341105974333, "learning_rate": 1.4590684179352682e-06, "loss": 0.6168, "step": 17927 }, { "epoch": 1.2952119493561147, "grad_norm": 8.117072830638556, "learning_rate": 1.4588024836277792e-06, "loss": 0.673, "step": 17928 }, { "epoch": 1.295284194556324, "grad_norm": 7.158164755183658, "learning_rate": 1.4585365635734755e-06, "loss": 0.6071, "step": 17929 }, { "epoch": 1.2953564397565338, "grad_norm": 8.035168593512639, "learning_rate": 1.4582706577759974e-06, "loss": 0.6893, "step": 17930 }, { "epoch": 1.2954286849567431, "grad_norm": 7.166054218944178, "learning_rate": 1.4580047662389867e-06, "loss": 0.6725, "step": 17931 }, { "epoch": 1.2955009301569527, "grad_norm": 6.109867320017096, "learning_rate": 1.4577388889660803e-06, "loss": 0.6601, "step": 17932 }, { "epoch": 1.2955731753571622, "grad_norm": 8.427944169245658, "learning_rate": 1.4574730259609194e-06, "loss": 0.6251, "step": 17933 }, { "epoch": 1.2956454205573718, "grad_norm": 7.1229866261676005, "learning_rate": 1.4572071772271432e-06, "loss": 0.6483, "step": 17934 }, { "epoch": 1.2957176657575813, "grad_norm": 6.6578244062136855, "learning_rate": 1.456941342768392e-06, "loss": 0.6227, "step": 17935 }, { "epoch": 1.2957899109577906, "grad_norm": 6.790483086863446, "learning_rate": 1.456675522588303e-06, "loss": 0.669, "step": 17936 }, { "epoch": 1.2958621561580004, "grad_norm": 6.689733684810005, "learning_rate": 1.4564097166905161e-06, "loss": 0.718, "step": 17937 }, { "epoch": 1.2959344013582097, "grad_norm": 10.330795058372125, "learning_rate": 1.4561439250786696e-06, "loss": 0.6655, "step": 17938 }, { "epoch": 1.2960066465584192, "grad_norm": 6.5856449881289505, "learning_rate": 1.4558781477564027e-06, "loss": 0.6718, "step": 17939 }, { "epoch": 1.2960788917586288, "grad_norm": 4.964271878637811, "learning_rate": 1.4556123847273528e-06, "loss": 0.5824, "step": 17940 }, { "epoch": 1.2961511369588383, "grad_norm": 7.513463460928717, "learning_rate": 1.455346635995159e-06, "loss": 0.6173, "step": 17941 }, { "epoch": 1.2962233821590479, "grad_norm": 5.408930119688445, "learning_rate": 1.4550809015634594e-06, "loss": 0.6172, "step": 17942 }, { "epoch": 1.2962956273592572, "grad_norm": 7.716921199723007, "learning_rate": 1.4548151814358897e-06, "loss": 0.6923, "step": 17943 }, { "epoch": 1.296367872559467, "grad_norm": 8.042510098779164, "learning_rate": 1.4545494756160886e-06, "loss": 0.6497, "step": 17944 }, { "epoch": 1.2964401177596763, "grad_norm": 5.667770876542591, "learning_rate": 1.4542837841076935e-06, "loss": 0.5964, "step": 17945 }, { "epoch": 1.2965123629598858, "grad_norm": 6.7789907603388695, "learning_rate": 1.4540181069143428e-06, "loss": 0.6555, "step": 17946 }, { "epoch": 1.2965846081600954, "grad_norm": 6.686021017367335, "learning_rate": 1.4537524440396698e-06, "loss": 0.6206, "step": 17947 }, { "epoch": 1.296656853360305, "grad_norm": 6.71116486809248, "learning_rate": 1.4534867954873144e-06, "loss": 0.6263, "step": 17948 }, { "epoch": 1.2967290985605144, "grad_norm": 6.131787956816779, "learning_rate": 1.4532211612609132e-06, "loss": 0.5811, "step": 17949 }, { "epoch": 1.296801343760724, "grad_norm": 7.265179232797218, "learning_rate": 1.4529555413641006e-06, "loss": 0.6845, "step": 17950 }, { "epoch": 1.2968735889609335, "grad_norm": 5.965410666514588, "learning_rate": 1.4526899358005136e-06, "loss": 0.6836, "step": 17951 }, { "epoch": 1.2969458341611428, "grad_norm": 7.298814274889856, "learning_rate": 1.4524243445737884e-06, "loss": 0.6008, "step": 17952 }, { "epoch": 1.2970180793613524, "grad_norm": 7.28194492936782, "learning_rate": 1.4521587676875615e-06, "loss": 0.6125, "step": 17953 }, { "epoch": 1.297090324561562, "grad_norm": 6.763204586532694, "learning_rate": 1.4518932051454665e-06, "loss": 0.6067, "step": 17954 }, { "epoch": 1.2971625697617715, "grad_norm": 5.6524006090026315, "learning_rate": 1.4516276569511396e-06, "loss": 0.6101, "step": 17955 }, { "epoch": 1.297234814961981, "grad_norm": 6.035073131001248, "learning_rate": 1.451362123108216e-06, "loss": 0.6182, "step": 17956 }, { "epoch": 1.2973070601621906, "grad_norm": 6.218349424716425, "learning_rate": 1.4510966036203305e-06, "loss": 0.5994, "step": 17957 }, { "epoch": 1.2973793053624, "grad_norm": 6.517142651956244, "learning_rate": 1.4508310984911187e-06, "loss": 0.644, "step": 17958 }, { "epoch": 1.2974515505626094, "grad_norm": 6.294112212318729, "learning_rate": 1.4505656077242142e-06, "loss": 0.6642, "step": 17959 }, { "epoch": 1.297523795762819, "grad_norm": 7.390283556545214, "learning_rate": 1.4503001313232526e-06, "loss": 0.7653, "step": 17960 }, { "epoch": 1.2975960409630285, "grad_norm": 8.597823075959857, "learning_rate": 1.4500346692918663e-06, "loss": 0.6062, "step": 17961 }, { "epoch": 1.297668286163238, "grad_norm": 8.114355068562809, "learning_rate": 1.44976922163369e-06, "loss": 0.6737, "step": 17962 }, { "epoch": 1.2977405313634476, "grad_norm": 8.491529395354407, "learning_rate": 1.4495037883523588e-06, "loss": 0.6163, "step": 17963 }, { "epoch": 1.2978127765636571, "grad_norm": 6.441025444512347, "learning_rate": 1.449238369451504e-06, "loss": 0.654, "step": 17964 }, { "epoch": 1.2978850217638667, "grad_norm": 6.552520430095308, "learning_rate": 1.44897296493476e-06, "loss": 0.6023, "step": 17965 }, { "epoch": 1.297957266964076, "grad_norm": 5.77130393118939, "learning_rate": 1.44870757480576e-06, "loss": 0.5926, "step": 17966 }, { "epoch": 1.2980295121642855, "grad_norm": 7.67277543325237, "learning_rate": 1.448442199068137e-06, "loss": 0.619, "step": 17967 }, { "epoch": 1.298101757364495, "grad_norm": 8.47757005460655, "learning_rate": 1.448176837725524e-06, "loss": 0.7059, "step": 17968 }, { "epoch": 1.2981740025647046, "grad_norm": 6.326798363545261, "learning_rate": 1.4479114907815534e-06, "loss": 0.6193, "step": 17969 }, { "epoch": 1.2982462477649142, "grad_norm": 6.2074872492191835, "learning_rate": 1.4476461582398576e-06, "loss": 0.6078, "step": 17970 }, { "epoch": 1.2983184929651237, "grad_norm": 7.337942351178479, "learning_rate": 1.4473808401040695e-06, "loss": 0.6089, "step": 17971 }, { "epoch": 1.2983907381653332, "grad_norm": 7.343630006499734, "learning_rate": 1.44711553637782e-06, "loss": 0.6305, "step": 17972 }, { "epoch": 1.2984629833655426, "grad_norm": 8.492638485610088, "learning_rate": 1.4468502470647405e-06, "loss": 0.6609, "step": 17973 }, { "epoch": 1.298535228565752, "grad_norm": 7.559282611536244, "learning_rate": 1.4465849721684649e-06, "loss": 0.6038, "step": 17974 }, { "epoch": 1.2986074737659616, "grad_norm": 6.100655579906038, "learning_rate": 1.446319711692622e-06, "loss": 0.6337, "step": 17975 }, { "epoch": 1.2986797189661712, "grad_norm": 7.083826863168223, "learning_rate": 1.4460544656408432e-06, "loss": 0.6244, "step": 17976 }, { "epoch": 1.2987519641663807, "grad_norm": 7.452489541912451, "learning_rate": 1.4457892340167612e-06, "loss": 0.6478, "step": 17977 }, { "epoch": 1.2988242093665903, "grad_norm": 6.506723594468606, "learning_rate": 1.4455240168240075e-06, "loss": 0.6806, "step": 17978 }, { "epoch": 1.2988964545667998, "grad_norm": 6.891102746157937, "learning_rate": 1.4452588140662099e-06, "loss": 0.5598, "step": 17979 }, { "epoch": 1.2989686997670091, "grad_norm": 6.414754442256021, "learning_rate": 1.444993625747e-06, "loss": 0.5845, "step": 17980 }, { "epoch": 1.2990409449672187, "grad_norm": 7.101753996060173, "learning_rate": 1.4447284518700097e-06, "loss": 0.7067, "step": 17981 }, { "epoch": 1.2991131901674282, "grad_norm": 7.857729426695988, "learning_rate": 1.4444632924388663e-06, "loss": 0.6177, "step": 17982 }, { "epoch": 1.2991854353676378, "grad_norm": 7.449897499147403, "learning_rate": 1.4441981474572009e-06, "loss": 0.6048, "step": 17983 }, { "epoch": 1.2992576805678473, "grad_norm": 6.404699817207295, "learning_rate": 1.4439330169286428e-06, "loss": 0.6665, "step": 17984 }, { "epoch": 1.2993299257680568, "grad_norm": 6.469913396476815, "learning_rate": 1.4436679008568222e-06, "loss": 0.5962, "step": 17985 }, { "epoch": 1.2994021709682664, "grad_norm": 6.943213781877208, "learning_rate": 1.4434027992453676e-06, "loss": 0.6158, "step": 17986 }, { "epoch": 1.2994744161684757, "grad_norm": 7.608433972710142, "learning_rate": 1.443137712097908e-06, "loss": 0.61, "step": 17987 }, { "epoch": 1.2995466613686852, "grad_norm": 6.804039977981659, "learning_rate": 1.4428726394180736e-06, "loss": 0.6366, "step": 17988 }, { "epoch": 1.2996189065688948, "grad_norm": 6.574151440534812, "learning_rate": 1.4426075812094914e-06, "loss": 0.6964, "step": 17989 }, { "epoch": 1.2996911517691043, "grad_norm": 6.373131571705815, "learning_rate": 1.4423425374757902e-06, "loss": 0.5533, "step": 17990 }, { "epoch": 1.2997633969693139, "grad_norm": 6.350726721322215, "learning_rate": 1.4420775082205984e-06, "loss": 0.6084, "step": 17991 }, { "epoch": 1.2998356421695234, "grad_norm": 6.503761083684008, "learning_rate": 1.441812493447545e-06, "loss": 0.7379, "step": 17992 }, { "epoch": 1.299907887369733, "grad_norm": 8.496868678739833, "learning_rate": 1.441547493160256e-06, "loss": 0.6252, "step": 17993 }, { "epoch": 1.2999801325699423, "grad_norm": 6.9284832068151205, "learning_rate": 1.4412825073623599e-06, "loss": 0.6101, "step": 17994 }, { "epoch": 1.3000523777701518, "grad_norm": 5.849926952981944, "learning_rate": 1.4410175360574847e-06, "loss": 0.5636, "step": 17995 }, { "epoch": 1.3001246229703614, "grad_norm": 6.4951564642522035, "learning_rate": 1.4407525792492567e-06, "loss": 0.6131, "step": 17996 }, { "epoch": 1.300196868170571, "grad_norm": 6.787894513939727, "learning_rate": 1.4404876369413035e-06, "loss": 0.6182, "step": 17997 }, { "epoch": 1.3002691133707804, "grad_norm": 7.734033935425073, "learning_rate": 1.4402227091372516e-06, "loss": 0.6845, "step": 17998 }, { "epoch": 1.30034135857099, "grad_norm": 5.751902058347166, "learning_rate": 1.4399577958407296e-06, "loss": 0.721, "step": 17999 }, { "epoch": 1.3004136037711995, "grad_norm": 7.596825749917486, "learning_rate": 1.439692897055361e-06, "loss": 0.6205, "step": 18000 }, { "epoch": 1.3004858489714088, "grad_norm": 5.638420540705319, "learning_rate": 1.4394280127847732e-06, "loss": 0.6478, "step": 18001 }, { "epoch": 1.3005580941716186, "grad_norm": 7.660813073414648, "learning_rate": 1.4391631430325925e-06, "loss": 0.7312, "step": 18002 }, { "epoch": 1.300630339371828, "grad_norm": 6.865675272560026, "learning_rate": 1.438898287802446e-06, "loss": 0.6648, "step": 18003 }, { "epoch": 1.3007025845720375, "grad_norm": 6.760551964611391, "learning_rate": 1.4386334470979557e-06, "loss": 0.6066, "step": 18004 }, { "epoch": 1.300774829772247, "grad_norm": 7.645892677457948, "learning_rate": 1.4383686209227504e-06, "loss": 0.6311, "step": 18005 }, { "epoch": 1.3008470749724566, "grad_norm": 6.219925505753195, "learning_rate": 1.4381038092804556e-06, "loss": 0.6545, "step": 18006 }, { "epoch": 1.300919320172666, "grad_norm": 6.8856146972003005, "learning_rate": 1.4378390121746943e-06, "loss": 0.7095, "step": 18007 }, { "epoch": 1.3009915653728754, "grad_norm": 7.382581789963322, "learning_rate": 1.4375742296090917e-06, "loss": 0.6812, "step": 18008 }, { "epoch": 1.3010638105730852, "grad_norm": 7.617699802483799, "learning_rate": 1.4373094615872735e-06, "loss": 0.6225, "step": 18009 }, { "epoch": 1.3011360557732945, "grad_norm": 6.479043777461584, "learning_rate": 1.4370447081128641e-06, "loss": 0.7337, "step": 18010 }, { "epoch": 1.301208300973504, "grad_norm": 6.07352004371728, "learning_rate": 1.4367799691894868e-06, "loss": 0.6095, "step": 18011 }, { "epoch": 1.3012805461737136, "grad_norm": 6.274597896747579, "learning_rate": 1.4365152448207658e-06, "loss": 0.6436, "step": 18012 }, { "epoch": 1.3013527913739231, "grad_norm": 6.0854714524901246, "learning_rate": 1.4362505350103257e-06, "loss": 0.6407, "step": 18013 }, { "epoch": 1.3014250365741327, "grad_norm": 7.877828181000474, "learning_rate": 1.4359858397617895e-06, "loss": 0.6221, "step": 18014 }, { "epoch": 1.301497281774342, "grad_norm": 7.329889471409916, "learning_rate": 1.4357211590787811e-06, "loss": 0.5941, "step": 18015 }, { "epoch": 1.3015695269745517, "grad_norm": 6.876078573065374, "learning_rate": 1.435456492964924e-06, "loss": 0.6341, "step": 18016 }, { "epoch": 1.301641772174761, "grad_norm": 7.1986594435647415, "learning_rate": 1.4351918414238413e-06, "loss": 0.639, "step": 18017 }, { "epoch": 1.3017140173749706, "grad_norm": 6.687209934111599, "learning_rate": 1.434927204459155e-06, "loss": 0.6243, "step": 18018 }, { "epoch": 1.3017862625751802, "grad_norm": 8.240901669547359, "learning_rate": 1.4346625820744883e-06, "loss": 0.6691, "step": 18019 }, { "epoch": 1.3018585077753897, "grad_norm": 6.672348398749639, "learning_rate": 1.4343979742734646e-06, "loss": 0.5938, "step": 18020 }, { "epoch": 1.3019307529755992, "grad_norm": 6.567302871841533, "learning_rate": 1.4341333810597042e-06, "loss": 0.6879, "step": 18021 }, { "epoch": 1.3020029981758086, "grad_norm": 6.185755319940632, "learning_rate": 1.4338688024368302e-06, "loss": 0.6215, "step": 18022 }, { "epoch": 1.3020752433760183, "grad_norm": 7.132077098761327, "learning_rate": 1.4336042384084647e-06, "loss": 0.7186, "step": 18023 }, { "epoch": 1.3021474885762276, "grad_norm": 7.685117771137278, "learning_rate": 1.433339688978229e-06, "loss": 0.5707, "step": 18024 }, { "epoch": 1.3022197337764372, "grad_norm": 7.390753263042003, "learning_rate": 1.4330751541497446e-06, "loss": 0.5798, "step": 18025 }, { "epoch": 1.3022919789766467, "grad_norm": 6.127895994055778, "learning_rate": 1.432810633926633e-06, "loss": 0.6186, "step": 18026 }, { "epoch": 1.3023642241768563, "grad_norm": 5.851559240444686, "learning_rate": 1.4325461283125152e-06, "loss": 0.6316, "step": 18027 }, { "epoch": 1.3024364693770658, "grad_norm": 7.454735539924595, "learning_rate": 1.4322816373110132e-06, "loss": 0.5502, "step": 18028 }, { "epoch": 1.3025087145772754, "grad_norm": 6.600272618067567, "learning_rate": 1.4320171609257454e-06, "loss": 0.7079, "step": 18029 }, { "epoch": 1.302580959777485, "grad_norm": 6.7031537391028735, "learning_rate": 1.4317526991603336e-06, "loss": 0.6702, "step": 18030 }, { "epoch": 1.3026532049776942, "grad_norm": 8.055326834168396, "learning_rate": 1.4314882520183992e-06, "loss": 0.5682, "step": 18031 }, { "epoch": 1.3027254501779038, "grad_norm": 6.694089406132467, "learning_rate": 1.4312238195035586e-06, "loss": 0.6314, "step": 18032 }, { "epoch": 1.3027976953781133, "grad_norm": 6.713328458499496, "learning_rate": 1.430959401619435e-06, "loss": 0.6564, "step": 18033 }, { "epoch": 1.3028699405783228, "grad_norm": 8.209178848740322, "learning_rate": 1.4306949983696473e-06, "loss": 0.6964, "step": 18034 }, { "epoch": 1.3029421857785324, "grad_norm": 6.8435214627224035, "learning_rate": 1.4304306097578159e-06, "loss": 0.6984, "step": 18035 }, { "epoch": 1.303014430978742, "grad_norm": 6.16700355365639, "learning_rate": 1.4301662357875576e-06, "loss": 0.6813, "step": 18036 }, { "epoch": 1.3030866761789515, "grad_norm": 6.842824934358279, "learning_rate": 1.429901876462493e-06, "loss": 0.6815, "step": 18037 }, { "epoch": 1.3031589213791608, "grad_norm": 7.647228422053476, "learning_rate": 1.4296375317862422e-06, "loss": 0.5738, "step": 18038 }, { "epoch": 1.3032311665793703, "grad_norm": 6.553775764864331, "learning_rate": 1.4293732017624212e-06, "loss": 0.6357, "step": 18039 }, { "epoch": 1.3033034117795799, "grad_norm": 6.9100905566761455, "learning_rate": 1.4291088863946494e-06, "loss": 0.6511, "step": 18040 }, { "epoch": 1.3033756569797894, "grad_norm": 7.045718349764557, "learning_rate": 1.4288445856865463e-06, "loss": 0.7196, "step": 18041 }, { "epoch": 1.303447902179999, "grad_norm": 6.383746859682475, "learning_rate": 1.4285802996417286e-06, "loss": 0.5939, "step": 18042 }, { "epoch": 1.3035201473802085, "grad_norm": 7.523588596938071, "learning_rate": 1.4283160282638148e-06, "loss": 0.5485, "step": 18043 }, { "epoch": 1.303592392580418, "grad_norm": 6.2844655171253425, "learning_rate": 1.4280517715564223e-06, "loss": 0.6228, "step": 18044 }, { "epoch": 1.3036646377806274, "grad_norm": 6.738315500365852, "learning_rate": 1.42778752952317e-06, "loss": 0.6309, "step": 18045 }, { "epoch": 1.303736882980837, "grad_norm": 6.767550504994143, "learning_rate": 1.4275233021676726e-06, "loss": 0.6503, "step": 18046 }, { "epoch": 1.3038091281810464, "grad_norm": 6.091363840712637, "learning_rate": 1.427259089493549e-06, "loss": 0.649, "step": 18047 }, { "epoch": 1.303881373381256, "grad_norm": 6.370158432647594, "learning_rate": 1.4269948915044152e-06, "loss": 0.6831, "step": 18048 }, { "epoch": 1.3039536185814655, "grad_norm": 7.200814826210777, "learning_rate": 1.426730708203889e-06, "loss": 0.6472, "step": 18049 }, { "epoch": 1.304025863781675, "grad_norm": 9.5936283265391, "learning_rate": 1.4264665395955852e-06, "loss": 0.6805, "step": 18050 }, { "epoch": 1.3040981089818846, "grad_norm": 5.9570949663600645, "learning_rate": 1.426202385683121e-06, "loss": 0.5911, "step": 18051 }, { "epoch": 1.304170354182094, "grad_norm": 7.88685587385437, "learning_rate": 1.4259382464701124e-06, "loss": 0.6458, "step": 18052 }, { "epoch": 1.3042425993823035, "grad_norm": 6.183358087344459, "learning_rate": 1.4256741219601752e-06, "loss": 0.6039, "step": 18053 }, { "epoch": 1.304314844582513, "grad_norm": 6.275393968350339, "learning_rate": 1.4254100121569254e-06, "loss": 0.5855, "step": 18054 }, { "epoch": 1.3043870897827226, "grad_norm": 6.185218467462113, "learning_rate": 1.4251459170639776e-06, "loss": 0.6243, "step": 18055 }, { "epoch": 1.304459334982932, "grad_norm": 6.50547120108111, "learning_rate": 1.424881836684949e-06, "loss": 0.6166, "step": 18056 }, { "epoch": 1.3045315801831416, "grad_norm": 7.559277817478742, "learning_rate": 1.424617771023452e-06, "loss": 0.6337, "step": 18057 }, { "epoch": 1.3046038253833512, "grad_norm": 7.906325992023012, "learning_rate": 1.4243537200831032e-06, "loss": 0.6507, "step": 18058 }, { "epoch": 1.3046760705835605, "grad_norm": 8.755237428766968, "learning_rate": 1.4240896838675163e-06, "loss": 0.6559, "step": 18059 }, { "epoch": 1.30474831578377, "grad_norm": 8.200797195760671, "learning_rate": 1.4238256623803065e-06, "loss": 0.6483, "step": 18060 }, { "epoch": 1.3048205609839796, "grad_norm": 7.682684654055009, "learning_rate": 1.4235616556250878e-06, "loss": 0.6683, "step": 18061 }, { "epoch": 1.3048928061841891, "grad_norm": 6.163076885218554, "learning_rate": 1.423297663605474e-06, "loss": 0.68, "step": 18062 }, { "epoch": 1.3049650513843987, "grad_norm": 7.2738421170855165, "learning_rate": 1.4230336863250804e-06, "loss": 0.6325, "step": 18063 }, { "epoch": 1.3050372965846082, "grad_norm": 5.948936290385076, "learning_rate": 1.4227697237875189e-06, "loss": 0.6428, "step": 18064 }, { "epoch": 1.3051095417848178, "grad_norm": 7.7357240675560055, "learning_rate": 1.4225057759964026e-06, "loss": 0.6327, "step": 18065 }, { "epoch": 1.305181786985027, "grad_norm": 7.580092216071675, "learning_rate": 1.4222418429553459e-06, "loss": 0.6696, "step": 18066 }, { "epoch": 1.3052540321852366, "grad_norm": 6.071832140164126, "learning_rate": 1.4219779246679626e-06, "loss": 0.6284, "step": 18067 }, { "epoch": 1.3053262773854462, "grad_norm": 5.861767415227647, "learning_rate": 1.4217140211378635e-06, "loss": 0.6406, "step": 18068 }, { "epoch": 1.3053985225856557, "grad_norm": 8.08716495061629, "learning_rate": 1.4214501323686624e-06, "loss": 0.6502, "step": 18069 }, { "epoch": 1.3054707677858652, "grad_norm": 6.294629778167216, "learning_rate": 1.4211862583639713e-06, "loss": 0.6133, "step": 18070 }, { "epoch": 1.3055430129860748, "grad_norm": 6.459398079338607, "learning_rate": 1.4209223991274027e-06, "loss": 0.6267, "step": 18071 }, { "epoch": 1.3056152581862843, "grad_norm": 7.1381925283660905, "learning_rate": 1.4206585546625685e-06, "loss": 0.5607, "step": 18072 }, { "epoch": 1.3056875033864936, "grad_norm": 6.506647378913767, "learning_rate": 1.4203947249730809e-06, "loss": 0.6051, "step": 18073 }, { "epoch": 1.3057597485867034, "grad_norm": 7.046892051929896, "learning_rate": 1.4201309100625522e-06, "loss": 0.6332, "step": 18074 }, { "epoch": 1.3058319937869127, "grad_norm": 7.37130929442737, "learning_rate": 1.4198671099345918e-06, "loss": 0.5792, "step": 18075 }, { "epoch": 1.3059042389871223, "grad_norm": 6.9846043901088715, "learning_rate": 1.419603324592812e-06, "loss": 0.7051, "step": 18076 }, { "epoch": 1.3059764841873318, "grad_norm": 6.777866626984009, "learning_rate": 1.4193395540408236e-06, "loss": 0.6524, "step": 18077 }, { "epoch": 1.3060487293875414, "grad_norm": 8.278481074530955, "learning_rate": 1.419075798282239e-06, "loss": 0.6407, "step": 18078 }, { "epoch": 1.306120974587751, "grad_norm": 6.493116401845123, "learning_rate": 1.4188120573206662e-06, "loss": 0.6834, "step": 18079 }, { "epoch": 1.3061932197879602, "grad_norm": 8.061273525899924, "learning_rate": 1.418548331159717e-06, "loss": 0.622, "step": 18080 }, { "epoch": 1.30626546498817, "grad_norm": 6.693135391641918, "learning_rate": 1.4182846198030013e-06, "loss": 0.6211, "step": 18081 }, { "epoch": 1.3063377101883793, "grad_norm": 8.591171932190452, "learning_rate": 1.4180209232541292e-06, "loss": 0.6138, "step": 18082 }, { "epoch": 1.3064099553885888, "grad_norm": 5.946461870974957, "learning_rate": 1.4177572415167106e-06, "loss": 0.7131, "step": 18083 }, { "epoch": 1.3064822005887984, "grad_norm": 7.1347065444617925, "learning_rate": 1.4174935745943551e-06, "loss": 0.6611, "step": 18084 }, { "epoch": 1.306554445789008, "grad_norm": 5.976899819422776, "learning_rate": 1.4172299224906733e-06, "loss": 0.5879, "step": 18085 }, { "epoch": 1.3066266909892175, "grad_norm": 7.183198653490505, "learning_rate": 1.416966285209272e-06, "loss": 0.6157, "step": 18086 }, { "epoch": 1.3066989361894268, "grad_norm": 6.70059860743785, "learning_rate": 1.4167026627537611e-06, "loss": 0.631, "step": 18087 }, { "epoch": 1.3067711813896365, "grad_norm": 7.1470261478161925, "learning_rate": 1.416439055127751e-06, "loss": 0.6412, "step": 18088 }, { "epoch": 1.3068434265898459, "grad_norm": 5.688621326618274, "learning_rate": 1.4161754623348468e-06, "loss": 0.5976, "step": 18089 }, { "epoch": 1.3069156717900554, "grad_norm": 6.789195385368791, "learning_rate": 1.4159118843786595e-06, "loss": 0.6189, "step": 18090 }, { "epoch": 1.306987916990265, "grad_norm": 6.751407335295366, "learning_rate": 1.4156483212627976e-06, "loss": 0.602, "step": 18091 }, { "epoch": 1.3070601621904745, "grad_norm": 7.965686882559867, "learning_rate": 1.4153847729908687e-06, "loss": 0.6776, "step": 18092 }, { "epoch": 1.307132407390684, "grad_norm": 8.728351515505242, "learning_rate": 1.415121239566479e-06, "loss": 0.6484, "step": 18093 }, { "epoch": 1.3072046525908934, "grad_norm": 6.768568706814402, "learning_rate": 1.4148577209932373e-06, "loss": 0.6587, "step": 18094 }, { "epoch": 1.3072768977911031, "grad_norm": 6.7451159857073275, "learning_rate": 1.4145942172747521e-06, "loss": 0.6053, "step": 18095 }, { "epoch": 1.3073491429913124, "grad_norm": 8.711241411458584, "learning_rate": 1.414330728414628e-06, "loss": 0.6837, "step": 18096 }, { "epoch": 1.307421388191522, "grad_norm": 6.978514531730952, "learning_rate": 1.4140672544164736e-06, "loss": 0.6101, "step": 18097 }, { "epoch": 1.3074936333917315, "grad_norm": 8.406651597197314, "learning_rate": 1.4138037952838948e-06, "loss": 0.6717, "step": 18098 }, { "epoch": 1.307565878591941, "grad_norm": 8.193426651615926, "learning_rate": 1.4135403510204993e-06, "loss": 0.7038, "step": 18099 }, { "epoch": 1.3076381237921506, "grad_norm": 6.167265201424151, "learning_rate": 1.4132769216298921e-06, "loss": 0.5066, "step": 18100 }, { "epoch": 1.3077103689923602, "grad_norm": 7.143096963398693, "learning_rate": 1.4130135071156806e-06, "loss": 0.6388, "step": 18101 }, { "epoch": 1.3077826141925697, "grad_norm": 6.454505163206198, "learning_rate": 1.4127501074814698e-06, "loss": 0.6861, "step": 18102 }, { "epoch": 1.307854859392779, "grad_norm": 7.497389784228919, "learning_rate": 1.4124867227308672e-06, "loss": 0.5667, "step": 18103 }, { "epoch": 1.3079271045929886, "grad_norm": 6.528034203364956, "learning_rate": 1.4122233528674759e-06, "loss": 0.6429, "step": 18104 }, { "epoch": 1.307999349793198, "grad_norm": 7.123572758726969, "learning_rate": 1.4119599978949022e-06, "loss": 0.6822, "step": 18105 }, { "epoch": 1.3080715949934076, "grad_norm": 6.23573200496103, "learning_rate": 1.4116966578167524e-06, "loss": 0.5828, "step": 18106 }, { "epoch": 1.3081438401936172, "grad_norm": 6.357825688020084, "learning_rate": 1.4114333326366293e-06, "loss": 0.7251, "step": 18107 }, { "epoch": 1.3082160853938267, "grad_norm": 6.302643890251473, "learning_rate": 1.4111700223581387e-06, "loss": 0.6078, "step": 18108 }, { "epoch": 1.3082883305940363, "grad_norm": 6.32765780007605, "learning_rate": 1.4109067269848837e-06, "loss": 0.6688, "step": 18109 }, { "epoch": 1.3083605757942456, "grad_norm": 6.217285597248521, "learning_rate": 1.4106434465204725e-06, "loss": 0.6295, "step": 18110 }, { "epoch": 1.3084328209944551, "grad_norm": 8.161444518940394, "learning_rate": 1.4103801809685053e-06, "loss": 0.7072, "step": 18111 }, { "epoch": 1.3085050661946647, "grad_norm": 6.218185322490315, "learning_rate": 1.4101169303325876e-06, "loss": 0.6804, "step": 18112 }, { "epoch": 1.3085773113948742, "grad_norm": 7.622410318829833, "learning_rate": 1.4098536946163238e-06, "loss": 0.6364, "step": 18113 }, { "epoch": 1.3086495565950838, "grad_norm": 5.791225096338, "learning_rate": 1.4095904738233157e-06, "loss": 0.576, "step": 18114 }, { "epoch": 1.3087218017952933, "grad_norm": 6.681751097541137, "learning_rate": 1.409327267957167e-06, "loss": 0.7234, "step": 18115 }, { "epoch": 1.3087940469955028, "grad_norm": 7.292698843425419, "learning_rate": 1.4090640770214814e-06, "loss": 0.755, "step": 18116 }, { "epoch": 1.3088662921957122, "grad_norm": 5.973087033291528, "learning_rate": 1.4088009010198616e-06, "loss": 0.6873, "step": 18117 }, { "epoch": 1.3089385373959217, "grad_norm": 7.662246037766062, "learning_rate": 1.40853773995591e-06, "loss": 0.6541, "step": 18118 }, { "epoch": 1.3090107825961312, "grad_norm": 7.634375639582454, "learning_rate": 1.4082745938332293e-06, "loss": 0.708, "step": 18119 }, { "epoch": 1.3090830277963408, "grad_norm": 6.988890960860169, "learning_rate": 1.4080114626554228e-06, "loss": 0.6576, "step": 18120 }, { "epoch": 1.3091552729965503, "grad_norm": 7.2843325397263685, "learning_rate": 1.4077483464260905e-06, "loss": 0.6571, "step": 18121 }, { "epoch": 1.3092275181967599, "grad_norm": 6.947287314152958, "learning_rate": 1.4074852451488351e-06, "loss": 0.6265, "step": 18122 }, { "epoch": 1.3092997633969694, "grad_norm": 6.1093618875738445, "learning_rate": 1.407222158827259e-06, "loss": 0.6858, "step": 18123 }, { "epoch": 1.3093720085971787, "grad_norm": 6.783378583427547, "learning_rate": 1.4069590874649632e-06, "loss": 0.67, "step": 18124 }, { "epoch": 1.3094442537973883, "grad_norm": 6.488810665204907, "learning_rate": 1.4066960310655484e-06, "loss": 0.6541, "step": 18125 }, { "epoch": 1.3095164989975978, "grad_norm": 7.876023725544623, "learning_rate": 1.4064329896326156e-06, "loss": 0.6393, "step": 18126 }, { "epoch": 1.3095887441978074, "grad_norm": 7.348854141704333, "learning_rate": 1.4061699631697668e-06, "loss": 0.6555, "step": 18127 }, { "epoch": 1.309660989398017, "grad_norm": 6.783027099645223, "learning_rate": 1.4059069516806012e-06, "loss": 0.6548, "step": 18128 }, { "epoch": 1.3097332345982264, "grad_norm": 5.3805394570845, "learning_rate": 1.40564395516872e-06, "loss": 0.6175, "step": 18129 }, { "epoch": 1.309805479798436, "grad_norm": 6.219663926640375, "learning_rate": 1.4053809736377235e-06, "loss": 0.5646, "step": 18130 }, { "epoch": 1.3098777249986453, "grad_norm": 6.64824499620321, "learning_rate": 1.4051180070912126e-06, "loss": 0.6782, "step": 18131 }, { "epoch": 1.3099499701988548, "grad_norm": 6.1547158745418855, "learning_rate": 1.404855055532785e-06, "loss": 0.637, "step": 18132 }, { "epoch": 1.3100222153990644, "grad_norm": 7.9117684988900745, "learning_rate": 1.4045921189660416e-06, "loss": 0.6402, "step": 18133 }, { "epoch": 1.310094460599274, "grad_norm": 7.169996360448023, "learning_rate": 1.4043291973945812e-06, "loss": 0.7363, "step": 18134 }, { "epoch": 1.3101667057994835, "grad_norm": 6.3381539453195925, "learning_rate": 1.4040662908220043e-06, "loss": 0.6684, "step": 18135 }, { "epoch": 1.310238950999693, "grad_norm": 5.47640333610992, "learning_rate": 1.4038033992519084e-06, "loss": 0.6198, "step": 18136 }, { "epoch": 1.3103111961999026, "grad_norm": 6.806702553071942, "learning_rate": 1.4035405226878916e-06, "loss": 0.564, "step": 18137 }, { "epoch": 1.3103834414001119, "grad_norm": 8.01810980461525, "learning_rate": 1.4032776611335554e-06, "loss": 0.6196, "step": 18138 }, { "epoch": 1.3104556866003214, "grad_norm": 6.570960122939102, "learning_rate": 1.403014814592496e-06, "loss": 0.6315, "step": 18139 }, { "epoch": 1.310527931800531, "grad_norm": 7.90733794314926, "learning_rate": 1.4027519830683116e-06, "loss": 0.6626, "step": 18140 }, { "epoch": 1.3106001770007405, "grad_norm": 7.817041648649363, "learning_rate": 1.4024891665646006e-06, "loss": 0.6428, "step": 18141 }, { "epoch": 1.31067242220095, "grad_norm": 6.688185612124305, "learning_rate": 1.4022263650849622e-06, "loss": 0.6372, "step": 18142 }, { "epoch": 1.3107446674011596, "grad_norm": 5.984817369012164, "learning_rate": 1.401963578632991e-06, "loss": 0.6692, "step": 18143 }, { "epoch": 1.3108169126013691, "grad_norm": 7.0048745757697635, "learning_rate": 1.4017008072122863e-06, "loss": 0.6674, "step": 18144 }, { "epoch": 1.3108891578015784, "grad_norm": 6.076448106265269, "learning_rate": 1.4014380508264441e-06, "loss": 0.5924, "step": 18145 }, { "epoch": 1.3109614030017882, "grad_norm": 7.156051433090536, "learning_rate": 1.4011753094790625e-06, "loss": 0.6162, "step": 18146 }, { "epoch": 1.3110336482019975, "grad_norm": 5.6305904158797135, "learning_rate": 1.4009125831737377e-06, "loss": 0.6514, "step": 18147 }, { "epoch": 1.311105893402207, "grad_norm": 6.567671708910654, "learning_rate": 1.4006498719140662e-06, "loss": 0.6, "step": 18148 }, { "epoch": 1.3111781386024166, "grad_norm": 7.997974139244966, "learning_rate": 1.4003871757036452e-06, "loss": 0.6689, "step": 18149 }, { "epoch": 1.3112503838026262, "grad_norm": 8.041752579251519, "learning_rate": 1.4001244945460693e-06, "loss": 0.6717, "step": 18150 }, { "epoch": 1.3113226290028357, "grad_norm": 6.866839750509668, "learning_rate": 1.399861828444935e-06, "loss": 0.7301, "step": 18151 }, { "epoch": 1.311394874203045, "grad_norm": 6.904793287935642, "learning_rate": 1.3995991774038386e-06, "loss": 0.5774, "step": 18152 }, { "epoch": 1.3114671194032548, "grad_norm": 6.271030291973685, "learning_rate": 1.3993365414263748e-06, "loss": 0.6009, "step": 18153 }, { "epoch": 1.311539364603464, "grad_norm": 7.596971119092541, "learning_rate": 1.3990739205161386e-06, "loss": 0.6472, "step": 18154 }, { "epoch": 1.3116116098036736, "grad_norm": 6.522521104233836, "learning_rate": 1.3988113146767258e-06, "loss": 0.5768, "step": 18155 }, { "epoch": 1.3116838550038832, "grad_norm": 5.7400712246081245, "learning_rate": 1.3985487239117313e-06, "loss": 0.6462, "step": 18156 }, { "epoch": 1.3117561002040927, "grad_norm": 6.27090650074329, "learning_rate": 1.398286148224749e-06, "loss": 0.5936, "step": 18157 }, { "epoch": 1.3118283454043023, "grad_norm": 9.247899822286511, "learning_rate": 1.3980235876193743e-06, "loss": 0.6102, "step": 18158 }, { "epoch": 1.3119005906045116, "grad_norm": 6.702878863032677, "learning_rate": 1.397761042099201e-06, "loss": 0.6507, "step": 18159 }, { "epoch": 1.3119728358047213, "grad_norm": 6.716033244338692, "learning_rate": 1.397498511667824e-06, "loss": 0.5891, "step": 18160 }, { "epoch": 1.3120450810049307, "grad_norm": 5.602603831522729, "learning_rate": 1.3972359963288356e-06, "loss": 0.571, "step": 18161 }, { "epoch": 1.3121173262051402, "grad_norm": 6.610538749958074, "learning_rate": 1.39697349608583e-06, "loss": 0.6339, "step": 18162 }, { "epoch": 1.3121895714053498, "grad_norm": 5.719239927891048, "learning_rate": 1.3967110109424021e-06, "loss": 0.6002, "step": 18163 }, { "epoch": 1.3122618166055593, "grad_norm": 6.465295431256621, "learning_rate": 1.3964485409021426e-06, "loss": 0.5548, "step": 18164 }, { "epoch": 1.3123340618057688, "grad_norm": 7.553583863366329, "learning_rate": 1.3961860859686454e-06, "loss": 0.5769, "step": 18165 }, { "epoch": 1.3124063070059782, "grad_norm": 6.472261369593175, "learning_rate": 1.3959236461455032e-06, "loss": 0.6276, "step": 18166 }, { "epoch": 1.312478552206188, "grad_norm": 7.942100813514725, "learning_rate": 1.3956612214363108e-06, "loss": 0.6846, "step": 18167 }, { "epoch": 1.3125507974063972, "grad_norm": 6.304552268240665, "learning_rate": 1.3953988118446577e-06, "loss": 0.6526, "step": 18168 }, { "epoch": 1.3126230426066068, "grad_norm": 6.476994517312402, "learning_rate": 1.3951364173741373e-06, "loss": 0.6495, "step": 18169 }, { "epoch": 1.3126952878068163, "grad_norm": 6.284360201005853, "learning_rate": 1.3948740380283422e-06, "loss": 0.5967, "step": 18170 }, { "epoch": 1.3127675330070259, "grad_norm": 10.817387000159995, "learning_rate": 1.3946116738108628e-06, "loss": 0.6547, "step": 18171 }, { "epoch": 1.3128397782072354, "grad_norm": 7.917840837105057, "learning_rate": 1.3943493247252911e-06, "loss": 0.698, "step": 18172 }, { "epoch": 1.312912023407445, "grad_norm": 6.5394956383765335, "learning_rate": 1.394086990775219e-06, "loss": 0.6198, "step": 18173 }, { "epoch": 1.3129842686076545, "grad_norm": 6.931646128308607, "learning_rate": 1.3938246719642368e-06, "loss": 0.6074, "step": 18174 }, { "epoch": 1.3130565138078638, "grad_norm": 6.9061853742380706, "learning_rate": 1.3935623682959365e-06, "loss": 0.6432, "step": 18175 }, { "epoch": 1.3131287590080734, "grad_norm": 6.9279471946272055, "learning_rate": 1.3933000797739078e-06, "loss": 0.612, "step": 18176 }, { "epoch": 1.313201004208283, "grad_norm": 7.2715921816645706, "learning_rate": 1.393037806401743e-06, "loss": 0.62, "step": 18177 }, { "epoch": 1.3132732494084924, "grad_norm": 6.953060261285696, "learning_rate": 1.39277554818303e-06, "loss": 0.6687, "step": 18178 }, { "epoch": 1.313345494608702, "grad_norm": 7.093163285380254, "learning_rate": 1.3925133051213602e-06, "loss": 0.5862, "step": 18179 }, { "epoch": 1.3134177398089115, "grad_norm": 6.6357142343276205, "learning_rate": 1.3922510772203229e-06, "loss": 0.628, "step": 18180 }, { "epoch": 1.313489985009121, "grad_norm": 7.482450424932481, "learning_rate": 1.3919888644835097e-06, "loss": 0.5667, "step": 18181 }, { "epoch": 1.3135622302093304, "grad_norm": 6.672926094111241, "learning_rate": 1.3917266669145075e-06, "loss": 0.6631, "step": 18182 }, { "epoch": 1.31363447540954, "grad_norm": 6.456303070234296, "learning_rate": 1.3914644845169068e-06, "loss": 0.589, "step": 18183 }, { "epoch": 1.3137067206097495, "grad_norm": 6.249682609128543, "learning_rate": 1.3912023172942965e-06, "loss": 0.5806, "step": 18184 }, { "epoch": 1.313778965809959, "grad_norm": 7.796587554586807, "learning_rate": 1.3909401652502654e-06, "loss": 0.5903, "step": 18185 }, { "epoch": 1.3138512110101686, "grad_norm": 7.034427721596216, "learning_rate": 1.3906780283884025e-06, "loss": 0.6886, "step": 18186 }, { "epoch": 1.313923456210378, "grad_norm": 7.997194036490261, "learning_rate": 1.390415906712296e-06, "loss": 0.6113, "step": 18187 }, { "epoch": 1.3139957014105876, "grad_norm": 7.0140811436389, "learning_rate": 1.3901538002255352e-06, "loss": 0.6813, "step": 18188 }, { "epoch": 1.314067946610797, "grad_norm": 7.3099118078419885, "learning_rate": 1.3898917089317062e-06, "loss": 0.6045, "step": 18189 }, { "epoch": 1.3141401918110065, "grad_norm": 7.107824403791005, "learning_rate": 1.3896296328343977e-06, "loss": 0.627, "step": 18190 }, { "epoch": 1.314212437011216, "grad_norm": 7.654911593793168, "learning_rate": 1.3893675719371975e-06, "loss": 0.6033, "step": 18191 }, { "epoch": 1.3142846822114256, "grad_norm": 7.292858382433494, "learning_rate": 1.3891055262436936e-06, "loss": 0.6081, "step": 18192 }, { "epoch": 1.3143569274116351, "grad_norm": 7.310643408726796, "learning_rate": 1.3888434957574718e-06, "loss": 0.695, "step": 18193 }, { "epoch": 1.3144291726118447, "grad_norm": 6.3417677013688385, "learning_rate": 1.3885814804821185e-06, "loss": 0.619, "step": 18194 }, { "epoch": 1.3145014178120542, "grad_norm": 6.658009757660728, "learning_rate": 1.388319480421224e-06, "loss": 0.5999, "step": 18195 }, { "epoch": 1.3145736630122635, "grad_norm": 6.34654196950616, "learning_rate": 1.3880574955783716e-06, "loss": 0.6295, "step": 18196 }, { "epoch": 1.314645908212473, "grad_norm": 6.691650813722498, "learning_rate": 1.3877955259571488e-06, "loss": 0.5854, "step": 18197 }, { "epoch": 1.3147181534126826, "grad_norm": 6.437919528957171, "learning_rate": 1.3875335715611415e-06, "loss": 0.6327, "step": 18198 }, { "epoch": 1.3147903986128922, "grad_norm": 9.318148340135476, "learning_rate": 1.3872716323939367e-06, "loss": 0.6403, "step": 18199 }, { "epoch": 1.3148626438131017, "grad_norm": 6.471113132667458, "learning_rate": 1.3870097084591188e-06, "loss": 0.5789, "step": 18200 }, { "epoch": 1.3149348890133112, "grad_norm": 6.85494817512449, "learning_rate": 1.3867477997602735e-06, "loss": 0.6569, "step": 18201 }, { "epoch": 1.3150071342135208, "grad_norm": 6.369370911104381, "learning_rate": 1.386485906300986e-06, "loss": 0.6088, "step": 18202 }, { "epoch": 1.31507937941373, "grad_norm": 6.765089617330844, "learning_rate": 1.3862240280848427e-06, "loss": 0.6479, "step": 18203 }, { "epoch": 1.3151516246139396, "grad_norm": 6.8055659010758776, "learning_rate": 1.3859621651154271e-06, "loss": 0.6388, "step": 18204 }, { "epoch": 1.3152238698141492, "grad_norm": 9.210783435416372, "learning_rate": 1.3857003173963245e-06, "loss": 0.7218, "step": 18205 }, { "epoch": 1.3152961150143587, "grad_norm": 6.967035330671729, "learning_rate": 1.3854384849311206e-06, "loss": 0.6074, "step": 18206 }, { "epoch": 1.3153683602145683, "grad_norm": 6.3367156316452995, "learning_rate": 1.3851766677233975e-06, "loss": 0.6488, "step": 18207 }, { "epoch": 1.3154406054147778, "grad_norm": 6.313443859278216, "learning_rate": 1.3849148657767397e-06, "loss": 0.6284, "step": 18208 }, { "epoch": 1.3155128506149874, "grad_norm": 7.046031554637356, "learning_rate": 1.3846530790947322e-06, "loss": 0.6667, "step": 18209 }, { "epoch": 1.3155850958151967, "grad_norm": 5.90090232026928, "learning_rate": 1.3843913076809583e-06, "loss": 0.6111, "step": 18210 }, { "epoch": 1.3156573410154062, "grad_norm": 6.188317447186145, "learning_rate": 1.3841295515390007e-06, "loss": 0.6529, "step": 18211 }, { "epoch": 1.3157295862156158, "grad_norm": 7.39674894727499, "learning_rate": 1.3838678106724433e-06, "loss": 0.7067, "step": 18212 }, { "epoch": 1.3158018314158253, "grad_norm": 6.1675187974140036, "learning_rate": 1.3836060850848688e-06, "loss": 0.7335, "step": 18213 }, { "epoch": 1.3158740766160348, "grad_norm": 7.380826297392142, "learning_rate": 1.3833443747798596e-06, "loss": 0.5868, "step": 18214 }, { "epoch": 1.3159463218162444, "grad_norm": 5.9949163517274355, "learning_rate": 1.3830826797609997e-06, "loss": 0.5958, "step": 18215 }, { "epoch": 1.316018567016454, "grad_norm": 6.636358348991597, "learning_rate": 1.3828210000318703e-06, "loss": 0.6243, "step": 18216 }, { "epoch": 1.3160908122166632, "grad_norm": 7.282210519984509, "learning_rate": 1.3825593355960548e-06, "loss": 0.6301, "step": 18217 }, { "epoch": 1.3161630574168728, "grad_norm": 5.893400555280497, "learning_rate": 1.3822976864571337e-06, "loss": 0.6135, "step": 18218 }, { "epoch": 1.3162353026170823, "grad_norm": 6.300937752337972, "learning_rate": 1.3820360526186894e-06, "loss": 0.6785, "step": 18219 }, { "epoch": 1.3163075478172919, "grad_norm": 7.203197553356133, "learning_rate": 1.3817744340843042e-06, "loss": 0.6272, "step": 18220 }, { "epoch": 1.3163797930175014, "grad_norm": 6.323357941146102, "learning_rate": 1.3815128308575582e-06, "loss": 0.5985, "step": 18221 }, { "epoch": 1.316452038217711, "grad_norm": 7.188856444694237, "learning_rate": 1.3812512429420317e-06, "loss": 0.6111, "step": 18222 }, { "epoch": 1.3165242834179205, "grad_norm": 6.3505077729744865, "learning_rate": 1.3809896703413083e-06, "loss": 0.6486, "step": 18223 }, { "epoch": 1.3165965286181298, "grad_norm": 8.614660727827092, "learning_rate": 1.3807281130589683e-06, "loss": 0.6894, "step": 18224 }, { "epoch": 1.3166687738183396, "grad_norm": 7.128526584496206, "learning_rate": 1.3804665710985903e-06, "loss": 0.6395, "step": 18225 }, { "epoch": 1.316741019018549, "grad_norm": 6.470192863502445, "learning_rate": 1.380205044463756e-06, "loss": 0.6225, "step": 18226 }, { "epoch": 1.3168132642187584, "grad_norm": 6.188216659195327, "learning_rate": 1.3799435331580457e-06, "loss": 0.6018, "step": 18227 }, { "epoch": 1.316885509418968, "grad_norm": 7.814009619772081, "learning_rate": 1.3796820371850384e-06, "loss": 0.6758, "step": 18228 }, { "epoch": 1.3169577546191775, "grad_norm": 6.990849917890019, "learning_rate": 1.3794205565483135e-06, "loss": 0.6468, "step": 18229 }, { "epoch": 1.317029999819387, "grad_norm": 6.650567579786013, "learning_rate": 1.3791590912514513e-06, "loss": 0.5549, "step": 18230 }, { "epoch": 1.3171022450195964, "grad_norm": 6.561234125026191, "learning_rate": 1.378897641298031e-06, "loss": 0.6191, "step": 18231 }, { "epoch": 1.3171744902198061, "grad_norm": 5.242779443921652, "learning_rate": 1.3786362066916315e-06, "loss": 0.5513, "step": 18232 }, { "epoch": 1.3172467354200155, "grad_norm": 6.524144153810312, "learning_rate": 1.3783747874358316e-06, "loss": 0.6297, "step": 18233 }, { "epoch": 1.317318980620225, "grad_norm": 7.559250566988865, "learning_rate": 1.3781133835342098e-06, "loss": 0.682, "step": 18234 }, { "epoch": 1.3173912258204346, "grad_norm": 6.178940526520177, "learning_rate": 1.377851994990346e-06, "loss": 0.6379, "step": 18235 }, { "epoch": 1.317463471020644, "grad_norm": 6.1144271215428025, "learning_rate": 1.377590621807816e-06, "loss": 0.5933, "step": 18236 }, { "epoch": 1.3175357162208536, "grad_norm": 7.1059798297504555, "learning_rate": 1.377329263990199e-06, "loss": 0.6458, "step": 18237 }, { "epoch": 1.317607961421063, "grad_norm": 7.028281437144987, "learning_rate": 1.3770679215410731e-06, "loss": 0.6373, "step": 18238 }, { "epoch": 1.3176802066212727, "grad_norm": 7.538434033066711, "learning_rate": 1.3768065944640151e-06, "loss": 0.6179, "step": 18239 }, { "epoch": 1.317752451821482, "grad_norm": 8.435600010324084, "learning_rate": 1.3765452827626024e-06, "loss": 0.6754, "step": 18240 }, { "epoch": 1.3178246970216916, "grad_norm": 6.9886718094991895, "learning_rate": 1.3762839864404127e-06, "loss": 0.655, "step": 18241 }, { "epoch": 1.3178969422219011, "grad_norm": 6.878701618016806, "learning_rate": 1.3760227055010229e-06, "loss": 0.6111, "step": 18242 }, { "epoch": 1.3179691874221107, "grad_norm": 6.304535628785748, "learning_rate": 1.375761439948009e-06, "loss": 0.6471, "step": 18243 }, { "epoch": 1.3180414326223202, "grad_norm": 7.142945828568441, "learning_rate": 1.3755001897849486e-06, "loss": 0.6508, "step": 18244 }, { "epoch": 1.3181136778225295, "grad_norm": 6.806778491250841, "learning_rate": 1.3752389550154176e-06, "loss": 0.6033, "step": 18245 }, { "epoch": 1.3181859230227393, "grad_norm": 8.096567495698553, "learning_rate": 1.374977735642992e-06, "loss": 0.6042, "step": 18246 }, { "epoch": 1.3182581682229486, "grad_norm": 8.717220267907583, "learning_rate": 1.3747165316712472e-06, "loss": 0.6742, "step": 18247 }, { "epoch": 1.3183304134231582, "grad_norm": 6.835753066708428, "learning_rate": 1.3744553431037595e-06, "loss": 0.6238, "step": 18248 }, { "epoch": 1.3184026586233677, "grad_norm": 6.886060383572103, "learning_rate": 1.374194169944105e-06, "loss": 0.6806, "step": 18249 }, { "epoch": 1.3184749038235772, "grad_norm": 7.083470631185064, "learning_rate": 1.3739330121958562e-06, "loss": 0.5823, "step": 18250 }, { "epoch": 1.3185471490237868, "grad_norm": 7.424761979626119, "learning_rate": 1.3736718698625912e-06, "loss": 0.5532, "step": 18251 }, { "epoch": 1.3186193942239963, "grad_norm": 5.891219779426155, "learning_rate": 1.373410742947885e-06, "loss": 0.6774, "step": 18252 }, { "epoch": 1.3186916394242059, "grad_norm": 7.127858977013463, "learning_rate": 1.3731496314553096e-06, "loss": 0.671, "step": 18253 }, { "epoch": 1.3187638846244152, "grad_norm": 6.924126459105126, "learning_rate": 1.3728885353884413e-06, "loss": 0.5965, "step": 18254 }, { "epoch": 1.3188361298246247, "grad_norm": 7.1509963828531005, "learning_rate": 1.3726274547508533e-06, "loss": 0.6372, "step": 18255 }, { "epoch": 1.3189083750248343, "grad_norm": 6.95174149025368, "learning_rate": 1.3723663895461215e-06, "loss": 0.6169, "step": 18256 }, { "epoch": 1.3189806202250438, "grad_norm": 6.45676007540831, "learning_rate": 1.372105339777817e-06, "loss": 0.6049, "step": 18257 }, { "epoch": 1.3190528654252534, "grad_norm": 7.555726099271137, "learning_rate": 1.3718443054495145e-06, "loss": 0.6237, "step": 18258 }, { "epoch": 1.319125110625463, "grad_norm": 6.353544144283518, "learning_rate": 1.3715832865647879e-06, "loss": 0.6398, "step": 18259 }, { "epoch": 1.3191973558256724, "grad_norm": 6.950994615258843, "learning_rate": 1.3713222831272099e-06, "loss": 0.6687, "step": 18260 }, { "epoch": 1.3192696010258818, "grad_norm": 7.6849388260103595, "learning_rate": 1.3710612951403536e-06, "loss": 0.6396, "step": 18261 }, { "epoch": 1.3193418462260913, "grad_norm": 7.330918290096693, "learning_rate": 1.370800322607791e-06, "loss": 0.6559, "step": 18262 }, { "epoch": 1.3194140914263008, "grad_norm": 5.716485580972013, "learning_rate": 1.3705393655330967e-06, "loss": 0.6546, "step": 18263 }, { "epoch": 1.3194863366265104, "grad_norm": 6.6483594663458145, "learning_rate": 1.3702784239198403e-06, "loss": 0.6171, "step": 18264 }, { "epoch": 1.31955858182672, "grad_norm": 6.972967539444906, "learning_rate": 1.3700174977715952e-06, "loss": 0.6804, "step": 18265 }, { "epoch": 1.3196308270269295, "grad_norm": 6.753430660308415, "learning_rate": 1.3697565870919333e-06, "loss": 0.6415, "step": 18266 }, { "epoch": 1.319703072227139, "grad_norm": 5.63386396734201, "learning_rate": 1.369495691884427e-06, "loss": 0.588, "step": 18267 }, { "epoch": 1.3197753174273483, "grad_norm": 6.602570767122703, "learning_rate": 1.3692348121526457e-06, "loss": 0.6453, "step": 18268 }, { "epoch": 1.3198475626275579, "grad_norm": 6.6578393032749315, "learning_rate": 1.3689739479001624e-06, "loss": 0.6215, "step": 18269 }, { "epoch": 1.3199198078277674, "grad_norm": 6.951107941280162, "learning_rate": 1.368713099130547e-06, "loss": 0.6403, "step": 18270 }, { "epoch": 1.319992053027977, "grad_norm": 6.509658093827598, "learning_rate": 1.3684522658473712e-06, "loss": 0.6455, "step": 18271 }, { "epoch": 1.3200642982281865, "grad_norm": 6.165242245409346, "learning_rate": 1.3681914480542052e-06, "loss": 0.6162, "step": 18272 }, { "epoch": 1.320136543428396, "grad_norm": 6.7890984607793285, "learning_rate": 1.3679306457546193e-06, "loss": 0.5705, "step": 18273 }, { "epoch": 1.3202087886286056, "grad_norm": 6.083426000159291, "learning_rate": 1.3676698589521853e-06, "loss": 0.588, "step": 18274 }, { "epoch": 1.320281033828815, "grad_norm": 7.042794883981219, "learning_rate": 1.3674090876504703e-06, "loss": 0.6778, "step": 18275 }, { "epoch": 1.3203532790290244, "grad_norm": 5.424318845677787, "learning_rate": 1.367148331853046e-06, "loss": 0.6133, "step": 18276 }, { "epoch": 1.320425524229234, "grad_norm": 8.274897143136716, "learning_rate": 1.3668875915634819e-06, "loss": 0.6947, "step": 18277 }, { "epoch": 1.3204977694294435, "grad_norm": 6.646383360421037, "learning_rate": 1.366626866785346e-06, "loss": 0.6231, "step": 18278 }, { "epoch": 1.320570014629653, "grad_norm": 6.1388412097826555, "learning_rate": 1.3663661575222076e-06, "loss": 0.63, "step": 18279 }, { "epoch": 1.3206422598298626, "grad_norm": 6.408925163147575, "learning_rate": 1.366105463777637e-06, "loss": 0.5984, "step": 18280 }, { "epoch": 1.3207145050300722, "grad_norm": 5.126857886020176, "learning_rate": 1.3658447855552033e-06, "loss": 0.6209, "step": 18281 }, { "epoch": 1.3207867502302815, "grad_norm": 5.961825043484679, "learning_rate": 1.3655841228584727e-06, "loss": 0.6369, "step": 18282 }, { "epoch": 1.320858995430491, "grad_norm": 6.308802362069392, "learning_rate": 1.3653234756910148e-06, "loss": 0.6154, "step": 18283 }, { "epoch": 1.3209312406307006, "grad_norm": 6.48247483689573, "learning_rate": 1.3650628440563984e-06, "loss": 0.635, "step": 18284 }, { "epoch": 1.32100348583091, "grad_norm": 6.142281888723206, "learning_rate": 1.3648022279581897e-06, "loss": 0.659, "step": 18285 }, { "epoch": 1.3210757310311196, "grad_norm": 6.6817665121691086, "learning_rate": 1.3645416273999568e-06, "loss": 0.6762, "step": 18286 }, { "epoch": 1.3211479762313292, "grad_norm": 6.197860213164204, "learning_rate": 1.3642810423852673e-06, "loss": 0.6938, "step": 18287 }, { "epoch": 1.3212202214315387, "grad_norm": 8.166453559845742, "learning_rate": 1.3640204729176888e-06, "loss": 0.6644, "step": 18288 }, { "epoch": 1.321292466631748, "grad_norm": 6.764616787609119, "learning_rate": 1.3637599190007877e-06, "loss": 0.6225, "step": 18289 }, { "epoch": 1.3213647118319576, "grad_norm": 6.15634170696288, "learning_rate": 1.3634993806381314e-06, "loss": 0.606, "step": 18290 }, { "epoch": 1.3214369570321671, "grad_norm": 6.828139804850226, "learning_rate": 1.3632388578332862e-06, "loss": 0.7147, "step": 18291 }, { "epoch": 1.3215092022323767, "grad_norm": 7.264859460677711, "learning_rate": 1.3629783505898188e-06, "loss": 0.6343, "step": 18292 }, { "epoch": 1.3215814474325862, "grad_norm": 7.42362129949004, "learning_rate": 1.3627178589112945e-06, "loss": 0.6367, "step": 18293 }, { "epoch": 1.3216536926327958, "grad_norm": 6.302471390643738, "learning_rate": 1.3624573828012795e-06, "loss": 0.5733, "step": 18294 }, { "epoch": 1.3217259378330053, "grad_norm": 6.163602050292913, "learning_rate": 1.3621969222633408e-06, "loss": 0.6099, "step": 18295 }, { "epoch": 1.3217981830332146, "grad_norm": 6.725144009661324, "learning_rate": 1.3619364773010416e-06, "loss": 0.6189, "step": 18296 }, { "epoch": 1.3218704282334244, "grad_norm": 6.953203728047973, "learning_rate": 1.3616760479179483e-06, "loss": 0.6202, "step": 18297 }, { "epoch": 1.3219426734336337, "grad_norm": 7.374821288966225, "learning_rate": 1.3614156341176264e-06, "loss": 0.6077, "step": 18298 }, { "epoch": 1.3220149186338432, "grad_norm": 6.814137673176549, "learning_rate": 1.36115523590364e-06, "loss": 0.5869, "step": 18299 }, { "epoch": 1.3220871638340528, "grad_norm": 7.460198394755419, "learning_rate": 1.3608948532795546e-06, "loss": 0.6215, "step": 18300 }, { "epoch": 1.3221594090342623, "grad_norm": 7.425669003540791, "learning_rate": 1.360634486248934e-06, "loss": 0.7022, "step": 18301 }, { "epoch": 1.3222316542344719, "grad_norm": 7.089576602663187, "learning_rate": 1.3603741348153434e-06, "loss": 0.625, "step": 18302 }, { "epoch": 1.3223038994346812, "grad_norm": 7.803695233782379, "learning_rate": 1.3601137989823454e-06, "loss": 0.6709, "step": 18303 }, { "epoch": 1.322376144634891, "grad_norm": 6.002023037951096, "learning_rate": 1.359853478753504e-06, "loss": 0.618, "step": 18304 }, { "epoch": 1.3224483898351003, "grad_norm": 7.503587500550518, "learning_rate": 1.3595931741323837e-06, "loss": 0.6266, "step": 18305 }, { "epoch": 1.3225206350353098, "grad_norm": 6.712985240368278, "learning_rate": 1.359332885122548e-06, "loss": 0.6409, "step": 18306 }, { "epoch": 1.3225928802355194, "grad_norm": 6.606258153414139, "learning_rate": 1.3590726117275576e-06, "loss": 0.6945, "step": 18307 }, { "epoch": 1.322665125435729, "grad_norm": 6.117243000426706, "learning_rate": 1.3588123539509783e-06, "loss": 0.6022, "step": 18308 }, { "epoch": 1.3227373706359384, "grad_norm": 7.837006485533674, "learning_rate": 1.3585521117963726e-06, "loss": 0.6226, "step": 18309 }, { "epoch": 1.3228096158361478, "grad_norm": 6.50517976223668, "learning_rate": 1.3582918852673017e-06, "loss": 0.6241, "step": 18310 }, { "epoch": 1.3228818610363575, "grad_norm": 6.791534921815413, "learning_rate": 1.3580316743673278e-06, "loss": 0.6086, "step": 18311 }, { "epoch": 1.3229541062365668, "grad_norm": 6.634275169537851, "learning_rate": 1.357771479100014e-06, "loss": 0.6656, "step": 18312 }, { "epoch": 1.3230263514367764, "grad_norm": 7.881989783008674, "learning_rate": 1.3575112994689227e-06, "loss": 0.715, "step": 18313 }, { "epoch": 1.323098596636986, "grad_norm": 6.251306931225556, "learning_rate": 1.3572511354776135e-06, "loss": 0.5777, "step": 18314 }, { "epoch": 1.3231708418371955, "grad_norm": 5.975097795907135, "learning_rate": 1.3569909871296489e-06, "loss": 0.6495, "step": 18315 }, { "epoch": 1.323243087037405, "grad_norm": 8.045514334293383, "learning_rate": 1.3567308544285904e-06, "loss": 0.6283, "step": 18316 }, { "epoch": 1.3233153322376143, "grad_norm": 6.415135023282312, "learning_rate": 1.3564707373779987e-06, "loss": 0.6158, "step": 18317 }, { "epoch": 1.323387577437824, "grad_norm": 6.762035800986869, "learning_rate": 1.3562106359814348e-06, "loss": 0.6401, "step": 18318 }, { "epoch": 1.3234598226380334, "grad_norm": 8.023757467603678, "learning_rate": 1.3559505502424592e-06, "loss": 0.646, "step": 18319 }, { "epoch": 1.323532067838243, "grad_norm": 7.1672154002107105, "learning_rate": 1.3556904801646331e-06, "loss": 0.6271, "step": 18320 }, { "epoch": 1.3236043130384525, "grad_norm": 7.68338125659398, "learning_rate": 1.3554304257515149e-06, "loss": 0.6662, "step": 18321 }, { "epoch": 1.323676558238662, "grad_norm": 7.8506169750949155, "learning_rate": 1.3551703870066657e-06, "loss": 0.7356, "step": 18322 }, { "epoch": 1.3237488034388716, "grad_norm": 7.454350464717236, "learning_rate": 1.3549103639336448e-06, "loss": 0.6585, "step": 18323 }, { "epoch": 1.3238210486390811, "grad_norm": 7.578355797468048, "learning_rate": 1.354650356536013e-06, "loss": 0.5594, "step": 18324 }, { "epoch": 1.3238932938392907, "grad_norm": 6.259115057359908, "learning_rate": 1.3543903648173274e-06, "loss": 0.5706, "step": 18325 }, { "epoch": 1.3239655390395, "grad_norm": 8.023154605680995, "learning_rate": 1.354130388781148e-06, "loss": 0.6844, "step": 18326 }, { "epoch": 1.3240377842397095, "grad_norm": 6.763835150444074, "learning_rate": 1.3538704284310345e-06, "loss": 0.6764, "step": 18327 }, { "epoch": 1.324110029439919, "grad_norm": 6.676094668348552, "learning_rate": 1.3536104837705445e-06, "loss": 0.577, "step": 18328 }, { "epoch": 1.3241822746401286, "grad_norm": 7.777645394545883, "learning_rate": 1.3533505548032367e-06, "loss": 0.7032, "step": 18329 }, { "epoch": 1.3242545198403382, "grad_norm": 6.378591087016873, "learning_rate": 1.3530906415326696e-06, "loss": 0.6299, "step": 18330 }, { "epoch": 1.3243267650405477, "grad_norm": 6.858705944199963, "learning_rate": 1.3528307439624022e-06, "loss": 0.6457, "step": 18331 }, { "epoch": 1.3243990102407572, "grad_norm": 7.184227505265413, "learning_rate": 1.3525708620959904e-06, "loss": 0.6029, "step": 18332 }, { "epoch": 1.3244712554409666, "grad_norm": 6.693277020667128, "learning_rate": 1.3523109959369924e-06, "loss": 0.5759, "step": 18333 }, { "epoch": 1.324543500641176, "grad_norm": 7.725794492685713, "learning_rate": 1.3520511454889666e-06, "loss": 0.611, "step": 18334 }, { "epoch": 1.3246157458413856, "grad_norm": 6.252268265155667, "learning_rate": 1.3517913107554674e-06, "loss": 0.6514, "step": 18335 }, { "epoch": 1.3246879910415952, "grad_norm": 6.663818641127318, "learning_rate": 1.3515314917400547e-06, "loss": 0.6716, "step": 18336 }, { "epoch": 1.3247602362418047, "grad_norm": 5.105617996754573, "learning_rate": 1.351271688446284e-06, "loss": 0.6131, "step": 18337 }, { "epoch": 1.3248324814420143, "grad_norm": 6.9554752106879185, "learning_rate": 1.351011900877713e-06, "loss": 0.6518, "step": 18338 }, { "epoch": 1.3249047266422238, "grad_norm": 6.587114656531417, "learning_rate": 1.3507521290378958e-06, "loss": 0.6467, "step": 18339 }, { "epoch": 1.3249769718424331, "grad_norm": 6.479541273010195, "learning_rate": 1.3504923729303898e-06, "loss": 0.5925, "step": 18340 }, { "epoch": 1.3250492170426427, "grad_norm": 6.59960879698133, "learning_rate": 1.3502326325587506e-06, "loss": 0.6253, "step": 18341 }, { "epoch": 1.3251214622428522, "grad_norm": 6.636347427443381, "learning_rate": 1.3499729079265351e-06, "loss": 0.6108, "step": 18342 }, { "epoch": 1.3251937074430618, "grad_norm": 6.340699312649789, "learning_rate": 1.3497131990372964e-06, "loss": 0.6078, "step": 18343 }, { "epoch": 1.3252659526432713, "grad_norm": 7.346542801317623, "learning_rate": 1.349453505894591e-06, "loss": 0.6469, "step": 18344 }, { "epoch": 1.3253381978434808, "grad_norm": 5.829427046978538, "learning_rate": 1.3491938285019737e-06, "loss": 0.6002, "step": 18345 }, { "epoch": 1.3254104430436904, "grad_norm": 6.496055873592976, "learning_rate": 1.3489341668629993e-06, "loss": 0.6167, "step": 18346 }, { "epoch": 1.3254826882438997, "grad_norm": 6.682068802964306, "learning_rate": 1.3486745209812225e-06, "loss": 0.6333, "step": 18347 }, { "epoch": 1.3255549334441092, "grad_norm": 5.9801711812604275, "learning_rate": 1.3484148908601974e-06, "loss": 0.6541, "step": 18348 }, { "epoch": 1.3256271786443188, "grad_norm": 7.343893885724736, "learning_rate": 1.3481552765034794e-06, "loss": 0.6807, "step": 18349 }, { "epoch": 1.3256994238445283, "grad_norm": 8.169000824023133, "learning_rate": 1.3478956779146202e-06, "loss": 0.6483, "step": 18350 }, { "epoch": 1.3257716690447379, "grad_norm": 6.458172835129779, "learning_rate": 1.3476360950971748e-06, "loss": 0.6558, "step": 18351 }, { "epoch": 1.3258439142449474, "grad_norm": 8.221118194187028, "learning_rate": 1.3473765280546974e-06, "loss": 0.5763, "step": 18352 }, { "epoch": 1.325916159445157, "grad_norm": 7.306938208154166, "learning_rate": 1.3471169767907393e-06, "loss": 0.67, "step": 18353 }, { "epoch": 1.3259884046453663, "grad_norm": 6.407806058637186, "learning_rate": 1.3468574413088547e-06, "loss": 0.6265, "step": 18354 }, { "epoch": 1.3260606498455758, "grad_norm": 7.128455411686074, "learning_rate": 1.3465979216125966e-06, "loss": 0.6251, "step": 18355 }, { "epoch": 1.3261328950457854, "grad_norm": 12.910348310787722, "learning_rate": 1.3463384177055173e-06, "loss": 0.6025, "step": 18356 }, { "epoch": 1.326205140245995, "grad_norm": 6.020868250691714, "learning_rate": 1.346078929591169e-06, "loss": 0.5695, "step": 18357 }, { "epoch": 1.3262773854462044, "grad_norm": 6.8758522199069985, "learning_rate": 1.3458194572731044e-06, "loss": 0.6175, "step": 18358 }, { "epoch": 1.326349630646414, "grad_norm": 5.510542216129649, "learning_rate": 1.3455600007548764e-06, "loss": 0.6838, "step": 18359 }, { "epoch": 1.3264218758466235, "grad_norm": 8.176561342979566, "learning_rate": 1.3453005600400344e-06, "loss": 0.7139, "step": 18360 }, { "epoch": 1.3264941210468328, "grad_norm": 6.892246323747218, "learning_rate": 1.3450411351321313e-06, "loss": 0.7386, "step": 18361 }, { "epoch": 1.3265663662470424, "grad_norm": 6.793452521499303, "learning_rate": 1.3447817260347182e-06, "loss": 0.5909, "step": 18362 }, { "epoch": 1.326638611447252, "grad_norm": 7.217666874995562, "learning_rate": 1.3445223327513474e-06, "loss": 0.6548, "step": 18363 }, { "epoch": 1.3267108566474615, "grad_norm": 7.114315119128321, "learning_rate": 1.3442629552855667e-06, "loss": 0.6624, "step": 18364 }, { "epoch": 1.326783101847671, "grad_norm": 7.955102580983768, "learning_rate": 1.3440035936409295e-06, "loss": 0.6328, "step": 18365 }, { "epoch": 1.3268553470478806, "grad_norm": 6.700853652091504, "learning_rate": 1.3437442478209859e-06, "loss": 0.6222, "step": 18366 }, { "epoch": 1.32692759224809, "grad_norm": 7.891617212137217, "learning_rate": 1.3434849178292867e-06, "loss": 0.6412, "step": 18367 }, { "epoch": 1.3269998374482994, "grad_norm": 9.27922431597868, "learning_rate": 1.3432256036693798e-06, "loss": 0.64, "step": 18368 }, { "epoch": 1.3270720826485092, "grad_norm": 7.302794969158882, "learning_rate": 1.3429663053448163e-06, "loss": 0.6017, "step": 18369 }, { "epoch": 1.3271443278487185, "grad_norm": 6.697398623005031, "learning_rate": 1.3427070228591465e-06, "loss": 0.6163, "step": 18370 }, { "epoch": 1.327216573048928, "grad_norm": 10.377853185074311, "learning_rate": 1.3424477562159183e-06, "loss": 0.6576, "step": 18371 }, { "epoch": 1.3272888182491376, "grad_norm": 6.907631300107343, "learning_rate": 1.3421885054186818e-06, "loss": 0.6583, "step": 18372 }, { "epoch": 1.3273610634493471, "grad_norm": 7.279190754609484, "learning_rate": 1.3419292704709852e-06, "loss": 0.7032, "step": 18373 }, { "epoch": 1.3274333086495567, "grad_norm": 8.411618018430715, "learning_rate": 1.3416700513763775e-06, "loss": 0.6362, "step": 18374 }, { "epoch": 1.327505553849766, "grad_norm": 6.834156855139798, "learning_rate": 1.341410848138408e-06, "loss": 0.6682, "step": 18375 }, { "epoch": 1.3275777990499757, "grad_norm": 7.467741958074916, "learning_rate": 1.3411516607606239e-06, "loss": 0.7042, "step": 18376 }, { "epoch": 1.327650044250185, "grad_norm": 7.305221456503865, "learning_rate": 1.3408924892465752e-06, "loss": 0.662, "step": 18377 }, { "epoch": 1.3277222894503946, "grad_norm": 6.441766898825865, "learning_rate": 1.3406333335998074e-06, "loss": 0.6314, "step": 18378 }, { "epoch": 1.3277945346506042, "grad_norm": 6.246492240747658, "learning_rate": 1.3403741938238685e-06, "loss": 0.6399, "step": 18379 }, { "epoch": 1.3278667798508137, "grad_norm": 6.7275092226303315, "learning_rate": 1.340115069922307e-06, "loss": 0.6224, "step": 18380 }, { "epoch": 1.3279390250510232, "grad_norm": 6.618361115670697, "learning_rate": 1.33985596189867e-06, "loss": 0.5631, "step": 18381 }, { "epoch": 1.3280112702512326, "grad_norm": 7.149070636664431, "learning_rate": 1.3395968697565038e-06, "loss": 0.5738, "step": 18382 }, { "epoch": 1.3280835154514423, "grad_norm": 6.801933843850091, "learning_rate": 1.3393377934993552e-06, "loss": 0.7012, "step": 18383 }, { "epoch": 1.3281557606516516, "grad_norm": 6.428125759601317, "learning_rate": 1.339078733130771e-06, "loss": 0.6259, "step": 18384 }, { "epoch": 1.3282280058518612, "grad_norm": 8.01861790037576, "learning_rate": 1.3388196886542976e-06, "loss": 0.5269, "step": 18385 }, { "epoch": 1.3283002510520707, "grad_norm": 6.795320179562802, "learning_rate": 1.3385606600734807e-06, "loss": 0.6425, "step": 18386 }, { "epoch": 1.3283724962522803, "grad_norm": 7.448522270141177, "learning_rate": 1.338301647391867e-06, "loss": 0.6846, "step": 18387 }, { "epoch": 1.3284447414524898, "grad_norm": 6.358214176339692, "learning_rate": 1.3380426506130023e-06, "loss": 0.5136, "step": 18388 }, { "epoch": 1.3285169866526991, "grad_norm": 7.288682279656543, "learning_rate": 1.3377836697404312e-06, "loss": 0.6707, "step": 18389 }, { "epoch": 1.328589231852909, "grad_norm": 6.6021706566212295, "learning_rate": 1.3375247047776984e-06, "loss": 0.6464, "step": 18390 }, { "epoch": 1.3286614770531182, "grad_norm": 6.827636701290182, "learning_rate": 1.3372657557283514e-06, "loss": 0.6789, "step": 18391 }, { "epoch": 1.3287337222533278, "grad_norm": 6.204516814397918, "learning_rate": 1.3370068225959316e-06, "loss": 0.6111, "step": 18392 }, { "epoch": 1.3288059674535373, "grad_norm": 6.999662118659059, "learning_rate": 1.3367479053839857e-06, "loss": 0.7316, "step": 18393 }, { "epoch": 1.3288782126537468, "grad_norm": 6.524681767562978, "learning_rate": 1.3364890040960582e-06, "loss": 0.6053, "step": 18394 }, { "epoch": 1.3289504578539564, "grad_norm": 8.977570878006416, "learning_rate": 1.3362301187356934e-06, "loss": 0.6859, "step": 18395 }, { "epoch": 1.329022703054166, "grad_norm": 6.046586061814525, "learning_rate": 1.3359712493064343e-06, "loss": 0.649, "step": 18396 }, { "epoch": 1.3290949482543755, "grad_norm": 5.952331009428221, "learning_rate": 1.3357123958118245e-06, "loss": 0.5604, "step": 18397 }, { "epoch": 1.3291671934545848, "grad_norm": 7.6107900819420005, "learning_rate": 1.3354535582554084e-06, "loss": 0.746, "step": 18398 }, { "epoch": 1.3292394386547943, "grad_norm": 6.326826701791504, "learning_rate": 1.3351947366407297e-06, "loss": 0.6246, "step": 18399 }, { "epoch": 1.3293116838550039, "grad_norm": 5.8466097928060785, "learning_rate": 1.3349359309713295e-06, "loss": 0.5775, "step": 18400 }, { "epoch": 1.3293839290552134, "grad_norm": 6.727689535645734, "learning_rate": 1.334677141250752e-06, "loss": 0.6172, "step": 18401 }, { "epoch": 1.329456174255423, "grad_norm": 6.489395587913835, "learning_rate": 1.3344183674825395e-06, "loss": 0.5905, "step": 18402 }, { "epoch": 1.3295284194556325, "grad_norm": 6.53784664248888, "learning_rate": 1.3341596096702345e-06, "loss": 0.6348, "step": 18403 }, { "epoch": 1.329600664655842, "grad_norm": 7.275000776048337, "learning_rate": 1.3339008678173793e-06, "loss": 0.6281, "step": 18404 }, { "epoch": 1.3296729098560514, "grad_norm": 6.5839443969418125, "learning_rate": 1.3336421419275158e-06, "loss": 0.6442, "step": 18405 }, { "epoch": 1.329745155056261, "grad_norm": 7.541333045394727, "learning_rate": 1.3333834320041867e-06, "loss": 0.6584, "step": 18406 }, { "epoch": 1.3298174002564704, "grad_norm": 5.9714394147543715, "learning_rate": 1.3331247380509316e-06, "loss": 0.6131, "step": 18407 }, { "epoch": 1.32988964545668, "grad_norm": 8.341530036633015, "learning_rate": 1.3328660600712927e-06, "loss": 0.6585, "step": 18408 }, { "epoch": 1.3299618906568895, "grad_norm": 6.3220100905070264, "learning_rate": 1.3326073980688126e-06, "loss": 0.6856, "step": 18409 }, { "epoch": 1.330034135857099, "grad_norm": 7.126288581908308, "learning_rate": 1.3323487520470293e-06, "loss": 0.5571, "step": 18410 }, { "epoch": 1.3301063810573086, "grad_norm": 7.137133255871054, "learning_rate": 1.3320901220094854e-06, "loss": 0.6085, "step": 18411 }, { "epoch": 1.330178626257518, "grad_norm": 7.680934428350951, "learning_rate": 1.3318315079597196e-06, "loss": 0.6851, "step": 18412 }, { "epoch": 1.3302508714577275, "grad_norm": 5.769231727795644, "learning_rate": 1.3315729099012756e-06, "loss": 0.6587, "step": 18413 }, { "epoch": 1.330323116657937, "grad_norm": 7.202168233553302, "learning_rate": 1.33131432783769e-06, "loss": 0.6161, "step": 18414 }, { "epoch": 1.3303953618581466, "grad_norm": 6.868467244236974, "learning_rate": 1.3310557617725042e-06, "loss": 0.7091, "step": 18415 }, { "epoch": 1.330467607058356, "grad_norm": 7.455213977754197, "learning_rate": 1.3307972117092581e-06, "loss": 0.6116, "step": 18416 }, { "epoch": 1.3305398522585656, "grad_norm": 5.408074749063208, "learning_rate": 1.3305386776514895e-06, "loss": 0.6723, "step": 18417 }, { "epoch": 1.3306120974587752, "grad_norm": 8.205750137991556, "learning_rate": 1.3302801596027384e-06, "loss": 0.6593, "step": 18418 }, { "epoch": 1.3306843426589845, "grad_norm": 7.92268250091179, "learning_rate": 1.3300216575665436e-06, "loss": 0.7223, "step": 18419 }, { "epoch": 1.330756587859194, "grad_norm": 6.7629397660015025, "learning_rate": 1.3297631715464437e-06, "loss": 0.6395, "step": 18420 }, { "epoch": 1.3308288330594036, "grad_norm": 7.104592259282323, "learning_rate": 1.3295047015459777e-06, "loss": 0.632, "step": 18421 }, { "epoch": 1.3309010782596131, "grad_norm": 5.644530067311727, "learning_rate": 1.3292462475686834e-06, "loss": 0.5866, "step": 18422 }, { "epoch": 1.3309733234598227, "grad_norm": 6.47934257374838, "learning_rate": 1.3289878096180988e-06, "loss": 0.6953, "step": 18423 }, { "epoch": 1.3310455686600322, "grad_norm": 7.686669529663153, "learning_rate": 1.3287293876977632e-06, "loss": 0.6494, "step": 18424 }, { "epoch": 1.3311178138602417, "grad_norm": 6.698637057927373, "learning_rate": 1.3284709818112112e-06, "loss": 0.6538, "step": 18425 }, { "epoch": 1.331190059060451, "grad_norm": 6.499381402730883, "learning_rate": 1.3282125919619826e-06, "loss": 0.5729, "step": 18426 }, { "epoch": 1.3312623042606606, "grad_norm": 6.852641143625613, "learning_rate": 1.3279542181536143e-06, "loss": 0.5338, "step": 18427 }, { "epoch": 1.3313345494608702, "grad_norm": 6.944108169890968, "learning_rate": 1.3276958603896417e-06, "loss": 0.6433, "step": 18428 }, { "epoch": 1.3314067946610797, "grad_norm": 6.79806837808327, "learning_rate": 1.3274375186736027e-06, "loss": 0.6527, "step": 18429 }, { "epoch": 1.3314790398612892, "grad_norm": 7.100220002540249, "learning_rate": 1.3271791930090336e-06, "loss": 0.6187, "step": 18430 }, { "epoch": 1.3315512850614988, "grad_norm": 7.53614565700036, "learning_rate": 1.3269208833994707e-06, "loss": 0.6265, "step": 18431 }, { "epoch": 1.3316235302617083, "grad_norm": 7.009941942788098, "learning_rate": 1.3266625898484503e-06, "loss": 0.6414, "step": 18432 }, { "epoch": 1.3316957754619176, "grad_norm": 6.733786714353544, "learning_rate": 1.3264043123595078e-06, "loss": 0.5774, "step": 18433 }, { "epoch": 1.3317680206621272, "grad_norm": 6.592146402770242, "learning_rate": 1.3261460509361798e-06, "loss": 0.5891, "step": 18434 }, { "epoch": 1.3318402658623367, "grad_norm": 7.111480468191057, "learning_rate": 1.3258878055820002e-06, "loss": 0.6342, "step": 18435 }, { "epoch": 1.3319125110625463, "grad_norm": 7.604999764848112, "learning_rate": 1.3256295763005048e-06, "loss": 0.5912, "step": 18436 }, { "epoch": 1.3319847562627558, "grad_norm": 6.473008085828352, "learning_rate": 1.3253713630952286e-06, "loss": 0.5685, "step": 18437 }, { "epoch": 1.3320570014629654, "grad_norm": 7.55592325038916, "learning_rate": 1.3251131659697075e-06, "loss": 0.5801, "step": 18438 }, { "epoch": 1.332129246663175, "grad_norm": 5.996594416190339, "learning_rate": 1.324854984927474e-06, "loss": 0.6145, "step": 18439 }, { "epoch": 1.3322014918633842, "grad_norm": 6.2776837449734835, "learning_rate": 1.3245968199720633e-06, "loss": 0.5866, "step": 18440 }, { "epoch": 1.3322737370635938, "grad_norm": 6.451406328524243, "learning_rate": 1.3243386711070094e-06, "loss": 0.6219, "step": 18441 }, { "epoch": 1.3323459822638033, "grad_norm": 7.31578524877216, "learning_rate": 1.3240805383358463e-06, "loss": 0.6203, "step": 18442 }, { "epoch": 1.3324182274640128, "grad_norm": 7.531491604170545, "learning_rate": 1.3238224216621077e-06, "loss": 0.6456, "step": 18443 }, { "epoch": 1.3324904726642224, "grad_norm": 5.710436505470296, "learning_rate": 1.3235643210893264e-06, "loss": 0.6338, "step": 18444 }, { "epoch": 1.332562717864432, "grad_norm": 7.857126206219357, "learning_rate": 1.3233062366210374e-06, "loss": 0.7055, "step": 18445 }, { "epoch": 1.3326349630646415, "grad_norm": 6.151488655854408, "learning_rate": 1.3230481682607715e-06, "loss": 0.584, "step": 18446 }, { "epoch": 1.3327072082648508, "grad_norm": 6.231578993458157, "learning_rate": 1.3227901160120624e-06, "loss": 0.7433, "step": 18447 }, { "epoch": 1.3327794534650605, "grad_norm": 6.284397228776419, "learning_rate": 1.3225320798784425e-06, "loss": 0.6425, "step": 18448 }, { "epoch": 1.3328516986652699, "grad_norm": 8.878121270431063, "learning_rate": 1.322274059863444e-06, "loss": 0.677, "step": 18449 }, { "epoch": 1.3329239438654794, "grad_norm": 7.906061826132706, "learning_rate": 1.322016055970599e-06, "loss": 0.6313, "step": 18450 }, { "epoch": 1.332996189065689, "grad_norm": 6.0518270712966435, "learning_rate": 1.3217580682034403e-06, "loss": 0.6045, "step": 18451 }, { "epoch": 1.3330684342658985, "grad_norm": 8.040691837132336, "learning_rate": 1.3215000965654992e-06, "loss": 0.7514, "step": 18452 }, { "epoch": 1.333140679466108, "grad_norm": 8.082333794633273, "learning_rate": 1.321242141060306e-06, "loss": 0.7038, "step": 18453 }, { "epoch": 1.3332129246663174, "grad_norm": 6.857610453829716, "learning_rate": 1.3209842016913924e-06, "loss": 0.5631, "step": 18454 }, { "epoch": 1.3332851698665271, "grad_norm": 8.641319426350506, "learning_rate": 1.3207262784622898e-06, "loss": 0.6868, "step": 18455 }, { "epoch": 1.3333574150667364, "grad_norm": 5.402717627043668, "learning_rate": 1.32046837137653e-06, "loss": 0.6061, "step": 18456 }, { "epoch": 1.333429660266946, "grad_norm": 7.718769027130521, "learning_rate": 1.320210480437641e-06, "loss": 0.669, "step": 18457 }, { "epoch": 1.3335019054671555, "grad_norm": 6.538223267438414, "learning_rate": 1.3199526056491544e-06, "loss": 0.5249, "step": 18458 }, { "epoch": 1.333574150667365, "grad_norm": 6.288087984959482, "learning_rate": 1.3196947470146005e-06, "loss": 0.6454, "step": 18459 }, { "epoch": 1.3336463958675746, "grad_norm": 7.347481027366438, "learning_rate": 1.3194369045375092e-06, "loss": 0.6479, "step": 18460 }, { "epoch": 1.333718641067784, "grad_norm": 9.001599911368837, "learning_rate": 1.3191790782214097e-06, "loss": 0.5938, "step": 18461 }, { "epoch": 1.3337908862679937, "grad_norm": 6.925465360904485, "learning_rate": 1.3189212680698322e-06, "loss": 0.6749, "step": 18462 }, { "epoch": 1.333863131468203, "grad_norm": 6.803868977033628, "learning_rate": 1.3186634740863063e-06, "loss": 0.6809, "step": 18463 }, { "epoch": 1.3339353766684126, "grad_norm": 6.581120984442182, "learning_rate": 1.3184056962743591e-06, "loss": 0.6549, "step": 18464 }, { "epoch": 1.334007621868622, "grad_norm": 7.136713404828321, "learning_rate": 1.318147934637521e-06, "loss": 0.6396, "step": 18465 }, { "epoch": 1.3340798670688316, "grad_norm": 7.625855945009909, "learning_rate": 1.3178901891793203e-06, "loss": 0.5892, "step": 18466 }, { "epoch": 1.3341521122690412, "grad_norm": 7.323502308709531, "learning_rate": 1.3176324599032844e-06, "loss": 0.5603, "step": 18467 }, { "epoch": 1.3342243574692505, "grad_norm": 7.071050009014253, "learning_rate": 1.3173747468129422e-06, "loss": 0.6483, "step": 18468 }, { "epoch": 1.3342966026694603, "grad_norm": 7.29629886811468, "learning_rate": 1.3171170499118201e-06, "loss": 0.6545, "step": 18469 }, { "epoch": 1.3343688478696696, "grad_norm": 6.413989049233062, "learning_rate": 1.3168593692034492e-06, "loss": 0.5815, "step": 18470 }, { "epoch": 1.3344410930698791, "grad_norm": 6.405383539918358, "learning_rate": 1.3166017046913543e-06, "loss": 0.612, "step": 18471 }, { "epoch": 1.3345133382700887, "grad_norm": 8.005868190026252, "learning_rate": 1.3163440563790629e-06, "loss": 0.7089, "step": 18472 }, { "epoch": 1.3345855834702982, "grad_norm": 7.516916398032492, "learning_rate": 1.3160864242701022e-06, "loss": 0.5987, "step": 18473 }, { "epoch": 1.3346578286705078, "grad_norm": 6.614049246144144, "learning_rate": 1.3158288083680005e-06, "loss": 0.6259, "step": 18474 }, { "epoch": 1.3347300738707173, "grad_norm": 6.689631612582306, "learning_rate": 1.3155712086762818e-06, "loss": 0.632, "step": 18475 }, { "epoch": 1.3348023190709268, "grad_norm": 6.442272010117529, "learning_rate": 1.3153136251984733e-06, "loss": 0.5976, "step": 18476 }, { "epoch": 1.3348745642711362, "grad_norm": 6.633352808069111, "learning_rate": 1.3150560579381016e-06, "loss": 0.66, "step": 18477 }, { "epoch": 1.3349468094713457, "grad_norm": 6.343150744767904, "learning_rate": 1.3147985068986924e-06, "loss": 0.6152, "step": 18478 }, { "epoch": 1.3350190546715552, "grad_norm": 7.006208935905669, "learning_rate": 1.3145409720837716e-06, "loss": 0.6015, "step": 18479 }, { "epoch": 1.3350912998717648, "grad_norm": 8.29122473786645, "learning_rate": 1.3142834534968643e-06, "loss": 0.7143, "step": 18480 }, { "epoch": 1.3351635450719743, "grad_norm": 6.054006703156616, "learning_rate": 1.314025951141497e-06, "loss": 0.5946, "step": 18481 }, { "epoch": 1.3352357902721839, "grad_norm": 8.150511144043648, "learning_rate": 1.3137684650211924e-06, "loss": 0.6947, "step": 18482 }, { "epoch": 1.3353080354723934, "grad_norm": 7.557231991350653, "learning_rate": 1.3135109951394764e-06, "loss": 0.7017, "step": 18483 }, { "epoch": 1.3353802806726027, "grad_norm": 6.1583483842486, "learning_rate": 1.313253541499875e-06, "loss": 0.5888, "step": 18484 }, { "epoch": 1.3354525258728123, "grad_norm": 5.577152797915692, "learning_rate": 1.3129961041059096e-06, "loss": 0.6028, "step": 18485 }, { "epoch": 1.3355247710730218, "grad_norm": 7.340390739746638, "learning_rate": 1.3127386829611061e-06, "loss": 0.6627, "step": 18486 }, { "epoch": 1.3355970162732314, "grad_norm": 5.810539325100137, "learning_rate": 1.3124812780689883e-06, "loss": 0.6298, "step": 18487 }, { "epoch": 1.335669261473441, "grad_norm": 5.445750849633518, "learning_rate": 1.31222388943308e-06, "loss": 0.6066, "step": 18488 }, { "epoch": 1.3357415066736504, "grad_norm": 6.834052195380109, "learning_rate": 1.311966517056904e-06, "loss": 0.6165, "step": 18489 }, { "epoch": 1.33581375187386, "grad_norm": 6.320441962614893, "learning_rate": 1.3117091609439842e-06, "loss": 0.6115, "step": 18490 }, { "epoch": 1.3358859970740693, "grad_norm": 6.525252951911196, "learning_rate": 1.311451821097844e-06, "loss": 0.64, "step": 18491 }, { "epoch": 1.3359582422742788, "grad_norm": 5.790596000816992, "learning_rate": 1.3111944975220047e-06, "loss": 0.571, "step": 18492 }, { "epoch": 1.3360304874744884, "grad_norm": 5.91872835729362, "learning_rate": 1.3109371902199897e-06, "loss": 0.5788, "step": 18493 }, { "epoch": 1.336102732674698, "grad_norm": 6.598082362090791, "learning_rate": 1.3106798991953216e-06, "loss": 0.608, "step": 18494 }, { "epoch": 1.3361749778749075, "grad_norm": 5.674520584895979, "learning_rate": 1.3104226244515229e-06, "loss": 0.6154, "step": 18495 }, { "epoch": 1.336247223075117, "grad_norm": 7.425092589960237, "learning_rate": 1.3101653659921138e-06, "loss": 0.5988, "step": 18496 }, { "epoch": 1.3363194682753265, "grad_norm": 7.550206051471558, "learning_rate": 1.3099081238206162e-06, "loss": 0.6408, "step": 18497 }, { "epoch": 1.3363917134755359, "grad_norm": 7.380596042123679, "learning_rate": 1.309650897940553e-06, "loss": 0.6686, "step": 18498 }, { "epoch": 1.3364639586757454, "grad_norm": 8.230725516580957, "learning_rate": 1.3093936883554462e-06, "loss": 0.6221, "step": 18499 }, { "epoch": 1.336536203875955, "grad_norm": 6.152051084308819, "learning_rate": 1.309136495068814e-06, "loss": 0.57, "step": 18500 }, { "epoch": 1.3366084490761645, "grad_norm": 6.826859206275932, "learning_rate": 1.3088793180841786e-06, "loss": 0.6791, "step": 18501 }, { "epoch": 1.336680694276374, "grad_norm": 5.773919814511981, "learning_rate": 1.3086221574050618e-06, "loss": 0.635, "step": 18502 }, { "epoch": 1.3367529394765836, "grad_norm": 6.454594110068612, "learning_rate": 1.3083650130349813e-06, "loss": 0.667, "step": 18503 }, { "epoch": 1.3368251846767931, "grad_norm": 7.43175124137453, "learning_rate": 1.3081078849774587e-06, "loss": 0.6646, "step": 18504 }, { "epoch": 1.3368974298770024, "grad_norm": 6.478848301301651, "learning_rate": 1.3078507732360135e-06, "loss": 0.535, "step": 18505 }, { "epoch": 1.336969675077212, "grad_norm": 6.290933157834209, "learning_rate": 1.3075936778141658e-06, "loss": 0.6577, "step": 18506 }, { "epoch": 1.3370419202774215, "grad_norm": 7.912418416884778, "learning_rate": 1.3073365987154347e-06, "loss": 0.6421, "step": 18507 }, { "epoch": 1.337114165477631, "grad_norm": 8.937899920712683, "learning_rate": 1.3070795359433395e-06, "loss": 0.6744, "step": 18508 }, { "epoch": 1.3371864106778406, "grad_norm": 5.74663943066674, "learning_rate": 1.3068224895014004e-06, "loss": 0.6102, "step": 18509 }, { "epoch": 1.3372586558780502, "grad_norm": 6.83922772381168, "learning_rate": 1.3065654593931337e-06, "loss": 0.6111, "step": 18510 }, { "epoch": 1.3373309010782597, "grad_norm": 7.5542572751075685, "learning_rate": 1.3063084456220592e-06, "loss": 0.633, "step": 18511 }, { "epoch": 1.337403146278469, "grad_norm": 7.159296045488227, "learning_rate": 1.3060514481916953e-06, "loss": 0.6465, "step": 18512 }, { "epoch": 1.3374753914786786, "grad_norm": 9.174226683579342, "learning_rate": 1.305794467105561e-06, "loss": 0.5897, "step": 18513 }, { "epoch": 1.337547636678888, "grad_norm": 6.701379081793567, "learning_rate": 1.3055375023671724e-06, "loss": 0.5748, "step": 18514 }, { "epoch": 1.3376198818790976, "grad_norm": 7.066249695957892, "learning_rate": 1.305280553980048e-06, "loss": 0.633, "step": 18515 }, { "epoch": 1.3376921270793072, "grad_norm": 6.065456023655793, "learning_rate": 1.305023621947705e-06, "loss": 0.6457, "step": 18516 }, { "epoch": 1.3377643722795167, "grad_norm": 6.007298163532916, "learning_rate": 1.3047667062736608e-06, "loss": 0.6485, "step": 18517 }, { "epoch": 1.3378366174797263, "grad_norm": 6.652664293326925, "learning_rate": 1.3045098069614323e-06, "loss": 0.5928, "step": 18518 }, { "epoch": 1.3379088626799356, "grad_norm": 6.789910337055557, "learning_rate": 1.3042529240145362e-06, "loss": 0.6068, "step": 18519 }, { "epoch": 1.3379811078801453, "grad_norm": 6.471251662815788, "learning_rate": 1.3039960574364904e-06, "loss": 0.616, "step": 18520 }, { "epoch": 1.3380533530803547, "grad_norm": 6.601138346211228, "learning_rate": 1.3037392072308088e-06, "loss": 0.5904, "step": 18521 }, { "epoch": 1.3381255982805642, "grad_norm": 6.515934120365054, "learning_rate": 1.303482373401009e-06, "loss": 0.6165, "step": 18522 }, { "epoch": 1.3381978434807738, "grad_norm": 8.175584815336082, "learning_rate": 1.3032255559506068e-06, "loss": 0.6432, "step": 18523 }, { "epoch": 1.3382700886809833, "grad_norm": 6.295881396484034, "learning_rate": 1.3029687548831168e-06, "loss": 0.5967, "step": 18524 }, { "epoch": 1.3383423338811928, "grad_norm": 8.398433412062959, "learning_rate": 1.302711970202054e-06, "loss": 0.6728, "step": 18525 }, { "epoch": 1.3384145790814022, "grad_norm": 5.92321633318726, "learning_rate": 1.302455201910936e-06, "loss": 0.5562, "step": 18526 }, { "epoch": 1.338486824281612, "grad_norm": 8.419427349705717, "learning_rate": 1.3021984500132773e-06, "loss": 0.6676, "step": 18527 }, { "epoch": 1.3385590694818212, "grad_norm": 7.561749886126361, "learning_rate": 1.3019417145125906e-06, "loss": 0.6327, "step": 18528 }, { "epoch": 1.3386313146820308, "grad_norm": 6.841704598560569, "learning_rate": 1.301684995412392e-06, "loss": 0.6061, "step": 18529 }, { "epoch": 1.3387035598822403, "grad_norm": 6.1182543919159835, "learning_rate": 1.3014282927161952e-06, "loss": 0.7084, "step": 18530 }, { "epoch": 1.3387758050824499, "grad_norm": 6.980960292832743, "learning_rate": 1.3011716064275154e-06, "loss": 0.6677, "step": 18531 }, { "epoch": 1.3388480502826594, "grad_norm": 7.110552948000483, "learning_rate": 1.3009149365498644e-06, "loss": 0.5938, "step": 18532 }, { "epoch": 1.3389202954828687, "grad_norm": 6.416990032552713, "learning_rate": 1.300658283086757e-06, "loss": 0.6392, "step": 18533 }, { "epoch": 1.3389925406830785, "grad_norm": 7.392029578393325, "learning_rate": 1.3004016460417063e-06, "loss": 0.6636, "step": 18534 }, { "epoch": 1.3390647858832878, "grad_norm": 6.960528394876977, "learning_rate": 1.3001450254182257e-06, "loss": 0.6443, "step": 18535 }, { "epoch": 1.3391370310834974, "grad_norm": 7.372250577860445, "learning_rate": 1.2998884212198282e-06, "loss": 0.6844, "step": 18536 }, { "epoch": 1.339209276283707, "grad_norm": 6.120785255667453, "learning_rate": 1.2996318334500263e-06, "loss": 0.6222, "step": 18537 }, { "epoch": 1.3392815214839164, "grad_norm": 7.90952471437288, "learning_rate": 1.2993752621123338e-06, "loss": 0.7206, "step": 18538 }, { "epoch": 1.339353766684126, "grad_norm": 6.5130536977088, "learning_rate": 1.2991187072102604e-06, "loss": 0.5939, "step": 18539 }, { "epoch": 1.3394260118843353, "grad_norm": 6.297104899940099, "learning_rate": 1.2988621687473195e-06, "loss": 0.6058, "step": 18540 }, { "epoch": 1.339498257084545, "grad_norm": 7.273556553637625, "learning_rate": 1.2986056467270241e-06, "loss": 0.6292, "step": 18541 }, { "epoch": 1.3395705022847544, "grad_norm": 8.296272672639748, "learning_rate": 1.2983491411528837e-06, "loss": 0.738, "step": 18542 }, { "epoch": 1.339642747484964, "grad_norm": 6.953625333122105, "learning_rate": 1.29809265202841e-06, "loss": 0.5735, "step": 18543 }, { "epoch": 1.3397149926851735, "grad_norm": 5.860948681903323, "learning_rate": 1.2978361793571152e-06, "loss": 0.5922, "step": 18544 }, { "epoch": 1.339787237885383, "grad_norm": 6.995785670736269, "learning_rate": 1.2975797231425094e-06, "loss": 0.5836, "step": 18545 }, { "epoch": 1.3398594830855926, "grad_norm": 6.472930883959416, "learning_rate": 1.2973232833881036e-06, "loss": 0.6671, "step": 18546 }, { "epoch": 1.339931728285802, "grad_norm": 8.016537263054504, "learning_rate": 1.2970668600974085e-06, "loss": 0.6073, "step": 18547 }, { "epoch": 1.3400039734860116, "grad_norm": 6.414054173639541, "learning_rate": 1.2968104532739346e-06, "loss": 0.6573, "step": 18548 }, { "epoch": 1.340076218686221, "grad_norm": 7.959061061747117, "learning_rate": 1.2965540629211907e-06, "loss": 0.5595, "step": 18549 }, { "epoch": 1.3401484638864305, "grad_norm": 8.255195600016467, "learning_rate": 1.296297689042687e-06, "loss": 0.7061, "step": 18550 }, { "epoch": 1.34022070908664, "grad_norm": 6.582813322880764, "learning_rate": 1.2960413316419337e-06, "loss": 0.624, "step": 18551 }, { "epoch": 1.3402929542868496, "grad_norm": 6.704062229605119, "learning_rate": 1.2957849907224407e-06, "loss": 0.6022, "step": 18552 }, { "epoch": 1.3403651994870591, "grad_norm": 7.734022591008667, "learning_rate": 1.2955286662877148e-06, "loss": 0.6457, "step": 18553 }, { "epoch": 1.3404374446872687, "grad_norm": 6.395592435427515, "learning_rate": 1.2952723583412658e-06, "loss": 0.5756, "step": 18554 }, { "epoch": 1.3405096898874782, "grad_norm": 6.924183479950085, "learning_rate": 1.2950160668866033e-06, "loss": 0.617, "step": 18555 }, { "epoch": 1.3405819350876875, "grad_norm": 7.932012628634697, "learning_rate": 1.2947597919272364e-06, "loss": 0.6404, "step": 18556 }, { "epoch": 1.340654180287897, "grad_norm": 6.465725841445667, "learning_rate": 1.2945035334666712e-06, "loss": 0.626, "step": 18557 }, { "epoch": 1.3407264254881066, "grad_norm": 7.000086647587334, "learning_rate": 1.2942472915084164e-06, "loss": 0.6745, "step": 18558 }, { "epoch": 1.3407986706883162, "grad_norm": 6.529800181113771, "learning_rate": 1.2939910660559813e-06, "loss": 0.6726, "step": 18559 }, { "epoch": 1.3408709158885257, "grad_norm": 7.270750932490705, "learning_rate": 1.2937348571128709e-06, "loss": 0.6792, "step": 18560 }, { "epoch": 1.3409431610887352, "grad_norm": 6.576085152661771, "learning_rate": 1.2934786646825937e-06, "loss": 0.6522, "step": 18561 }, { "epoch": 1.3410154062889448, "grad_norm": 5.523959770492587, "learning_rate": 1.2932224887686568e-06, "loss": 0.577, "step": 18562 }, { "epoch": 1.341087651489154, "grad_norm": 6.858803275575622, "learning_rate": 1.292966329374567e-06, "loss": 0.6013, "step": 18563 }, { "epoch": 1.3411598966893636, "grad_norm": 7.433409263097592, "learning_rate": 1.292710186503831e-06, "loss": 0.6353, "step": 18564 }, { "epoch": 1.3412321418895732, "grad_norm": 7.161096522583795, "learning_rate": 1.292454060159955e-06, "loss": 0.6546, "step": 18565 }, { "epoch": 1.3413043870897827, "grad_norm": 8.625433869437245, "learning_rate": 1.2921979503464465e-06, "loss": 0.7818, "step": 18566 }, { "epoch": 1.3413766322899923, "grad_norm": 7.1829126729031545, "learning_rate": 1.2919418570668093e-06, "loss": 0.6495, "step": 18567 }, { "epoch": 1.3414488774902018, "grad_norm": 6.87656592828604, "learning_rate": 1.2916857803245503e-06, "loss": 0.6393, "step": 18568 }, { "epoch": 1.3415211226904113, "grad_norm": 6.54929032303131, "learning_rate": 1.2914297201231743e-06, "loss": 0.5683, "step": 18569 }, { "epoch": 1.3415933678906207, "grad_norm": 6.25042479025176, "learning_rate": 1.2911736764661881e-06, "loss": 0.6671, "step": 18570 }, { "epoch": 1.3416656130908302, "grad_norm": 6.443762242539595, "learning_rate": 1.2909176493570949e-06, "loss": 0.606, "step": 18571 }, { "epoch": 1.3417378582910398, "grad_norm": 6.282015777519498, "learning_rate": 1.2906616387994004e-06, "loss": 0.7267, "step": 18572 }, { "epoch": 1.3418101034912493, "grad_norm": 5.849364169526816, "learning_rate": 1.2904056447966085e-06, "loss": 0.6468, "step": 18573 }, { "epoch": 1.3418823486914588, "grad_norm": 5.9100319999951605, "learning_rate": 1.2901496673522247e-06, "loss": 0.5939, "step": 18574 }, { "epoch": 1.3419545938916684, "grad_norm": 7.50081909793141, "learning_rate": 1.2898937064697524e-06, "loss": 0.6685, "step": 18575 }, { "epoch": 1.342026839091878, "grad_norm": 8.605556963772786, "learning_rate": 1.2896377621526956e-06, "loss": 0.7219, "step": 18576 }, { "epoch": 1.3420990842920872, "grad_norm": 6.483225083938832, "learning_rate": 1.2893818344045589e-06, "loss": 0.6281, "step": 18577 }, { "epoch": 1.3421713294922968, "grad_norm": 7.144015719594033, "learning_rate": 1.2891259232288441e-06, "loss": 0.6672, "step": 18578 }, { "epoch": 1.3422435746925063, "grad_norm": 6.291294853221854, "learning_rate": 1.2888700286290551e-06, "loss": 0.567, "step": 18579 }, { "epoch": 1.3423158198927159, "grad_norm": 8.258287371043368, "learning_rate": 1.2886141506086951e-06, "loss": 0.6888, "step": 18580 }, { "epoch": 1.3423880650929254, "grad_norm": 6.247573992049459, "learning_rate": 1.2883582891712682e-06, "loss": 0.6453, "step": 18581 }, { "epoch": 1.342460310293135, "grad_norm": 7.362179750171014, "learning_rate": 1.288102444320273e-06, "loss": 0.6475, "step": 18582 }, { "epoch": 1.3425325554933445, "grad_norm": 7.7460294827610054, "learning_rate": 1.2878466160592157e-06, "loss": 0.6558, "step": 18583 }, { "epoch": 1.3426048006935538, "grad_norm": 6.059446844154086, "learning_rate": 1.2875908043915978e-06, "loss": 0.6045, "step": 18584 }, { "epoch": 1.3426770458937634, "grad_norm": 6.651077768282095, "learning_rate": 1.2873350093209194e-06, "loss": 0.6204, "step": 18585 }, { "epoch": 1.342749291093973, "grad_norm": 6.859728688916704, "learning_rate": 1.2870792308506832e-06, "loss": 0.6999, "step": 18586 }, { "epoch": 1.3428215362941824, "grad_norm": 6.623998890217859, "learning_rate": 1.2868234689843907e-06, "loss": 0.6226, "step": 18587 }, { "epoch": 1.342893781494392, "grad_norm": 7.27308111413741, "learning_rate": 1.2865677237255437e-06, "loss": 0.6603, "step": 18588 }, { "epoch": 1.3429660266946015, "grad_norm": 6.50941782702396, "learning_rate": 1.2863119950776414e-06, "loss": 0.6399, "step": 18589 }, { "epoch": 1.343038271894811, "grad_norm": 6.774459040254501, "learning_rate": 1.2860562830441859e-06, "loss": 0.6156, "step": 18590 }, { "epoch": 1.3431105170950204, "grad_norm": 6.436169829485127, "learning_rate": 1.2858005876286771e-06, "loss": 0.6539, "step": 18591 }, { "epoch": 1.3431827622952301, "grad_norm": 7.800727702698665, "learning_rate": 1.2855449088346155e-06, "loss": 0.6419, "step": 18592 }, { "epoch": 1.3432550074954395, "grad_norm": 8.391229646800971, "learning_rate": 1.2852892466655008e-06, "loss": 0.6285, "step": 18593 }, { "epoch": 1.343327252695649, "grad_norm": 7.904340068528511, "learning_rate": 1.2850336011248338e-06, "loss": 0.6008, "step": 18594 }, { "epoch": 1.3433994978958586, "grad_norm": 6.3129187718843776, "learning_rate": 1.2847779722161138e-06, "loss": 0.6913, "step": 18595 }, { "epoch": 1.343471743096068, "grad_norm": 7.08292642060381, "learning_rate": 1.2845223599428392e-06, "loss": 0.5871, "step": 18596 }, { "epoch": 1.3435439882962776, "grad_norm": 7.302862353452022, "learning_rate": 1.2842667643085096e-06, "loss": 0.687, "step": 18597 }, { "epoch": 1.343616233496487, "grad_norm": 7.435479923605197, "learning_rate": 1.284011185316625e-06, "loss": 0.6868, "step": 18598 }, { "epoch": 1.3436884786966967, "grad_norm": 6.2116333385761076, "learning_rate": 1.283755622970682e-06, "loss": 0.6497, "step": 18599 }, { "epoch": 1.343760723896906, "grad_norm": 5.698543349073475, "learning_rate": 1.2835000772741808e-06, "loss": 0.6204, "step": 18600 }, { "epoch": 1.3438329690971156, "grad_norm": 8.099230468364727, "learning_rate": 1.2832445482306184e-06, "loss": 0.7095, "step": 18601 }, { "epoch": 1.3439052142973251, "grad_norm": 6.396686244429062, "learning_rate": 1.2829890358434936e-06, "loss": 0.5949, "step": 18602 }, { "epoch": 1.3439774594975347, "grad_norm": 7.047145119372532, "learning_rate": 1.282733540116304e-06, "loss": 0.5847, "step": 18603 }, { "epoch": 1.3440497046977442, "grad_norm": 5.980539551956791, "learning_rate": 1.2824780610525467e-06, "loss": 0.6434, "step": 18604 }, { "epoch": 1.3441219498979535, "grad_norm": 6.508174744194916, "learning_rate": 1.2822225986557195e-06, "loss": 0.6164, "step": 18605 }, { "epoch": 1.3441941950981633, "grad_norm": 7.317481797446239, "learning_rate": 1.281967152929321e-06, "loss": 0.6943, "step": 18606 }, { "epoch": 1.3442664402983726, "grad_norm": 6.4375112589024726, "learning_rate": 1.2817117238768447e-06, "loss": 0.617, "step": 18607 }, { "epoch": 1.3443386854985822, "grad_norm": 6.962143302286464, "learning_rate": 1.2814563115017893e-06, "loss": 0.7545, "step": 18608 }, { "epoch": 1.3444109306987917, "grad_norm": 7.1716776635773245, "learning_rate": 1.2812009158076516e-06, "loss": 0.6998, "step": 18609 }, { "epoch": 1.3444831758990012, "grad_norm": 7.084072687793884, "learning_rate": 1.2809455367979255e-06, "loss": 0.6384, "step": 18610 }, { "epoch": 1.3445554210992108, "grad_norm": 6.11073104755453, "learning_rate": 1.2806901744761092e-06, "loss": 0.6686, "step": 18611 }, { "epoch": 1.34462766629942, "grad_norm": 7.223181219090943, "learning_rate": 1.2804348288456976e-06, "loss": 0.6375, "step": 18612 }, { "epoch": 1.3446999114996299, "grad_norm": 7.601872625033029, "learning_rate": 1.2801794999101874e-06, "loss": 0.6124, "step": 18613 }, { "epoch": 1.3447721566998392, "grad_norm": 9.121609410610942, "learning_rate": 1.2799241876730716e-06, "loss": 0.6725, "step": 18614 }, { "epoch": 1.3448444019000487, "grad_norm": 6.629145638724047, "learning_rate": 1.2796688921378464e-06, "loss": 0.7125, "step": 18615 }, { "epoch": 1.3449166471002583, "grad_norm": 7.402239135969212, "learning_rate": 1.2794136133080076e-06, "loss": 0.665, "step": 18616 }, { "epoch": 1.3449888923004678, "grad_norm": 7.117696398432829, "learning_rate": 1.2791583511870475e-06, "loss": 0.7193, "step": 18617 }, { "epoch": 1.3450611375006774, "grad_norm": 6.260423437063033, "learning_rate": 1.2789031057784617e-06, "loss": 0.5433, "step": 18618 }, { "epoch": 1.345133382700887, "grad_norm": 7.002802696454381, "learning_rate": 1.2786478770857446e-06, "loss": 0.6334, "step": 18619 }, { "epoch": 1.3452056279010964, "grad_norm": 8.679528840105924, "learning_rate": 1.2783926651123895e-06, "loss": 0.6647, "step": 18620 }, { "epoch": 1.3452778731013058, "grad_norm": 6.953939120175656, "learning_rate": 1.2781374698618903e-06, "loss": 0.6351, "step": 18621 }, { "epoch": 1.3453501183015153, "grad_norm": 6.512811506146912, "learning_rate": 1.2778822913377403e-06, "loss": 0.5747, "step": 18622 }, { "epoch": 1.3454223635017248, "grad_norm": 6.513718726405717, "learning_rate": 1.277627129543434e-06, "loss": 0.5982, "step": 18623 }, { "epoch": 1.3454946087019344, "grad_norm": 8.138320098664117, "learning_rate": 1.2773719844824622e-06, "loss": 0.6568, "step": 18624 }, { "epoch": 1.345566853902144, "grad_norm": 6.602489302546001, "learning_rate": 1.2771168561583187e-06, "loss": 0.6255, "step": 18625 }, { "epoch": 1.3456390991023535, "grad_norm": 6.199049526085065, "learning_rate": 1.2768617445744954e-06, "loss": 0.649, "step": 18626 }, { "epoch": 1.345711344302563, "grad_norm": 6.219838722812109, "learning_rate": 1.2766066497344865e-06, "loss": 0.6616, "step": 18627 }, { "epoch": 1.3457835895027723, "grad_norm": 6.160890331082908, "learning_rate": 1.2763515716417818e-06, "loss": 0.6646, "step": 18628 }, { "epoch": 1.3458558347029819, "grad_norm": 6.395889166009442, "learning_rate": 1.2760965102998738e-06, "loss": 0.5992, "step": 18629 }, { "epoch": 1.3459280799031914, "grad_norm": 7.042559264302356, "learning_rate": 1.2758414657122542e-06, "loss": 0.62, "step": 18630 }, { "epoch": 1.346000325103401, "grad_norm": 7.5056957233980865, "learning_rate": 1.2755864378824145e-06, "loss": 0.6268, "step": 18631 }, { "epoch": 1.3460725703036105, "grad_norm": 9.44538277346687, "learning_rate": 1.2753314268138462e-06, "loss": 0.6297, "step": 18632 }, { "epoch": 1.34614481550382, "grad_norm": 6.772702222292495, "learning_rate": 1.275076432510039e-06, "loss": 0.6409, "step": 18633 }, { "epoch": 1.3462170607040296, "grad_norm": 6.6193498233391646, "learning_rate": 1.2748214549744857e-06, "loss": 0.5889, "step": 18634 }, { "epoch": 1.346289305904239, "grad_norm": 7.680227173385854, "learning_rate": 1.2745664942106745e-06, "loss": 0.6266, "step": 18635 }, { "epoch": 1.3463615511044484, "grad_norm": 6.928445216561684, "learning_rate": 1.2743115502220965e-06, "loss": 0.6167, "step": 18636 }, { "epoch": 1.346433796304658, "grad_norm": 7.111793458838064, "learning_rate": 1.2740566230122416e-06, "loss": 0.6342, "step": 18637 }, { "epoch": 1.3465060415048675, "grad_norm": 7.86188880958621, "learning_rate": 1.2738017125845997e-06, "loss": 0.5887, "step": 18638 }, { "epoch": 1.346578286705077, "grad_norm": 6.756963175945536, "learning_rate": 1.2735468189426604e-06, "loss": 0.616, "step": 18639 }, { "epoch": 1.3466505319052866, "grad_norm": 6.938698330045835, "learning_rate": 1.2732919420899125e-06, "loss": 0.6704, "step": 18640 }, { "epoch": 1.3467227771054961, "grad_norm": 7.629448515434009, "learning_rate": 1.2730370820298469e-06, "loss": 0.541, "step": 18641 }, { "epoch": 1.3467950223057055, "grad_norm": 7.433276090986007, "learning_rate": 1.2727822387659495e-06, "loss": 0.7149, "step": 18642 }, { "epoch": 1.346867267505915, "grad_norm": 7.240900840987513, "learning_rate": 1.2725274123017106e-06, "loss": 0.631, "step": 18643 }, { "epoch": 1.3469395127061246, "grad_norm": 6.461362887012771, "learning_rate": 1.2722726026406184e-06, "loss": 0.6164, "step": 18644 }, { "epoch": 1.347011757906334, "grad_norm": 7.036324123982236, "learning_rate": 1.272017809786162e-06, "loss": 0.6492, "step": 18645 }, { "epoch": 1.3470840031065436, "grad_norm": 6.530854281616232, "learning_rate": 1.2717630337418273e-06, "loss": 0.6509, "step": 18646 }, { "epoch": 1.3471562483067532, "grad_norm": 6.57396082335318, "learning_rate": 1.2715082745111028e-06, "loss": 0.6614, "step": 18647 }, { "epoch": 1.3472284935069627, "grad_norm": 6.961810706927602, "learning_rate": 1.2712535320974768e-06, "loss": 0.6283, "step": 18648 }, { "epoch": 1.347300738707172, "grad_norm": 6.206659727219015, "learning_rate": 1.2709988065044351e-06, "loss": 0.5839, "step": 18649 }, { "epoch": 1.3473729839073816, "grad_norm": 7.592719365261641, "learning_rate": 1.2707440977354657e-06, "loss": 0.6194, "step": 18650 }, { "epoch": 1.3474452291075911, "grad_norm": 6.621327948015112, "learning_rate": 1.2704894057940554e-06, "loss": 0.6509, "step": 18651 }, { "epoch": 1.3475174743078007, "grad_norm": 7.3439162215230915, "learning_rate": 1.2702347306836914e-06, "loss": 0.6703, "step": 18652 }, { "epoch": 1.3475897195080102, "grad_norm": 6.969913402659762, "learning_rate": 1.269980072407858e-06, "loss": 0.5899, "step": 18653 }, { "epoch": 1.3476619647082198, "grad_norm": 8.946293273198577, "learning_rate": 1.2697254309700425e-06, "loss": 0.6454, "step": 18654 }, { "epoch": 1.3477342099084293, "grad_norm": 6.433800087765856, "learning_rate": 1.2694708063737313e-06, "loss": 0.636, "step": 18655 }, { "epoch": 1.3478064551086386, "grad_norm": 7.251931558007252, "learning_rate": 1.2692161986224082e-06, "loss": 0.6057, "step": 18656 }, { "epoch": 1.3478787003088482, "grad_norm": 7.418528748115834, "learning_rate": 1.2689616077195604e-06, "loss": 0.6955, "step": 18657 }, { "epoch": 1.3479509455090577, "grad_norm": 7.335099209957686, "learning_rate": 1.268707033668672e-06, "loss": 0.5967, "step": 18658 }, { "epoch": 1.3480231907092672, "grad_norm": 7.513798545283309, "learning_rate": 1.2684524764732282e-06, "loss": 0.7004, "step": 18659 }, { "epoch": 1.3480954359094768, "grad_norm": 7.57103959598448, "learning_rate": 1.2681979361367136e-06, "loss": 0.5856, "step": 18660 }, { "epoch": 1.3481676811096863, "grad_norm": 7.683315471813213, "learning_rate": 1.267943412662613e-06, "loss": 0.665, "step": 18661 }, { "epoch": 1.3482399263098959, "grad_norm": 6.982345934901727, "learning_rate": 1.2676889060544106e-06, "loss": 0.6188, "step": 18662 }, { "epoch": 1.3483121715101052, "grad_norm": 6.209440845126948, "learning_rate": 1.267434416315591e-06, "loss": 0.6661, "step": 18663 }, { "epoch": 1.3483844167103147, "grad_norm": 6.6070858573664175, "learning_rate": 1.2671799434496363e-06, "loss": 0.6706, "step": 18664 }, { "epoch": 1.3484566619105243, "grad_norm": 6.514689938712248, "learning_rate": 1.2669254874600311e-06, "loss": 0.632, "step": 18665 }, { "epoch": 1.3485289071107338, "grad_norm": 7.515535413561048, "learning_rate": 1.2666710483502592e-06, "loss": 0.6063, "step": 18666 }, { "epoch": 1.3486011523109434, "grad_norm": 7.262667671408507, "learning_rate": 1.2664166261238015e-06, "loss": 0.5835, "step": 18667 }, { "epoch": 1.348673397511153, "grad_norm": 6.425547642505152, "learning_rate": 1.266162220784143e-06, "loss": 0.518, "step": 18668 }, { "epoch": 1.3487456427113624, "grad_norm": 7.041271612458233, "learning_rate": 1.265907832334766e-06, "loss": 0.6453, "step": 18669 }, { "epoch": 1.3488178879115718, "grad_norm": 6.868145386567279, "learning_rate": 1.2656534607791532e-06, "loss": 0.5628, "step": 18670 }, { "epoch": 1.3488901331117815, "grad_norm": 6.609898467560969, "learning_rate": 1.2653991061207853e-06, "loss": 0.5833, "step": 18671 }, { "epoch": 1.3489623783119908, "grad_norm": 7.4352318647154565, "learning_rate": 1.2651447683631452e-06, "loss": 0.5332, "step": 18672 }, { "epoch": 1.3490346235122004, "grad_norm": 5.915138297959261, "learning_rate": 1.2648904475097151e-06, "loss": 0.6073, "step": 18673 }, { "epoch": 1.34910686871241, "grad_norm": 6.06469466608355, "learning_rate": 1.2646361435639747e-06, "loss": 0.6918, "step": 18674 }, { "epoch": 1.3491791139126195, "grad_norm": 7.0805457819688336, "learning_rate": 1.264381856529407e-06, "loss": 0.6718, "step": 18675 }, { "epoch": 1.349251359112829, "grad_norm": 6.543202246300074, "learning_rate": 1.2641275864094915e-06, "loss": 0.6677, "step": 18676 }, { "epoch": 1.3493236043130383, "grad_norm": 7.5578485481328395, "learning_rate": 1.2638733332077103e-06, "loss": 0.6484, "step": 18677 }, { "epoch": 1.349395849513248, "grad_norm": 6.743132594626974, "learning_rate": 1.263619096927543e-06, "loss": 0.7328, "step": 18678 }, { "epoch": 1.3494680947134574, "grad_norm": 7.233912541745379, "learning_rate": 1.2633648775724704e-06, "loss": 0.5879, "step": 18679 }, { "epoch": 1.349540339913667, "grad_norm": 8.739467712534758, "learning_rate": 1.263110675145973e-06, "loss": 0.5874, "step": 18680 }, { "epoch": 1.3496125851138765, "grad_norm": 6.495105074061212, "learning_rate": 1.26285648965153e-06, "loss": 0.6343, "step": 18681 }, { "epoch": 1.349684830314086, "grad_norm": 7.96993734452378, "learning_rate": 1.2626023210926203e-06, "loss": 0.5738, "step": 18682 }, { "epoch": 1.3497570755142956, "grad_norm": 6.66460056078124, "learning_rate": 1.2623481694727247e-06, "loss": 0.6695, "step": 18683 }, { "epoch": 1.349829320714505, "grad_norm": 6.961935363557152, "learning_rate": 1.262094034795322e-06, "loss": 0.6455, "step": 18684 }, { "epoch": 1.3499015659147147, "grad_norm": 6.150998116731239, "learning_rate": 1.26183991706389e-06, "loss": 0.588, "step": 18685 }, { "epoch": 1.349973811114924, "grad_norm": 8.58588374002164, "learning_rate": 1.2615858162819084e-06, "loss": 0.6306, "step": 18686 }, { "epoch": 1.3500460563151335, "grad_norm": 6.260257390979891, "learning_rate": 1.2613317324528545e-06, "loss": 0.6145, "step": 18687 }, { "epoch": 1.350118301515343, "grad_norm": 6.372462945877523, "learning_rate": 1.2610776655802094e-06, "loss": 0.6225, "step": 18688 }, { "epoch": 1.3501905467155526, "grad_norm": 7.2208101049520295, "learning_rate": 1.2608236156674474e-06, "loss": 0.6719, "step": 18689 }, { "epoch": 1.3502627919157622, "grad_norm": 8.560293539843201, "learning_rate": 1.2605695827180487e-06, "loss": 0.6519, "step": 18690 }, { "epoch": 1.3503350371159715, "grad_norm": 7.098623878472332, "learning_rate": 1.2603155667354907e-06, "loss": 0.5971, "step": 18691 }, { "epoch": 1.3504072823161812, "grad_norm": 6.55619901886384, "learning_rate": 1.260061567723249e-06, "loss": 0.5727, "step": 18692 }, { "epoch": 1.3504795275163906, "grad_norm": 6.549735598287209, "learning_rate": 1.259807585684802e-06, "loss": 0.7077, "step": 18693 }, { "epoch": 1.3505517727166, "grad_norm": 7.389052629050818, "learning_rate": 1.2595536206236262e-06, "loss": 0.59, "step": 18694 }, { "epoch": 1.3506240179168096, "grad_norm": 6.745809031068296, "learning_rate": 1.259299672543198e-06, "loss": 0.6254, "step": 18695 }, { "epoch": 1.3506962631170192, "grad_norm": 7.383304123429614, "learning_rate": 1.2590457414469938e-06, "loss": 0.6458, "step": 18696 }, { "epoch": 1.3507685083172287, "grad_norm": 6.323975057071378, "learning_rate": 1.2587918273384903e-06, "loss": 0.6102, "step": 18697 }, { "epoch": 1.3508407535174383, "grad_norm": 7.2984541628007715, "learning_rate": 1.2585379302211637e-06, "loss": 0.6406, "step": 18698 }, { "epoch": 1.3509129987176478, "grad_norm": 8.813888115823897, "learning_rate": 1.2582840500984877e-06, "loss": 0.7165, "step": 18699 }, { "epoch": 1.3509852439178571, "grad_norm": 7.752793823696972, "learning_rate": 1.2580301869739391e-06, "loss": 0.695, "step": 18700 }, { "epoch": 1.3510574891180667, "grad_norm": 6.394217155446463, "learning_rate": 1.257776340850993e-06, "loss": 0.5783, "step": 18701 }, { "epoch": 1.3511297343182762, "grad_norm": 8.307266308657091, "learning_rate": 1.2575225117331253e-06, "loss": 0.6875, "step": 18702 }, { "epoch": 1.3512019795184858, "grad_norm": 6.7654113030988485, "learning_rate": 1.2572686996238085e-06, "loss": 0.6506, "step": 18703 }, { "epoch": 1.3512742247186953, "grad_norm": 6.471259915582091, "learning_rate": 1.2570149045265185e-06, "loss": 0.6849, "step": 18704 }, { "epoch": 1.3513464699189048, "grad_norm": 7.02293399283204, "learning_rate": 1.2567611264447287e-06, "loss": 0.68, "step": 18705 }, { "epoch": 1.3514187151191144, "grad_norm": 5.3993800619803505, "learning_rate": 1.2565073653819145e-06, "loss": 0.6156, "step": 18706 }, { "epoch": 1.3514909603193237, "grad_norm": 7.738465123541084, "learning_rate": 1.2562536213415488e-06, "loss": 0.7006, "step": 18707 }, { "epoch": 1.3515632055195332, "grad_norm": 6.767053043753437, "learning_rate": 1.255999894327105e-06, "loss": 0.6653, "step": 18708 }, { "epoch": 1.3516354507197428, "grad_norm": 5.7432772270616335, "learning_rate": 1.255746184342058e-06, "loss": 0.6445, "step": 18709 }, { "epoch": 1.3517076959199523, "grad_norm": 9.469467488415605, "learning_rate": 1.255492491389879e-06, "loss": 0.6302, "step": 18710 }, { "epoch": 1.3517799411201619, "grad_norm": 6.232546430614507, "learning_rate": 1.2552388154740409e-06, "loss": 0.6799, "step": 18711 }, { "epoch": 1.3518521863203714, "grad_norm": 7.368249939960939, "learning_rate": 1.2549851565980173e-06, "loss": 0.7348, "step": 18712 }, { "epoch": 1.351924431520581, "grad_norm": 7.202910513177284, "learning_rate": 1.2547315147652811e-06, "loss": 0.6586, "step": 18713 }, { "epoch": 1.3519966767207903, "grad_norm": 5.897792674811365, "learning_rate": 1.2544778899793026e-06, "loss": 0.5964, "step": 18714 }, { "epoch": 1.3520689219209998, "grad_norm": 7.0695136699105925, "learning_rate": 1.2542242822435535e-06, "loss": 0.6479, "step": 18715 }, { "epoch": 1.3521411671212094, "grad_norm": 7.220264886873687, "learning_rate": 1.2539706915615088e-06, "loss": 0.6415, "step": 18716 }, { "epoch": 1.352213412321419, "grad_norm": 6.192706451116286, "learning_rate": 1.253717117936637e-06, "loss": 0.6031, "step": 18717 }, { "epoch": 1.3522856575216284, "grad_norm": 7.4249687733619725, "learning_rate": 1.25346356137241e-06, "loss": 0.6301, "step": 18718 }, { "epoch": 1.352357902721838, "grad_norm": 7.299797567408735, "learning_rate": 1.253210021872299e-06, "loss": 0.682, "step": 18719 }, { "epoch": 1.3524301479220475, "grad_norm": 7.245117384944902, "learning_rate": 1.2529564994397759e-06, "loss": 0.644, "step": 18720 }, { "epoch": 1.3525023931222568, "grad_norm": 8.382868927466188, "learning_rate": 1.2527029940783086e-06, "loss": 0.5717, "step": 18721 }, { "epoch": 1.3525746383224664, "grad_norm": 6.875210013216329, "learning_rate": 1.2524495057913693e-06, "loss": 0.6365, "step": 18722 }, { "epoch": 1.352646883522676, "grad_norm": 8.260773069398624, "learning_rate": 1.2521960345824275e-06, "loss": 0.6203, "step": 18723 }, { "epoch": 1.3527191287228855, "grad_norm": 6.769706781029962, "learning_rate": 1.2519425804549532e-06, "loss": 0.6525, "step": 18724 }, { "epoch": 1.352791373923095, "grad_norm": 7.083706236354579, "learning_rate": 1.2516891434124156e-06, "loss": 0.6371, "step": 18725 }, { "epoch": 1.3528636191233046, "grad_norm": 6.087023343140031, "learning_rate": 1.2514357234582847e-06, "loss": 0.6016, "step": 18726 }, { "epoch": 1.352935864323514, "grad_norm": 6.1565189206016875, "learning_rate": 1.2511823205960302e-06, "loss": 0.609, "step": 18727 }, { "epoch": 1.3530081095237234, "grad_norm": 6.2398351789372395, "learning_rate": 1.2509289348291194e-06, "loss": 0.6102, "step": 18728 }, { "epoch": 1.353080354723933, "grad_norm": 6.73106157611162, "learning_rate": 1.250675566161021e-06, "loss": 0.6327, "step": 18729 }, { "epoch": 1.3531525999241425, "grad_norm": 6.451562133404718, "learning_rate": 1.2504222145952054e-06, "loss": 0.6401, "step": 18730 }, { "epoch": 1.353224845124352, "grad_norm": 6.91112584394961, "learning_rate": 1.2501688801351387e-06, "loss": 0.6701, "step": 18731 }, { "epoch": 1.3532970903245616, "grad_norm": 7.259594061145374, "learning_rate": 1.249915562784289e-06, "loss": 0.6805, "step": 18732 }, { "epoch": 1.3533693355247711, "grad_norm": 7.633467428481597, "learning_rate": 1.249662262546125e-06, "loss": 0.6308, "step": 18733 }, { "epoch": 1.3534415807249807, "grad_norm": 6.591399872304162, "learning_rate": 1.2494089794241138e-06, "loss": 0.5768, "step": 18734 }, { "epoch": 1.35351382592519, "grad_norm": 6.361399485322804, "learning_rate": 1.2491557134217225e-06, "loss": 0.5905, "step": 18735 }, { "epoch": 1.3535860711253995, "grad_norm": 7.2694375812577645, "learning_rate": 1.2489024645424184e-06, "loss": 0.6861, "step": 18736 }, { "epoch": 1.353658316325609, "grad_norm": 6.8244507285628515, "learning_rate": 1.2486492327896681e-06, "loss": 0.64, "step": 18737 }, { "epoch": 1.3537305615258186, "grad_norm": 6.619513200915143, "learning_rate": 1.2483960181669391e-06, "loss": 0.5828, "step": 18738 }, { "epoch": 1.3538028067260282, "grad_norm": 6.87074925186347, "learning_rate": 1.248142820677696e-06, "loss": 0.6538, "step": 18739 }, { "epoch": 1.3538750519262377, "grad_norm": 7.367920144343176, "learning_rate": 1.2478896403254058e-06, "loss": 0.6179, "step": 18740 }, { "epoch": 1.3539472971264472, "grad_norm": 7.227748794100085, "learning_rate": 1.2476364771135348e-06, "loss": 0.6692, "step": 18741 }, { "epoch": 1.3540195423266566, "grad_norm": 6.202409448358589, "learning_rate": 1.2473833310455472e-06, "loss": 0.5699, "step": 18742 }, { "epoch": 1.3540917875268663, "grad_norm": 6.722360324054046, "learning_rate": 1.2471302021249094e-06, "loss": 0.6529, "step": 18743 }, { "epoch": 1.3541640327270756, "grad_norm": 6.754976486741148, "learning_rate": 1.246877090355085e-06, "loss": 0.5929, "step": 18744 }, { "epoch": 1.3542362779272852, "grad_norm": 5.980226996460855, "learning_rate": 1.2466239957395425e-06, "loss": 0.6255, "step": 18745 }, { "epoch": 1.3543085231274947, "grad_norm": 7.459738174630072, "learning_rate": 1.2463709182817433e-06, "loss": 0.675, "step": 18746 }, { "epoch": 1.3543807683277043, "grad_norm": 6.601872219065967, "learning_rate": 1.246117857985153e-06, "loss": 0.662, "step": 18747 }, { "epoch": 1.3544530135279138, "grad_norm": 6.771106076738231, "learning_rate": 1.2458648148532363e-06, "loss": 0.6509, "step": 18748 }, { "epoch": 1.3545252587281231, "grad_norm": 7.218089416136097, "learning_rate": 1.2456117888894557e-06, "loss": 0.6287, "step": 18749 }, { "epoch": 1.354597503928333, "grad_norm": 5.968014656826154, "learning_rate": 1.2453587800972755e-06, "loss": 0.5914, "step": 18750 }, { "epoch": 1.3546697491285422, "grad_norm": 7.847464240589072, "learning_rate": 1.2451057884801599e-06, "loss": 0.6682, "step": 18751 }, { "epoch": 1.3547419943287518, "grad_norm": 6.0428440744451315, "learning_rate": 1.2448528140415714e-06, "loss": 0.6174, "step": 18752 }, { "epoch": 1.3548142395289613, "grad_norm": 6.010165186689053, "learning_rate": 1.2445998567849732e-06, "loss": 0.6739, "step": 18753 }, { "epoch": 1.3548864847291708, "grad_norm": 6.923514901677471, "learning_rate": 1.2443469167138284e-06, "loss": 0.6559, "step": 18754 }, { "epoch": 1.3549587299293804, "grad_norm": 7.466551133806038, "learning_rate": 1.2440939938316002e-06, "loss": 0.6749, "step": 18755 }, { "epoch": 1.3550309751295897, "grad_norm": 6.321977205152728, "learning_rate": 1.2438410881417493e-06, "loss": 0.6288, "step": 18756 }, { "epoch": 1.3551032203297995, "grad_norm": 6.962280828837602, "learning_rate": 1.2435881996477383e-06, "loss": 0.6848, "step": 18757 }, { "epoch": 1.3551754655300088, "grad_norm": 7.994293561861652, "learning_rate": 1.2433353283530294e-06, "loss": 0.6297, "step": 18758 }, { "epoch": 1.3552477107302183, "grad_norm": 6.679449922675613, "learning_rate": 1.2430824742610854e-06, "loss": 0.6636, "step": 18759 }, { "epoch": 1.3553199559304279, "grad_norm": 6.74163582711854, "learning_rate": 1.242829637375365e-06, "loss": 0.6873, "step": 18760 }, { "epoch": 1.3553922011306374, "grad_norm": 6.927079631450687, "learning_rate": 1.242576817699331e-06, "loss": 0.6791, "step": 18761 }, { "epoch": 1.355464446330847, "grad_norm": 6.322245713592371, "learning_rate": 1.2423240152364439e-06, "loss": 0.7018, "step": 18762 }, { "epoch": 1.3555366915310563, "grad_norm": 7.201037851343, "learning_rate": 1.2420712299901646e-06, "loss": 0.6416, "step": 18763 }, { "epoch": 1.355608936731266, "grad_norm": 7.958466959791795, "learning_rate": 1.2418184619639534e-06, "loss": 0.6324, "step": 18764 }, { "epoch": 1.3556811819314754, "grad_norm": 7.338609040390766, "learning_rate": 1.2415657111612705e-06, "loss": 0.6328, "step": 18765 }, { "epoch": 1.355753427131685, "grad_norm": 8.23940637320508, "learning_rate": 1.2413129775855768e-06, "loss": 0.6765, "step": 18766 }, { "epoch": 1.3558256723318944, "grad_norm": 6.502696431678812, "learning_rate": 1.2410602612403302e-06, "loss": 0.5803, "step": 18767 }, { "epoch": 1.355897917532104, "grad_norm": 5.554635478230818, "learning_rate": 1.2408075621289914e-06, "loss": 0.5566, "step": 18768 }, { "epoch": 1.3559701627323135, "grad_norm": 6.914483181499275, "learning_rate": 1.240554880255019e-06, "loss": 0.6515, "step": 18769 }, { "epoch": 1.356042407932523, "grad_norm": 8.72469354217013, "learning_rate": 1.2403022156218735e-06, "loss": 0.7008, "step": 18770 }, { "epoch": 1.3561146531327326, "grad_norm": 6.086895182993543, "learning_rate": 1.2400495682330116e-06, "loss": 0.6404, "step": 18771 }, { "epoch": 1.356186898332942, "grad_norm": 7.197120132942607, "learning_rate": 1.239796938091892e-06, "loss": 0.6388, "step": 18772 }, { "epoch": 1.3562591435331515, "grad_norm": 6.758688527034572, "learning_rate": 1.2395443252019755e-06, "loss": 0.6131, "step": 18773 }, { "epoch": 1.356331388733361, "grad_norm": 8.610172961687574, "learning_rate": 1.2392917295667179e-06, "loss": 0.6425, "step": 18774 }, { "epoch": 1.3564036339335706, "grad_norm": 8.475019660859385, "learning_rate": 1.2390391511895774e-06, "loss": 0.6047, "step": 18775 }, { "epoch": 1.35647587913378, "grad_norm": 5.743332687676044, "learning_rate": 1.2387865900740118e-06, "loss": 0.5869, "step": 18776 }, { "epoch": 1.3565481243339896, "grad_norm": 6.5968461564163965, "learning_rate": 1.2385340462234796e-06, "loss": 0.6884, "step": 18777 }, { "epoch": 1.3566203695341992, "grad_norm": 9.727812341260359, "learning_rate": 1.2382815196414359e-06, "loss": 0.6415, "step": 18778 }, { "epoch": 1.3566926147344085, "grad_norm": 6.477165313754852, "learning_rate": 1.2380290103313384e-06, "loss": 0.5652, "step": 18779 }, { "epoch": 1.356764859934618, "grad_norm": 7.071181910893211, "learning_rate": 1.2377765182966438e-06, "loss": 0.6247, "step": 18780 }, { "epoch": 1.3568371051348276, "grad_norm": 7.454960946812925, "learning_rate": 1.237524043540809e-06, "loss": 0.6573, "step": 18781 }, { "epoch": 1.3569093503350371, "grad_norm": 6.67205624479376, "learning_rate": 1.2372715860672896e-06, "loss": 0.6199, "step": 18782 }, { "epoch": 1.3569815955352467, "grad_norm": 6.343194645969048, "learning_rate": 1.2370191458795417e-06, "loss": 0.6482, "step": 18783 }, { "epoch": 1.3570538407354562, "grad_norm": 6.733483912732497, "learning_rate": 1.236766722981022e-06, "loss": 0.5973, "step": 18784 }, { "epoch": 1.3571260859356657, "grad_norm": 8.021228757416695, "learning_rate": 1.2365143173751842e-06, "loss": 0.6703, "step": 18785 }, { "epoch": 1.357198331135875, "grad_norm": 7.911139261836188, "learning_rate": 1.2362619290654843e-06, "loss": 0.6493, "step": 18786 }, { "epoch": 1.3572705763360846, "grad_norm": 7.328604676381825, "learning_rate": 1.2360095580553782e-06, "loss": 0.6194, "step": 18787 }, { "epoch": 1.3573428215362942, "grad_norm": 7.056558996552511, "learning_rate": 1.2357572043483187e-06, "loss": 0.6375, "step": 18788 }, { "epoch": 1.3574150667365037, "grad_norm": 6.060519494681142, "learning_rate": 1.235504867947762e-06, "loss": 0.5817, "step": 18789 }, { "epoch": 1.3574873119367132, "grad_norm": 6.86299055392485, "learning_rate": 1.2352525488571614e-06, "loss": 0.6397, "step": 18790 }, { "epoch": 1.3575595571369228, "grad_norm": 7.1883677083228426, "learning_rate": 1.2350002470799715e-06, "loss": 0.6285, "step": 18791 }, { "epoch": 1.3576318023371323, "grad_norm": 5.605173902034253, "learning_rate": 1.234747962619646e-06, "loss": 0.5711, "step": 18792 }, { "epoch": 1.3577040475373416, "grad_norm": 7.666698068747963, "learning_rate": 1.2344956954796387e-06, "loss": 0.6523, "step": 18793 }, { "epoch": 1.3577762927375512, "grad_norm": 5.888211917152679, "learning_rate": 1.2342434456634025e-06, "loss": 0.644, "step": 18794 }, { "epoch": 1.3578485379377607, "grad_norm": 7.754726322345145, "learning_rate": 1.2339912131743918e-06, "loss": 0.6879, "step": 18795 }, { "epoch": 1.3579207831379703, "grad_norm": 7.056072990420563, "learning_rate": 1.2337389980160572e-06, "loss": 0.5749, "step": 18796 }, { "epoch": 1.3579930283381798, "grad_norm": 6.6151354782059855, "learning_rate": 1.2334868001918526e-06, "loss": 0.5861, "step": 18797 }, { "epoch": 1.3580652735383894, "grad_norm": 6.271107546196049, "learning_rate": 1.2332346197052316e-06, "loss": 0.6172, "step": 18798 }, { "epoch": 1.358137518738599, "grad_norm": 6.853351981339274, "learning_rate": 1.2329824565596438e-06, "loss": 0.6797, "step": 18799 }, { "epoch": 1.3582097639388082, "grad_norm": 6.494231672294093, "learning_rate": 1.2327303107585416e-06, "loss": 0.7131, "step": 18800 }, { "epoch": 1.3582820091390178, "grad_norm": 7.560376838697608, "learning_rate": 1.2324781823053784e-06, "loss": 0.6924, "step": 18801 }, { "epoch": 1.3583542543392273, "grad_norm": 6.967767894290892, "learning_rate": 1.2322260712036058e-06, "loss": 0.6708, "step": 18802 }, { "epoch": 1.3584264995394368, "grad_norm": 7.12086768531735, "learning_rate": 1.2319739774566727e-06, "loss": 0.6497, "step": 18803 }, { "epoch": 1.3584987447396464, "grad_norm": 8.432721176345645, "learning_rate": 1.2317219010680315e-06, "loss": 0.5425, "step": 18804 }, { "epoch": 1.358570989939856, "grad_norm": 6.168905350140666, "learning_rate": 1.2314698420411333e-06, "loss": 0.5952, "step": 18805 }, { "epoch": 1.3586432351400655, "grad_norm": 6.598789404613949, "learning_rate": 1.2312178003794275e-06, "loss": 0.6128, "step": 18806 }, { "epoch": 1.3587154803402748, "grad_norm": 7.1767192096738714, "learning_rate": 1.2309657760863646e-06, "loss": 0.5449, "step": 18807 }, { "epoch": 1.3587877255404843, "grad_norm": 7.896436066046886, "learning_rate": 1.230713769165395e-06, "loss": 0.649, "step": 18808 }, { "epoch": 1.3588599707406939, "grad_norm": 8.456069066944842, "learning_rate": 1.2304617796199683e-06, "loss": 0.6583, "step": 18809 }, { "epoch": 1.3589322159409034, "grad_norm": 6.265324535442983, "learning_rate": 1.2302098074535343e-06, "loss": 0.6073, "step": 18810 }, { "epoch": 1.359004461141113, "grad_norm": 8.20606299693513, "learning_rate": 1.229957852669542e-06, "loss": 0.652, "step": 18811 }, { "epoch": 1.3590767063413225, "grad_norm": 6.19447134303861, "learning_rate": 1.2297059152714413e-06, "loss": 0.6267, "step": 18812 }, { "epoch": 1.359148951541532, "grad_norm": 6.387433661248895, "learning_rate": 1.2294539952626797e-06, "loss": 0.5965, "step": 18813 }, { "epoch": 1.3592211967417414, "grad_norm": 8.449629140085408, "learning_rate": 1.2292020926467063e-06, "loss": 0.6136, "step": 18814 }, { "epoch": 1.3592934419419511, "grad_norm": 6.838036229169567, "learning_rate": 1.2289502074269693e-06, "loss": 0.6198, "step": 18815 }, { "epoch": 1.3593656871421604, "grad_norm": 6.747681961461087, "learning_rate": 1.2286983396069182e-06, "loss": 0.6842, "step": 18816 }, { "epoch": 1.35943793234237, "grad_norm": 5.903691782279648, "learning_rate": 1.228446489189999e-06, "loss": 0.5277, "step": 18817 }, { "epoch": 1.3595101775425795, "grad_norm": 6.581280094378605, "learning_rate": 1.2281946561796595e-06, "loss": 0.6268, "step": 18818 }, { "epoch": 1.359582422742789, "grad_norm": 7.081295962070884, "learning_rate": 1.2279428405793482e-06, "loss": 0.6447, "step": 18819 }, { "epoch": 1.3596546679429986, "grad_norm": 5.7039650494332665, "learning_rate": 1.2276910423925115e-06, "loss": 0.6072, "step": 18820 }, { "epoch": 1.359726913143208, "grad_norm": 6.75862531239018, "learning_rate": 1.2274392616225964e-06, "loss": 0.6167, "step": 18821 }, { "epoch": 1.3597991583434177, "grad_norm": 7.769811595463812, "learning_rate": 1.22718749827305e-06, "loss": 0.6573, "step": 18822 }, { "epoch": 1.359871403543627, "grad_norm": 7.073030705906714, "learning_rate": 1.2269357523473195e-06, "loss": 0.644, "step": 18823 }, { "epoch": 1.3599436487438366, "grad_norm": 7.512882104889751, "learning_rate": 1.2266840238488486e-06, "loss": 0.663, "step": 18824 }, { "epoch": 1.360015893944046, "grad_norm": 7.234147729455215, "learning_rate": 1.226432312781085e-06, "loss": 0.6532, "step": 18825 }, { "epoch": 1.3600881391442556, "grad_norm": 10.384570751474651, "learning_rate": 1.2261806191474745e-06, "loss": 0.6169, "step": 18826 }, { "epoch": 1.3601603843444652, "grad_norm": 7.493572532709516, "learning_rate": 1.225928942951463e-06, "loss": 0.6133, "step": 18827 }, { "epoch": 1.3602326295446745, "grad_norm": 6.386599291696271, "learning_rate": 1.225677284196494e-06, "loss": 0.6357, "step": 18828 }, { "epoch": 1.3603048747448843, "grad_norm": 6.6885181258062145, "learning_rate": 1.2254256428860126e-06, "loss": 0.6048, "step": 18829 }, { "epoch": 1.3603771199450936, "grad_norm": 7.1989659944439355, "learning_rate": 1.2251740190234664e-06, "loss": 0.5692, "step": 18830 }, { "epoch": 1.3604493651453031, "grad_norm": 6.75314512095351, "learning_rate": 1.2249224126122971e-06, "loss": 0.6173, "step": 18831 }, { "epoch": 1.3605216103455127, "grad_norm": 5.842774661671483, "learning_rate": 1.2246708236559498e-06, "loss": 0.6781, "step": 18832 }, { "epoch": 1.3605938555457222, "grad_norm": 7.841661152312407, "learning_rate": 1.224419252157869e-06, "loss": 0.6351, "step": 18833 }, { "epoch": 1.3606661007459318, "grad_norm": 6.40975186538684, "learning_rate": 1.224167698121499e-06, "loss": 0.5761, "step": 18834 }, { "epoch": 1.360738345946141, "grad_norm": 8.901003285982446, "learning_rate": 1.2239161615502819e-06, "loss": 0.6892, "step": 18835 }, { "epoch": 1.3608105911463508, "grad_norm": 6.90879478002495, "learning_rate": 1.2236646424476615e-06, "loss": 0.6625, "step": 18836 }, { "epoch": 1.3608828363465602, "grad_norm": 7.857651508265163, "learning_rate": 1.223413140817081e-06, "loss": 0.6155, "step": 18837 }, { "epoch": 1.3609550815467697, "grad_norm": 7.346517357990775, "learning_rate": 1.2231616566619834e-06, "loss": 0.5893, "step": 18838 }, { "epoch": 1.3610273267469792, "grad_norm": 6.331641155183826, "learning_rate": 1.2229101899858114e-06, "loss": 0.6424, "step": 18839 }, { "epoch": 1.3610995719471888, "grad_norm": 6.8263741699280835, "learning_rate": 1.2226587407920074e-06, "loss": 0.6427, "step": 18840 }, { "epoch": 1.3611718171473983, "grad_norm": 6.286555692165872, "learning_rate": 1.2224073090840143e-06, "loss": 0.6529, "step": 18841 }, { "epoch": 1.3612440623476079, "grad_norm": 6.244207520852951, "learning_rate": 1.2221558948652723e-06, "loss": 0.5599, "step": 18842 }, { "epoch": 1.3613163075478174, "grad_norm": 7.648845470797068, "learning_rate": 1.221904498139224e-06, "loss": 0.632, "step": 18843 }, { "epoch": 1.3613885527480267, "grad_norm": 9.24975874302672, "learning_rate": 1.2216531189093107e-06, "loss": 0.729, "step": 18844 }, { "epoch": 1.3614607979482363, "grad_norm": 7.204909759316391, "learning_rate": 1.2214017571789744e-06, "loss": 0.6348, "step": 18845 }, { "epoch": 1.3615330431484458, "grad_norm": 7.06984497547824, "learning_rate": 1.2211504129516544e-06, "loss": 0.6347, "step": 18846 }, { "epoch": 1.3616052883486554, "grad_norm": 6.552738158996672, "learning_rate": 1.2208990862307924e-06, "loss": 0.6343, "step": 18847 }, { "epoch": 1.361677533548865, "grad_norm": 7.257014071562993, "learning_rate": 1.2206477770198286e-06, "loss": 0.6057, "step": 18848 }, { "epoch": 1.3617497787490744, "grad_norm": 7.687670698054044, "learning_rate": 1.2203964853222033e-06, "loss": 0.6382, "step": 18849 }, { "epoch": 1.361822023949284, "grad_norm": 5.9513731453903835, "learning_rate": 1.2201452111413566e-06, "loss": 0.5782, "step": 18850 }, { "epoch": 1.3618942691494933, "grad_norm": 6.973777155176282, "learning_rate": 1.2198939544807283e-06, "loss": 0.6488, "step": 18851 }, { "epoch": 1.3619665143497028, "grad_norm": 5.574878934221405, "learning_rate": 1.219642715343759e-06, "loss": 0.5272, "step": 18852 }, { "epoch": 1.3620387595499124, "grad_norm": 6.339084958249901, "learning_rate": 1.2193914937338855e-06, "loss": 0.6957, "step": 18853 }, { "epoch": 1.362111004750122, "grad_norm": 8.69082248741812, "learning_rate": 1.2191402896545481e-06, "loss": 0.6821, "step": 18854 }, { "epoch": 1.3621832499503315, "grad_norm": 6.383639894831535, "learning_rate": 1.2188891031091868e-06, "loss": 0.6859, "step": 18855 }, { "epoch": 1.362255495150541, "grad_norm": 6.806543948829225, "learning_rate": 1.218637934101238e-06, "loss": 0.6589, "step": 18856 }, { "epoch": 1.3623277403507505, "grad_norm": 7.5275745841454835, "learning_rate": 1.21838678263414e-06, "loss": 0.5493, "step": 18857 }, { "epoch": 1.3623999855509599, "grad_norm": 6.913397081327064, "learning_rate": 1.2181356487113327e-06, "loss": 0.5604, "step": 18858 }, { "epoch": 1.3624722307511694, "grad_norm": 6.3180837579955265, "learning_rate": 1.217884532336254e-06, "loss": 0.6262, "step": 18859 }, { "epoch": 1.362544475951379, "grad_norm": 5.739027085209272, "learning_rate": 1.2176334335123396e-06, "loss": 0.6349, "step": 18860 }, { "epoch": 1.3626167211515885, "grad_norm": 6.099750713820357, "learning_rate": 1.217382352243028e-06, "loss": 0.5932, "step": 18861 }, { "epoch": 1.362688966351798, "grad_norm": 7.09706985830987, "learning_rate": 1.2171312885317569e-06, "loss": 0.5981, "step": 18862 }, { "epoch": 1.3627612115520076, "grad_norm": 7.81001742782309, "learning_rate": 1.2168802423819615e-06, "loss": 0.6254, "step": 18863 }, { "epoch": 1.3628334567522171, "grad_norm": 6.9041918971863625, "learning_rate": 1.2166292137970793e-06, "loss": 0.6555, "step": 18864 }, { "epoch": 1.3629057019524264, "grad_norm": 8.649659660567712, "learning_rate": 1.2163782027805466e-06, "loss": 0.6115, "step": 18865 }, { "epoch": 1.362977947152636, "grad_norm": 6.556060829015393, "learning_rate": 1.2161272093357994e-06, "loss": 0.6243, "step": 18866 }, { "epoch": 1.3630501923528455, "grad_norm": 5.991776552992136, "learning_rate": 1.2158762334662744e-06, "loss": 0.5558, "step": 18867 }, { "epoch": 1.363122437553055, "grad_norm": 6.130551683203534, "learning_rate": 1.2156252751754063e-06, "loss": 0.6087, "step": 18868 }, { "epoch": 1.3631946827532646, "grad_norm": 5.761703853264641, "learning_rate": 1.215374334466631e-06, "loss": 0.6106, "step": 18869 }, { "epoch": 1.3632669279534742, "grad_norm": 5.861420866789615, "learning_rate": 1.2151234113433845e-06, "loss": 0.6196, "step": 18870 }, { "epoch": 1.3633391731536837, "grad_norm": 6.451428797771455, "learning_rate": 1.2148725058091002e-06, "loss": 0.6197, "step": 18871 }, { "epoch": 1.363411418353893, "grad_norm": 7.159528089773914, "learning_rate": 1.2146216178672132e-06, "loss": 0.691, "step": 18872 }, { "epoch": 1.3634836635541026, "grad_norm": 6.704376886302988, "learning_rate": 1.2143707475211593e-06, "loss": 0.6381, "step": 18873 }, { "epoch": 1.363555908754312, "grad_norm": 7.700381519724617, "learning_rate": 1.2141198947743708e-06, "loss": 0.6163, "step": 18874 }, { "epoch": 1.3636281539545216, "grad_norm": 8.502091543138674, "learning_rate": 1.2138690596302821e-06, "loss": 0.698, "step": 18875 }, { "epoch": 1.3637003991547312, "grad_norm": 6.126695359376972, "learning_rate": 1.2136182420923277e-06, "loss": 0.6409, "step": 18876 }, { "epoch": 1.3637726443549407, "grad_norm": 6.460337895865801, "learning_rate": 1.2133674421639408e-06, "loss": 0.598, "step": 18877 }, { "epoch": 1.3638448895551503, "grad_norm": 6.7587452503862835, "learning_rate": 1.2131166598485545e-06, "loss": 0.6438, "step": 18878 }, { "epoch": 1.3639171347553596, "grad_norm": 8.381584198367062, "learning_rate": 1.2128658951496016e-06, "loss": 0.6223, "step": 18879 }, { "epoch": 1.3639893799555691, "grad_norm": 7.213436400452224, "learning_rate": 1.2126151480705167e-06, "loss": 0.6569, "step": 18880 }, { "epoch": 1.3640616251557787, "grad_norm": 8.942742790699365, "learning_rate": 1.2123644186147296e-06, "loss": 0.7125, "step": 18881 }, { "epoch": 1.3641338703559882, "grad_norm": 7.853995027024239, "learning_rate": 1.2121137067856738e-06, "loss": 0.6272, "step": 18882 }, { "epoch": 1.3642061155561978, "grad_norm": 5.834736282939879, "learning_rate": 1.2118630125867813e-06, "loss": 0.6297, "step": 18883 }, { "epoch": 1.3642783607564073, "grad_norm": 6.139653330407029, "learning_rate": 1.2116123360214852e-06, "loss": 0.6187, "step": 18884 }, { "epoch": 1.3643506059566168, "grad_norm": 6.52473204782376, "learning_rate": 1.2113616770932138e-06, "loss": 0.7255, "step": 18885 }, { "epoch": 1.3644228511568262, "grad_norm": 6.933681777610223, "learning_rate": 1.2111110358054015e-06, "loss": 0.671, "step": 18886 }, { "epoch": 1.3644950963570357, "grad_norm": 6.6126348874733205, "learning_rate": 1.2108604121614793e-06, "loss": 0.6399, "step": 18887 }, { "epoch": 1.3645673415572452, "grad_norm": 5.665879288367479, "learning_rate": 1.2106098061648761e-06, "loss": 0.6081, "step": 18888 }, { "epoch": 1.3646395867574548, "grad_norm": 6.478324549205894, "learning_rate": 1.2103592178190235e-06, "loss": 0.6402, "step": 18889 }, { "epoch": 1.3647118319576643, "grad_norm": 7.23839851033257, "learning_rate": 1.2101086471273518e-06, "loss": 0.5884, "step": 18890 }, { "epoch": 1.3647840771578739, "grad_norm": 6.9183131024015765, "learning_rate": 1.2098580940932923e-06, "loss": 0.6012, "step": 18891 }, { "epoch": 1.3648563223580834, "grad_norm": 5.807189484340188, "learning_rate": 1.2096075587202727e-06, "loss": 0.527, "step": 18892 }, { "epoch": 1.3649285675582927, "grad_norm": 6.489639241165008, "learning_rate": 1.209357041011724e-06, "loss": 0.6611, "step": 18893 }, { "epoch": 1.3650008127585025, "grad_norm": 7.073274209185593, "learning_rate": 1.2091065409710748e-06, "loss": 0.591, "step": 18894 }, { "epoch": 1.3650730579587118, "grad_norm": 6.63699119327496, "learning_rate": 1.2088560586017551e-06, "loss": 0.6589, "step": 18895 }, { "epoch": 1.3651453031589214, "grad_norm": 6.197459210949816, "learning_rate": 1.208605593907193e-06, "loss": 0.6747, "step": 18896 }, { "epoch": 1.365217548359131, "grad_norm": 6.928069707488961, "learning_rate": 1.208355146890818e-06, "loss": 0.6878, "step": 18897 }, { "epoch": 1.3652897935593404, "grad_norm": 6.649543356454279, "learning_rate": 1.2081047175560593e-06, "loss": 0.5901, "step": 18898 }, { "epoch": 1.36536203875955, "grad_norm": 8.57600529508285, "learning_rate": 1.2078543059063425e-06, "loss": 0.6367, "step": 18899 }, { "epoch": 1.3654342839597593, "grad_norm": 7.419028288354763, "learning_rate": 1.2076039119450971e-06, "loss": 0.6361, "step": 18900 }, { "epoch": 1.365506529159969, "grad_norm": 7.220431837867661, "learning_rate": 1.2073535356757508e-06, "loss": 0.644, "step": 18901 }, { "epoch": 1.3655787743601784, "grad_norm": 8.477419311535424, "learning_rate": 1.2071031771017319e-06, "loss": 0.7471, "step": 18902 }, { "epoch": 1.365651019560388, "grad_norm": 7.231066480729552, "learning_rate": 1.2068528362264655e-06, "loss": 0.6024, "step": 18903 }, { "epoch": 1.3657232647605975, "grad_norm": 7.495868307742054, "learning_rate": 1.2066025130533797e-06, "loss": 0.632, "step": 18904 }, { "epoch": 1.365795509960807, "grad_norm": 7.618662217020176, "learning_rate": 1.2063522075859013e-06, "loss": 0.6673, "step": 18905 }, { "epoch": 1.3658677551610166, "grad_norm": 8.206905285950278, "learning_rate": 1.2061019198274568e-06, "loss": 0.6276, "step": 18906 }, { "epoch": 1.3659400003612259, "grad_norm": 6.4619736123191185, "learning_rate": 1.2058516497814724e-06, "loss": 0.64, "step": 18907 }, { "epoch": 1.3660122455614356, "grad_norm": 7.3721163009601804, "learning_rate": 1.205601397451374e-06, "loss": 0.5869, "step": 18908 }, { "epoch": 1.366084490761645, "grad_norm": 7.072063621200299, "learning_rate": 1.2053511628405883e-06, "loss": 0.6194, "step": 18909 }, { "epoch": 1.3661567359618545, "grad_norm": 6.768675506266411, "learning_rate": 1.2051009459525392e-06, "loss": 0.6192, "step": 18910 }, { "epoch": 1.366228981162064, "grad_norm": 6.49665452987662, "learning_rate": 1.2048507467906525e-06, "loss": 0.5871, "step": 18911 }, { "epoch": 1.3663012263622736, "grad_norm": 7.830163381925468, "learning_rate": 1.2046005653583546e-06, "loss": 0.6066, "step": 18912 }, { "epoch": 1.3663734715624831, "grad_norm": 8.032067876946932, "learning_rate": 1.204350401659067e-06, "loss": 0.6688, "step": 18913 }, { "epoch": 1.3664457167626924, "grad_norm": 7.417279107456464, "learning_rate": 1.204100255696218e-06, "loss": 0.6337, "step": 18914 }, { "epoch": 1.3665179619629022, "grad_norm": 7.565044384893598, "learning_rate": 1.2038501274732295e-06, "loss": 0.6619, "step": 18915 }, { "epoch": 1.3665902071631115, "grad_norm": 7.450892083950732, "learning_rate": 1.2036000169935278e-06, "loss": 0.6532, "step": 18916 }, { "epoch": 1.366662452363321, "grad_norm": 7.322673013973796, "learning_rate": 1.203349924260534e-06, "loss": 0.6656, "step": 18917 }, { "epoch": 1.3667346975635306, "grad_norm": 6.9188076826687315, "learning_rate": 1.203099849277673e-06, "loss": 0.573, "step": 18918 }, { "epoch": 1.3668069427637402, "grad_norm": 6.927807885235167, "learning_rate": 1.2028497920483691e-06, "loss": 0.645, "step": 18919 }, { "epoch": 1.3668791879639497, "grad_norm": 5.9652974793663045, "learning_rate": 1.2025997525760435e-06, "loss": 0.6425, "step": 18920 }, { "epoch": 1.3669514331641592, "grad_norm": 5.518216224164369, "learning_rate": 1.20234973086412e-06, "loss": 0.5522, "step": 18921 }, { "epoch": 1.3670236783643688, "grad_norm": 8.3377906577077, "learning_rate": 1.202099726916021e-06, "loss": 0.6578, "step": 18922 }, { "epoch": 1.367095923564578, "grad_norm": 9.944215053230815, "learning_rate": 1.201849740735169e-06, "loss": 0.7088, "step": 18923 }, { "epoch": 1.3671681687647876, "grad_norm": 8.60004208576639, "learning_rate": 1.2015997723249859e-06, "loss": 0.6369, "step": 18924 }, { "epoch": 1.3672404139649972, "grad_norm": 5.949313007091463, "learning_rate": 1.201349821688894e-06, "loss": 0.5857, "step": 18925 }, { "epoch": 1.3673126591652067, "grad_norm": 9.567550621885612, "learning_rate": 1.2010998888303147e-06, "loss": 0.7483, "step": 18926 }, { "epoch": 1.3673849043654163, "grad_norm": 8.063127360221246, "learning_rate": 1.2008499737526702e-06, "loss": 0.6254, "step": 18927 }, { "epoch": 1.3674571495656258, "grad_norm": 7.567055583263236, "learning_rate": 1.2006000764593801e-06, "loss": 0.6233, "step": 18928 }, { "epoch": 1.3675293947658353, "grad_norm": 6.986374263808368, "learning_rate": 1.2003501969538658e-06, "loss": 0.6098, "step": 18929 }, { "epoch": 1.3676016399660447, "grad_norm": 8.061006156272605, "learning_rate": 1.2001003352395494e-06, "loss": 0.6044, "step": 18930 }, { "epoch": 1.3676738851662542, "grad_norm": 5.846470816509801, "learning_rate": 1.1998504913198492e-06, "loss": 0.6466, "step": 18931 }, { "epoch": 1.3677461303664638, "grad_norm": 7.2056194628402155, "learning_rate": 1.199600665198186e-06, "loss": 0.6325, "step": 18932 }, { "epoch": 1.3678183755666733, "grad_norm": 6.2232254256523865, "learning_rate": 1.1993508568779799e-06, "loss": 0.6836, "step": 18933 }, { "epoch": 1.3678906207668828, "grad_norm": 6.133907458482604, "learning_rate": 1.199101066362651e-06, "loss": 0.5795, "step": 18934 }, { "epoch": 1.3679628659670924, "grad_norm": 6.97037340080714, "learning_rate": 1.1988512936556182e-06, "loss": 0.6311, "step": 18935 }, { "epoch": 1.368035111167302, "grad_norm": 8.050253860546155, "learning_rate": 1.1986015387603011e-06, "loss": 0.6535, "step": 18936 }, { "epoch": 1.3681073563675112, "grad_norm": 8.044572637858474, "learning_rate": 1.1983518016801194e-06, "loss": 0.6036, "step": 18937 }, { "epoch": 1.3681796015677208, "grad_norm": 6.889009385781297, "learning_rate": 1.1981020824184897e-06, "loss": 0.6878, "step": 18938 }, { "epoch": 1.3682518467679303, "grad_norm": 7.120077206053739, "learning_rate": 1.1978523809788318e-06, "loss": 0.7241, "step": 18939 }, { "epoch": 1.3683240919681399, "grad_norm": 7.01372626423495, "learning_rate": 1.1976026973645637e-06, "loss": 0.6466, "step": 18940 }, { "epoch": 1.3683963371683494, "grad_norm": 5.90203351683037, "learning_rate": 1.1973530315791043e-06, "loss": 0.6457, "step": 18941 }, { "epoch": 1.368468582368559, "grad_norm": 5.788770907699334, "learning_rate": 1.1971033836258689e-06, "loss": 0.6203, "step": 18942 }, { "epoch": 1.3685408275687685, "grad_norm": 7.527767405047276, "learning_rate": 1.1968537535082771e-06, "loss": 0.69, "step": 18943 }, { "epoch": 1.3686130727689778, "grad_norm": 6.348815398971279, "learning_rate": 1.1966041412297469e-06, "loss": 0.5615, "step": 18944 }, { "epoch": 1.3686853179691874, "grad_norm": 6.7030737814913826, "learning_rate": 1.1963545467936927e-06, "loss": 0.5966, "step": 18945 }, { "epoch": 1.368757563169397, "grad_norm": 6.6043858376445925, "learning_rate": 1.1961049702035326e-06, "loss": 0.5853, "step": 18946 }, { "epoch": 1.3688298083696064, "grad_norm": 8.435639805071009, "learning_rate": 1.195855411462683e-06, "loss": 0.6393, "step": 18947 }, { "epoch": 1.368902053569816, "grad_norm": 6.967296773836808, "learning_rate": 1.1956058705745617e-06, "loss": 0.6269, "step": 18948 }, { "epoch": 1.3689742987700255, "grad_norm": 6.838825282897641, "learning_rate": 1.1953563475425817e-06, "loss": 0.5987, "step": 18949 }, { "epoch": 1.369046543970235, "grad_norm": 5.4168549529387215, "learning_rate": 1.1951068423701604e-06, "loss": 0.6454, "step": 18950 }, { "epoch": 1.3691187891704444, "grad_norm": 7.049472913585455, "learning_rate": 1.1948573550607134e-06, "loss": 0.5859, "step": 18951 }, { "epoch": 1.369191034370654, "grad_norm": 7.215788791229873, "learning_rate": 1.1946078856176557e-06, "loss": 0.6924, "step": 18952 }, { "epoch": 1.3692632795708635, "grad_norm": 6.42964151952347, "learning_rate": 1.1943584340444025e-06, "loss": 0.6073, "step": 18953 }, { "epoch": 1.369335524771073, "grad_norm": 8.075275088133123, "learning_rate": 1.1941090003443686e-06, "loss": 0.6398, "step": 18954 }, { "epoch": 1.3694077699712826, "grad_norm": 6.805766286204048, "learning_rate": 1.1938595845209694e-06, "loss": 0.6547, "step": 18955 }, { "epoch": 1.369480015171492, "grad_norm": 8.21143970351086, "learning_rate": 1.1936101865776176e-06, "loss": 0.6626, "step": 18956 }, { "epoch": 1.3695522603717016, "grad_norm": 6.092966508626495, "learning_rate": 1.1933608065177277e-06, "loss": 0.5754, "step": 18957 }, { "epoch": 1.369624505571911, "grad_norm": 5.960343277537292, "learning_rate": 1.1931114443447139e-06, "loss": 0.6368, "step": 18958 }, { "epoch": 1.3696967507721205, "grad_norm": 7.536180836899106, "learning_rate": 1.1928621000619907e-06, "loss": 0.611, "step": 18959 }, { "epoch": 1.36976899597233, "grad_norm": 6.682961037683907, "learning_rate": 1.1926127736729695e-06, "loss": 0.5931, "step": 18960 }, { "epoch": 1.3698412411725396, "grad_norm": 6.473985405453838, "learning_rate": 1.1923634651810644e-06, "loss": 0.5621, "step": 18961 }, { "epoch": 1.3699134863727491, "grad_norm": 6.313139628117798, "learning_rate": 1.192114174589688e-06, "loss": 0.6242, "step": 18962 }, { "epoch": 1.3699857315729587, "grad_norm": 6.677177376960168, "learning_rate": 1.1918649019022532e-06, "loss": 0.6687, "step": 18963 }, { "epoch": 1.3700579767731682, "grad_norm": 6.287933286143097, "learning_rate": 1.191615647122172e-06, "loss": 0.6151, "step": 18964 }, { "epoch": 1.3701302219733775, "grad_norm": 6.832033762596022, "learning_rate": 1.191366410252857e-06, "loss": 0.6218, "step": 18965 }, { "epoch": 1.3702024671735873, "grad_norm": 7.367575059811939, "learning_rate": 1.1911171912977203e-06, "loss": 0.7387, "step": 18966 }, { "epoch": 1.3702747123737966, "grad_norm": 6.6984266341103265, "learning_rate": 1.1908679902601721e-06, "loss": 0.6195, "step": 18967 }, { "epoch": 1.3703469575740062, "grad_norm": 5.968264255431076, "learning_rate": 1.1906188071436248e-06, "loss": 0.6539, "step": 18968 }, { "epoch": 1.3704192027742157, "grad_norm": 6.178678447070613, "learning_rate": 1.1903696419514905e-06, "loss": 0.6371, "step": 18969 }, { "epoch": 1.3704914479744252, "grad_norm": 6.869828099483645, "learning_rate": 1.1901204946871767e-06, "loss": 0.5629, "step": 18970 }, { "epoch": 1.3705636931746348, "grad_norm": 7.200419615068044, "learning_rate": 1.1898713653540973e-06, "loss": 0.6546, "step": 18971 }, { "epoch": 1.370635938374844, "grad_norm": 6.495131797011272, "learning_rate": 1.1896222539556615e-06, "loss": 0.6416, "step": 18972 }, { "epoch": 1.3707081835750539, "grad_norm": 6.392846651669942, "learning_rate": 1.1893731604952807e-06, "loss": 0.5355, "step": 18973 }, { "epoch": 1.3707804287752632, "grad_norm": 7.723459390141951, "learning_rate": 1.189124084976363e-06, "loss": 0.7169, "step": 18974 }, { "epoch": 1.3708526739754727, "grad_norm": 6.730060372386476, "learning_rate": 1.1888750274023183e-06, "loss": 0.6121, "step": 18975 }, { "epoch": 1.3709249191756823, "grad_norm": 7.28353361411609, "learning_rate": 1.188625987776557e-06, "loss": 0.5761, "step": 18976 }, { "epoch": 1.3709971643758918, "grad_norm": 6.307672751076103, "learning_rate": 1.188376966102488e-06, "loss": 0.6077, "step": 18977 }, { "epoch": 1.3710694095761014, "grad_norm": 7.086172753984791, "learning_rate": 1.1881279623835193e-06, "loss": 0.5695, "step": 18978 }, { "epoch": 1.3711416547763107, "grad_norm": 8.303898771194534, "learning_rate": 1.1878789766230603e-06, "loss": 0.7175, "step": 18979 }, { "epoch": 1.3712138999765204, "grad_norm": 7.297012491176558, "learning_rate": 1.1876300088245193e-06, "loss": 0.6933, "step": 18980 }, { "epoch": 1.3712861451767298, "grad_norm": 7.4766877581428846, "learning_rate": 1.1873810589913042e-06, "loss": 0.6101, "step": 18981 }, { "epoch": 1.3713583903769393, "grad_norm": 8.0056189831102, "learning_rate": 1.1871321271268235e-06, "loss": 0.6191, "step": 18982 }, { "epoch": 1.3714306355771488, "grad_norm": 7.613881243594014, "learning_rate": 1.1868832132344846e-06, "loss": 0.6719, "step": 18983 }, { "epoch": 1.3715028807773584, "grad_norm": 8.107857328780964, "learning_rate": 1.1866343173176959e-06, "loss": 0.6458, "step": 18984 }, { "epoch": 1.371575125977568, "grad_norm": 6.455553535348763, "learning_rate": 1.1863854393798623e-06, "loss": 0.7093, "step": 18985 }, { "epoch": 1.3716473711777772, "grad_norm": 7.349249416095529, "learning_rate": 1.1861365794243925e-06, "loss": 0.6448, "step": 18986 }, { "epoch": 1.371719616377987, "grad_norm": 6.98529524575806, "learning_rate": 1.1858877374546937e-06, "loss": 0.5818, "step": 18987 }, { "epoch": 1.3717918615781963, "grad_norm": 7.133147550403684, "learning_rate": 1.1856389134741703e-06, "loss": 0.6102, "step": 18988 }, { "epoch": 1.3718641067784059, "grad_norm": 7.3490879868376355, "learning_rate": 1.1853901074862299e-06, "loss": 0.648, "step": 18989 }, { "epoch": 1.3719363519786154, "grad_norm": 6.125263130608305, "learning_rate": 1.1851413194942771e-06, "loss": 0.617, "step": 18990 }, { "epoch": 1.372008597178825, "grad_norm": 8.180789439713847, "learning_rate": 1.1848925495017205e-06, "loss": 0.644, "step": 18991 }, { "epoch": 1.3720808423790345, "grad_norm": 6.319213017102492, "learning_rate": 1.1846437975119628e-06, "loss": 0.5924, "step": 18992 }, { "epoch": 1.372153087579244, "grad_norm": 6.2019257046734335, "learning_rate": 1.1843950635284105e-06, "loss": 0.6204, "step": 18993 }, { "epoch": 1.3722253327794536, "grad_norm": 8.296646028796724, "learning_rate": 1.184146347554469e-06, "loss": 0.5843, "step": 18994 }, { "epoch": 1.372297577979663, "grad_norm": 6.932576124706711, "learning_rate": 1.1838976495935415e-06, "loss": 0.5889, "step": 18995 }, { "epoch": 1.3723698231798724, "grad_norm": 7.287278197169998, "learning_rate": 1.1836489696490331e-06, "loss": 0.6734, "step": 18996 }, { "epoch": 1.372442068380082, "grad_norm": 6.8303199574716515, "learning_rate": 1.1834003077243484e-06, "loss": 0.601, "step": 18997 }, { "epoch": 1.3725143135802915, "grad_norm": 7.233435287577426, "learning_rate": 1.183151663822891e-06, "loss": 0.6145, "step": 18998 }, { "epoch": 1.372586558780501, "grad_norm": 7.273736966008848, "learning_rate": 1.1829030379480652e-06, "loss": 0.6655, "step": 18999 }, { "epoch": 1.3726588039807106, "grad_norm": 6.469920176937832, "learning_rate": 1.1826544301032739e-06, "loss": 0.6507, "step": 19000 }, { "epoch": 1.3727310491809201, "grad_norm": 6.421157091392017, "learning_rate": 1.1824058402919213e-06, "loss": 0.6615, "step": 19001 }, { "epoch": 1.3728032943811295, "grad_norm": 6.879639603246426, "learning_rate": 1.1821572685174093e-06, "loss": 0.5916, "step": 19002 }, { "epoch": 1.372875539581339, "grad_norm": 6.009103544534372, "learning_rate": 1.1819087147831407e-06, "loss": 0.6717, "step": 19003 }, { "epoch": 1.3729477847815486, "grad_norm": 7.669432707774548, "learning_rate": 1.1816601790925186e-06, "loss": 0.6522, "step": 19004 }, { "epoch": 1.373020029981758, "grad_norm": 5.9835038708935455, "learning_rate": 1.1814116614489458e-06, "loss": 0.6462, "step": 19005 }, { "epoch": 1.3730922751819676, "grad_norm": 7.840447570325774, "learning_rate": 1.1811631618558225e-06, "loss": 0.6673, "step": 19006 }, { "epoch": 1.3731645203821772, "grad_norm": 6.963359852264488, "learning_rate": 1.1809146803165516e-06, "loss": 0.6083, "step": 19007 }, { "epoch": 1.3732367655823867, "grad_norm": 9.716680864571286, "learning_rate": 1.1806662168345348e-06, "loss": 0.65, "step": 19008 }, { "epoch": 1.373309010782596, "grad_norm": 7.132981230462538, "learning_rate": 1.1804177714131729e-06, "loss": 0.656, "step": 19009 }, { "epoch": 1.3733812559828056, "grad_norm": 7.080981623386155, "learning_rate": 1.180169344055867e-06, "loss": 0.5845, "step": 19010 }, { "epoch": 1.3734535011830151, "grad_norm": 6.956731859433889, "learning_rate": 1.1799209347660179e-06, "loss": 0.6611, "step": 19011 }, { "epoch": 1.3735257463832247, "grad_norm": 6.413942064090825, "learning_rate": 1.1796725435470274e-06, "loss": 0.6339, "step": 19012 }, { "epoch": 1.3735979915834342, "grad_norm": 6.319040667963683, "learning_rate": 1.1794241704022937e-06, "loss": 0.6105, "step": 19013 }, { "epoch": 1.3736702367836437, "grad_norm": 6.788995634919574, "learning_rate": 1.1791758153352176e-06, "loss": 0.6114, "step": 19014 }, { "epoch": 1.3737424819838533, "grad_norm": 5.919378635170559, "learning_rate": 1.178927478349199e-06, "loss": 0.6501, "step": 19015 }, { "epoch": 1.3738147271840626, "grad_norm": 6.936778537936395, "learning_rate": 1.1786791594476384e-06, "loss": 0.6043, "step": 19016 }, { "epoch": 1.3738869723842722, "grad_norm": 6.799621448077694, "learning_rate": 1.1784308586339336e-06, "loss": 0.655, "step": 19017 }, { "epoch": 1.3739592175844817, "grad_norm": 6.237162413855089, "learning_rate": 1.178182575911484e-06, "loss": 0.5699, "step": 19018 }, { "epoch": 1.3740314627846912, "grad_norm": 5.3227988551689505, "learning_rate": 1.1779343112836883e-06, "loss": 0.6578, "step": 19019 }, { "epoch": 1.3741037079849008, "grad_norm": 7.2232411603176745, "learning_rate": 1.1776860647539457e-06, "loss": 0.6395, "step": 19020 }, { "epoch": 1.3741759531851103, "grad_norm": 9.682730208067689, "learning_rate": 1.177437836325654e-06, "loss": 0.6353, "step": 19021 }, { "epoch": 1.3742481983853199, "grad_norm": 7.158561499811399, "learning_rate": 1.1771896260022118e-06, "loss": 0.6251, "step": 19022 }, { "epoch": 1.3743204435855292, "grad_norm": 6.982017853258659, "learning_rate": 1.176941433787017e-06, "loss": 0.6776, "step": 19023 }, { "epoch": 1.3743926887857387, "grad_norm": 6.892063950784916, "learning_rate": 1.176693259683466e-06, "loss": 0.6246, "step": 19024 }, { "epoch": 1.3744649339859483, "grad_norm": 6.343610188864972, "learning_rate": 1.1764451036949565e-06, "loss": 0.552, "step": 19025 }, { "epoch": 1.3745371791861578, "grad_norm": 6.389558213413323, "learning_rate": 1.1761969658248862e-06, "loss": 0.6142, "step": 19026 }, { "epoch": 1.3746094243863674, "grad_norm": 6.875236923296887, "learning_rate": 1.1759488460766514e-06, "loss": 0.6098, "step": 19027 }, { "epoch": 1.374681669586577, "grad_norm": 6.248011463442945, "learning_rate": 1.1757007444536488e-06, "loss": 0.6044, "step": 19028 }, { "epoch": 1.3747539147867864, "grad_norm": 6.937148626553602, "learning_rate": 1.1754526609592748e-06, "loss": 0.6529, "step": 19029 }, { "epoch": 1.3748261599869958, "grad_norm": 7.242023157520452, "learning_rate": 1.1752045955969264e-06, "loss": 0.6006, "step": 19030 }, { "epoch": 1.3748984051872053, "grad_norm": 6.440664161885124, "learning_rate": 1.1749565483699977e-06, "loss": 0.6592, "step": 19031 }, { "epoch": 1.3749706503874148, "grad_norm": 5.41035085390076, "learning_rate": 1.1747085192818848e-06, "loss": 0.549, "step": 19032 }, { "epoch": 1.3750428955876244, "grad_norm": 7.96557075059989, "learning_rate": 1.174460508335983e-06, "loss": 0.6313, "step": 19033 }, { "epoch": 1.375115140787834, "grad_norm": 8.54541717314526, "learning_rate": 1.174212515535689e-06, "loss": 0.6269, "step": 19034 }, { "epoch": 1.3751873859880435, "grad_norm": 6.199892523049353, "learning_rate": 1.173964540884395e-06, "loss": 0.584, "step": 19035 }, { "epoch": 1.375259631188253, "grad_norm": 8.189617785802891, "learning_rate": 1.1737165843854969e-06, "loss": 0.6348, "step": 19036 }, { "epoch": 1.3753318763884623, "grad_norm": 7.819176834372497, "learning_rate": 1.1734686460423892e-06, "loss": 0.7232, "step": 19037 }, { "epoch": 1.375404121588672, "grad_norm": 8.207493256569915, "learning_rate": 1.1732207258584657e-06, "loss": 0.6475, "step": 19038 }, { "epoch": 1.3754763667888814, "grad_norm": 6.64436532990225, "learning_rate": 1.1729728238371201e-06, "loss": 0.6056, "step": 19039 }, { "epoch": 1.375548611989091, "grad_norm": 6.803942704198903, "learning_rate": 1.172724939981746e-06, "loss": 0.6477, "step": 19040 }, { "epoch": 1.3756208571893005, "grad_norm": 6.787961951846578, "learning_rate": 1.1724770742957384e-06, "loss": 0.6322, "step": 19041 }, { "epoch": 1.37569310238951, "grad_norm": 9.53321292863407, "learning_rate": 1.1722292267824875e-06, "loss": 0.6248, "step": 19042 }, { "epoch": 1.3757653475897196, "grad_norm": 7.039873726319211, "learning_rate": 1.1719813974453878e-06, "loss": 0.6448, "step": 19043 }, { "epoch": 1.375837592789929, "grad_norm": 6.402035997312782, "learning_rate": 1.1717335862878326e-06, "loss": 0.6724, "step": 19044 }, { "epoch": 1.3759098379901387, "grad_norm": 6.85006854169204, "learning_rate": 1.171485793313212e-06, "loss": 0.5797, "step": 19045 }, { "epoch": 1.375982083190348, "grad_norm": 7.6131910589834755, "learning_rate": 1.1712380185249198e-06, "loss": 0.641, "step": 19046 }, { "epoch": 1.3760543283905575, "grad_norm": 8.426254351141836, "learning_rate": 1.1709902619263462e-06, "loss": 0.6246, "step": 19047 }, { "epoch": 1.376126573590767, "grad_norm": 7.071467285270462, "learning_rate": 1.1707425235208857e-06, "loss": 0.6344, "step": 19048 }, { "epoch": 1.3761988187909766, "grad_norm": 6.115658233973317, "learning_rate": 1.1704948033119272e-06, "loss": 0.6872, "step": 19049 }, { "epoch": 1.3762710639911861, "grad_norm": 7.127021636591772, "learning_rate": 1.1702471013028627e-06, "loss": 0.6049, "step": 19050 }, { "epoch": 1.3763433091913955, "grad_norm": 8.057709919570772, "learning_rate": 1.1699994174970837e-06, "loss": 0.6035, "step": 19051 }, { "epoch": 1.3764155543916052, "grad_norm": 7.313061032414427, "learning_rate": 1.1697517518979792e-06, "loss": 0.6874, "step": 19052 }, { "epoch": 1.3764877995918146, "grad_norm": 6.439941239651649, "learning_rate": 1.1695041045089402e-06, "loss": 0.6364, "step": 19053 }, { "epoch": 1.376560044792024, "grad_norm": 6.078823490753641, "learning_rate": 1.1692564753333568e-06, "loss": 0.6126, "step": 19054 }, { "epoch": 1.3766322899922336, "grad_norm": 7.466251993017795, "learning_rate": 1.1690088643746193e-06, "loss": 0.6407, "step": 19055 }, { "epoch": 1.3767045351924432, "grad_norm": 7.057568203809016, "learning_rate": 1.1687612716361168e-06, "loss": 0.5931, "step": 19056 }, { "epoch": 1.3767767803926527, "grad_norm": 6.509207439784096, "learning_rate": 1.1685136971212392e-06, "loss": 0.5801, "step": 19057 }, { "epoch": 1.376849025592862, "grad_norm": 5.318452798539072, "learning_rate": 1.1682661408333749e-06, "loss": 0.5083, "step": 19058 }, { "epoch": 1.3769212707930718, "grad_norm": 5.97731178922314, "learning_rate": 1.1680186027759137e-06, "loss": 0.6385, "step": 19059 }, { "epoch": 1.3769935159932811, "grad_norm": 7.3727066871022995, "learning_rate": 1.1677710829522435e-06, "loss": 0.6466, "step": 19060 }, { "epoch": 1.3770657611934907, "grad_norm": 7.579621912401762, "learning_rate": 1.1675235813657521e-06, "loss": 0.6, "step": 19061 }, { "epoch": 1.3771380063937002, "grad_norm": 6.896720283684609, "learning_rate": 1.1672760980198294e-06, "loss": 0.5707, "step": 19062 }, { "epoch": 1.3772102515939098, "grad_norm": 6.191156713228466, "learning_rate": 1.1670286329178615e-06, "loss": 0.505, "step": 19063 }, { "epoch": 1.3772824967941193, "grad_norm": 7.724125646595511, "learning_rate": 1.1667811860632364e-06, "loss": 0.6612, "step": 19064 }, { "epoch": 1.3773547419943288, "grad_norm": 6.7397266905296025, "learning_rate": 1.1665337574593417e-06, "loss": 0.6655, "step": 19065 }, { "epoch": 1.3774269871945384, "grad_norm": 5.6840779835601625, "learning_rate": 1.1662863471095642e-06, "loss": 0.5559, "step": 19066 }, { "epoch": 1.3774992323947477, "grad_norm": 6.0589359464733406, "learning_rate": 1.1660389550172912e-06, "loss": 0.6971, "step": 19067 }, { "epoch": 1.3775714775949572, "grad_norm": 6.713370507196406, "learning_rate": 1.1657915811859093e-06, "loss": 0.6379, "step": 19068 }, { "epoch": 1.3776437227951668, "grad_norm": 7.008928871328332, "learning_rate": 1.1655442256188058e-06, "loss": 0.6156, "step": 19069 }, { "epoch": 1.3777159679953763, "grad_norm": 6.128942349718414, "learning_rate": 1.1652968883193645e-06, "loss": 0.6292, "step": 19070 }, { "epoch": 1.3777882131955859, "grad_norm": 7.457955832136029, "learning_rate": 1.1650495692909725e-06, "loss": 0.6845, "step": 19071 }, { "epoch": 1.3778604583957954, "grad_norm": 6.867932657861456, "learning_rate": 1.1648022685370155e-06, "loss": 0.6847, "step": 19072 }, { "epoch": 1.377932703596005, "grad_norm": 7.442004466604211, "learning_rate": 1.1645549860608797e-06, "loss": 0.6582, "step": 19073 }, { "epoch": 1.3780049487962143, "grad_norm": 5.963362734135578, "learning_rate": 1.164307721865948e-06, "loss": 0.6301, "step": 19074 }, { "epoch": 1.3780771939964238, "grad_norm": 6.5342283621944715, "learning_rate": 1.1640604759556057e-06, "loss": 0.636, "step": 19075 }, { "epoch": 1.3781494391966334, "grad_norm": 6.444937548279324, "learning_rate": 1.16381324833324e-06, "loss": 0.6402, "step": 19076 }, { "epoch": 1.378221684396843, "grad_norm": 6.858505715312385, "learning_rate": 1.1635660390022327e-06, "loss": 0.7035, "step": 19077 }, { "epoch": 1.3782939295970524, "grad_norm": 7.089025596302308, "learning_rate": 1.1633188479659685e-06, "loss": 0.6792, "step": 19078 }, { "epoch": 1.378366174797262, "grad_norm": 6.907934199246765, "learning_rate": 1.1630716752278315e-06, "loss": 0.5406, "step": 19079 }, { "epoch": 1.3784384199974715, "grad_norm": 7.2911370075952755, "learning_rate": 1.1628245207912062e-06, "loss": 0.5829, "step": 19080 }, { "epoch": 1.3785106651976808, "grad_norm": 6.30676522697443, "learning_rate": 1.162577384659474e-06, "loss": 0.5981, "step": 19081 }, { "epoch": 1.3785829103978904, "grad_norm": 6.6108913260434, "learning_rate": 1.162330266836019e-06, "loss": 0.6697, "step": 19082 }, { "epoch": 1.3786551555981, "grad_norm": 7.2906287044903255, "learning_rate": 1.162083167324224e-06, "loss": 0.6276, "step": 19083 }, { "epoch": 1.3787274007983095, "grad_norm": 6.812579513225768, "learning_rate": 1.1618360861274713e-06, "loss": 0.6767, "step": 19084 }, { "epoch": 1.378799645998519, "grad_norm": 6.984909412667699, "learning_rate": 1.1615890232491439e-06, "loss": 0.6217, "step": 19085 }, { "epoch": 1.3788718911987285, "grad_norm": 6.597543791133768, "learning_rate": 1.1613419786926233e-06, "loss": 0.6182, "step": 19086 }, { "epoch": 1.378944136398938, "grad_norm": 6.96080159098523, "learning_rate": 1.1610949524612928e-06, "loss": 0.6311, "step": 19087 }, { "epoch": 1.3790163815991474, "grad_norm": 6.083183321562031, "learning_rate": 1.1608479445585316e-06, "loss": 0.6466, "step": 19088 }, { "epoch": 1.379088626799357, "grad_norm": 6.645226748855239, "learning_rate": 1.1606009549877226e-06, "loss": 0.6155, "step": 19089 }, { "epoch": 1.3791608719995665, "grad_norm": 7.093889008222946, "learning_rate": 1.1603539837522461e-06, "loss": 0.6253, "step": 19090 }, { "epoch": 1.379233117199776, "grad_norm": 5.889564479405933, "learning_rate": 1.1601070308554849e-06, "loss": 0.5791, "step": 19091 }, { "epoch": 1.3793053623999856, "grad_norm": 6.734853351358635, "learning_rate": 1.159860096300817e-06, "loss": 0.5984, "step": 19092 }, { "epoch": 1.3793776076001951, "grad_norm": 6.364871787003273, "learning_rate": 1.1596131800916236e-06, "loss": 0.6818, "step": 19093 }, { "epoch": 1.3794498528004047, "grad_norm": 6.9321584672022825, "learning_rate": 1.159366282231285e-06, "loss": 0.6247, "step": 19094 }, { "epoch": 1.379522098000614, "grad_norm": 7.059407325052568, "learning_rate": 1.1591194027231812e-06, "loss": 0.6311, "step": 19095 }, { "epoch": 1.3795943432008235, "grad_norm": 6.273855535087375, "learning_rate": 1.1588725415706917e-06, "loss": 0.6231, "step": 19096 }, { "epoch": 1.379666588401033, "grad_norm": 6.252452521740657, "learning_rate": 1.1586256987771958e-06, "loss": 0.5909, "step": 19097 }, { "epoch": 1.3797388336012426, "grad_norm": 6.619694438693521, "learning_rate": 1.1583788743460733e-06, "loss": 0.6295, "step": 19098 }, { "epoch": 1.3798110788014522, "grad_norm": 6.793825644990549, "learning_rate": 1.1581320682807015e-06, "loss": 0.6474, "step": 19099 }, { "epoch": 1.3798833240016617, "grad_norm": 6.269681535387247, "learning_rate": 1.1578852805844599e-06, "loss": 0.6451, "step": 19100 }, { "epoch": 1.3799555692018712, "grad_norm": 6.606998963427675, "learning_rate": 1.1576385112607275e-06, "loss": 0.6596, "step": 19101 }, { "epoch": 1.3800278144020806, "grad_norm": 5.798275184672378, "learning_rate": 1.1573917603128807e-06, "loss": 0.6659, "step": 19102 }, { "epoch": 1.38010005960229, "grad_norm": 8.438926293655259, "learning_rate": 1.1571450277442972e-06, "loss": 0.6156, "step": 19103 }, { "epoch": 1.3801723048024996, "grad_norm": 7.43029333426367, "learning_rate": 1.1568983135583567e-06, "loss": 0.6066, "step": 19104 }, { "epoch": 1.3802445500027092, "grad_norm": 10.82900429330301, "learning_rate": 1.1566516177584363e-06, "loss": 0.5904, "step": 19105 }, { "epoch": 1.3803167952029187, "grad_norm": 6.579956669534323, "learning_rate": 1.1564049403479114e-06, "loss": 0.5998, "step": 19106 }, { "epoch": 1.3803890404031283, "grad_norm": 6.765136701128879, "learning_rate": 1.1561582813301595e-06, "loss": 0.6145, "step": 19107 }, { "epoch": 1.3804612856033378, "grad_norm": 6.921989629412016, "learning_rate": 1.1559116407085571e-06, "loss": 0.611, "step": 19108 }, { "epoch": 1.3805335308035471, "grad_norm": 6.7451120268593625, "learning_rate": 1.155665018486482e-06, "loss": 0.6244, "step": 19109 }, { "epoch": 1.3806057760037567, "grad_norm": 6.808904136284294, "learning_rate": 1.155418414667308e-06, "loss": 0.6514, "step": 19110 }, { "epoch": 1.3806780212039662, "grad_norm": 7.20951404346506, "learning_rate": 1.1551718292544118e-06, "loss": 0.64, "step": 19111 }, { "epoch": 1.3807502664041758, "grad_norm": 7.405211556945892, "learning_rate": 1.154925262251169e-06, "loss": 0.6198, "step": 19112 }, { "epoch": 1.3808225116043853, "grad_norm": 6.823568329496998, "learning_rate": 1.1546787136609552e-06, "loss": 0.5964, "step": 19113 }, { "epoch": 1.3808947568045948, "grad_norm": 8.256942140154512, "learning_rate": 1.1544321834871452e-06, "loss": 0.6157, "step": 19114 }, { "epoch": 1.3809670020048044, "grad_norm": 6.190405577651622, "learning_rate": 1.1541856717331137e-06, "loss": 0.589, "step": 19115 }, { "epoch": 1.3810392472050137, "grad_norm": 5.542553402889047, "learning_rate": 1.1539391784022366e-06, "loss": 0.6551, "step": 19116 }, { "epoch": 1.3811114924052235, "grad_norm": 5.732272477290857, "learning_rate": 1.1536927034978857e-06, "loss": 0.5573, "step": 19117 }, { "epoch": 1.3811837376054328, "grad_norm": 6.567473933498899, "learning_rate": 1.1534462470234368e-06, "loss": 0.5962, "step": 19118 }, { "epoch": 1.3812559828056423, "grad_norm": 5.757966535104625, "learning_rate": 1.1531998089822638e-06, "loss": 0.5606, "step": 19119 }, { "epoch": 1.3813282280058519, "grad_norm": 6.6848846740589885, "learning_rate": 1.1529533893777386e-06, "loss": 0.6097, "step": 19120 }, { "epoch": 1.3814004732060614, "grad_norm": 7.6474037601816365, "learning_rate": 1.152706988213236e-06, "loss": 0.689, "step": 19121 }, { "epoch": 1.381472718406271, "grad_norm": 7.224471561945914, "learning_rate": 1.1524606054921284e-06, "loss": 0.6454, "step": 19122 }, { "epoch": 1.3815449636064803, "grad_norm": 6.035007077444457, "learning_rate": 1.152214241217789e-06, "loss": 0.6252, "step": 19123 }, { "epoch": 1.38161720880669, "grad_norm": 7.766673947877977, "learning_rate": 1.15196789539359e-06, "loss": 0.6273, "step": 19124 }, { "epoch": 1.3816894540068994, "grad_norm": 7.307108660414026, "learning_rate": 1.1517215680229038e-06, "loss": 0.6275, "step": 19125 }, { "epoch": 1.381761699207109, "grad_norm": 6.890081613442077, "learning_rate": 1.1514752591091036e-06, "loss": 0.6084, "step": 19126 }, { "epoch": 1.3818339444073184, "grad_norm": 5.743012868509118, "learning_rate": 1.151228968655559e-06, "loss": 0.5687, "step": 19127 }, { "epoch": 1.381906189607528, "grad_norm": 6.197988232966107, "learning_rate": 1.1509826966656423e-06, "loss": 0.5991, "step": 19128 }, { "epoch": 1.3819784348077375, "grad_norm": 6.61480417758994, "learning_rate": 1.1507364431427253e-06, "loss": 0.6698, "step": 19129 }, { "epoch": 1.3820506800079468, "grad_norm": 7.448839278975209, "learning_rate": 1.1504902080901797e-06, "loss": 0.6941, "step": 19130 }, { "epoch": 1.3821229252081566, "grad_norm": 6.2906802922115155, "learning_rate": 1.1502439915113744e-06, "loss": 0.5908, "step": 19131 }, { "epoch": 1.382195170408366, "grad_norm": 6.550692735687029, "learning_rate": 1.14999779340968e-06, "loss": 0.6207, "step": 19132 }, { "epoch": 1.3822674156085755, "grad_norm": 7.343114687446212, "learning_rate": 1.1497516137884692e-06, "loss": 0.6206, "step": 19133 }, { "epoch": 1.382339660808785, "grad_norm": 8.108917282211362, "learning_rate": 1.1495054526511096e-06, "loss": 0.6351, "step": 19134 }, { "epoch": 1.3824119060089946, "grad_norm": 6.284615445386195, "learning_rate": 1.1492593100009717e-06, "loss": 0.6326, "step": 19135 }, { "epoch": 1.382484151209204, "grad_norm": 7.068936276194905, "learning_rate": 1.1490131858414252e-06, "loss": 0.5587, "step": 19136 }, { "epoch": 1.3825563964094134, "grad_norm": 8.539029441553117, "learning_rate": 1.14876708017584e-06, "loss": 0.6126, "step": 19137 }, { "epoch": 1.3826286416096232, "grad_norm": 6.413059393146869, "learning_rate": 1.1485209930075836e-06, "loss": 0.6617, "step": 19138 }, { "epoch": 1.3827008868098325, "grad_norm": 5.59077707065701, "learning_rate": 1.148274924340025e-06, "loss": 0.5495, "step": 19139 }, { "epoch": 1.382773132010042, "grad_norm": 7.56746718531571, "learning_rate": 1.1480288741765335e-06, "loss": 0.57, "step": 19140 }, { "epoch": 1.3828453772102516, "grad_norm": 6.675203801733527, "learning_rate": 1.147782842520477e-06, "loss": 0.5967, "step": 19141 }, { "epoch": 1.3829176224104611, "grad_norm": 7.0795107531285035, "learning_rate": 1.1475368293752234e-06, "loss": 0.5892, "step": 19142 }, { "epoch": 1.3829898676106707, "grad_norm": 7.154583199634906, "learning_rate": 1.1472908347441405e-06, "loss": 0.6485, "step": 19143 }, { "epoch": 1.3830621128108802, "grad_norm": 6.517511986478927, "learning_rate": 1.1470448586305966e-06, "loss": 0.6257, "step": 19144 }, { "epoch": 1.3831343580110897, "grad_norm": 8.02062047370421, "learning_rate": 1.1467989010379571e-06, "loss": 0.7072, "step": 19145 }, { "epoch": 1.383206603211299, "grad_norm": 6.2054940031196475, "learning_rate": 1.14655296196959e-06, "loss": 0.5896, "step": 19146 }, { "epoch": 1.3832788484115086, "grad_norm": 6.3990110110036, "learning_rate": 1.146307041428862e-06, "loss": 0.6221, "step": 19147 }, { "epoch": 1.3833510936117182, "grad_norm": 7.5400374814829645, "learning_rate": 1.1460611394191406e-06, "loss": 0.6908, "step": 19148 }, { "epoch": 1.3834233388119277, "grad_norm": 7.334602795164971, "learning_rate": 1.1458152559437898e-06, "loss": 0.5652, "step": 19149 }, { "epoch": 1.3834955840121372, "grad_norm": 7.736264762821187, "learning_rate": 1.145569391006177e-06, "loss": 0.665, "step": 19150 }, { "epoch": 1.3835678292123468, "grad_norm": 8.034096536221128, "learning_rate": 1.1453235446096671e-06, "loss": 0.6742, "step": 19151 }, { "epoch": 1.3836400744125563, "grad_norm": 6.534383359855365, "learning_rate": 1.1450777167576266e-06, "loss": 0.6636, "step": 19152 }, { "epoch": 1.3837123196127656, "grad_norm": 6.518069753146614, "learning_rate": 1.1448319074534198e-06, "loss": 0.6557, "step": 19153 }, { "epoch": 1.3837845648129752, "grad_norm": 6.494182624394265, "learning_rate": 1.1445861167004118e-06, "loss": 0.5401, "step": 19154 }, { "epoch": 1.3838568100131847, "grad_norm": 8.493581704780455, "learning_rate": 1.144340344501969e-06, "loss": 0.6912, "step": 19155 }, { "epoch": 1.3839290552133943, "grad_norm": 6.735626458455204, "learning_rate": 1.1440945908614527e-06, "loss": 0.6299, "step": 19156 }, { "epoch": 1.3840013004136038, "grad_norm": 7.361307396660919, "learning_rate": 1.1438488557822288e-06, "loss": 0.64, "step": 19157 }, { "epoch": 1.3840735456138133, "grad_norm": 6.764054818786786, "learning_rate": 1.1436031392676622e-06, "loss": 0.5838, "step": 19158 }, { "epoch": 1.384145790814023, "grad_norm": 6.74541091334639, "learning_rate": 1.1433574413211142e-06, "loss": 0.6443, "step": 19159 }, { "epoch": 1.3842180360142322, "grad_norm": 7.364563103359842, "learning_rate": 1.143111761945949e-06, "loss": 0.6939, "step": 19160 }, { "epoch": 1.3842902812144418, "grad_norm": 6.696369599582223, "learning_rate": 1.142866101145531e-06, "loss": 0.5522, "step": 19161 }, { "epoch": 1.3843625264146513, "grad_norm": 7.4168905445841515, "learning_rate": 1.1426204589232229e-06, "loss": 0.6504, "step": 19162 }, { "epoch": 1.3844347716148608, "grad_norm": 6.624121265794076, "learning_rate": 1.1423748352823861e-06, "loss": 0.6468, "step": 19163 }, { "epoch": 1.3845070168150704, "grad_norm": 7.357031187778751, "learning_rate": 1.1421292302263837e-06, "loss": 0.6676, "step": 19164 }, { "epoch": 1.38457926201528, "grad_norm": 6.093883649876524, "learning_rate": 1.1418836437585778e-06, "loss": 0.5629, "step": 19165 }, { "epoch": 1.3846515072154895, "grad_norm": 8.204054076851179, "learning_rate": 1.141638075882331e-06, "loss": 0.66, "step": 19166 }, { "epoch": 1.3847237524156988, "grad_norm": 6.190322078281657, "learning_rate": 1.1413925266010037e-06, "loss": 0.6475, "step": 19167 }, { "epoch": 1.3847959976159083, "grad_norm": 6.7124070145117685, "learning_rate": 1.1411469959179579e-06, "loss": 0.6083, "step": 19168 }, { "epoch": 1.3848682428161179, "grad_norm": 7.865128642665282, "learning_rate": 1.1409014838365542e-06, "loss": 0.6135, "step": 19169 }, { "epoch": 1.3849404880163274, "grad_norm": 8.006678654484135, "learning_rate": 1.1406559903601541e-06, "loss": 0.6703, "step": 19170 }, { "epoch": 1.385012733216537, "grad_norm": 6.530417357305405, "learning_rate": 1.1404105154921184e-06, "loss": 0.5827, "step": 19171 }, { "epoch": 1.3850849784167465, "grad_norm": 8.680164341227801, "learning_rate": 1.1401650592358067e-06, "loss": 0.6935, "step": 19172 }, { "epoch": 1.385157223616956, "grad_norm": 7.3194330719246805, "learning_rate": 1.1399196215945805e-06, "loss": 0.6182, "step": 19173 }, { "epoch": 1.3852294688171654, "grad_norm": 7.712513740317101, "learning_rate": 1.1396742025717975e-06, "loss": 0.58, "step": 19174 }, { "epoch": 1.385301714017375, "grad_norm": 6.436310889777858, "learning_rate": 1.1394288021708185e-06, "loss": 0.693, "step": 19175 }, { "epoch": 1.3853739592175844, "grad_norm": 6.9119004829448185, "learning_rate": 1.139183420395004e-06, "loss": 0.6944, "step": 19176 }, { "epoch": 1.385446204417794, "grad_norm": 6.576139970593828, "learning_rate": 1.1389380572477107e-06, "loss": 0.6536, "step": 19177 }, { "epoch": 1.3855184496180035, "grad_norm": 6.423812982176588, "learning_rate": 1.1386927127322988e-06, "loss": 0.6951, "step": 19178 }, { "epoch": 1.385590694818213, "grad_norm": 5.408075101748527, "learning_rate": 1.1384473868521265e-06, "loss": 0.5726, "step": 19179 }, { "epoch": 1.3856629400184226, "grad_norm": 6.953409184852577, "learning_rate": 1.1382020796105526e-06, "loss": 0.7119, "step": 19180 }, { "epoch": 1.385735185218632, "grad_norm": 6.431567968102359, "learning_rate": 1.1379567910109346e-06, "loss": 0.5976, "step": 19181 }, { "epoch": 1.3858074304188415, "grad_norm": 5.543192067933598, "learning_rate": 1.1377115210566306e-06, "loss": 0.6107, "step": 19182 }, { "epoch": 1.385879675619051, "grad_norm": 8.578557238085514, "learning_rate": 1.137466269750999e-06, "loss": 0.7718, "step": 19183 }, { "epoch": 1.3859519208192606, "grad_norm": 7.162560757950199, "learning_rate": 1.137221037097395e-06, "loss": 0.645, "step": 19184 }, { "epoch": 1.38602416601947, "grad_norm": 6.3848083438651955, "learning_rate": 1.1369758230991775e-06, "loss": 0.5997, "step": 19185 }, { "epoch": 1.3860964112196796, "grad_norm": 7.81077251408884, "learning_rate": 1.1367306277597022e-06, "loss": 0.7069, "step": 19186 }, { "epoch": 1.3861686564198892, "grad_norm": 7.519113726233122, "learning_rate": 1.1364854510823275e-06, "loss": 0.6406, "step": 19187 }, { "epoch": 1.3862409016200985, "grad_norm": 6.998779735471358, "learning_rate": 1.1362402930704062e-06, "loss": 0.6534, "step": 19188 }, { "epoch": 1.3863131468203083, "grad_norm": 6.1972296154997, "learning_rate": 1.1359951537272971e-06, "loss": 0.6023, "step": 19189 }, { "epoch": 1.3863853920205176, "grad_norm": 6.501667102356386, "learning_rate": 1.1357500330563553e-06, "loss": 0.6134, "step": 19190 }, { "epoch": 1.3864576372207271, "grad_norm": 6.827034660245784, "learning_rate": 1.1355049310609376e-06, "loss": 0.6569, "step": 19191 }, { "epoch": 1.3865298824209367, "grad_norm": 5.729848962094723, "learning_rate": 1.1352598477443966e-06, "loss": 0.6219, "step": 19192 }, { "epoch": 1.3866021276211462, "grad_norm": 6.679423651587225, "learning_rate": 1.1350147831100887e-06, "loss": 0.6314, "step": 19193 }, { "epoch": 1.3866743728213557, "grad_norm": 8.350456599925904, "learning_rate": 1.1347697371613697e-06, "loss": 0.5734, "step": 19194 }, { "epoch": 1.386746618021565, "grad_norm": 7.28941810451906, "learning_rate": 1.1345247099015919e-06, "loss": 0.642, "step": 19195 }, { "epoch": 1.3868188632217748, "grad_norm": 7.088933309311577, "learning_rate": 1.1342797013341105e-06, "loss": 0.6211, "step": 19196 }, { "epoch": 1.3868911084219842, "grad_norm": 6.131912379206795, "learning_rate": 1.1340347114622795e-06, "loss": 0.6635, "step": 19197 }, { "epoch": 1.3869633536221937, "grad_norm": 6.576765268743175, "learning_rate": 1.1337897402894529e-06, "loss": 0.6275, "step": 19198 }, { "epoch": 1.3870355988224032, "grad_norm": 6.061532798773382, "learning_rate": 1.133544787818984e-06, "loss": 0.5991, "step": 19199 }, { "epoch": 1.3871078440226128, "grad_norm": 7.7396801565556475, "learning_rate": 1.133299854054226e-06, "loss": 0.6815, "step": 19200 }, { "epoch": 1.3871800892228223, "grad_norm": 6.866256147291958, "learning_rate": 1.1330549389985326e-06, "loss": 0.5897, "step": 19201 }, { "epoch": 1.3872523344230316, "grad_norm": 6.737869948950826, "learning_rate": 1.132810042655255e-06, "loss": 0.6137, "step": 19202 }, { "epoch": 1.3873245796232414, "grad_norm": 8.16210259818156, "learning_rate": 1.1325651650277462e-06, "loss": 0.6429, "step": 19203 }, { "epoch": 1.3873968248234507, "grad_norm": 6.772331314661656, "learning_rate": 1.1323203061193585e-06, "loss": 0.6094, "step": 19204 }, { "epoch": 1.3874690700236603, "grad_norm": 6.871594504564151, "learning_rate": 1.1320754659334449e-06, "loss": 0.647, "step": 19205 }, { "epoch": 1.3875413152238698, "grad_norm": 7.414251327119901, "learning_rate": 1.1318306444733551e-06, "loss": 0.7119, "step": 19206 }, { "epoch": 1.3876135604240794, "grad_norm": 6.159679096830267, "learning_rate": 1.1315858417424414e-06, "loss": 0.5813, "step": 19207 }, { "epoch": 1.387685805624289, "grad_norm": 8.52270392767281, "learning_rate": 1.131341057744055e-06, "loss": 0.6605, "step": 19208 }, { "epoch": 1.3877580508244982, "grad_norm": 7.1122584940530995, "learning_rate": 1.1310962924815469e-06, "loss": 0.611, "step": 19209 }, { "epoch": 1.387830296024708, "grad_norm": 7.525682092720457, "learning_rate": 1.1308515459582678e-06, "loss": 0.6278, "step": 19210 }, { "epoch": 1.3879025412249173, "grad_norm": 6.839373857270963, "learning_rate": 1.1306068181775675e-06, "loss": 0.6258, "step": 19211 }, { "epoch": 1.3879747864251268, "grad_norm": 7.505388739961406, "learning_rate": 1.130362109142798e-06, "loss": 0.6164, "step": 19212 }, { "epoch": 1.3880470316253364, "grad_norm": 6.4651355320294535, "learning_rate": 1.1301174188573067e-06, "loss": 0.552, "step": 19213 }, { "epoch": 1.388119276825546, "grad_norm": 7.6586688521992174, "learning_rate": 1.1298727473244442e-06, "loss": 0.6208, "step": 19214 }, { "epoch": 1.3881915220257555, "grad_norm": 6.533294211808974, "learning_rate": 1.1296280945475602e-06, "loss": 0.6017, "step": 19215 }, { "epoch": 1.388263767225965, "grad_norm": 5.995168966245896, "learning_rate": 1.1293834605300032e-06, "loss": 0.664, "step": 19216 }, { "epoch": 1.3883360124261745, "grad_norm": 6.762298682525025, "learning_rate": 1.1291388452751226e-06, "loss": 0.6518, "step": 19217 }, { "epoch": 1.3884082576263839, "grad_norm": 6.922062925108393, "learning_rate": 1.128894248786267e-06, "loss": 0.6245, "step": 19218 }, { "epoch": 1.3884805028265934, "grad_norm": 6.509249048942001, "learning_rate": 1.1286496710667851e-06, "loss": 0.606, "step": 19219 }, { "epoch": 1.388552748026803, "grad_norm": 6.479860062206687, "learning_rate": 1.1284051121200237e-06, "loss": 0.6301, "step": 19220 }, { "epoch": 1.3886249932270125, "grad_norm": 5.543140626464454, "learning_rate": 1.1281605719493313e-06, "loss": 0.5895, "step": 19221 }, { "epoch": 1.388697238427222, "grad_norm": 7.055057891889347, "learning_rate": 1.1279160505580556e-06, "loss": 0.578, "step": 19222 }, { "epoch": 1.3887694836274316, "grad_norm": 6.518825559095893, "learning_rate": 1.1276715479495445e-06, "loss": 0.6286, "step": 19223 }, { "epoch": 1.3888417288276411, "grad_norm": 6.6647075953520245, "learning_rate": 1.1274270641271438e-06, "loss": 0.5661, "step": 19224 }, { "epoch": 1.3889139740278504, "grad_norm": 7.059519991455908, "learning_rate": 1.1271825990942007e-06, "loss": 0.6101, "step": 19225 }, { "epoch": 1.38898621922806, "grad_norm": 5.900696742410886, "learning_rate": 1.1269381528540615e-06, "loss": 0.6252, "step": 19226 }, { "epoch": 1.3890584644282695, "grad_norm": 7.068022872533744, "learning_rate": 1.1266937254100736e-06, "loss": 0.636, "step": 19227 }, { "epoch": 1.389130709628479, "grad_norm": 7.8406876745109875, "learning_rate": 1.1264493167655815e-06, "loss": 0.6386, "step": 19228 }, { "epoch": 1.3892029548286886, "grad_norm": 8.642120618187233, "learning_rate": 1.1262049269239323e-06, "loss": 0.6928, "step": 19229 }, { "epoch": 1.3892752000288981, "grad_norm": 7.560875079659421, "learning_rate": 1.1259605558884718e-06, "loss": 0.7074, "step": 19230 }, { "epoch": 1.3893474452291077, "grad_norm": 5.924118542996469, "learning_rate": 1.1257162036625435e-06, "loss": 0.579, "step": 19231 }, { "epoch": 1.389419690429317, "grad_norm": 6.5950587994063365, "learning_rate": 1.1254718702494932e-06, "loss": 0.6022, "step": 19232 }, { "epoch": 1.3894919356295266, "grad_norm": 7.768146312824648, "learning_rate": 1.1252275556526668e-06, "loss": 0.7072, "step": 19233 }, { "epoch": 1.389564180829736, "grad_norm": 8.080501361815415, "learning_rate": 1.1249832598754068e-06, "loss": 0.6944, "step": 19234 }, { "epoch": 1.3896364260299456, "grad_norm": 8.123698438418266, "learning_rate": 1.124738982921058e-06, "loss": 0.6478, "step": 19235 }, { "epoch": 1.3897086712301552, "grad_norm": 7.3581269779265845, "learning_rate": 1.1244947247929653e-06, "loss": 0.611, "step": 19236 }, { "epoch": 1.3897809164303647, "grad_norm": 7.047045788101537, "learning_rate": 1.1242504854944714e-06, "loss": 0.6554, "step": 19237 }, { "epoch": 1.3898531616305743, "grad_norm": 7.035280420371438, "learning_rate": 1.1240062650289202e-06, "loss": 0.5835, "step": 19238 }, { "epoch": 1.3899254068307836, "grad_norm": 7.920516219221767, "learning_rate": 1.1237620633996548e-06, "loss": 0.6192, "step": 19239 }, { "epoch": 1.3899976520309931, "grad_norm": 6.605906484978519, "learning_rate": 1.1235178806100183e-06, "loss": 0.5591, "step": 19240 }, { "epoch": 1.3900698972312027, "grad_norm": 7.802681818893715, "learning_rate": 1.123273716663354e-06, "loss": 0.5672, "step": 19241 }, { "epoch": 1.3901421424314122, "grad_norm": 5.8678662431966595, "learning_rate": 1.1230295715630028e-06, "loss": 0.5151, "step": 19242 }, { "epoch": 1.3902143876316218, "grad_norm": 6.942979452949722, "learning_rate": 1.1227854453123075e-06, "loss": 0.6139, "step": 19243 }, { "epoch": 1.3902866328318313, "grad_norm": 7.713417836976239, "learning_rate": 1.1225413379146111e-06, "loss": 0.6288, "step": 19244 }, { "epoch": 1.3903588780320408, "grad_norm": 8.521737968593678, "learning_rate": 1.1222972493732526e-06, "loss": 0.6313, "step": 19245 }, { "epoch": 1.3904311232322502, "grad_norm": 6.625137615573957, "learning_rate": 1.1220531796915756e-06, "loss": 0.6917, "step": 19246 }, { "epoch": 1.3905033684324597, "grad_norm": 7.9478999213112695, "learning_rate": 1.1218091288729207e-06, "loss": 0.614, "step": 19247 }, { "epoch": 1.3905756136326692, "grad_norm": 7.941541949237197, "learning_rate": 1.12156509692063e-06, "loss": 0.5952, "step": 19248 }, { "epoch": 1.3906478588328788, "grad_norm": 7.004495267253352, "learning_rate": 1.1213210838380418e-06, "loss": 0.6931, "step": 19249 }, { "epoch": 1.3907201040330883, "grad_norm": 8.017095895592186, "learning_rate": 1.1210770896284973e-06, "loss": 0.5865, "step": 19250 }, { "epoch": 1.3907923492332979, "grad_norm": 5.718247355066122, "learning_rate": 1.1208331142953377e-06, "loss": 0.546, "step": 19251 }, { "epoch": 1.3908645944335074, "grad_norm": 6.085133569234115, "learning_rate": 1.1205891578419013e-06, "loss": 0.6759, "step": 19252 }, { "epoch": 1.3909368396337167, "grad_norm": 7.034027771618089, "learning_rate": 1.1203452202715279e-06, "loss": 0.6299, "step": 19253 }, { "epoch": 1.3910090848339263, "grad_norm": 8.042538083418828, "learning_rate": 1.1201013015875574e-06, "loss": 0.6222, "step": 19254 }, { "epoch": 1.3910813300341358, "grad_norm": 7.718746540516684, "learning_rate": 1.1198574017933285e-06, "loss": 0.6471, "step": 19255 }, { "epoch": 1.3911535752343454, "grad_norm": 7.857612427390526, "learning_rate": 1.1196135208921804e-06, "loss": 0.7007, "step": 19256 }, { "epoch": 1.391225820434555, "grad_norm": 7.921532136317996, "learning_rate": 1.1193696588874513e-06, "loss": 0.6703, "step": 19257 }, { "epoch": 1.3912980656347644, "grad_norm": 7.98356728844756, "learning_rate": 1.1191258157824805e-06, "loss": 0.6119, "step": 19258 }, { "epoch": 1.391370310834974, "grad_norm": 9.463289505589563, "learning_rate": 1.1188819915806042e-06, "loss": 0.5806, "step": 19259 }, { "epoch": 1.3914425560351833, "grad_norm": 6.37122756939091, "learning_rate": 1.118638186285161e-06, "loss": 0.6204, "step": 19260 }, { "epoch": 1.391514801235393, "grad_norm": 7.65685833440992, "learning_rate": 1.1183943998994883e-06, "loss": 0.6435, "step": 19261 }, { "epoch": 1.3915870464356024, "grad_norm": 8.96600448640281, "learning_rate": 1.1181506324269247e-06, "loss": 0.6293, "step": 19262 }, { "epoch": 1.391659291635812, "grad_norm": 6.219576526712318, "learning_rate": 1.1179068838708051e-06, "loss": 0.6361, "step": 19263 }, { "epoch": 1.3917315368360215, "grad_norm": 6.908460444564428, "learning_rate": 1.117663154234467e-06, "loss": 0.6816, "step": 19264 }, { "epoch": 1.391803782036231, "grad_norm": 7.37457610383008, "learning_rate": 1.1174194435212471e-06, "loss": 0.6451, "step": 19265 }, { "epoch": 1.3918760272364405, "grad_norm": 7.15465091346905, "learning_rate": 1.1171757517344816e-06, "loss": 0.5579, "step": 19266 }, { "epoch": 1.3919482724366499, "grad_norm": 7.83159190631824, "learning_rate": 1.1169320788775062e-06, "loss": 0.6064, "step": 19267 }, { "epoch": 1.3920205176368596, "grad_norm": 6.72897455120334, "learning_rate": 1.116688424953657e-06, "loss": 0.564, "step": 19268 }, { "epoch": 1.392092762837069, "grad_norm": 5.824871719977979, "learning_rate": 1.1164447899662697e-06, "loss": 0.604, "step": 19269 }, { "epoch": 1.3921650080372785, "grad_norm": 7.725115788156815, "learning_rate": 1.1162011739186785e-06, "loss": 0.6672, "step": 19270 }, { "epoch": 1.392237253237488, "grad_norm": 6.539348345380185, "learning_rate": 1.1159575768142183e-06, "loss": 0.6369, "step": 19271 }, { "epoch": 1.3923094984376976, "grad_norm": 6.404197103427625, "learning_rate": 1.1157139986562245e-06, "loss": 0.5192, "step": 19272 }, { "epoch": 1.3923817436379071, "grad_norm": 6.951708840158644, "learning_rate": 1.1154704394480313e-06, "loss": 0.62, "step": 19273 }, { "epoch": 1.3924539888381164, "grad_norm": 7.730950847934786, "learning_rate": 1.1152268991929727e-06, "loss": 0.6432, "step": 19274 }, { "epoch": 1.3925262340383262, "grad_norm": 7.519702463554051, "learning_rate": 1.1149833778943826e-06, "loss": 0.6733, "step": 19275 }, { "epoch": 1.3925984792385355, "grad_norm": 8.573768051224118, "learning_rate": 1.1147398755555954e-06, "loss": 0.6522, "step": 19276 }, { "epoch": 1.392670724438745, "grad_norm": 6.981178320206797, "learning_rate": 1.1144963921799432e-06, "loss": 0.6048, "step": 19277 }, { "epoch": 1.3927429696389546, "grad_norm": 7.011551725900656, "learning_rate": 1.1142529277707592e-06, "loss": 0.5737, "step": 19278 }, { "epoch": 1.3928152148391642, "grad_norm": 6.489596036732233, "learning_rate": 1.1140094823313768e-06, "loss": 0.7077, "step": 19279 }, { "epoch": 1.3928874600393737, "grad_norm": 7.835903422470766, "learning_rate": 1.1137660558651294e-06, "loss": 0.6077, "step": 19280 }, { "epoch": 1.392959705239583, "grad_norm": 7.022284866139435, "learning_rate": 1.1135226483753474e-06, "loss": 0.6029, "step": 19281 }, { "epoch": 1.3930319504397928, "grad_norm": 6.407905774017858, "learning_rate": 1.113279259865364e-06, "loss": 0.5583, "step": 19282 }, { "epoch": 1.393104195640002, "grad_norm": 7.786646430934235, "learning_rate": 1.1130358903385107e-06, "loss": 0.629, "step": 19283 }, { "epoch": 1.3931764408402116, "grad_norm": 6.062397749520289, "learning_rate": 1.1127925397981192e-06, "loss": 0.6201, "step": 19284 }, { "epoch": 1.3932486860404212, "grad_norm": 5.7514447594747065, "learning_rate": 1.1125492082475205e-06, "loss": 0.5622, "step": 19285 }, { "epoch": 1.3933209312406307, "grad_norm": 6.065038719714068, "learning_rate": 1.1123058956900462e-06, "loss": 0.6288, "step": 19286 }, { "epoch": 1.3933931764408403, "grad_norm": 6.499080299455418, "learning_rate": 1.1120626021290272e-06, "loss": 0.5992, "step": 19287 }, { "epoch": 1.3934654216410496, "grad_norm": 5.991778462958216, "learning_rate": 1.111819327567793e-06, "loss": 0.6049, "step": 19288 }, { "epoch": 1.3935376668412593, "grad_norm": 6.677848339642465, "learning_rate": 1.1115760720096743e-06, "loss": 0.5969, "step": 19289 }, { "epoch": 1.3936099120414687, "grad_norm": 6.422040134357982, "learning_rate": 1.111332835458002e-06, "loss": 0.622, "step": 19290 }, { "epoch": 1.3936821572416782, "grad_norm": 7.358959534533955, "learning_rate": 1.111089617916104e-06, "loss": 0.6117, "step": 19291 }, { "epoch": 1.3937544024418878, "grad_norm": 7.609634676973146, "learning_rate": 1.1108464193873106e-06, "loss": 0.5979, "step": 19292 }, { "epoch": 1.3938266476420973, "grad_norm": 7.026595138870993, "learning_rate": 1.1106032398749503e-06, "loss": 0.6159, "step": 19293 }, { "epoch": 1.3938988928423068, "grad_norm": 6.732234322525374, "learning_rate": 1.1103600793823546e-06, "loss": 0.5623, "step": 19294 }, { "epoch": 1.3939711380425164, "grad_norm": 7.526088607976086, "learning_rate": 1.1101169379128496e-06, "loss": 0.6566, "step": 19295 }, { "epoch": 1.394043383242726, "grad_norm": 8.351913287768632, "learning_rate": 1.1098738154697646e-06, "loss": 0.6363, "step": 19296 }, { "epoch": 1.3941156284429352, "grad_norm": 6.680317381668645, "learning_rate": 1.1096307120564276e-06, "loss": 0.6824, "step": 19297 }, { "epoch": 1.3941878736431448, "grad_norm": 6.233077676735527, "learning_rate": 1.1093876276761676e-06, "loss": 0.615, "step": 19298 }, { "epoch": 1.3942601188433543, "grad_norm": 6.218332861323745, "learning_rate": 1.1091445623323103e-06, "loss": 0.6119, "step": 19299 }, { "epoch": 1.3943323640435639, "grad_norm": 5.335667437174291, "learning_rate": 1.1089015160281838e-06, "loss": 0.6224, "step": 19300 }, { "epoch": 1.3944046092437734, "grad_norm": 6.906463481658781, "learning_rate": 1.1086584887671157e-06, "loss": 0.6217, "step": 19301 }, { "epoch": 1.394476854443983, "grad_norm": 7.157877774410404, "learning_rate": 1.1084154805524324e-06, "loss": 0.6839, "step": 19302 }, { "epoch": 1.3945490996441925, "grad_norm": 6.598212445102319, "learning_rate": 1.108172491387461e-06, "loss": 0.5969, "step": 19303 }, { "epoch": 1.3946213448444018, "grad_norm": 6.739426419899417, "learning_rate": 1.107929521275527e-06, "loss": 0.5587, "step": 19304 }, { "epoch": 1.3946935900446114, "grad_norm": 8.297455675742894, "learning_rate": 1.1076865702199581e-06, "loss": 0.6943, "step": 19305 }, { "epoch": 1.394765835244821, "grad_norm": 6.885926043607159, "learning_rate": 1.1074436382240782e-06, "loss": 0.7106, "step": 19306 }, { "epoch": 1.3948380804450304, "grad_norm": 7.878732674634266, "learning_rate": 1.1072007252912137e-06, "loss": 0.6151, "step": 19307 }, { "epoch": 1.39491032564524, "grad_norm": 8.106651833838562, "learning_rate": 1.1069578314246906e-06, "loss": 0.6415, "step": 19308 }, { "epoch": 1.3949825708454495, "grad_norm": 7.2032399199414625, "learning_rate": 1.1067149566278324e-06, "loss": 0.5644, "step": 19309 }, { "epoch": 1.395054816045659, "grad_norm": 6.971783307138099, "learning_rate": 1.1064721009039645e-06, "loss": 0.6248, "step": 19310 }, { "epoch": 1.3951270612458684, "grad_norm": 6.761399427396437, "learning_rate": 1.1062292642564114e-06, "loss": 0.5968, "step": 19311 }, { "epoch": 1.395199306446078, "grad_norm": 7.585153745477365, "learning_rate": 1.1059864466884976e-06, "loss": 0.6913, "step": 19312 }, { "epoch": 1.3952715516462875, "grad_norm": 7.17624027816999, "learning_rate": 1.105743648203547e-06, "loss": 0.6167, "step": 19313 }, { "epoch": 1.395343796846497, "grad_norm": 7.0186981063378235, "learning_rate": 1.1055008688048831e-06, "loss": 0.6585, "step": 19314 }, { "epoch": 1.3954160420467066, "grad_norm": 6.717403117177938, "learning_rate": 1.1052581084958308e-06, "loss": 0.5806, "step": 19315 }, { "epoch": 1.395488287246916, "grad_norm": 6.101101710153398, "learning_rate": 1.105015367279711e-06, "loss": 0.6787, "step": 19316 }, { "epoch": 1.3955605324471256, "grad_norm": 7.417767675164466, "learning_rate": 1.1047726451598476e-06, "loss": 0.7041, "step": 19317 }, { "epoch": 1.395632777647335, "grad_norm": 7.352188521515767, "learning_rate": 1.1045299421395637e-06, "loss": 0.6312, "step": 19318 }, { "epoch": 1.3957050228475445, "grad_norm": 7.888243665262793, "learning_rate": 1.1042872582221817e-06, "loss": 0.6807, "step": 19319 }, { "epoch": 1.395777268047754, "grad_norm": 7.278246608060498, "learning_rate": 1.104044593411023e-06, "loss": 0.6023, "step": 19320 }, { "epoch": 1.3958495132479636, "grad_norm": 7.045156332735672, "learning_rate": 1.1038019477094097e-06, "loss": 0.6401, "step": 19321 }, { "epoch": 1.3959217584481731, "grad_norm": 8.23948878359811, "learning_rate": 1.1035593211206626e-06, "loss": 0.6243, "step": 19322 }, { "epoch": 1.3959940036483827, "grad_norm": 6.431535049809169, "learning_rate": 1.1033167136481063e-06, "loss": 0.6333, "step": 19323 }, { "epoch": 1.3960662488485922, "grad_norm": 7.956449462116434, "learning_rate": 1.1030741252950588e-06, "loss": 0.6167, "step": 19324 }, { "epoch": 1.3961384940488015, "grad_norm": 6.689796980213571, "learning_rate": 1.1028315560648418e-06, "loss": 0.583, "step": 19325 }, { "epoch": 1.396210739249011, "grad_norm": 7.212954882684935, "learning_rate": 1.1025890059607766e-06, "loss": 0.6664, "step": 19326 }, { "epoch": 1.3962829844492206, "grad_norm": 6.432666334187728, "learning_rate": 1.1023464749861823e-06, "loss": 0.5726, "step": 19327 }, { "epoch": 1.3963552296494302, "grad_norm": 6.790222420062248, "learning_rate": 1.1021039631443794e-06, "loss": 0.6416, "step": 19328 }, { "epoch": 1.3964274748496397, "grad_norm": 7.747739154464045, "learning_rate": 1.1018614704386879e-06, "loss": 0.6057, "step": 19329 }, { "epoch": 1.3964997200498492, "grad_norm": 6.927368464134893, "learning_rate": 1.1016189968724275e-06, "loss": 0.6727, "step": 19330 }, { "epoch": 1.3965719652500588, "grad_norm": 6.817476187518576, "learning_rate": 1.101376542448917e-06, "loss": 0.6356, "step": 19331 }, { "epoch": 1.396644210450268, "grad_norm": 9.035562825239369, "learning_rate": 1.1011341071714758e-06, "loss": 0.6315, "step": 19332 }, { "epoch": 1.3967164556504776, "grad_norm": 7.814942001156842, "learning_rate": 1.1008916910434233e-06, "loss": 0.6496, "step": 19333 }, { "epoch": 1.3967887008506872, "grad_norm": 6.58064334170843, "learning_rate": 1.1006492940680762e-06, "loss": 0.6232, "step": 19334 }, { "epoch": 1.3968609460508967, "grad_norm": 6.831599907320479, "learning_rate": 1.100406916248754e-06, "loss": 0.6361, "step": 19335 }, { "epoch": 1.3969331912511063, "grad_norm": 6.01979899074014, "learning_rate": 1.1001645575887742e-06, "loss": 0.6125, "step": 19336 }, { "epoch": 1.3970054364513158, "grad_norm": 7.3286145662861575, "learning_rate": 1.099922218091456e-06, "loss": 0.6717, "step": 19337 }, { "epoch": 1.3970776816515253, "grad_norm": 6.282831249194855, "learning_rate": 1.099679897760114e-06, "loss": 0.5638, "step": 19338 }, { "epoch": 1.3971499268517347, "grad_norm": 7.22786543363991, "learning_rate": 1.0994375965980675e-06, "loss": 0.7035, "step": 19339 }, { "epoch": 1.3972221720519444, "grad_norm": 6.982296491444172, "learning_rate": 1.0991953146086325e-06, "loss": 0.6359, "step": 19340 }, { "epoch": 1.3972944172521538, "grad_norm": 6.831548535244721, "learning_rate": 1.0989530517951264e-06, "loss": 0.6406, "step": 19341 }, { "epoch": 1.3973666624523633, "grad_norm": 6.956086700250111, "learning_rate": 1.0987108081608647e-06, "loss": 0.7585, "step": 19342 }, { "epoch": 1.3974389076525728, "grad_norm": 6.705364004519477, "learning_rate": 1.0984685837091642e-06, "loss": 0.6724, "step": 19343 }, { "epoch": 1.3975111528527824, "grad_norm": 7.54807741052083, "learning_rate": 1.0982263784433414e-06, "loss": 0.5744, "step": 19344 }, { "epoch": 1.397583398052992, "grad_norm": 7.520887915359489, "learning_rate": 1.0979841923667104e-06, "loss": 0.662, "step": 19345 }, { "epoch": 1.3976556432532012, "grad_norm": 7.805131290560757, "learning_rate": 1.0977420254825868e-06, "loss": 0.6904, "step": 19346 }, { "epoch": 1.397727888453411, "grad_norm": 6.855831264898785, "learning_rate": 1.0974998777942863e-06, "loss": 0.6153, "step": 19347 }, { "epoch": 1.3978001336536203, "grad_norm": 7.0667096310635, "learning_rate": 1.0972577493051246e-06, "loss": 0.6347, "step": 19348 }, { "epoch": 1.3978723788538299, "grad_norm": 7.577608763424858, "learning_rate": 1.0970156400184139e-06, "loss": 0.638, "step": 19349 }, { "epoch": 1.3979446240540394, "grad_norm": 7.808364139633178, "learning_rate": 1.0967735499374693e-06, "loss": 0.6127, "step": 19350 }, { "epoch": 1.398016869254249, "grad_norm": 7.005658995775336, "learning_rate": 1.0965314790656067e-06, "loss": 0.6116, "step": 19351 }, { "epoch": 1.3980891144544585, "grad_norm": 7.1543717897217505, "learning_rate": 1.0962894274061375e-06, "loss": 0.6293, "step": 19352 }, { "epoch": 1.3981613596546678, "grad_norm": 7.078233595935009, "learning_rate": 1.0960473949623761e-06, "loss": 0.6285, "step": 19353 }, { "epoch": 1.3982336048548776, "grad_norm": 7.034250661800934, "learning_rate": 1.0958053817376362e-06, "loss": 0.5223, "step": 19354 }, { "epoch": 1.398305850055087, "grad_norm": 6.078101150740485, "learning_rate": 1.095563387735231e-06, "loss": 0.6628, "step": 19355 }, { "epoch": 1.3983780952552964, "grad_norm": 6.141240601100519, "learning_rate": 1.0953214129584716e-06, "loss": 0.5903, "step": 19356 }, { "epoch": 1.398450340455506, "grad_norm": 6.385317363454321, "learning_rate": 1.0950794574106712e-06, "loss": 0.6191, "step": 19357 }, { "epoch": 1.3985225856557155, "grad_norm": 6.6097008013856335, "learning_rate": 1.0948375210951425e-06, "loss": 0.6637, "step": 19358 }, { "epoch": 1.398594830855925, "grad_norm": 6.245854337969531, "learning_rate": 1.094595604015197e-06, "loss": 0.6533, "step": 19359 }, { "epoch": 1.3986670760561344, "grad_norm": 6.017819645834771, "learning_rate": 1.0943537061741464e-06, "loss": 0.6467, "step": 19360 }, { "epoch": 1.3987393212563441, "grad_norm": 6.9632604214564235, "learning_rate": 1.094111827575302e-06, "loss": 0.6837, "step": 19361 }, { "epoch": 1.3988115664565535, "grad_norm": 8.125219254103, "learning_rate": 1.0938699682219763e-06, "loss": 0.6502, "step": 19362 }, { "epoch": 1.398883811656763, "grad_norm": 6.897168294376833, "learning_rate": 1.093628128117478e-06, "loss": 0.5889, "step": 19363 }, { "epoch": 1.3989560568569726, "grad_norm": 7.642200518160043, "learning_rate": 1.0933863072651185e-06, "loss": 0.5859, "step": 19364 }, { "epoch": 1.399028302057182, "grad_norm": 6.258661600672965, "learning_rate": 1.0931445056682092e-06, "loss": 0.5787, "step": 19365 }, { "epoch": 1.3991005472573916, "grad_norm": 8.32705767521561, "learning_rate": 1.0929027233300584e-06, "loss": 0.6241, "step": 19366 }, { "epoch": 1.3991727924576012, "grad_norm": 8.130250613395695, "learning_rate": 1.0926609602539767e-06, "loss": 0.6204, "step": 19367 }, { "epoch": 1.3992450376578107, "grad_norm": 7.629816003983233, "learning_rate": 1.0924192164432739e-06, "loss": 0.679, "step": 19368 }, { "epoch": 1.39931728285802, "grad_norm": 6.9906027249973395, "learning_rate": 1.092177491901259e-06, "loss": 0.6427, "step": 19369 }, { "epoch": 1.3993895280582296, "grad_norm": 7.25949684858592, "learning_rate": 1.091935786631241e-06, "loss": 0.7182, "step": 19370 }, { "epoch": 1.3994617732584391, "grad_norm": 7.594209884720948, "learning_rate": 1.091694100636529e-06, "loss": 0.6228, "step": 19371 }, { "epoch": 1.3995340184586487, "grad_norm": 7.702593121533711, "learning_rate": 1.0914524339204321e-06, "loss": 0.6619, "step": 19372 }, { "epoch": 1.3996062636588582, "grad_norm": 7.338635810661504, "learning_rate": 1.091210786486257e-06, "loss": 0.6355, "step": 19373 }, { "epoch": 1.3996785088590677, "grad_norm": 6.4104740266296805, "learning_rate": 1.0909691583373122e-06, "loss": 0.604, "step": 19374 }, { "epoch": 1.3997507540592773, "grad_norm": 7.190431212757036, "learning_rate": 1.0907275494769055e-06, "loss": 0.6201, "step": 19375 }, { "epoch": 1.3998229992594866, "grad_norm": 5.661639433218734, "learning_rate": 1.0904859599083454e-06, "loss": 0.5581, "step": 19376 }, { "epoch": 1.3998952444596962, "grad_norm": 7.856312695528474, "learning_rate": 1.090244389634937e-06, "loss": 0.6421, "step": 19377 }, { "epoch": 1.3999674896599057, "grad_norm": 7.585842960919662, "learning_rate": 1.0900028386599878e-06, "loss": 0.6316, "step": 19378 }, { "epoch": 1.4000397348601152, "grad_norm": 7.783403336116906, "learning_rate": 1.0897613069868056e-06, "loss": 0.6213, "step": 19379 }, { "epoch": 1.4001119800603248, "grad_norm": 7.490908643868981, "learning_rate": 1.0895197946186971e-06, "loss": 0.5918, "step": 19380 }, { "epoch": 1.4001842252605343, "grad_norm": 7.0024973637862935, "learning_rate": 1.0892783015589664e-06, "loss": 0.6237, "step": 19381 }, { "epoch": 1.4002564704607439, "grad_norm": 7.078742330884812, "learning_rate": 1.0890368278109207e-06, "loss": 0.6217, "step": 19382 }, { "epoch": 1.4003287156609532, "grad_norm": 5.998526074251621, "learning_rate": 1.0887953733778657e-06, "loss": 0.5441, "step": 19383 }, { "epoch": 1.4004009608611627, "grad_norm": 5.934266826332743, "learning_rate": 1.088553938263106e-06, "loss": 0.6159, "step": 19384 }, { "epoch": 1.4004732060613723, "grad_norm": 7.044056533872551, "learning_rate": 1.0883125224699467e-06, "loss": 0.6564, "step": 19385 }, { "epoch": 1.4005454512615818, "grad_norm": 5.874976949443644, "learning_rate": 1.0880711260016927e-06, "loss": 0.6159, "step": 19386 }, { "epoch": 1.4006176964617914, "grad_norm": 6.565853751481208, "learning_rate": 1.087829748861649e-06, "loss": 0.6149, "step": 19387 }, { "epoch": 1.400689941662001, "grad_norm": 7.437753624959333, "learning_rate": 1.0875883910531192e-06, "loss": 0.6226, "step": 19388 }, { "epoch": 1.4007621868622104, "grad_norm": 7.474707872685221, "learning_rate": 1.087347052579408e-06, "loss": 0.6398, "step": 19389 }, { "epoch": 1.4008344320624198, "grad_norm": 8.460728749300399, "learning_rate": 1.0871057334438195e-06, "loss": 0.6604, "step": 19390 }, { "epoch": 1.4009066772626293, "grad_norm": 7.249351472458777, "learning_rate": 1.0868644336496556e-06, "loss": 0.6931, "step": 19391 }, { "epoch": 1.4009789224628388, "grad_norm": 7.342289686090617, "learning_rate": 1.0866231532002205e-06, "loss": 0.669, "step": 19392 }, { "epoch": 1.4010511676630484, "grad_norm": 7.783925527171289, "learning_rate": 1.0863818920988168e-06, "loss": 0.6185, "step": 19393 }, { "epoch": 1.401123412863258, "grad_norm": 7.597534743565672, "learning_rate": 1.0861406503487484e-06, "loss": 0.5492, "step": 19394 }, { "epoch": 1.4011956580634675, "grad_norm": 5.918836956734841, "learning_rate": 1.0858994279533159e-06, "loss": 0.6106, "step": 19395 }, { "epoch": 1.401267903263677, "grad_norm": 6.559357680997116, "learning_rate": 1.085658224915822e-06, "loss": 0.6187, "step": 19396 }, { "epoch": 1.4013401484638863, "grad_norm": 7.393596930705005, "learning_rate": 1.085417041239569e-06, "loss": 0.6508, "step": 19397 }, { "epoch": 1.4014123936640959, "grad_norm": 6.4479871344426165, "learning_rate": 1.0851758769278583e-06, "loss": 0.5887, "step": 19398 }, { "epoch": 1.4014846388643054, "grad_norm": 5.792697110434173, "learning_rate": 1.0849347319839915e-06, "loss": 0.5963, "step": 19399 }, { "epoch": 1.401556884064515, "grad_norm": 6.458728934036431, "learning_rate": 1.0846936064112693e-06, "loss": 0.6318, "step": 19400 }, { "epoch": 1.4016291292647245, "grad_norm": 9.073243816680646, "learning_rate": 1.0844525002129939e-06, "loss": 0.6774, "step": 19401 }, { "epoch": 1.401701374464934, "grad_norm": 7.030006943071173, "learning_rate": 1.0842114133924633e-06, "loss": 0.589, "step": 19402 }, { "epoch": 1.4017736196651436, "grad_norm": 7.3181211597205476, "learning_rate": 1.0839703459529794e-06, "loss": 0.6885, "step": 19403 }, { "epoch": 1.401845864865353, "grad_norm": 8.29757428859224, "learning_rate": 1.083729297897842e-06, "loss": 0.5888, "step": 19404 }, { "epoch": 1.4019181100655624, "grad_norm": 8.081981218126918, "learning_rate": 1.0834882692303517e-06, "loss": 0.6784, "step": 19405 }, { "epoch": 1.401990355265772, "grad_norm": 7.389670570474486, "learning_rate": 1.0832472599538064e-06, "loss": 0.6534, "step": 19406 }, { "epoch": 1.4020626004659815, "grad_norm": 6.424151163396298, "learning_rate": 1.083006270071505e-06, "loss": 0.5835, "step": 19407 }, { "epoch": 1.402134845666191, "grad_norm": 6.239293808713075, "learning_rate": 1.0827652995867493e-06, "loss": 0.6617, "step": 19408 }, { "epoch": 1.4022070908664006, "grad_norm": 6.616678445775127, "learning_rate": 1.0825243485028356e-06, "loss": 0.6308, "step": 19409 }, { "epoch": 1.4022793360666101, "grad_norm": 6.511919683275108, "learning_rate": 1.0822834168230625e-06, "loss": 0.5862, "step": 19410 }, { "epoch": 1.4023515812668195, "grad_norm": 7.478752811921662, "learning_rate": 1.082042504550729e-06, "loss": 0.5319, "step": 19411 }, { "epoch": 1.4024238264670292, "grad_norm": 6.634128543338681, "learning_rate": 1.0818016116891337e-06, "loss": 0.6084, "step": 19412 }, { "epoch": 1.4024960716672386, "grad_norm": 6.430108725190689, "learning_rate": 1.081560738241572e-06, "loss": 0.6675, "step": 19413 }, { "epoch": 1.402568316867448, "grad_norm": 6.850503176286436, "learning_rate": 1.0813198842113424e-06, "loss": 0.619, "step": 19414 }, { "epoch": 1.4026405620676576, "grad_norm": 6.802712785728733, "learning_rate": 1.081079049601742e-06, "loss": 0.6221, "step": 19415 }, { "epoch": 1.4027128072678672, "grad_norm": 7.0883905944496615, "learning_rate": 1.0808382344160682e-06, "loss": 0.6545, "step": 19416 }, { "epoch": 1.4027850524680767, "grad_norm": 6.342517751895178, "learning_rate": 1.0805974386576166e-06, "loss": 0.5496, "step": 19417 }, { "epoch": 1.402857297668286, "grad_norm": 7.6475810895338725, "learning_rate": 1.080356662329684e-06, "loss": 0.642, "step": 19418 }, { "epoch": 1.4029295428684958, "grad_norm": 6.644192803053685, "learning_rate": 1.0801159054355672e-06, "loss": 0.6915, "step": 19419 }, { "epoch": 1.4030017880687051, "grad_norm": 6.791550648952355, "learning_rate": 1.0798751679785605e-06, "loss": 0.6618, "step": 19420 }, { "epoch": 1.4030740332689147, "grad_norm": 8.05092955725237, "learning_rate": 1.0796344499619602e-06, "loss": 0.5985, "step": 19421 }, { "epoch": 1.4031462784691242, "grad_norm": 6.869074261745822, "learning_rate": 1.079393751389062e-06, "loss": 0.6151, "step": 19422 }, { "epoch": 1.4032185236693338, "grad_norm": 8.204267498563675, "learning_rate": 1.0791530722631596e-06, "loss": 0.6554, "step": 19423 }, { "epoch": 1.4032907688695433, "grad_norm": 7.049683140336904, "learning_rate": 1.0789124125875485e-06, "loss": 0.5982, "step": 19424 }, { "epoch": 1.4033630140697526, "grad_norm": 7.019938546805644, "learning_rate": 1.0786717723655231e-06, "loss": 0.6614, "step": 19425 }, { "epoch": 1.4034352592699624, "grad_norm": 6.91003203940796, "learning_rate": 1.0784311516003773e-06, "loss": 0.6205, "step": 19426 }, { "epoch": 1.4035075044701717, "grad_norm": 6.499884384300796, "learning_rate": 1.0781905502954054e-06, "loss": 0.5741, "step": 19427 }, { "epoch": 1.4035797496703812, "grad_norm": 7.8043189587035515, "learning_rate": 1.0779499684539011e-06, "loss": 0.6629, "step": 19428 }, { "epoch": 1.4036519948705908, "grad_norm": 6.332896000000803, "learning_rate": 1.0777094060791574e-06, "loss": 0.6679, "step": 19429 }, { "epoch": 1.4037242400708003, "grad_norm": 6.8449474663177945, "learning_rate": 1.0774688631744683e-06, "loss": 0.6344, "step": 19430 }, { "epoch": 1.4037964852710099, "grad_norm": 8.73049758803962, "learning_rate": 1.0772283397431255e-06, "loss": 0.6234, "step": 19431 }, { "epoch": 1.4038687304712192, "grad_norm": 7.932072984399096, "learning_rate": 1.0769878357884217e-06, "loss": 0.6372, "step": 19432 }, { "epoch": 1.403940975671429, "grad_norm": 6.270935395720856, "learning_rate": 1.0767473513136503e-06, "loss": 0.6236, "step": 19433 }, { "epoch": 1.4040132208716383, "grad_norm": 7.619468809020785, "learning_rate": 1.076506886322102e-06, "loss": 0.6369, "step": 19434 }, { "epoch": 1.4040854660718478, "grad_norm": 5.821363029144449, "learning_rate": 1.076266440817068e-06, "loss": 0.616, "step": 19435 }, { "epoch": 1.4041577112720574, "grad_norm": 6.039634766304297, "learning_rate": 1.076026014801842e-06, "loss": 0.7188, "step": 19436 }, { "epoch": 1.404229956472267, "grad_norm": 7.83745745049567, "learning_rate": 1.0757856082797149e-06, "loss": 0.6926, "step": 19437 }, { "epoch": 1.4043022016724764, "grad_norm": 6.8923130173584, "learning_rate": 1.0755452212539764e-06, "loss": 0.6136, "step": 19438 }, { "epoch": 1.404374446872686, "grad_norm": 7.752212946798466, "learning_rate": 1.0753048537279177e-06, "loss": 0.6536, "step": 19439 }, { "epoch": 1.4044466920728955, "grad_norm": 7.501856002039881, "learning_rate": 1.0750645057048303e-06, "loss": 0.6292, "step": 19440 }, { "epoch": 1.4045189372731048, "grad_norm": 6.642331754472567, "learning_rate": 1.0748241771880026e-06, "loss": 0.6351, "step": 19441 }, { "epoch": 1.4045911824733144, "grad_norm": 7.991883452030557, "learning_rate": 1.074583868180725e-06, "loss": 0.5952, "step": 19442 }, { "epoch": 1.404663427673524, "grad_norm": 6.968298653667836, "learning_rate": 1.0743435786862876e-06, "loss": 0.6119, "step": 19443 }, { "epoch": 1.4047356728737335, "grad_norm": 9.096326954463544, "learning_rate": 1.0741033087079797e-06, "loss": 0.747, "step": 19444 }, { "epoch": 1.404807918073943, "grad_norm": 6.027704807822306, "learning_rate": 1.0738630582490906e-06, "loss": 0.6169, "step": 19445 }, { "epoch": 1.4048801632741525, "grad_norm": 5.4559605612350195, "learning_rate": 1.0736228273129085e-06, "loss": 0.5828, "step": 19446 }, { "epoch": 1.404952408474362, "grad_norm": 7.493417776091295, "learning_rate": 1.0733826159027236e-06, "loss": 0.6844, "step": 19447 }, { "epoch": 1.4050246536745714, "grad_norm": 7.596578691259638, "learning_rate": 1.073142424021822e-06, "loss": 0.5526, "step": 19448 }, { "epoch": 1.405096898874781, "grad_norm": 7.2953234703715175, "learning_rate": 1.0729022516734928e-06, "loss": 0.6615, "step": 19449 }, { "epoch": 1.4051691440749905, "grad_norm": 6.734552863442963, "learning_rate": 1.0726620988610235e-06, "loss": 0.6077, "step": 19450 }, { "epoch": 1.4052413892752, "grad_norm": 7.348491290684611, "learning_rate": 1.0724219655877026e-06, "loss": 0.5831, "step": 19451 }, { "epoch": 1.4053136344754096, "grad_norm": 6.956737617066284, "learning_rate": 1.0721818518568162e-06, "loss": 0.6331, "step": 19452 }, { "epoch": 1.4053858796756191, "grad_norm": 8.591010305479074, "learning_rate": 1.0719417576716512e-06, "loss": 0.6197, "step": 19453 }, { "epoch": 1.4054581248758287, "grad_norm": 7.3456358740440555, "learning_rate": 1.0717016830354948e-06, "loss": 0.5585, "step": 19454 }, { "epoch": 1.405530370076038, "grad_norm": 7.466638753475512, "learning_rate": 1.0714616279516333e-06, "loss": 0.6799, "step": 19455 }, { "epoch": 1.4056026152762475, "grad_norm": 6.151161530759068, "learning_rate": 1.071221592423353e-06, "loss": 0.5677, "step": 19456 }, { "epoch": 1.405674860476457, "grad_norm": 6.357701186740599, "learning_rate": 1.0709815764539396e-06, "loss": 0.6052, "step": 19457 }, { "epoch": 1.4057471056766666, "grad_norm": 7.281801129222555, "learning_rate": 1.07074158004668e-06, "loss": 0.641, "step": 19458 }, { "epoch": 1.4058193508768762, "grad_norm": 8.246564207476187, "learning_rate": 1.0705016032048574e-06, "loss": 0.6815, "step": 19459 }, { "epoch": 1.4058915960770857, "grad_norm": 7.988358371800702, "learning_rate": 1.070261645931758e-06, "loss": 0.6676, "step": 19460 }, { "epoch": 1.4059638412772952, "grad_norm": 5.918832445225024, "learning_rate": 1.0700217082306662e-06, "loss": 0.5838, "step": 19461 }, { "epoch": 1.4060360864775046, "grad_norm": 5.966949670711387, "learning_rate": 1.0697817901048683e-06, "loss": 0.6236, "step": 19462 }, { "epoch": 1.406108331677714, "grad_norm": 6.815677566710399, "learning_rate": 1.0695418915576453e-06, "loss": 0.6071, "step": 19463 }, { "epoch": 1.4061805768779236, "grad_norm": 6.468868530956614, "learning_rate": 1.0693020125922837e-06, "loss": 0.6421, "step": 19464 }, { "epoch": 1.4062528220781332, "grad_norm": 8.01619178597367, "learning_rate": 1.069062153212068e-06, "loss": 0.6392, "step": 19465 }, { "epoch": 1.4063250672783427, "grad_norm": 6.059761923466284, "learning_rate": 1.068822313420279e-06, "loss": 0.5545, "step": 19466 }, { "epoch": 1.4063973124785523, "grad_norm": 5.0960180931668395, "learning_rate": 1.0685824932202016e-06, "loss": 0.5925, "step": 19467 }, { "epoch": 1.4064695576787618, "grad_norm": 7.271653822226601, "learning_rate": 1.0683426926151184e-06, "loss": 0.6593, "step": 19468 }, { "epoch": 1.4065418028789711, "grad_norm": 7.237447988161826, "learning_rate": 1.068102911608313e-06, "loss": 0.6535, "step": 19469 }, { "epoch": 1.4066140480791807, "grad_norm": 6.991340185218369, "learning_rate": 1.0678631502030661e-06, "loss": 0.579, "step": 19470 }, { "epoch": 1.4066862932793902, "grad_norm": 6.873288305648786, "learning_rate": 1.0676234084026608e-06, "loss": 0.611, "step": 19471 }, { "epoch": 1.4067585384795998, "grad_norm": 7.6389622663576455, "learning_rate": 1.0673836862103785e-06, "loss": 0.6085, "step": 19472 }, { "epoch": 1.4068307836798093, "grad_norm": 7.787068225530856, "learning_rate": 1.0671439836295014e-06, "loss": 0.7337, "step": 19473 }, { "epoch": 1.4069030288800188, "grad_norm": 7.117608502830185, "learning_rate": 1.0669043006633107e-06, "loss": 0.6106, "step": 19474 }, { "epoch": 1.4069752740802284, "grad_norm": 7.350131764171551, "learning_rate": 1.0666646373150874e-06, "loss": 0.588, "step": 19475 }, { "epoch": 1.4070475192804377, "grad_norm": 7.231699240667487, "learning_rate": 1.066424993588113e-06, "loss": 0.5848, "step": 19476 }, { "epoch": 1.4071197644806472, "grad_norm": 8.166622187694431, "learning_rate": 1.0661853694856667e-06, "loss": 0.6242, "step": 19477 }, { "epoch": 1.4071920096808568, "grad_norm": 8.148313439102319, "learning_rate": 1.0659457650110291e-06, "loss": 0.6807, "step": 19478 }, { "epoch": 1.4072642548810663, "grad_norm": 7.182251182997157, "learning_rate": 1.0657061801674806e-06, "loss": 0.6177, "step": 19479 }, { "epoch": 1.4073365000812759, "grad_norm": 6.416733217100689, "learning_rate": 1.065466614958302e-06, "loss": 0.6385, "step": 19480 }, { "epoch": 1.4074087452814854, "grad_norm": 7.2162743488947, "learning_rate": 1.0652270693867705e-06, "loss": 0.7143, "step": 19481 }, { "epoch": 1.407480990481695, "grad_norm": 6.39657651412646, "learning_rate": 1.0649875434561662e-06, "loss": 0.6164, "step": 19482 }, { "epoch": 1.4075532356819043, "grad_norm": 6.417327681673627, "learning_rate": 1.0647480371697685e-06, "loss": 0.6645, "step": 19483 }, { "epoch": 1.407625480882114, "grad_norm": 10.3411020998325, "learning_rate": 1.064508550530856e-06, "loss": 0.606, "step": 19484 }, { "epoch": 1.4076977260823234, "grad_norm": 6.972119530288397, "learning_rate": 1.0642690835427064e-06, "loss": 0.6878, "step": 19485 }, { "epoch": 1.407769971282533, "grad_norm": 7.158949163885962, "learning_rate": 1.0640296362085984e-06, "loss": 0.7206, "step": 19486 }, { "epoch": 1.4078422164827424, "grad_norm": 6.293283957810327, "learning_rate": 1.0637902085318106e-06, "loss": 0.6129, "step": 19487 }, { "epoch": 1.407914461682952, "grad_norm": 6.321167990522345, "learning_rate": 1.0635508005156187e-06, "loss": 0.6217, "step": 19488 }, { "epoch": 1.4079867068831615, "grad_norm": 5.4235328975849795, "learning_rate": 1.0633114121633011e-06, "loss": 0.582, "step": 19489 }, { "epoch": 1.4080589520833708, "grad_norm": 7.40644160155642, "learning_rate": 1.0630720434781359e-06, "loss": 0.6025, "step": 19490 }, { "epoch": 1.4081311972835806, "grad_norm": 5.931433539972553, "learning_rate": 1.0628326944633965e-06, "loss": 0.5702, "step": 19491 }, { "epoch": 1.40820344248379, "grad_norm": 6.787528651617848, "learning_rate": 1.062593365122363e-06, "loss": 0.6123, "step": 19492 }, { "epoch": 1.4082756876839995, "grad_norm": 5.778302075162406, "learning_rate": 1.0623540554583097e-06, "loss": 0.6061, "step": 19493 }, { "epoch": 1.408347932884209, "grad_norm": 5.956404936337438, "learning_rate": 1.0621147654745145e-06, "loss": 0.6221, "step": 19494 }, { "epoch": 1.4084201780844186, "grad_norm": 6.32132549679076, "learning_rate": 1.0618754951742507e-06, "loss": 0.5584, "step": 19495 }, { "epoch": 1.408492423284628, "grad_norm": 8.125459628676158, "learning_rate": 1.0616362445607948e-06, "loss": 0.5956, "step": 19496 }, { "epoch": 1.4085646684848374, "grad_norm": 6.34175958084969, "learning_rate": 1.0613970136374223e-06, "loss": 0.5709, "step": 19497 }, { "epoch": 1.4086369136850472, "grad_norm": 7.242317075094886, "learning_rate": 1.0611578024074073e-06, "loss": 0.6219, "step": 19498 }, { "epoch": 1.4087091588852565, "grad_norm": 7.39959598160048, "learning_rate": 1.0609186108740248e-06, "loss": 0.7192, "step": 19499 }, { "epoch": 1.408781404085466, "grad_norm": 7.993483989187718, "learning_rate": 1.0606794390405489e-06, "loss": 0.6828, "step": 19500 }, { "epoch": 1.4088536492856756, "grad_norm": 7.546322504200463, "learning_rate": 1.060440286910254e-06, "loss": 0.5926, "step": 19501 }, { "epoch": 1.4089258944858851, "grad_norm": 9.141370328854302, "learning_rate": 1.0602011544864137e-06, "loss": 0.647, "step": 19502 }, { "epoch": 1.4089981396860947, "grad_norm": 8.05536850752293, "learning_rate": 1.0599620417723018e-06, "loss": 0.6215, "step": 19503 }, { "epoch": 1.409070384886304, "grad_norm": 7.600566802723087, "learning_rate": 1.0597229487711924e-06, "loss": 0.6867, "step": 19504 }, { "epoch": 1.4091426300865137, "grad_norm": 6.552974217853563, "learning_rate": 1.0594838754863564e-06, "loss": 0.662, "step": 19505 }, { "epoch": 1.409214875286723, "grad_norm": 7.096720472675048, "learning_rate": 1.0592448219210679e-06, "loss": 0.6164, "step": 19506 }, { "epoch": 1.4092871204869326, "grad_norm": 8.153750215583317, "learning_rate": 1.0590057880785991e-06, "loss": 0.6356, "step": 19507 }, { "epoch": 1.4093593656871422, "grad_norm": 5.89947282407935, "learning_rate": 1.058766773962223e-06, "loss": 0.6367, "step": 19508 }, { "epoch": 1.4094316108873517, "grad_norm": 7.599737624608538, "learning_rate": 1.0585277795752099e-06, "loss": 0.6817, "step": 19509 }, { "epoch": 1.4095038560875612, "grad_norm": 6.603798680527705, "learning_rate": 1.0582888049208323e-06, "loss": 0.6202, "step": 19510 }, { "epoch": 1.4095761012877706, "grad_norm": 6.435807385186318, "learning_rate": 1.0580498500023614e-06, "loss": 0.6466, "step": 19511 }, { "epoch": 1.4096483464879803, "grad_norm": 7.211630476926989, "learning_rate": 1.0578109148230685e-06, "loss": 0.6092, "step": 19512 }, { "epoch": 1.4097205916881896, "grad_norm": 5.867854866443322, "learning_rate": 1.0575719993862244e-06, "loss": 0.5327, "step": 19513 }, { "epoch": 1.4097928368883992, "grad_norm": 6.401420745509168, "learning_rate": 1.0573331036950995e-06, "loss": 0.6579, "step": 19514 }, { "epoch": 1.4098650820886087, "grad_norm": 7.410226004135977, "learning_rate": 1.0570942277529656e-06, "loss": 0.5846, "step": 19515 }, { "epoch": 1.4099373272888183, "grad_norm": 5.629981823039821, "learning_rate": 1.05685537156309e-06, "loss": 0.6288, "step": 19516 }, { "epoch": 1.4100095724890278, "grad_norm": 7.100768797178678, "learning_rate": 1.056616535128744e-06, "loss": 0.5358, "step": 19517 }, { "epoch": 1.4100818176892373, "grad_norm": 8.003611226413625, "learning_rate": 1.056377718453197e-06, "loss": 0.632, "step": 19518 }, { "epoch": 1.410154062889447, "grad_norm": 6.58975662870315, "learning_rate": 1.0561389215397192e-06, "loss": 0.6199, "step": 19519 }, { "epoch": 1.4102263080896562, "grad_norm": 7.248154767116675, "learning_rate": 1.0559001443915761e-06, "loss": 0.6727, "step": 19520 }, { "epoch": 1.4102985532898658, "grad_norm": 6.430817923012246, "learning_rate": 1.0556613870120403e-06, "loss": 0.6105, "step": 19521 }, { "epoch": 1.4103707984900753, "grad_norm": 7.364877251601166, "learning_rate": 1.055422649404379e-06, "loss": 0.6907, "step": 19522 }, { "epoch": 1.4104430436902848, "grad_norm": 6.561863168970458, "learning_rate": 1.0551839315718596e-06, "loss": 0.6366, "step": 19523 }, { "epoch": 1.4105152888904944, "grad_norm": 6.391382746087299, "learning_rate": 1.0549452335177499e-06, "loss": 0.5849, "step": 19524 }, { "epoch": 1.410587534090704, "grad_norm": 7.495222477375275, "learning_rate": 1.054706555245318e-06, "loss": 0.6625, "step": 19525 }, { "epoch": 1.4106597792909135, "grad_norm": 6.955348244456215, "learning_rate": 1.0544678967578324e-06, "loss": 0.6051, "step": 19526 }, { "epoch": 1.4107320244911228, "grad_norm": 7.559538710715015, "learning_rate": 1.0542292580585578e-06, "loss": 0.6297, "step": 19527 }, { "epoch": 1.4108042696913323, "grad_norm": 6.629709549147284, "learning_rate": 1.0539906391507621e-06, "loss": 0.6071, "step": 19528 }, { "epoch": 1.4108765148915419, "grad_norm": 6.501551809671126, "learning_rate": 1.0537520400377119e-06, "loss": 0.5831, "step": 19529 }, { "epoch": 1.4109487600917514, "grad_norm": 7.097754604264878, "learning_rate": 1.0535134607226733e-06, "loss": 0.585, "step": 19530 }, { "epoch": 1.411021005291961, "grad_norm": 6.680319094774426, "learning_rate": 1.0532749012089118e-06, "loss": 0.6433, "step": 19531 }, { "epoch": 1.4110932504921705, "grad_norm": 6.726178839505077, "learning_rate": 1.0530363614996942e-06, "loss": 0.6056, "step": 19532 }, { "epoch": 1.41116549569238, "grad_norm": 6.73371958364412, "learning_rate": 1.0527978415982863e-06, "loss": 0.6294, "step": 19533 }, { "epoch": 1.4112377408925894, "grad_norm": 7.539533815821248, "learning_rate": 1.052559341507951e-06, "loss": 0.5931, "step": 19534 }, { "epoch": 1.411309986092799, "grad_norm": 6.799255375202976, "learning_rate": 1.0523208612319543e-06, "loss": 0.6312, "step": 19535 }, { "epoch": 1.4113822312930084, "grad_norm": 8.00153526833386, "learning_rate": 1.0520824007735615e-06, "loss": 0.6165, "step": 19536 }, { "epoch": 1.411454476493218, "grad_norm": 6.845279052485478, "learning_rate": 1.051843960136037e-06, "loss": 0.5477, "step": 19537 }, { "epoch": 1.4115267216934275, "grad_norm": 7.412279981979229, "learning_rate": 1.0516055393226432e-06, "loss": 0.6045, "step": 19538 }, { "epoch": 1.411598966893637, "grad_norm": 6.776892883958292, "learning_rate": 1.0513671383366452e-06, "loss": 0.6154, "step": 19539 }, { "epoch": 1.4116712120938466, "grad_norm": 5.723109183760873, "learning_rate": 1.0511287571813061e-06, "loss": 0.5618, "step": 19540 }, { "epoch": 1.411743457294056, "grad_norm": 7.134786476837857, "learning_rate": 1.0508903958598896e-06, "loss": 0.6504, "step": 19541 }, { "epoch": 1.4118157024942655, "grad_norm": 6.680689400829927, "learning_rate": 1.0506520543756581e-06, "loss": 0.6503, "step": 19542 }, { "epoch": 1.411887947694475, "grad_norm": 7.223407250369282, "learning_rate": 1.050413732731875e-06, "loss": 0.6726, "step": 19543 }, { "epoch": 1.4119601928946846, "grad_norm": 5.9759245089418815, "learning_rate": 1.0501754309318029e-06, "loss": 0.5718, "step": 19544 }, { "epoch": 1.412032438094894, "grad_norm": 7.21858142474299, "learning_rate": 1.0499371489787027e-06, "loss": 0.5944, "step": 19545 }, { "epoch": 1.4121046832951036, "grad_norm": 6.630745465700535, "learning_rate": 1.049698886875837e-06, "loss": 0.6712, "step": 19546 }, { "epoch": 1.4121769284953132, "grad_norm": 8.062800911899156, "learning_rate": 1.0494606446264686e-06, "loss": 0.6068, "step": 19547 }, { "epoch": 1.4122491736955225, "grad_norm": 6.266412775835026, "learning_rate": 1.0492224222338557e-06, "loss": 0.6513, "step": 19548 }, { "epoch": 1.412321418895732, "grad_norm": 6.837707081227018, "learning_rate": 1.0489842197012622e-06, "loss": 0.679, "step": 19549 }, { "epoch": 1.4123936640959416, "grad_norm": 6.7346100732971514, "learning_rate": 1.0487460370319482e-06, "loss": 0.6062, "step": 19550 }, { "epoch": 1.4124659092961511, "grad_norm": 6.173669327283935, "learning_rate": 1.048507874229175e-06, "loss": 0.6519, "step": 19551 }, { "epoch": 1.4125381544963607, "grad_norm": 7.324941370602476, "learning_rate": 1.0482697312962013e-06, "loss": 0.5835, "step": 19552 }, { "epoch": 1.4126103996965702, "grad_norm": 7.766311216065524, "learning_rate": 1.0480316082362873e-06, "loss": 0.6762, "step": 19553 }, { "epoch": 1.4126826448967797, "grad_norm": 6.804048387753472, "learning_rate": 1.0477935050526943e-06, "loss": 0.6802, "step": 19554 }, { "epoch": 1.412754890096989, "grad_norm": 8.870071117025711, "learning_rate": 1.04755542174868e-06, "loss": 0.6594, "step": 19555 }, { "epoch": 1.4128271352971986, "grad_norm": 6.22775031310684, "learning_rate": 1.047317358327504e-06, "loss": 0.5889, "step": 19556 }, { "epoch": 1.4128993804974082, "grad_norm": 7.452789235099333, "learning_rate": 1.0470793147924254e-06, "loss": 0.6126, "step": 19557 }, { "epoch": 1.4129716256976177, "grad_norm": 8.113880236374849, "learning_rate": 1.046841291146703e-06, "loss": 0.6363, "step": 19558 }, { "epoch": 1.4130438708978272, "grad_norm": 6.4239151213134695, "learning_rate": 1.046603287393595e-06, "loss": 0.6444, "step": 19559 }, { "epoch": 1.4131161160980368, "grad_norm": 7.061957954603517, "learning_rate": 1.0463653035363593e-06, "loss": 0.5083, "step": 19560 }, { "epoch": 1.4131883612982463, "grad_norm": 6.334671816013957, "learning_rate": 1.0461273395782541e-06, "loss": 0.687, "step": 19561 }, { "epoch": 1.4132606064984556, "grad_norm": 6.857040529395852, "learning_rate": 1.0458893955225378e-06, "loss": 0.6319, "step": 19562 }, { "epoch": 1.4133328516986654, "grad_norm": 7.155169143010379, "learning_rate": 1.0456514713724656e-06, "loss": 0.5807, "step": 19563 }, { "epoch": 1.4134050968988747, "grad_norm": 7.641125746468242, "learning_rate": 1.0454135671312954e-06, "loss": 0.6176, "step": 19564 }, { "epoch": 1.4134773420990843, "grad_norm": 8.82067491920394, "learning_rate": 1.0451756828022853e-06, "loss": 0.6189, "step": 19565 }, { "epoch": 1.4135495872992938, "grad_norm": 6.323718385181925, "learning_rate": 1.0449378183886893e-06, "loss": 0.6781, "step": 19566 }, { "epoch": 1.4136218324995034, "grad_norm": 6.16860852296684, "learning_rate": 1.0446999738937647e-06, "loss": 0.5769, "step": 19567 }, { "epoch": 1.413694077699713, "grad_norm": 7.64935490499993, "learning_rate": 1.0444621493207669e-06, "loss": 0.596, "step": 19568 }, { "epoch": 1.4137663228999222, "grad_norm": 7.994532385648678, "learning_rate": 1.0442243446729536e-06, "loss": 0.6582, "step": 19569 }, { "epoch": 1.413838568100132, "grad_norm": 7.55285685469283, "learning_rate": 1.0439865599535782e-06, "loss": 0.6747, "step": 19570 }, { "epoch": 1.4139108133003413, "grad_norm": 6.511417045537628, "learning_rate": 1.043748795165896e-06, "loss": 0.6268, "step": 19571 }, { "epoch": 1.4139830585005508, "grad_norm": 7.074670078955924, "learning_rate": 1.043511050313163e-06, "loss": 0.6297, "step": 19572 }, { "epoch": 1.4140553037007604, "grad_norm": 7.450716473222926, "learning_rate": 1.0432733253986319e-06, "loss": 0.699, "step": 19573 }, { "epoch": 1.41412754890097, "grad_norm": 7.556366001245268, "learning_rate": 1.0430356204255579e-06, "loss": 0.7047, "step": 19574 }, { "epoch": 1.4141997941011795, "grad_norm": 7.894677177337589, "learning_rate": 1.0427979353971952e-06, "loss": 0.6312, "step": 19575 }, { "epoch": 1.4142720393013888, "grad_norm": 7.169399391035055, "learning_rate": 1.042560270316797e-06, "loss": 0.6455, "step": 19576 }, { "epoch": 1.4143442845015985, "grad_norm": 6.676082669010833, "learning_rate": 1.042322625187617e-06, "loss": 0.6649, "step": 19577 }, { "epoch": 1.4144165297018079, "grad_norm": 6.2031517508672245, "learning_rate": 1.0420850000129087e-06, "loss": 0.6001, "step": 19578 }, { "epoch": 1.4144887749020174, "grad_norm": 7.387854542752791, "learning_rate": 1.0418473947959256e-06, "loss": 0.6231, "step": 19579 }, { "epoch": 1.414561020102227, "grad_norm": 7.312448093849565, "learning_rate": 1.0416098095399187e-06, "loss": 0.5901, "step": 19580 }, { "epoch": 1.4146332653024365, "grad_norm": 7.426852514702745, "learning_rate": 1.041372244248141e-06, "loss": 0.6125, "step": 19581 }, { "epoch": 1.414705510502646, "grad_norm": 8.192001386642339, "learning_rate": 1.041134698923845e-06, "loss": 0.6413, "step": 19582 }, { "epoch": 1.4147777557028554, "grad_norm": 6.438154168825293, "learning_rate": 1.0408971735702828e-06, "loss": 0.7118, "step": 19583 }, { "epoch": 1.4148500009030651, "grad_norm": 6.273256900225788, "learning_rate": 1.0406596681907047e-06, "loss": 0.5682, "step": 19584 }, { "epoch": 1.4149222461032744, "grad_norm": 8.031256174760516, "learning_rate": 1.0404221827883624e-06, "loss": 0.6679, "step": 19585 }, { "epoch": 1.414994491303484, "grad_norm": 6.276781001144466, "learning_rate": 1.0401847173665073e-06, "loss": 0.6892, "step": 19586 }, { "epoch": 1.4150667365036935, "grad_norm": 6.600547432593157, "learning_rate": 1.0399472719283899e-06, "loss": 0.6303, "step": 19587 }, { "epoch": 1.415138981703903, "grad_norm": 7.176669244950335, "learning_rate": 1.0397098464772606e-06, "loss": 0.7047, "step": 19588 }, { "epoch": 1.4152112269041126, "grad_norm": 7.807311020908368, "learning_rate": 1.0394724410163698e-06, "loss": 0.6192, "step": 19589 }, { "epoch": 1.4152834721043221, "grad_norm": 6.870244826572834, "learning_rate": 1.0392350555489681e-06, "loss": 0.5592, "step": 19590 }, { "epoch": 1.4153557173045317, "grad_norm": 6.705151231668887, "learning_rate": 1.0389976900783033e-06, "loss": 0.6299, "step": 19591 }, { "epoch": 1.415427962504741, "grad_norm": 8.116413450371, "learning_rate": 1.038760344607626e-06, "loss": 0.6512, "step": 19592 }, { "epoch": 1.4155002077049506, "grad_norm": 7.210853386054648, "learning_rate": 1.0385230191401846e-06, "loss": 0.6228, "step": 19593 }, { "epoch": 1.41557245290516, "grad_norm": 6.665539487280391, "learning_rate": 1.0382857136792296e-06, "loss": 0.6435, "step": 19594 }, { "epoch": 1.4156446981053696, "grad_norm": 6.555598817384434, "learning_rate": 1.038048428228007e-06, "loss": 0.6377, "step": 19595 }, { "epoch": 1.4157169433055792, "grad_norm": 7.184808641830801, "learning_rate": 1.0378111627897664e-06, "loss": 0.6656, "step": 19596 }, { "epoch": 1.4157891885057887, "grad_norm": 6.5046766036833725, "learning_rate": 1.0375739173677556e-06, "loss": 0.6717, "step": 19597 }, { "epoch": 1.4158614337059983, "grad_norm": 7.8625544947932635, "learning_rate": 1.0373366919652224e-06, "loss": 0.6216, "step": 19598 }, { "epoch": 1.4159336789062076, "grad_norm": 6.518703547574633, "learning_rate": 1.0370994865854146e-06, "loss": 0.5498, "step": 19599 }, { "epoch": 1.4160059241064171, "grad_norm": 7.251974954939538, "learning_rate": 1.0368623012315785e-06, "loss": 0.7057, "step": 19600 }, { "epoch": 1.4160781693066267, "grad_norm": 6.632287965125139, "learning_rate": 1.0366251359069626e-06, "loss": 0.6718, "step": 19601 }, { "epoch": 1.4161504145068362, "grad_norm": 6.741379778981665, "learning_rate": 1.036387990614811e-06, "loss": 0.633, "step": 19602 }, { "epoch": 1.4162226597070458, "grad_norm": 5.981174818501183, "learning_rate": 1.036150865358372e-06, "loss": 0.5959, "step": 19603 }, { "epoch": 1.4162949049072553, "grad_norm": 6.291495853488493, "learning_rate": 1.0359137601408906e-06, "loss": 0.5579, "step": 19604 }, { "epoch": 1.4163671501074648, "grad_norm": 7.271503523259102, "learning_rate": 1.0356766749656133e-06, "loss": 0.6575, "step": 19605 }, { "epoch": 1.4164393953076742, "grad_norm": 7.071537952760629, "learning_rate": 1.0354396098357853e-06, "loss": 0.7104, "step": 19606 }, { "epoch": 1.4165116405078837, "grad_norm": 7.331741449220823, "learning_rate": 1.0352025647546518e-06, "loss": 0.6163, "step": 19607 }, { "epoch": 1.4165838857080932, "grad_norm": 6.874685106868788, "learning_rate": 1.0349655397254588e-06, "loss": 0.5518, "step": 19608 }, { "epoch": 1.4166561309083028, "grad_norm": 6.29910519012733, "learning_rate": 1.034728534751449e-06, "loss": 0.6645, "step": 19609 }, { "epoch": 1.4167283761085123, "grad_norm": 6.900067492168781, "learning_rate": 1.034491549835868e-06, "loss": 0.5894, "step": 19610 }, { "epoch": 1.4168006213087219, "grad_norm": 6.568387251836473, "learning_rate": 1.0342545849819596e-06, "loss": 0.5975, "step": 19611 }, { "epoch": 1.4168728665089314, "grad_norm": 8.100276951705784, "learning_rate": 1.0340176401929691e-06, "loss": 0.7088, "step": 19612 }, { "epoch": 1.4169451117091407, "grad_norm": 6.432704287330655, "learning_rate": 1.0337807154721376e-06, "loss": 0.6625, "step": 19613 }, { "epoch": 1.4170173569093503, "grad_norm": 7.291039692399334, "learning_rate": 1.03354381082271e-06, "loss": 0.5896, "step": 19614 }, { "epoch": 1.4170896021095598, "grad_norm": 6.750006499110731, "learning_rate": 1.0333069262479287e-06, "loss": 0.6063, "step": 19615 }, { "epoch": 1.4171618473097694, "grad_norm": 6.270606897602321, "learning_rate": 1.033070061751037e-06, "loss": 0.5786, "step": 19616 }, { "epoch": 1.417234092509979, "grad_norm": 7.87643855443295, "learning_rate": 1.0328332173352768e-06, "loss": 0.6804, "step": 19617 }, { "epoch": 1.4173063377101884, "grad_norm": 6.739596225857871, "learning_rate": 1.032596393003891e-06, "loss": 0.6076, "step": 19618 }, { "epoch": 1.417378582910398, "grad_norm": 6.8172898558741, "learning_rate": 1.032359588760122e-06, "loss": 0.6141, "step": 19619 }, { "epoch": 1.4174508281106073, "grad_norm": 7.727746824520503, "learning_rate": 1.03212280460721e-06, "loss": 0.6596, "step": 19620 }, { "epoch": 1.4175230733108168, "grad_norm": 5.798538010593941, "learning_rate": 1.0318860405483967e-06, "loss": 0.5972, "step": 19621 }, { "epoch": 1.4175953185110264, "grad_norm": 6.148652476487131, "learning_rate": 1.0316492965869247e-06, "loss": 0.6175, "step": 19622 }, { "epoch": 1.417667563711236, "grad_norm": 6.507922699397107, "learning_rate": 1.0314125727260327e-06, "loss": 0.5796, "step": 19623 }, { "epoch": 1.4177398089114455, "grad_norm": 8.10919765506346, "learning_rate": 1.0311758689689624e-06, "loss": 0.6273, "step": 19624 }, { "epoch": 1.417812054111655, "grad_norm": 7.8014208551005915, "learning_rate": 1.030939185318953e-06, "loss": 0.5742, "step": 19625 }, { "epoch": 1.4178842993118645, "grad_norm": 6.797835779795103, "learning_rate": 1.0307025217792473e-06, "loss": 0.5674, "step": 19626 }, { "epoch": 1.4179565445120739, "grad_norm": 6.950114560726791, "learning_rate": 1.0304658783530825e-06, "loss": 0.7024, "step": 19627 }, { "epoch": 1.4180287897122834, "grad_norm": 7.343534166126843, "learning_rate": 1.0302292550436987e-06, "loss": 0.5866, "step": 19628 }, { "epoch": 1.418101034912493, "grad_norm": 6.620146503037503, "learning_rate": 1.029992651854336e-06, "loss": 0.6388, "step": 19629 }, { "epoch": 1.4181732801127025, "grad_norm": 5.644653403468461, "learning_rate": 1.0297560687882319e-06, "loss": 0.6337, "step": 19630 }, { "epoch": 1.418245525312912, "grad_norm": 6.340665922601586, "learning_rate": 1.0295195058486253e-06, "loss": 0.6573, "step": 19631 }, { "epoch": 1.4183177705131216, "grad_norm": 8.087109761794778, "learning_rate": 1.0292829630387551e-06, "loss": 0.5594, "step": 19632 }, { "epoch": 1.4183900157133311, "grad_norm": 7.731846128161148, "learning_rate": 1.0290464403618592e-06, "loss": 0.6698, "step": 19633 }, { "epoch": 1.4184622609135404, "grad_norm": 7.1192877106719195, "learning_rate": 1.0288099378211755e-06, "loss": 0.6223, "step": 19634 }, { "epoch": 1.4185345061137502, "grad_norm": 7.289159579923586, "learning_rate": 1.028573455419941e-06, "loss": 0.5991, "step": 19635 }, { "epoch": 1.4186067513139595, "grad_norm": 6.742151006111402, "learning_rate": 1.028336993161395e-06, "loss": 0.6676, "step": 19636 }, { "epoch": 1.418678996514169, "grad_norm": 7.900077732524639, "learning_rate": 1.0281005510487715e-06, "loss": 0.6669, "step": 19637 }, { "epoch": 1.4187512417143786, "grad_norm": 7.436633147684655, "learning_rate": 1.0278641290853086e-06, "loss": 0.6392, "step": 19638 }, { "epoch": 1.4188234869145882, "grad_norm": 7.503078337575513, "learning_rate": 1.027627727274243e-06, "loss": 0.7185, "step": 19639 }, { "epoch": 1.4188957321147977, "grad_norm": 8.784263881006327, "learning_rate": 1.0273913456188115e-06, "loss": 0.6157, "step": 19640 }, { "epoch": 1.418967977315007, "grad_norm": 9.393304883572245, "learning_rate": 1.0271549841222479e-06, "loss": 0.6044, "step": 19641 }, { "epoch": 1.4190402225152168, "grad_norm": 6.740697596640534, "learning_rate": 1.0269186427877893e-06, "loss": 0.6417, "step": 19642 }, { "epoch": 1.419112467715426, "grad_norm": 7.447284833521648, "learning_rate": 1.0266823216186703e-06, "loss": 0.6544, "step": 19643 }, { "epoch": 1.4191847129156356, "grad_norm": 6.4201048850169995, "learning_rate": 1.0264460206181264e-06, "loss": 0.6641, "step": 19644 }, { "epoch": 1.4192569581158452, "grad_norm": 8.143537296963233, "learning_rate": 1.0262097397893927e-06, "loss": 0.6096, "step": 19645 }, { "epoch": 1.4193292033160547, "grad_norm": 6.465527012609355, "learning_rate": 1.0259734791357032e-06, "loss": 0.6179, "step": 19646 }, { "epoch": 1.4194014485162643, "grad_norm": 7.270055720182655, "learning_rate": 1.0257372386602928e-06, "loss": 0.6431, "step": 19647 }, { "epoch": 1.4194736937164736, "grad_norm": 6.324843924183679, "learning_rate": 1.0255010183663943e-06, "loss": 0.6489, "step": 19648 }, { "epoch": 1.4195459389166833, "grad_norm": 8.461869376054109, "learning_rate": 1.0252648182572422e-06, "loss": 0.5737, "step": 19649 }, { "epoch": 1.4196181841168927, "grad_norm": 8.638850052212353, "learning_rate": 1.0250286383360695e-06, "loss": 0.6218, "step": 19650 }, { "epoch": 1.4196904293171022, "grad_norm": 6.859165614240845, "learning_rate": 1.0247924786061103e-06, "loss": 0.6529, "step": 19651 }, { "epoch": 1.4197626745173118, "grad_norm": 6.887103437302471, "learning_rate": 1.024556339070596e-06, "loss": 0.5654, "step": 19652 }, { "epoch": 1.4198349197175213, "grad_norm": 8.485539421419297, "learning_rate": 1.0243202197327587e-06, "loss": 0.6784, "step": 19653 }, { "epoch": 1.4199071649177308, "grad_norm": 7.079009348210438, "learning_rate": 1.0240841205958335e-06, "loss": 0.5494, "step": 19654 }, { "epoch": 1.4199794101179402, "grad_norm": 7.103296519338778, "learning_rate": 1.02384804166305e-06, "loss": 0.546, "step": 19655 }, { "epoch": 1.42005165531815, "grad_norm": 6.730098915676866, "learning_rate": 1.0236119829376406e-06, "loss": 0.6314, "step": 19656 }, { "epoch": 1.4201239005183592, "grad_norm": 6.569974294115754, "learning_rate": 1.0233759444228367e-06, "loss": 0.5918, "step": 19657 }, { "epoch": 1.4201961457185688, "grad_norm": 7.7141507303447545, "learning_rate": 1.0231399261218709e-06, "loss": 0.5835, "step": 19658 }, { "epoch": 1.4202683909187783, "grad_norm": 7.0174696776166785, "learning_rate": 1.0229039280379713e-06, "loss": 0.5824, "step": 19659 }, { "epoch": 1.4203406361189879, "grad_norm": 7.164777846902092, "learning_rate": 1.0226679501743706e-06, "loss": 0.641, "step": 19660 }, { "epoch": 1.4204128813191974, "grad_norm": 8.704802702410355, "learning_rate": 1.0224319925342982e-06, "loss": 0.6445, "step": 19661 }, { "epoch": 1.420485126519407, "grad_norm": 8.759158763468804, "learning_rate": 1.0221960551209848e-06, "loss": 0.6336, "step": 19662 }, { "epoch": 1.4205573717196165, "grad_norm": 7.2950763977108215, "learning_rate": 1.0219601379376598e-06, "loss": 0.6196, "step": 19663 }, { "epoch": 1.4206296169198258, "grad_norm": 6.391321270234012, "learning_rate": 1.0217242409875531e-06, "loss": 0.5981, "step": 19664 }, { "epoch": 1.4207018621200354, "grad_norm": 8.490421058808531, "learning_rate": 1.0214883642738943e-06, "loss": 0.6362, "step": 19665 }, { "epoch": 1.420774107320245, "grad_norm": 7.125534572959383, "learning_rate": 1.021252507799911e-06, "loss": 0.6651, "step": 19666 }, { "epoch": 1.4208463525204544, "grad_norm": 8.104013894282911, "learning_rate": 1.0210166715688327e-06, "loss": 0.6157, "step": 19667 }, { "epoch": 1.420918597720664, "grad_norm": 7.433433895827862, "learning_rate": 1.020780855583888e-06, "loss": 0.6351, "step": 19668 }, { "epoch": 1.4209908429208735, "grad_norm": 6.2083693467019465, "learning_rate": 1.0205450598483057e-06, "loss": 0.6356, "step": 19669 }, { "epoch": 1.421063088121083, "grad_norm": 5.4016736015566424, "learning_rate": 1.0203092843653118e-06, "loss": 0.6528, "step": 19670 }, { "epoch": 1.4211353333212924, "grad_norm": 6.118414627874916, "learning_rate": 1.020073529138135e-06, "loss": 0.6617, "step": 19671 }, { "epoch": 1.421207578521502, "grad_norm": 7.1226422608382425, "learning_rate": 1.0198377941700025e-06, "loss": 0.5945, "step": 19672 }, { "epoch": 1.4212798237217115, "grad_norm": 6.156137223348365, "learning_rate": 1.0196020794641412e-06, "loss": 0.5603, "step": 19673 }, { "epoch": 1.421352068921921, "grad_norm": 7.270337223590565, "learning_rate": 1.0193663850237782e-06, "loss": 0.6363, "step": 19674 }, { "epoch": 1.4214243141221305, "grad_norm": 7.810670928465954, "learning_rate": 1.0191307108521394e-06, "loss": 0.6524, "step": 19675 }, { "epoch": 1.42149655932234, "grad_norm": 7.738575790630598, "learning_rate": 1.0188950569524525e-06, "loss": 0.6156, "step": 19676 }, { "epoch": 1.4215688045225496, "grad_norm": 7.232016259217341, "learning_rate": 1.0186594233279413e-06, "loss": 0.6628, "step": 19677 }, { "epoch": 1.421641049722759, "grad_norm": 7.703746716400986, "learning_rate": 1.0184238099818322e-06, "loss": 0.5833, "step": 19678 }, { "epoch": 1.4217132949229685, "grad_norm": 7.349509719793415, "learning_rate": 1.018188216917352e-06, "loss": 0.6706, "step": 19679 }, { "epoch": 1.421785540123178, "grad_norm": 7.784344283892882, "learning_rate": 1.0179526441377235e-06, "loss": 0.6258, "step": 19680 }, { "epoch": 1.4218577853233876, "grad_norm": 7.108771061658976, "learning_rate": 1.0177170916461719e-06, "loss": 0.5553, "step": 19681 }, { "epoch": 1.4219300305235971, "grad_norm": 9.28255117010329, "learning_rate": 1.0174815594459232e-06, "loss": 0.696, "step": 19682 }, { "epoch": 1.4220022757238067, "grad_norm": 7.210195517673302, "learning_rate": 1.0172460475402012e-06, "loss": 0.6498, "step": 19683 }, { "epoch": 1.4220745209240162, "grad_norm": 7.599375960177971, "learning_rate": 1.0170105559322295e-06, "loss": 0.6067, "step": 19684 }, { "epoch": 1.4221467661242255, "grad_norm": 6.878358055509681, "learning_rate": 1.0167750846252312e-06, "loss": 0.5621, "step": 19685 }, { "epoch": 1.422219011324435, "grad_norm": 7.084179038630178, "learning_rate": 1.0165396336224314e-06, "loss": 0.6035, "step": 19686 }, { "epoch": 1.4222912565246446, "grad_norm": 6.470644466135417, "learning_rate": 1.0163042029270514e-06, "loss": 0.5941, "step": 19687 }, { "epoch": 1.4223635017248542, "grad_norm": 7.835114242801691, "learning_rate": 1.0160687925423145e-06, "loss": 0.6453, "step": 19688 }, { "epoch": 1.4224357469250637, "grad_norm": 7.31396130689651, "learning_rate": 1.0158334024714443e-06, "loss": 0.6876, "step": 19689 }, { "epoch": 1.4225079921252732, "grad_norm": 6.739843568882575, "learning_rate": 1.0155980327176617e-06, "loss": 0.6338, "step": 19690 }, { "epoch": 1.4225802373254828, "grad_norm": 8.00381521803406, "learning_rate": 1.01536268328419e-06, "loss": 0.674, "step": 19691 }, { "epoch": 1.422652482525692, "grad_norm": 6.484768260382875, "learning_rate": 1.01512735417425e-06, "loss": 0.665, "step": 19692 }, { "epoch": 1.4227247277259016, "grad_norm": 7.077585768211961, "learning_rate": 1.0148920453910642e-06, "loss": 0.6329, "step": 19693 }, { "epoch": 1.4227969729261112, "grad_norm": 6.682575444781399, "learning_rate": 1.0146567569378536e-06, "loss": 0.5886, "step": 19694 }, { "epoch": 1.4228692181263207, "grad_norm": 7.210090761076381, "learning_rate": 1.014421488817838e-06, "loss": 0.6241, "step": 19695 }, { "epoch": 1.4229414633265303, "grad_norm": 6.403524274837078, "learning_rate": 1.0141862410342385e-06, "loss": 0.6009, "step": 19696 }, { "epoch": 1.4230137085267398, "grad_norm": 8.06483934160855, "learning_rate": 1.013951013590277e-06, "loss": 0.6725, "step": 19697 }, { "epoch": 1.4230859537269493, "grad_norm": 6.021854969539695, "learning_rate": 1.013715806489171e-06, "loss": 0.6167, "step": 19698 }, { "epoch": 1.4231581989271587, "grad_norm": 6.957817226008248, "learning_rate": 1.0134806197341419e-06, "loss": 0.6274, "step": 19699 }, { "epoch": 1.4232304441273682, "grad_norm": 7.723762892147281, "learning_rate": 1.0132454533284086e-06, "loss": 0.5643, "step": 19700 }, { "epoch": 1.4233026893275778, "grad_norm": 6.199445502803966, "learning_rate": 1.0130103072751907e-06, "loss": 0.6702, "step": 19701 }, { "epoch": 1.4233749345277873, "grad_norm": 7.334238979019377, "learning_rate": 1.0127751815777073e-06, "loss": 0.6127, "step": 19702 }, { "epoch": 1.4234471797279968, "grad_norm": 6.25179753203763, "learning_rate": 1.0125400762391768e-06, "loss": 0.6459, "step": 19703 }, { "epoch": 1.4235194249282064, "grad_norm": 7.026432268743939, "learning_rate": 1.0123049912628185e-06, "loss": 0.5751, "step": 19704 }, { "epoch": 1.423591670128416, "grad_norm": 7.378477149712407, "learning_rate": 1.012069926651849e-06, "loss": 0.6487, "step": 19705 }, { "epoch": 1.4236639153286252, "grad_norm": 6.882604658645376, "learning_rate": 1.011834882409487e-06, "loss": 0.5711, "step": 19706 }, { "epoch": 1.423736160528835, "grad_norm": 6.62743030812197, "learning_rate": 1.0115998585389498e-06, "loss": 0.6328, "step": 19707 }, { "epoch": 1.4238084057290443, "grad_norm": 7.049788927746791, "learning_rate": 1.0113648550434558e-06, "loss": 0.6282, "step": 19708 }, { "epoch": 1.4238806509292539, "grad_norm": 7.173154363609494, "learning_rate": 1.0111298719262203e-06, "loss": 0.599, "step": 19709 }, { "epoch": 1.4239528961294634, "grad_norm": 5.651100301365113, "learning_rate": 1.0108949091904596e-06, "loss": 0.5786, "step": 19710 }, { "epoch": 1.424025141329673, "grad_norm": 6.473063776681211, "learning_rate": 1.0106599668393935e-06, "loss": 0.6332, "step": 19711 }, { "epoch": 1.4240973865298825, "grad_norm": 6.523227564922868, "learning_rate": 1.0104250448762351e-06, "loss": 0.6026, "step": 19712 }, { "epoch": 1.4241696317300918, "grad_norm": 6.314548575484886, "learning_rate": 1.010190143304201e-06, "loss": 0.593, "step": 19713 }, { "epoch": 1.4242418769303016, "grad_norm": 8.150290698150311, "learning_rate": 1.0099552621265073e-06, "loss": 0.6176, "step": 19714 }, { "epoch": 1.424314122130511, "grad_norm": 8.614598733564554, "learning_rate": 1.0097204013463702e-06, "loss": 0.7113, "step": 19715 }, { "epoch": 1.4243863673307204, "grad_norm": 7.477161475157931, "learning_rate": 1.0094855609670026e-06, "loss": 0.6524, "step": 19716 }, { "epoch": 1.42445861253093, "grad_norm": 7.91462908841455, "learning_rate": 1.0092507409916203e-06, "loss": 0.6335, "step": 19717 }, { "epoch": 1.4245308577311395, "grad_norm": 6.63430133191112, "learning_rate": 1.009015941423438e-06, "loss": 0.6203, "step": 19718 }, { "epoch": 1.424603102931349, "grad_norm": 6.77333668916474, "learning_rate": 1.0087811622656696e-06, "loss": 0.6522, "step": 19719 }, { "epoch": 1.4246753481315584, "grad_norm": 7.817291255417153, "learning_rate": 1.0085464035215295e-06, "loss": 0.6802, "step": 19720 }, { "epoch": 1.4247475933317681, "grad_norm": 7.076764041172603, "learning_rate": 1.0083116651942306e-06, "loss": 0.6462, "step": 19721 }, { "epoch": 1.4248198385319775, "grad_norm": 6.667392055465845, "learning_rate": 1.0080769472869884e-06, "loss": 0.5888, "step": 19722 }, { "epoch": 1.424892083732187, "grad_norm": 7.62035015473127, "learning_rate": 1.007842249803013e-06, "loss": 0.6032, "step": 19723 }, { "epoch": 1.4249643289323966, "grad_norm": 7.87022448867866, "learning_rate": 1.0076075727455186e-06, "loss": 0.6069, "step": 19724 }, { "epoch": 1.425036574132606, "grad_norm": 6.978658841764392, "learning_rate": 1.0073729161177178e-06, "loss": 0.5966, "step": 19725 }, { "epoch": 1.4251088193328156, "grad_norm": 6.7459673668134, "learning_rate": 1.0071382799228239e-06, "loss": 0.6101, "step": 19726 }, { "epoch": 1.425181064533025, "grad_norm": 7.4047888751072515, "learning_rate": 1.0069036641640468e-06, "loss": 0.6578, "step": 19727 }, { "epoch": 1.4252533097332347, "grad_norm": 5.9465131913182105, "learning_rate": 1.0066690688445993e-06, "loss": 0.5515, "step": 19728 }, { "epoch": 1.425325554933444, "grad_norm": 6.7719747510463, "learning_rate": 1.0064344939676927e-06, "loss": 0.6317, "step": 19729 }, { "epoch": 1.4253978001336536, "grad_norm": 8.078903662453651, "learning_rate": 1.0061999395365383e-06, "loss": 0.6419, "step": 19730 }, { "epoch": 1.4254700453338631, "grad_norm": 6.962842139119404, "learning_rate": 1.0059654055543466e-06, "loss": 0.6467, "step": 19731 }, { "epoch": 1.4255422905340727, "grad_norm": 7.865950213420485, "learning_rate": 1.0057308920243287e-06, "loss": 0.6663, "step": 19732 }, { "epoch": 1.4256145357342822, "grad_norm": 7.229257310048025, "learning_rate": 1.0054963989496955e-06, "loss": 0.7073, "step": 19733 }, { "epoch": 1.4256867809344915, "grad_norm": 6.726101140627836, "learning_rate": 1.0052619263336553e-06, "loss": 0.5975, "step": 19734 }, { "epoch": 1.4257590261347013, "grad_norm": 9.170507377806652, "learning_rate": 1.0050274741794186e-06, "loss": 0.6341, "step": 19735 }, { "epoch": 1.4258312713349106, "grad_norm": 9.409163825314211, "learning_rate": 1.004793042490196e-06, "loss": 0.7347, "step": 19736 }, { "epoch": 1.4259035165351202, "grad_norm": 7.723968595025073, "learning_rate": 1.004558631269195e-06, "loss": 0.6404, "step": 19737 }, { "epoch": 1.4259757617353297, "grad_norm": 6.8986752772582856, "learning_rate": 1.0043242405196237e-06, "loss": 0.5744, "step": 19738 }, { "epoch": 1.4260480069355392, "grad_norm": 7.431116522255184, "learning_rate": 1.0040898702446938e-06, "loss": 0.6422, "step": 19739 }, { "epoch": 1.4261202521357488, "grad_norm": 6.484885616190784, "learning_rate": 1.0038555204476127e-06, "loss": 0.5668, "step": 19740 }, { "epoch": 1.4261924973359583, "grad_norm": 7.162435864865427, "learning_rate": 1.003621191131587e-06, "loss": 0.673, "step": 19741 }, { "epoch": 1.4262647425361679, "grad_norm": 6.906601288435715, "learning_rate": 1.0033868822998252e-06, "loss": 0.5614, "step": 19742 }, { "epoch": 1.4263369877363772, "grad_norm": 6.82860795676522, "learning_rate": 1.0031525939555358e-06, "loss": 0.6393, "step": 19743 }, { "epoch": 1.4264092329365867, "grad_norm": 5.812508859935284, "learning_rate": 1.0029183261019245e-06, "loss": 0.6158, "step": 19744 }, { "epoch": 1.4264814781367963, "grad_norm": 6.332397224444649, "learning_rate": 1.0026840787421989e-06, "loss": 0.6818, "step": 19745 }, { "epoch": 1.4265537233370058, "grad_norm": 6.007179097888736, "learning_rate": 1.0024498518795656e-06, "loss": 0.6061, "step": 19746 }, { "epoch": 1.4266259685372153, "grad_norm": 7.389964035834527, "learning_rate": 1.0022156455172312e-06, "loss": 0.5995, "step": 19747 }, { "epoch": 1.426698213737425, "grad_norm": 6.8274724373528235, "learning_rate": 1.0019814596584013e-06, "loss": 0.6008, "step": 19748 }, { "epoch": 1.4267704589376344, "grad_norm": 8.213743592374279, "learning_rate": 1.0017472943062824e-06, "loss": 0.6108, "step": 19749 }, { "epoch": 1.4268427041378438, "grad_norm": 6.408793618976567, "learning_rate": 1.0015131494640794e-06, "loss": 0.6119, "step": 19750 }, { "epoch": 1.4269149493380533, "grad_norm": 7.4565232564987705, "learning_rate": 1.0012790251349991e-06, "loss": 0.6695, "step": 19751 }, { "epoch": 1.4269871945382628, "grad_norm": 8.02997884821214, "learning_rate": 1.0010449213222442e-06, "loss": 0.7106, "step": 19752 }, { "epoch": 1.4270594397384724, "grad_norm": 6.328593973748855, "learning_rate": 1.0008108380290206e-06, "loss": 0.6394, "step": 19753 }, { "epoch": 1.427131684938682, "grad_norm": 8.492618721802259, "learning_rate": 1.0005767752585332e-06, "loss": 0.6447, "step": 19754 }, { "epoch": 1.4272039301388915, "grad_norm": 7.385673945312292, "learning_rate": 1.0003427330139848e-06, "loss": 0.6325, "step": 19755 }, { "epoch": 1.427276175339101, "grad_norm": 6.954944295433853, "learning_rate": 1.0001087112985799e-06, "loss": 0.6306, "step": 19756 }, { "epoch": 1.4273484205393103, "grad_norm": 6.810876215330642, "learning_rate": 9.998747101155223e-07, "loss": 0.5835, "step": 19757 }, { "epoch": 1.4274206657395199, "grad_norm": 7.791831302093443, "learning_rate": 9.996407294680149e-07, "loss": 0.6785, "step": 19758 }, { "epoch": 1.4274929109397294, "grad_norm": 6.987569944410384, "learning_rate": 9.994067693592612e-07, "loss": 0.6964, "step": 19759 }, { "epoch": 1.427565156139939, "grad_norm": 6.652241104059817, "learning_rate": 9.991728297924638e-07, "loss": 0.6563, "step": 19760 }, { "epoch": 1.4276374013401485, "grad_norm": 6.79062437082947, "learning_rate": 9.989389107708258e-07, "loss": 0.6138, "step": 19761 }, { "epoch": 1.427709646540358, "grad_norm": 7.237377886491419, "learning_rate": 9.987050122975475e-07, "loss": 0.5907, "step": 19762 }, { "epoch": 1.4277818917405676, "grad_norm": 7.652812824603641, "learning_rate": 9.98471134375832e-07, "loss": 0.6398, "step": 19763 }, { "epoch": 1.427854136940777, "grad_norm": 6.631563785761915, "learning_rate": 9.98237277008881e-07, "loss": 0.6475, "step": 19764 }, { "epoch": 1.4279263821409864, "grad_norm": 5.874739945520162, "learning_rate": 9.980034401998964e-07, "loss": 0.6096, "step": 19765 }, { "epoch": 1.427998627341196, "grad_norm": 6.8216561198888925, "learning_rate": 9.977696239520767e-07, "loss": 0.6398, "step": 19766 }, { "epoch": 1.4280708725414055, "grad_norm": 6.8872612941244595, "learning_rate": 9.975358282686254e-07, "loss": 0.7253, "step": 19767 }, { "epoch": 1.428143117741615, "grad_norm": 6.816777277688724, "learning_rate": 9.97302053152743e-07, "loss": 0.6322, "step": 19768 }, { "epoch": 1.4282153629418246, "grad_norm": 6.6513714172143725, "learning_rate": 9.970682986076277e-07, "loss": 0.6349, "step": 19769 }, { "epoch": 1.4282876081420341, "grad_norm": 7.228409022957529, "learning_rate": 9.968345646364804e-07, "loss": 0.5967, "step": 19770 }, { "epoch": 1.4283598533422435, "grad_norm": 7.485956140477862, "learning_rate": 9.966008512425008e-07, "loss": 0.6226, "step": 19771 }, { "epoch": 1.428432098542453, "grad_norm": 6.571998624777113, "learning_rate": 9.963671584288892e-07, "loss": 0.6722, "step": 19772 }, { "epoch": 1.4285043437426626, "grad_norm": 6.299920569025089, "learning_rate": 9.961334861988425e-07, "loss": 0.5882, "step": 19773 }, { "epoch": 1.428576588942872, "grad_norm": 6.597599298063102, "learning_rate": 9.958998345555612e-07, "loss": 0.5289, "step": 19774 }, { "epoch": 1.4286488341430816, "grad_norm": 5.8793254107215605, "learning_rate": 9.95666203502243e-07, "loss": 0.6076, "step": 19775 }, { "epoch": 1.4287210793432912, "grad_norm": 6.869153120101658, "learning_rate": 9.954325930420863e-07, "loss": 0.5849, "step": 19776 }, { "epoch": 1.4287933245435007, "grad_norm": 7.479974842947204, "learning_rate": 9.951990031782895e-07, "loss": 0.5533, "step": 19777 }, { "epoch": 1.42886556974371, "grad_norm": 6.380743991916684, "learning_rate": 9.949654339140498e-07, "loss": 0.6442, "step": 19778 }, { "epoch": 1.4289378149439196, "grad_norm": 6.917617763100056, "learning_rate": 9.947318852525657e-07, "loss": 0.6328, "step": 19779 }, { "epoch": 1.4290100601441291, "grad_norm": 6.407260424317053, "learning_rate": 9.944983571970324e-07, "loss": 0.6013, "step": 19780 }, { "epoch": 1.4290823053443387, "grad_norm": 7.291359101121265, "learning_rate": 9.942648497506476e-07, "loss": 0.6506, "step": 19781 }, { "epoch": 1.4291545505445482, "grad_norm": 7.391978746755193, "learning_rate": 9.94031362916608e-07, "loss": 0.6238, "step": 19782 }, { "epoch": 1.4292267957447577, "grad_norm": 6.6244872272629465, "learning_rate": 9.937978966981105e-07, "loss": 0.596, "step": 19783 }, { "epoch": 1.4292990409449673, "grad_norm": 6.650941836554611, "learning_rate": 9.935644510983492e-07, "loss": 0.6273, "step": 19784 }, { "epoch": 1.4293712861451766, "grad_norm": 7.864998172825276, "learning_rate": 9.933310261205211e-07, "loss": 0.6346, "step": 19785 }, { "epoch": 1.4294435313453864, "grad_norm": 6.264528706596489, "learning_rate": 9.930976217678214e-07, "loss": 0.5693, "step": 19786 }, { "epoch": 1.4295157765455957, "grad_norm": 8.62393491498914, "learning_rate": 9.928642380434453e-07, "loss": 0.6102, "step": 19787 }, { "epoch": 1.4295880217458052, "grad_norm": 8.902655696048388, "learning_rate": 9.926308749505876e-07, "loss": 0.5609, "step": 19788 }, { "epoch": 1.4296602669460148, "grad_norm": 7.052380088373617, "learning_rate": 9.923975324924427e-07, "loss": 0.6309, "step": 19789 }, { "epoch": 1.4297325121462243, "grad_norm": 6.433275632978354, "learning_rate": 9.92164210672206e-07, "loss": 0.6548, "step": 19790 }, { "epoch": 1.4298047573464339, "grad_norm": 5.661439990938356, "learning_rate": 9.919309094930695e-07, "loss": 0.5913, "step": 19791 }, { "epoch": 1.4298770025466432, "grad_norm": 7.8017257248565635, "learning_rate": 9.916976289582279e-07, "loss": 0.6796, "step": 19792 }, { "epoch": 1.429949247746853, "grad_norm": 6.785136853653191, "learning_rate": 9.914643690708755e-07, "loss": 0.6686, "step": 19793 }, { "epoch": 1.4300214929470623, "grad_norm": 7.589706034841699, "learning_rate": 9.912311298342028e-07, "loss": 0.6395, "step": 19794 }, { "epoch": 1.4300937381472718, "grad_norm": 10.208420692121958, "learning_rate": 9.909979112514056e-07, "loss": 0.7103, "step": 19795 }, { "epoch": 1.4301659833474814, "grad_norm": 6.679245747910937, "learning_rate": 9.907647133256752e-07, "loss": 0.6124, "step": 19796 }, { "epoch": 1.430238228547691, "grad_norm": 6.399244991114094, "learning_rate": 9.905315360602047e-07, "loss": 0.6097, "step": 19797 }, { "epoch": 1.4303104737479004, "grad_norm": 6.611924421502719, "learning_rate": 9.902983794581847e-07, "loss": 0.5927, "step": 19798 }, { "epoch": 1.4303827189481098, "grad_norm": 6.387333626269375, "learning_rate": 9.900652435228079e-07, "loss": 0.5487, "step": 19799 }, { "epoch": 1.4304549641483195, "grad_norm": 6.55484143191197, "learning_rate": 9.898321282572652e-07, "loss": 0.5886, "step": 19800 }, { "epoch": 1.4305272093485288, "grad_norm": 7.481926312387348, "learning_rate": 9.89599033664749e-07, "loss": 0.6396, "step": 19801 }, { "epoch": 1.4305994545487384, "grad_norm": 6.862591452067381, "learning_rate": 9.893659597484488e-07, "loss": 0.5815, "step": 19802 }, { "epoch": 1.430671699748948, "grad_norm": 6.413943253592343, "learning_rate": 9.89132906511555e-07, "loss": 0.6318, "step": 19803 }, { "epoch": 1.4307439449491575, "grad_norm": 7.497385713800772, "learning_rate": 9.88899873957259e-07, "loss": 0.5931, "step": 19804 }, { "epoch": 1.430816190149367, "grad_norm": 5.160490199912558, "learning_rate": 9.886668620887504e-07, "loss": 0.5453, "step": 19805 }, { "epoch": 1.4308884353495763, "grad_norm": 6.286371524930967, "learning_rate": 9.884338709092191e-07, "loss": 0.592, "step": 19806 }, { "epoch": 1.430960680549786, "grad_norm": 6.820598023895299, "learning_rate": 9.882009004218544e-07, "loss": 0.6416, "step": 19807 }, { "epoch": 1.4310329257499954, "grad_norm": 7.136548771580043, "learning_rate": 9.879679506298464e-07, "loss": 0.6311, "step": 19808 }, { "epoch": 1.431105170950205, "grad_norm": 7.038690182474526, "learning_rate": 9.87735021536382e-07, "loss": 0.6881, "step": 19809 }, { "epoch": 1.4311774161504145, "grad_norm": 6.76697440488994, "learning_rate": 9.875021131446514e-07, "loss": 0.6146, "step": 19810 }, { "epoch": 1.431249661350624, "grad_norm": 6.855676301167283, "learning_rate": 9.87269225457843e-07, "loss": 0.6382, "step": 19811 }, { "epoch": 1.4313219065508336, "grad_norm": 6.596122134214364, "learning_rate": 9.870363584791437e-07, "loss": 0.5792, "step": 19812 }, { "epoch": 1.4313941517510431, "grad_norm": 7.585774570212203, "learning_rate": 9.86803512211742e-07, "loss": 0.6411, "step": 19813 }, { "epoch": 1.4314663969512527, "grad_norm": 6.89994503496917, "learning_rate": 9.865706866588252e-07, "loss": 0.6296, "step": 19814 }, { "epoch": 1.431538642151462, "grad_norm": 7.321005284637182, "learning_rate": 9.863378818235802e-07, "loss": 0.6287, "step": 19815 }, { "epoch": 1.4316108873516715, "grad_norm": 7.661838033686392, "learning_rate": 9.861050977091946e-07, "loss": 0.6185, "step": 19816 }, { "epoch": 1.431683132551881, "grad_norm": 7.113835203752779, "learning_rate": 9.85872334318855e-07, "loss": 0.6834, "step": 19817 }, { "epoch": 1.4317553777520906, "grad_norm": 6.141645895078705, "learning_rate": 9.856395916557482e-07, "loss": 0.653, "step": 19818 }, { "epoch": 1.4318276229523001, "grad_norm": 6.118568625017045, "learning_rate": 9.854068697230584e-07, "loss": 0.5798, "step": 19819 }, { "epoch": 1.4318998681525097, "grad_norm": 6.987780668732842, "learning_rate": 9.851741685239729e-07, "loss": 0.6495, "step": 19820 }, { "epoch": 1.4319721133527192, "grad_norm": 7.452866779708597, "learning_rate": 9.849414880616767e-07, "loss": 0.6171, "step": 19821 }, { "epoch": 1.4320443585529286, "grad_norm": 6.526816291431804, "learning_rate": 9.84708828339356e-07, "loss": 0.5827, "step": 19822 }, { "epoch": 1.432116603753138, "grad_norm": 7.881353086301061, "learning_rate": 9.844761893601933e-07, "loss": 0.6488, "step": 19823 }, { "epoch": 1.4321888489533476, "grad_norm": 7.21556384299007, "learning_rate": 9.842435711273758e-07, "loss": 0.6227, "step": 19824 }, { "epoch": 1.4322610941535572, "grad_norm": 8.163319998382772, "learning_rate": 9.840109736440867e-07, "loss": 0.6932, "step": 19825 }, { "epoch": 1.4323333393537667, "grad_norm": 7.672942978745387, "learning_rate": 9.837783969135113e-07, "loss": 0.7145, "step": 19826 }, { "epoch": 1.4324055845539763, "grad_norm": 6.695938633025701, "learning_rate": 9.835458409388312e-07, "loss": 0.6393, "step": 19827 }, { "epoch": 1.4324778297541858, "grad_norm": 7.259263007638069, "learning_rate": 9.833133057232313e-07, "loss": 0.6517, "step": 19828 }, { "epoch": 1.4325500749543951, "grad_norm": 6.90565066951939, "learning_rate": 9.830807912698957e-07, "loss": 0.6033, "step": 19829 }, { "epoch": 1.4326223201546047, "grad_norm": 7.295157187469821, "learning_rate": 9.82848297582005e-07, "loss": 0.6811, "step": 19830 }, { "epoch": 1.4326945653548142, "grad_norm": 7.8042102014807675, "learning_rate": 9.826158246627433e-07, "loss": 0.631, "step": 19831 }, { "epoch": 1.4327668105550238, "grad_norm": 6.543645895464134, "learning_rate": 9.823833725152926e-07, "loss": 0.6925, "step": 19832 }, { "epoch": 1.4328390557552333, "grad_norm": 6.785903669883869, "learning_rate": 9.821509411428353e-07, "loss": 0.6982, "step": 19833 }, { "epoch": 1.4329113009554428, "grad_norm": 6.435019894964116, "learning_rate": 9.81918530548553e-07, "loss": 0.6589, "step": 19834 }, { "epoch": 1.4329835461556524, "grad_norm": 8.031337396168446, "learning_rate": 9.816861407356275e-07, "loss": 0.6841, "step": 19835 }, { "epoch": 1.4330557913558617, "grad_norm": 7.681502320853222, "learning_rate": 9.814537717072405e-07, "loss": 0.6445, "step": 19836 }, { "epoch": 1.4331280365560712, "grad_norm": 6.653324540972713, "learning_rate": 9.812214234665717e-07, "loss": 0.5502, "step": 19837 }, { "epoch": 1.4332002817562808, "grad_norm": 7.959555194254316, "learning_rate": 9.809890960168022e-07, "loss": 0.631, "step": 19838 }, { "epoch": 1.4332725269564903, "grad_norm": 6.19857844391291, "learning_rate": 9.807567893611124e-07, "loss": 0.6253, "step": 19839 }, { "epoch": 1.4333447721566999, "grad_norm": 6.631079421514539, "learning_rate": 9.80524503502684e-07, "loss": 0.579, "step": 19840 }, { "epoch": 1.4334170173569094, "grad_norm": 6.339455038530274, "learning_rate": 9.80292238444694e-07, "loss": 0.6283, "step": 19841 }, { "epoch": 1.433489262557119, "grad_norm": 5.9623221892708775, "learning_rate": 9.80059994190323e-07, "loss": 0.5813, "step": 19842 }, { "epoch": 1.4335615077573283, "grad_norm": 8.246502221497316, "learning_rate": 9.798277707427508e-07, "loss": 0.6328, "step": 19843 }, { "epoch": 1.4336337529575378, "grad_norm": 6.368758493404696, "learning_rate": 9.795955681051563e-07, "loss": 0.6152, "step": 19844 }, { "epoch": 1.4337059981577474, "grad_norm": 7.318069814711273, "learning_rate": 9.793633862807178e-07, "loss": 0.5857, "step": 19845 }, { "epoch": 1.433778243357957, "grad_norm": 6.439343234863841, "learning_rate": 9.791312252726137e-07, "loss": 0.619, "step": 19846 }, { "epoch": 1.4338504885581664, "grad_norm": 7.483923147813121, "learning_rate": 9.788990850840232e-07, "loss": 0.6506, "step": 19847 }, { "epoch": 1.433922733758376, "grad_norm": 6.825729822954231, "learning_rate": 9.786669657181225e-07, "loss": 0.6355, "step": 19848 }, { "epoch": 1.4339949789585855, "grad_norm": 7.133038453516545, "learning_rate": 9.784348671780893e-07, "loss": 0.6078, "step": 19849 }, { "epoch": 1.4340672241587948, "grad_norm": 7.215508331769831, "learning_rate": 9.78202789467102e-07, "loss": 0.5401, "step": 19850 }, { "epoch": 1.4341394693590044, "grad_norm": 6.1775117653735085, "learning_rate": 9.779707325883365e-07, "loss": 0.5882, "step": 19851 }, { "epoch": 1.434211714559214, "grad_norm": 9.579664196906293, "learning_rate": 9.777386965449701e-07, "loss": 0.6144, "step": 19852 }, { "epoch": 1.4342839597594235, "grad_norm": 8.432527108034362, "learning_rate": 9.775066813401788e-07, "loss": 0.6998, "step": 19853 }, { "epoch": 1.434356204959633, "grad_norm": 6.274828308917821, "learning_rate": 9.7727468697714e-07, "loss": 0.5855, "step": 19854 }, { "epoch": 1.4344284501598425, "grad_norm": 6.397806995892378, "learning_rate": 9.77042713459027e-07, "loss": 0.5605, "step": 19855 }, { "epoch": 1.434500695360052, "grad_norm": 8.096241924546305, "learning_rate": 9.768107607890173e-07, "loss": 0.6236, "step": 19856 }, { "epoch": 1.4345729405602614, "grad_norm": 7.0360422037234915, "learning_rate": 9.765788289702855e-07, "loss": 0.6409, "step": 19857 }, { "epoch": 1.4346451857604712, "grad_norm": 7.772600506525033, "learning_rate": 9.763469180060072e-07, "loss": 0.638, "step": 19858 }, { "epoch": 1.4347174309606805, "grad_norm": 6.48957957782504, "learning_rate": 9.761150278993561e-07, "loss": 0.5734, "step": 19859 }, { "epoch": 1.43478967616089, "grad_norm": 7.320302859435354, "learning_rate": 9.75883158653507e-07, "loss": 0.6958, "step": 19860 }, { "epoch": 1.4348619213610996, "grad_norm": 6.211003832192959, "learning_rate": 9.75651310271634e-07, "loss": 0.6086, "step": 19861 }, { "epoch": 1.4349341665613091, "grad_norm": 7.2569499411757326, "learning_rate": 9.754194827569107e-07, "loss": 0.6699, "step": 19862 }, { "epoch": 1.4350064117615187, "grad_norm": 6.697467826489742, "learning_rate": 9.751876761125111e-07, "loss": 0.584, "step": 19863 }, { "epoch": 1.435078656961728, "grad_norm": 6.524891655851879, "learning_rate": 9.749558903416085e-07, "loss": 0.5938, "step": 19864 }, { "epoch": 1.4351509021619377, "grad_norm": 7.069522573276253, "learning_rate": 9.747241254473762e-07, "loss": 0.5945, "step": 19865 }, { "epoch": 1.435223147362147, "grad_norm": 7.081626985755523, "learning_rate": 9.744923814329856e-07, "loss": 0.6289, "step": 19866 }, { "epoch": 1.4352953925623566, "grad_norm": 6.545943819890609, "learning_rate": 9.742606583016102e-07, "loss": 0.6244, "step": 19867 }, { "epoch": 1.4353676377625662, "grad_norm": 7.4120216253863775, "learning_rate": 9.74028956056422e-07, "loss": 0.5605, "step": 19868 }, { "epoch": 1.4354398829627757, "grad_norm": 6.566030660640766, "learning_rate": 9.73797274700592e-07, "loss": 0.5853, "step": 19869 }, { "epoch": 1.4355121281629852, "grad_norm": 6.954646460558396, "learning_rate": 9.735656142372921e-07, "loss": 0.5968, "step": 19870 }, { "epoch": 1.4355843733631946, "grad_norm": 6.05197047151874, "learning_rate": 9.73333974669693e-07, "loss": 0.6214, "step": 19871 }, { "epoch": 1.4356566185634043, "grad_norm": 8.51633857747056, "learning_rate": 9.73102356000968e-07, "loss": 0.6218, "step": 19872 }, { "epoch": 1.4357288637636136, "grad_norm": 6.838286147872546, "learning_rate": 9.728707582342856e-07, "loss": 0.691, "step": 19873 }, { "epoch": 1.4358011089638232, "grad_norm": 8.608303550331415, "learning_rate": 9.726391813728164e-07, "loss": 0.6158, "step": 19874 }, { "epoch": 1.4358733541640327, "grad_norm": 7.545576090423908, "learning_rate": 9.72407625419732e-07, "loss": 0.6629, "step": 19875 }, { "epoch": 1.4359455993642423, "grad_norm": 5.8830271323312875, "learning_rate": 9.721760903782e-07, "loss": 0.6023, "step": 19876 }, { "epoch": 1.4360178445644518, "grad_norm": 6.835727396324786, "learning_rate": 9.719445762513916e-07, "loss": 0.6121, "step": 19877 }, { "epoch": 1.4360900897646611, "grad_norm": 6.239872776592998, "learning_rate": 9.717130830424752e-07, "loss": 0.5728, "step": 19878 }, { "epoch": 1.436162334964871, "grad_norm": 7.133452905858944, "learning_rate": 9.714816107546199e-07, "loss": 0.6039, "step": 19879 }, { "epoch": 1.4362345801650802, "grad_norm": 6.573789640351728, "learning_rate": 9.712501593909945e-07, "loss": 0.6652, "step": 19880 }, { "epoch": 1.4363068253652898, "grad_norm": 6.458419438270094, "learning_rate": 9.710187289547676e-07, "loss": 0.712, "step": 19881 }, { "epoch": 1.4363790705654993, "grad_norm": 6.141066363054502, "learning_rate": 9.707873194491072e-07, "loss": 0.6091, "step": 19882 }, { "epoch": 1.4364513157657088, "grad_norm": 6.290975301116498, "learning_rate": 9.705559308771817e-07, "loss": 0.6241, "step": 19883 }, { "epoch": 1.4365235609659184, "grad_norm": 6.0453812678831325, "learning_rate": 9.70324563242157e-07, "loss": 0.6048, "step": 19884 }, { "epoch": 1.436595806166128, "grad_norm": 7.521399676555051, "learning_rate": 9.700932165472015e-07, "loss": 0.6194, "step": 19885 }, { "epoch": 1.4366680513663375, "grad_norm": 6.121553346558033, "learning_rate": 9.69861890795483e-07, "loss": 0.6122, "step": 19886 }, { "epoch": 1.4367402965665468, "grad_norm": 8.261429239165004, "learning_rate": 9.696305859901661e-07, "loss": 0.6472, "step": 19887 }, { "epoch": 1.4368125417667563, "grad_norm": 7.325595964267307, "learning_rate": 9.69399302134418e-07, "loss": 0.6349, "step": 19888 }, { "epoch": 1.4368847869669659, "grad_norm": 6.652415715901836, "learning_rate": 9.691680392314053e-07, "loss": 0.5954, "step": 19889 }, { "epoch": 1.4369570321671754, "grad_norm": 6.068536643492251, "learning_rate": 9.689367972842936e-07, "loss": 0.6126, "step": 19890 }, { "epoch": 1.437029277367385, "grad_norm": 6.508948988212486, "learning_rate": 9.68705576296248e-07, "loss": 0.6266, "step": 19891 }, { "epoch": 1.4371015225675945, "grad_norm": 7.500345095006593, "learning_rate": 9.684743762704343e-07, "loss": 0.5892, "step": 19892 }, { "epoch": 1.437173767767804, "grad_norm": 6.603612385142175, "learning_rate": 9.682431972100182e-07, "loss": 0.6884, "step": 19893 }, { "epoch": 1.4372460129680134, "grad_norm": 7.51178299057619, "learning_rate": 9.680120391181624e-07, "loss": 0.5416, "step": 19894 }, { "epoch": 1.437318258168223, "grad_norm": 8.143507785664344, "learning_rate": 9.677809019980324e-07, "loss": 0.6031, "step": 19895 }, { "epoch": 1.4373905033684324, "grad_norm": 6.903812858741566, "learning_rate": 9.67549785852792e-07, "loss": 0.6777, "step": 19896 }, { "epoch": 1.437462748568642, "grad_norm": 6.425288200594069, "learning_rate": 9.673186906856061e-07, "loss": 0.5766, "step": 19897 }, { "epoch": 1.4375349937688515, "grad_norm": 7.421009149740829, "learning_rate": 9.670876164996366e-07, "loss": 0.631, "step": 19898 }, { "epoch": 1.437607238969061, "grad_norm": 6.43756340514133, "learning_rate": 9.66856563298047e-07, "loss": 0.5626, "step": 19899 }, { "epoch": 1.4376794841692706, "grad_norm": 6.533568632339169, "learning_rate": 9.666255310840008e-07, "loss": 0.6207, "step": 19900 }, { "epoch": 1.43775172936948, "grad_norm": 6.75633055915056, "learning_rate": 9.663945198606605e-07, "loss": 0.6256, "step": 19901 }, { "epoch": 1.4378239745696895, "grad_norm": 7.290819420411188, "learning_rate": 9.661635296311885e-07, "loss": 0.6101, "step": 19902 }, { "epoch": 1.437896219769899, "grad_norm": 7.045761933990443, "learning_rate": 9.659325603987472e-07, "loss": 0.6645, "step": 19903 }, { "epoch": 1.4379684649701086, "grad_norm": 7.007209062909309, "learning_rate": 9.657016121664984e-07, "loss": 0.6214, "step": 19904 }, { "epoch": 1.438040710170318, "grad_norm": 8.239417947748239, "learning_rate": 9.654706849376024e-07, "loss": 0.6043, "step": 19905 }, { "epoch": 1.4381129553705276, "grad_norm": 8.900555848326535, "learning_rate": 9.652397787152212e-07, "loss": 0.702, "step": 19906 }, { "epoch": 1.4381852005707372, "grad_norm": 6.657838443829996, "learning_rate": 9.650088935025159e-07, "loss": 0.6448, "step": 19907 }, { "epoch": 1.4382574457709465, "grad_norm": 6.6329672068603776, "learning_rate": 9.64778029302647e-07, "loss": 0.6065, "step": 19908 }, { "epoch": 1.438329690971156, "grad_norm": 6.786932326827456, "learning_rate": 9.645471861187749e-07, "loss": 0.5673, "step": 19909 }, { "epoch": 1.4384019361713656, "grad_norm": 6.977679224121465, "learning_rate": 9.643163639540596e-07, "loss": 0.718, "step": 19910 }, { "epoch": 1.4384741813715751, "grad_norm": 8.139298753830182, "learning_rate": 9.640855628116616e-07, "loss": 0.6458, "step": 19911 }, { "epoch": 1.4385464265717847, "grad_norm": 7.801458017087755, "learning_rate": 9.638547826947388e-07, "loss": 0.6167, "step": 19912 }, { "epoch": 1.4386186717719942, "grad_norm": 7.526722920525458, "learning_rate": 9.636240236064516e-07, "loss": 0.633, "step": 19913 }, { "epoch": 1.4386909169722037, "grad_norm": 6.12908612407336, "learning_rate": 9.633932855499584e-07, "loss": 0.6069, "step": 19914 }, { "epoch": 1.438763162172413, "grad_norm": 6.311620972115982, "learning_rate": 9.63162568528419e-07, "loss": 0.5483, "step": 19915 }, { "epoch": 1.4388354073726226, "grad_norm": 7.072744585583477, "learning_rate": 9.629318725449898e-07, "loss": 0.7714, "step": 19916 }, { "epoch": 1.4389076525728322, "grad_norm": 6.785505655853161, "learning_rate": 9.627011976028297e-07, "loss": 0.6263, "step": 19917 }, { "epoch": 1.4389798977730417, "grad_norm": 6.6199425160658025, "learning_rate": 9.624705437050968e-07, "loss": 0.6857, "step": 19918 }, { "epoch": 1.4390521429732512, "grad_norm": 5.799776322885894, "learning_rate": 9.622399108549483e-07, "loss": 0.629, "step": 19919 }, { "epoch": 1.4391243881734608, "grad_norm": 6.897569819971866, "learning_rate": 9.620092990555412e-07, "loss": 0.7035, "step": 19920 }, { "epoch": 1.4391966333736703, "grad_norm": 6.148336678711969, "learning_rate": 9.617787083100328e-07, "loss": 0.5732, "step": 19921 }, { "epoch": 1.4392688785738796, "grad_norm": 9.223364344375732, "learning_rate": 9.615481386215805e-07, "loss": 0.6696, "step": 19922 }, { "epoch": 1.4393411237740892, "grad_norm": 6.90783617955679, "learning_rate": 9.613175899933386e-07, "loss": 0.6258, "step": 19923 }, { "epoch": 1.4394133689742987, "grad_norm": 7.256875296896525, "learning_rate": 9.610870624284643e-07, "loss": 0.5589, "step": 19924 }, { "epoch": 1.4394856141745083, "grad_norm": 6.759897111188707, "learning_rate": 9.60856555930114e-07, "loss": 0.597, "step": 19925 }, { "epoch": 1.4395578593747178, "grad_norm": 8.350184784632015, "learning_rate": 9.606260705014415e-07, "loss": 0.723, "step": 19926 }, { "epoch": 1.4396301045749273, "grad_norm": 5.725281034676411, "learning_rate": 9.60395606145603e-07, "loss": 0.656, "step": 19927 }, { "epoch": 1.439702349775137, "grad_norm": 5.9454908707638205, "learning_rate": 9.601651628657521e-07, "loss": 0.6302, "step": 19928 }, { "epoch": 1.4397745949753462, "grad_norm": 6.775448053050077, "learning_rate": 9.599347406650464e-07, "loss": 0.5657, "step": 19929 }, { "epoch": 1.4398468401755558, "grad_norm": 6.138379489636419, "learning_rate": 9.597043395466374e-07, "loss": 0.6195, "step": 19930 }, { "epoch": 1.4399190853757653, "grad_norm": 7.994801024059738, "learning_rate": 9.594739595136801e-07, "loss": 0.6602, "step": 19931 }, { "epoch": 1.4399913305759748, "grad_norm": 7.732590101415804, "learning_rate": 9.592436005693282e-07, "loss": 0.6424, "step": 19932 }, { "epoch": 1.4400635757761844, "grad_norm": 7.625406723431473, "learning_rate": 9.590132627167359e-07, "loss": 0.6253, "step": 19933 }, { "epoch": 1.440135820976394, "grad_norm": 5.83954130174161, "learning_rate": 9.587829459590546e-07, "loss": 0.6474, "step": 19934 }, { "epoch": 1.4402080661766035, "grad_norm": 8.125892942218623, "learning_rate": 9.585526502994382e-07, "loss": 0.5978, "step": 19935 }, { "epoch": 1.4402803113768128, "grad_norm": 6.339588322315223, "learning_rate": 9.583223757410392e-07, "loss": 0.6066, "step": 19936 }, { "epoch": 1.4403525565770225, "grad_norm": 8.051082126079631, "learning_rate": 9.580921222870097e-07, "loss": 0.5692, "step": 19937 }, { "epoch": 1.4404248017772319, "grad_norm": 7.1680831692162155, "learning_rate": 9.578618899405019e-07, "loss": 0.6668, "step": 19938 }, { "epoch": 1.4404970469774414, "grad_norm": 7.23006038656382, "learning_rate": 9.576316787046675e-07, "loss": 0.657, "step": 19939 }, { "epoch": 1.440569292177651, "grad_norm": 7.920158847410378, "learning_rate": 9.574014885826585e-07, "loss": 0.682, "step": 19940 }, { "epoch": 1.4406415373778605, "grad_norm": 6.642898565300915, "learning_rate": 9.571713195776248e-07, "loss": 0.599, "step": 19941 }, { "epoch": 1.44071378257807, "grad_norm": 5.936487693823345, "learning_rate": 9.569411716927174e-07, "loss": 0.6483, "step": 19942 }, { "epoch": 1.4407860277782794, "grad_norm": 8.117554055125881, "learning_rate": 9.567110449310884e-07, "loss": 0.6527, "step": 19943 }, { "epoch": 1.4408582729784891, "grad_norm": 8.542260969623003, "learning_rate": 9.56480939295886e-07, "loss": 0.6233, "step": 19944 }, { "epoch": 1.4409305181786984, "grad_norm": 7.082239433043964, "learning_rate": 9.56250854790261e-07, "loss": 0.6673, "step": 19945 }, { "epoch": 1.441002763378908, "grad_norm": 6.482468952262018, "learning_rate": 9.560207914173634e-07, "loss": 0.639, "step": 19946 }, { "epoch": 1.4410750085791175, "grad_norm": 6.487699754759784, "learning_rate": 9.557907491803422e-07, "loss": 0.6383, "step": 19947 }, { "epoch": 1.441147253779327, "grad_norm": 7.477487983145539, "learning_rate": 9.555607280823465e-07, "loss": 0.6528, "step": 19948 }, { "epoch": 1.4412194989795366, "grad_norm": 6.608617139432838, "learning_rate": 9.553307281265254e-07, "loss": 0.5761, "step": 19949 }, { "epoch": 1.441291744179746, "grad_norm": 7.448181176101401, "learning_rate": 9.551007493160282e-07, "loss": 0.6574, "step": 19950 }, { "epoch": 1.4413639893799557, "grad_norm": 5.3749180277406685, "learning_rate": 9.548707916540011e-07, "loss": 0.6154, "step": 19951 }, { "epoch": 1.441436234580165, "grad_norm": 7.0238464711643775, "learning_rate": 9.546408551435935e-07, "loss": 0.6183, "step": 19952 }, { "epoch": 1.4415084797803746, "grad_norm": 7.716621630587341, "learning_rate": 9.544109397879525e-07, "loss": 0.729, "step": 19953 }, { "epoch": 1.441580724980584, "grad_norm": 6.469641877393872, "learning_rate": 9.541810455902264e-07, "loss": 0.5862, "step": 19954 }, { "epoch": 1.4416529701807936, "grad_norm": 7.09755171350852, "learning_rate": 9.539511725535608e-07, "loss": 0.7134, "step": 19955 }, { "epoch": 1.4417252153810032, "grad_norm": 5.711660524618592, "learning_rate": 9.537213206811025e-07, "loss": 0.5952, "step": 19956 }, { "epoch": 1.4417974605812125, "grad_norm": 6.655383416300578, "learning_rate": 9.534914899759992e-07, "loss": 0.6912, "step": 19957 }, { "epoch": 1.4418697057814223, "grad_norm": 6.913618618784656, "learning_rate": 9.532616804413977e-07, "loss": 0.6538, "step": 19958 }, { "epoch": 1.4419419509816316, "grad_norm": 9.758600598678647, "learning_rate": 9.530318920804421e-07, "loss": 0.6666, "step": 19959 }, { "epoch": 1.4420141961818411, "grad_norm": 7.251976269994038, "learning_rate": 9.528021248962785e-07, "loss": 0.7157, "step": 19960 }, { "epoch": 1.4420864413820507, "grad_norm": 6.905906979426822, "learning_rate": 9.525723788920536e-07, "loss": 0.64, "step": 19961 }, { "epoch": 1.4421586865822602, "grad_norm": 7.3961960681201635, "learning_rate": 9.523426540709104e-07, "loss": 0.6926, "step": 19962 }, { "epoch": 1.4422309317824697, "grad_norm": 6.241758787785105, "learning_rate": 9.521129504359944e-07, "loss": 0.6368, "step": 19963 }, { "epoch": 1.4423031769826793, "grad_norm": 7.202741302399185, "learning_rate": 9.518832679904505e-07, "loss": 0.6993, "step": 19964 }, { "epoch": 1.4423754221828888, "grad_norm": 6.439314799374947, "learning_rate": 9.516536067374224e-07, "loss": 0.5786, "step": 19965 }, { "epoch": 1.4424476673830982, "grad_norm": 5.553219715313178, "learning_rate": 9.514239666800543e-07, "loss": 0.55, "step": 19966 }, { "epoch": 1.4425199125833077, "grad_norm": 6.230308028393308, "learning_rate": 9.5119434782149e-07, "loss": 0.6153, "step": 19967 }, { "epoch": 1.4425921577835172, "grad_norm": 5.820103747829418, "learning_rate": 9.509647501648731e-07, "loss": 0.5973, "step": 19968 }, { "epoch": 1.4426644029837268, "grad_norm": 6.442030118268443, "learning_rate": 9.50735173713345e-07, "loss": 0.5739, "step": 19969 }, { "epoch": 1.4427366481839363, "grad_norm": 7.03997234599144, "learning_rate": 9.505056184700495e-07, "loss": 0.6452, "step": 19970 }, { "epoch": 1.4428088933841459, "grad_norm": 6.1904049614242345, "learning_rate": 9.502760844381293e-07, "loss": 0.6798, "step": 19971 }, { "epoch": 1.4428811385843554, "grad_norm": 6.105121976116589, "learning_rate": 9.500465716207266e-07, "loss": 0.592, "step": 19972 }, { "epoch": 1.4429533837845647, "grad_norm": 7.436920399881909, "learning_rate": 9.498170800209824e-07, "loss": 0.6555, "step": 19973 }, { "epoch": 1.4430256289847743, "grad_norm": 6.717995960594387, "learning_rate": 9.495876096420386e-07, "loss": 0.6283, "step": 19974 }, { "epoch": 1.4430978741849838, "grad_norm": 7.204505506776157, "learning_rate": 9.493581604870367e-07, "loss": 0.523, "step": 19975 }, { "epoch": 1.4431701193851934, "grad_norm": 6.611521703683748, "learning_rate": 9.491287325591175e-07, "loss": 0.6204, "step": 19976 }, { "epoch": 1.443242364585403, "grad_norm": 6.828114385181967, "learning_rate": 9.488993258614218e-07, "loss": 0.5954, "step": 19977 }, { "epoch": 1.4433146097856124, "grad_norm": 5.62866684816848, "learning_rate": 9.486699403970897e-07, "loss": 0.5313, "step": 19978 }, { "epoch": 1.443386854985822, "grad_norm": 7.561384891234454, "learning_rate": 9.484405761692628e-07, "loss": 0.6457, "step": 19979 }, { "epoch": 1.4434591001860313, "grad_norm": 7.350875968375566, "learning_rate": 9.482112331810789e-07, "loss": 0.7207, "step": 19980 }, { "epoch": 1.4435313453862408, "grad_norm": 7.1138987474804765, "learning_rate": 9.479819114356781e-07, "loss": 0.6289, "step": 19981 }, { "epoch": 1.4436035905864504, "grad_norm": 6.6457428004237995, "learning_rate": 9.477526109362001e-07, "loss": 0.6688, "step": 19982 }, { "epoch": 1.44367583578666, "grad_norm": 6.484164094367565, "learning_rate": 9.475233316857843e-07, "loss": 0.6495, "step": 19983 }, { "epoch": 1.4437480809868695, "grad_norm": 7.849711914607694, "learning_rate": 9.472940736875677e-07, "loss": 0.6492, "step": 19984 }, { "epoch": 1.443820326187079, "grad_norm": 7.590278240024296, "learning_rate": 9.470648369446889e-07, "loss": 0.7549, "step": 19985 }, { "epoch": 1.4438925713872885, "grad_norm": 6.652416862762495, "learning_rate": 9.468356214602883e-07, "loss": 0.6071, "step": 19986 }, { "epoch": 1.4439648165874979, "grad_norm": 6.973869325061742, "learning_rate": 9.466064272375014e-07, "loss": 0.6318, "step": 19987 }, { "epoch": 1.4440370617877074, "grad_norm": 7.527085288440215, "learning_rate": 9.463772542794664e-07, "loss": 0.5915, "step": 19988 }, { "epoch": 1.444109306987917, "grad_norm": 6.752530224198255, "learning_rate": 9.461481025893205e-07, "loss": 0.6566, "step": 19989 }, { "epoch": 1.4441815521881265, "grad_norm": 6.916596959999916, "learning_rate": 9.459189721702014e-07, "loss": 0.5704, "step": 19990 }, { "epoch": 1.444253797388336, "grad_norm": 6.4215619458747755, "learning_rate": 9.45689863025244e-07, "loss": 0.6069, "step": 19991 }, { "epoch": 1.4443260425885456, "grad_norm": 8.833347632438555, "learning_rate": 9.454607751575856e-07, "loss": 0.6107, "step": 19992 }, { "epoch": 1.4443982877887551, "grad_norm": 6.616273422790729, "learning_rate": 9.452317085703619e-07, "loss": 0.6995, "step": 19993 }, { "epoch": 1.4444705329889644, "grad_norm": 8.920744014211474, "learning_rate": 9.450026632667092e-07, "loss": 0.5902, "step": 19994 }, { "epoch": 1.444542778189174, "grad_norm": 7.417657364531466, "learning_rate": 9.447736392497625e-07, "loss": 0.6495, "step": 19995 }, { "epoch": 1.4446150233893835, "grad_norm": 6.839229118229673, "learning_rate": 9.445446365226574e-07, "loss": 0.6903, "step": 19996 }, { "epoch": 1.444687268589593, "grad_norm": 6.860107938845947, "learning_rate": 9.443156550885291e-07, "loss": 0.6485, "step": 19997 }, { "epoch": 1.4447595137898026, "grad_norm": 7.602977029071562, "learning_rate": 9.440866949505106e-07, "loss": 0.6817, "step": 19998 }, { "epoch": 1.4448317589900121, "grad_norm": 6.709673761433061, "learning_rate": 9.438577561117374e-07, "loss": 0.6018, "step": 19999 }, { "epoch": 1.4449040041902217, "grad_norm": 7.312909497915794, "learning_rate": 9.436288385753442e-07, "loss": 0.6884, "step": 20000 }, { "epoch": 1.444976249390431, "grad_norm": 5.701706466885286, "learning_rate": 9.433999423444626e-07, "loss": 0.606, "step": 20001 }, { "epoch": 1.4450484945906406, "grad_norm": 6.736581816946634, "learning_rate": 9.431710674222275e-07, "loss": 0.5683, "step": 20002 }, { "epoch": 1.44512073979085, "grad_norm": 5.96734219071145, "learning_rate": 9.429422138117713e-07, "loss": 0.5678, "step": 20003 }, { "epoch": 1.4451929849910596, "grad_norm": 6.886543154844465, "learning_rate": 9.427133815162273e-07, "loss": 0.6331, "step": 20004 }, { "epoch": 1.4452652301912692, "grad_norm": 7.209901613365751, "learning_rate": 9.424845705387281e-07, "loss": 0.6155, "step": 20005 }, { "epoch": 1.4453374753914787, "grad_norm": 6.932166721546897, "learning_rate": 9.42255780882406e-07, "loss": 0.5423, "step": 20006 }, { "epoch": 1.4454097205916883, "grad_norm": 7.246072428792756, "learning_rate": 9.420270125503933e-07, "loss": 0.6697, "step": 20007 }, { "epoch": 1.4454819657918976, "grad_norm": 6.969335420249995, "learning_rate": 9.417982655458202e-07, "loss": 0.5751, "step": 20008 }, { "epoch": 1.4455542109921073, "grad_norm": 7.1007456965463485, "learning_rate": 9.415695398718192e-07, "loss": 0.696, "step": 20009 }, { "epoch": 1.4456264561923167, "grad_norm": 6.619699336942988, "learning_rate": 9.413408355315209e-07, "loss": 0.6713, "step": 20010 }, { "epoch": 1.4456987013925262, "grad_norm": 7.549343550482577, "learning_rate": 9.411121525280575e-07, "loss": 0.7539, "step": 20011 }, { "epoch": 1.4457709465927358, "grad_norm": 6.053427603291462, "learning_rate": 9.408834908645573e-07, "loss": 0.5707, "step": 20012 }, { "epoch": 1.4458431917929453, "grad_norm": 5.597775337565666, "learning_rate": 9.406548505441507e-07, "loss": 0.6381, "step": 20013 }, { "epoch": 1.4459154369931548, "grad_norm": 6.442938127996528, "learning_rate": 9.404262315699691e-07, "loss": 0.5828, "step": 20014 }, { "epoch": 1.4459876821933642, "grad_norm": 6.99702881017437, "learning_rate": 9.401976339451427e-07, "loss": 0.6882, "step": 20015 }, { "epoch": 1.446059927393574, "grad_norm": 6.7739547655179395, "learning_rate": 9.399690576727985e-07, "loss": 0.6163, "step": 20016 }, { "epoch": 1.4461321725937832, "grad_norm": 6.752738114713056, "learning_rate": 9.397405027560666e-07, "loss": 0.5963, "step": 20017 }, { "epoch": 1.4462044177939928, "grad_norm": 6.846043868570419, "learning_rate": 9.395119691980767e-07, "loss": 0.6094, "step": 20018 }, { "epoch": 1.4462766629942023, "grad_norm": 6.322458702064146, "learning_rate": 9.392834570019555e-07, "loss": 0.6722, "step": 20019 }, { "epoch": 1.4463489081944119, "grad_norm": 7.069950730989788, "learning_rate": 9.390549661708318e-07, "loss": 0.6435, "step": 20020 }, { "epoch": 1.4464211533946214, "grad_norm": 6.732222989882172, "learning_rate": 9.388264967078337e-07, "loss": 0.6031, "step": 20021 }, { "epoch": 1.4464933985948307, "grad_norm": 5.430549125093044, "learning_rate": 9.385980486160887e-07, "loss": 0.5775, "step": 20022 }, { "epoch": 1.4465656437950405, "grad_norm": 6.9445300525051, "learning_rate": 9.38369621898724e-07, "loss": 0.5867, "step": 20023 }, { "epoch": 1.4466378889952498, "grad_norm": 7.180162216946767, "learning_rate": 9.381412165588666e-07, "loss": 0.5975, "step": 20024 }, { "epoch": 1.4467101341954594, "grad_norm": 8.579601278933, "learning_rate": 9.379128325996442e-07, "loss": 0.6669, "step": 20025 }, { "epoch": 1.446782379395669, "grad_norm": 6.571686917314989, "learning_rate": 9.376844700241813e-07, "loss": 0.5993, "step": 20026 }, { "epoch": 1.4468546245958784, "grad_norm": 8.415417518706969, "learning_rate": 9.374561288356051e-07, "loss": 0.6122, "step": 20027 }, { "epoch": 1.446926869796088, "grad_norm": 7.636542916773699, "learning_rate": 9.372278090370413e-07, "loss": 0.6638, "step": 20028 }, { "epoch": 1.4469991149962973, "grad_norm": 8.050492682692285, "learning_rate": 9.36999510631616e-07, "loss": 0.6366, "step": 20029 }, { "epoch": 1.447071360196507, "grad_norm": 5.880123744882156, "learning_rate": 9.36771233622453e-07, "loss": 0.6287, "step": 20030 }, { "epoch": 1.4471436053967164, "grad_norm": 7.615655645246995, "learning_rate": 9.365429780126781e-07, "loss": 0.6725, "step": 20031 }, { "epoch": 1.447215850596926, "grad_norm": 6.18293516757025, "learning_rate": 9.363147438054159e-07, "loss": 0.6614, "step": 20032 }, { "epoch": 1.4472880957971355, "grad_norm": 7.308747200754125, "learning_rate": 9.360865310037909e-07, "loss": 0.5971, "step": 20033 }, { "epoch": 1.447360340997345, "grad_norm": 6.710082527028356, "learning_rate": 9.358583396109266e-07, "loss": 0.6174, "step": 20034 }, { "epoch": 1.4474325861975545, "grad_norm": 6.83514309000119, "learning_rate": 9.356301696299475e-07, "loss": 0.6379, "step": 20035 }, { "epoch": 1.447504831397764, "grad_norm": 6.856895885107139, "learning_rate": 9.354020210639775e-07, "loss": 0.6718, "step": 20036 }, { "epoch": 1.4475770765979736, "grad_norm": 6.559417581987945, "learning_rate": 9.351738939161381e-07, "loss": 0.625, "step": 20037 }, { "epoch": 1.447649321798183, "grad_norm": 6.9493543366129185, "learning_rate": 9.34945788189553e-07, "loss": 0.6921, "step": 20038 }, { "epoch": 1.4477215669983925, "grad_norm": 6.4520289342317785, "learning_rate": 9.347177038873448e-07, "loss": 0.5459, "step": 20039 }, { "epoch": 1.447793812198602, "grad_norm": 6.845606164612914, "learning_rate": 9.344896410126369e-07, "loss": 0.6498, "step": 20040 }, { "epoch": 1.4478660573988116, "grad_norm": 6.620627633851135, "learning_rate": 9.342615995685487e-07, "loss": 0.6072, "step": 20041 }, { "epoch": 1.4479383025990211, "grad_norm": 7.031148002732422, "learning_rate": 9.340335795582039e-07, "loss": 0.6367, "step": 20042 }, { "epoch": 1.4480105477992307, "grad_norm": 7.246336175611141, "learning_rate": 9.338055809847249e-07, "loss": 0.6059, "step": 20043 }, { "epoch": 1.4480827929994402, "grad_norm": 5.743816533912286, "learning_rate": 9.335776038512301e-07, "loss": 0.6345, "step": 20044 }, { "epoch": 1.4481550381996495, "grad_norm": 7.141029129445411, "learning_rate": 9.33349648160842e-07, "loss": 0.6758, "step": 20045 }, { "epoch": 1.448227283399859, "grad_norm": 7.52224408247811, "learning_rate": 9.331217139166807e-07, "loss": 0.6452, "step": 20046 }, { "epoch": 1.4482995286000686, "grad_norm": 8.093559586925235, "learning_rate": 9.328938011218671e-07, "loss": 0.677, "step": 20047 }, { "epoch": 1.4483717738002782, "grad_norm": 7.232680845350039, "learning_rate": 9.326659097795202e-07, "loss": 0.608, "step": 20048 }, { "epoch": 1.4484440190004877, "grad_norm": 7.198705546259553, "learning_rate": 9.324380398927596e-07, "loss": 0.6409, "step": 20049 }, { "epoch": 1.4485162642006972, "grad_norm": 6.164837265156265, "learning_rate": 9.322101914647052e-07, "loss": 0.6218, "step": 20050 }, { "epoch": 1.4485885094009068, "grad_norm": 7.35050413517738, "learning_rate": 9.319823644984763e-07, "loss": 0.6313, "step": 20051 }, { "epoch": 1.448660754601116, "grad_norm": 6.8233600812101045, "learning_rate": 9.317545589971911e-07, "loss": 0.6403, "step": 20052 }, { "epoch": 1.4487329998013256, "grad_norm": 6.308307728616747, "learning_rate": 9.315267749639684e-07, "loss": 0.6172, "step": 20053 }, { "epoch": 1.4488052450015352, "grad_norm": 6.664929672293707, "learning_rate": 9.31299012401927e-07, "loss": 0.6259, "step": 20054 }, { "epoch": 1.4488774902017447, "grad_norm": 6.6418002176997115, "learning_rate": 9.310712713141834e-07, "loss": 0.664, "step": 20055 }, { "epoch": 1.4489497354019543, "grad_norm": 7.497841333631077, "learning_rate": 9.308435517038559e-07, "loss": 0.6834, "step": 20056 }, { "epoch": 1.4490219806021638, "grad_norm": 5.635169098423069, "learning_rate": 9.306158535740625e-07, "loss": 0.5152, "step": 20057 }, { "epoch": 1.4490942258023733, "grad_norm": 6.778419571943783, "learning_rate": 9.303881769279188e-07, "loss": 0.6591, "step": 20058 }, { "epoch": 1.4491664710025827, "grad_norm": 7.081001556159276, "learning_rate": 9.301605217685423e-07, "loss": 0.7181, "step": 20059 }, { "epoch": 1.4492387162027922, "grad_norm": 7.826476755432186, "learning_rate": 9.299328880990491e-07, "loss": 0.5988, "step": 20060 }, { "epoch": 1.4493109614030018, "grad_norm": 7.973321059907549, "learning_rate": 9.297052759225558e-07, "loss": 0.6942, "step": 20061 }, { "epoch": 1.4493832066032113, "grad_norm": 6.865129355526758, "learning_rate": 9.29477685242178e-07, "loss": 0.6623, "step": 20062 }, { "epoch": 1.4494554518034208, "grad_norm": 5.876969352337184, "learning_rate": 9.292501160610312e-07, "loss": 0.5926, "step": 20063 }, { "epoch": 1.4495276970036304, "grad_norm": 7.053711140302676, "learning_rate": 9.290225683822308e-07, "loss": 0.5574, "step": 20064 }, { "epoch": 1.44959994220384, "grad_norm": 7.915226480758413, "learning_rate": 9.287950422088923e-07, "loss": 0.6699, "step": 20065 }, { "epoch": 1.4496721874040492, "grad_norm": 6.848181556968757, "learning_rate": 9.285675375441292e-07, "loss": 0.6034, "step": 20066 }, { "epoch": 1.4497444326042588, "grad_norm": 7.368521479589105, "learning_rate": 9.283400543910559e-07, "loss": 0.6442, "step": 20067 }, { "epoch": 1.4498166778044683, "grad_norm": 7.695847360060845, "learning_rate": 9.281125927527881e-07, "loss": 0.6175, "step": 20068 }, { "epoch": 1.4498889230046779, "grad_norm": 7.670132751907542, "learning_rate": 9.278851526324367e-07, "loss": 0.5921, "step": 20069 }, { "epoch": 1.4499611682048874, "grad_norm": 7.75450274193376, "learning_rate": 9.276577340331177e-07, "loss": 0.631, "step": 20070 }, { "epoch": 1.450033413405097, "grad_norm": 7.592567383156599, "learning_rate": 9.274303369579435e-07, "loss": 0.6345, "step": 20071 }, { "epoch": 1.4501056586053065, "grad_norm": 7.118039931200035, "learning_rate": 9.272029614100278e-07, "loss": 0.6172, "step": 20072 }, { "epoch": 1.4501779038055158, "grad_norm": 6.22433236460022, "learning_rate": 9.269756073924815e-07, "loss": 0.5791, "step": 20073 }, { "epoch": 1.4502501490057254, "grad_norm": 7.3310931280837455, "learning_rate": 9.267482749084178e-07, "loss": 0.6214, "step": 20074 }, { "epoch": 1.450322394205935, "grad_norm": 6.289319676418706, "learning_rate": 9.265209639609496e-07, "loss": 0.618, "step": 20075 }, { "epoch": 1.4503946394061444, "grad_norm": 6.870625439288767, "learning_rate": 9.262936745531867e-07, "loss": 0.6437, "step": 20076 }, { "epoch": 1.450466884606354, "grad_norm": 7.396113287413458, "learning_rate": 9.260664066882413e-07, "loss": 0.5338, "step": 20077 }, { "epoch": 1.4505391298065635, "grad_norm": 5.625706946135644, "learning_rate": 9.258391603692249e-07, "loss": 0.5385, "step": 20078 }, { "epoch": 1.450611375006773, "grad_norm": 6.406102025253655, "learning_rate": 9.256119355992482e-07, "loss": 0.5861, "step": 20079 }, { "epoch": 1.4506836202069824, "grad_norm": 6.625647171367724, "learning_rate": 9.253847323814216e-07, "loss": 0.5546, "step": 20080 }, { "epoch": 1.4507558654071921, "grad_norm": 7.984164778299058, "learning_rate": 9.251575507188554e-07, "loss": 0.6614, "step": 20081 }, { "epoch": 1.4508281106074015, "grad_norm": 8.31810233262465, "learning_rate": 9.249303906146606e-07, "loss": 0.6662, "step": 20082 }, { "epoch": 1.450900355807611, "grad_norm": 6.920659976864985, "learning_rate": 9.247032520719446e-07, "loss": 0.6331, "step": 20083 }, { "epoch": 1.4509726010078206, "grad_norm": 6.735235668157071, "learning_rate": 9.24476135093818e-07, "loss": 0.656, "step": 20084 }, { "epoch": 1.45104484620803, "grad_norm": 6.808796567188608, "learning_rate": 9.2424903968339e-07, "loss": 0.6318, "step": 20085 }, { "epoch": 1.4511170914082396, "grad_norm": 7.369195496582086, "learning_rate": 9.240219658437699e-07, "loss": 0.6345, "step": 20086 }, { "epoch": 1.451189336608449, "grad_norm": 6.925747375921814, "learning_rate": 9.237949135780646e-07, "loss": 0.5804, "step": 20087 }, { "epoch": 1.4512615818086587, "grad_norm": 6.526960652750162, "learning_rate": 9.235678828893829e-07, "loss": 0.6219, "step": 20088 }, { "epoch": 1.451333827008868, "grad_norm": 6.473214935152369, "learning_rate": 9.23340873780833e-07, "loss": 0.6433, "step": 20089 }, { "epoch": 1.4514060722090776, "grad_norm": 7.097870424069797, "learning_rate": 9.231138862555225e-07, "loss": 0.6152, "step": 20090 }, { "epoch": 1.4514783174092871, "grad_norm": 7.946848252971848, "learning_rate": 9.228869203165583e-07, "loss": 0.592, "step": 20091 }, { "epoch": 1.4515505626094967, "grad_norm": 7.470767718266508, "learning_rate": 9.226599759670479e-07, "loss": 0.6393, "step": 20092 }, { "epoch": 1.4516228078097062, "grad_norm": 7.038695331104432, "learning_rate": 9.224330532100984e-07, "loss": 0.6182, "step": 20093 }, { "epoch": 1.4516950530099155, "grad_norm": 6.65636891719992, "learning_rate": 9.222061520488146e-07, "loss": 0.6453, "step": 20094 }, { "epoch": 1.4517672982101253, "grad_norm": 6.629889645125548, "learning_rate": 9.219792724863033e-07, "loss": 0.5422, "step": 20095 }, { "epoch": 1.4518395434103346, "grad_norm": 5.609547561923983, "learning_rate": 9.217524145256706e-07, "loss": 0.5163, "step": 20096 }, { "epoch": 1.4519117886105442, "grad_norm": 6.452938788362063, "learning_rate": 9.21525578170023e-07, "loss": 0.6047, "step": 20097 }, { "epoch": 1.4519840338107537, "grad_norm": 8.00903191934932, "learning_rate": 9.212987634224629e-07, "loss": 0.6351, "step": 20098 }, { "epoch": 1.4520562790109632, "grad_norm": 8.34721479734689, "learning_rate": 9.210719702860976e-07, "loss": 0.6354, "step": 20099 }, { "epoch": 1.4521285242111728, "grad_norm": 7.399756824518744, "learning_rate": 9.208451987640321e-07, "loss": 0.6313, "step": 20100 }, { "epoch": 1.452200769411382, "grad_norm": 6.389589258404118, "learning_rate": 9.206184488593686e-07, "loss": 0.5998, "step": 20101 }, { "epoch": 1.4522730146115919, "grad_norm": 5.559275517637695, "learning_rate": 9.203917205752125e-07, "loss": 0.5755, "step": 20102 }, { "epoch": 1.4523452598118012, "grad_norm": 5.842785434369474, "learning_rate": 9.20165013914667e-07, "loss": 0.6442, "step": 20103 }, { "epoch": 1.4524175050120107, "grad_norm": 6.611485354020364, "learning_rate": 9.19938328880837e-07, "loss": 0.6177, "step": 20104 }, { "epoch": 1.4524897502122203, "grad_norm": 7.659033444530932, "learning_rate": 9.197116654768231e-07, "loss": 0.5936, "step": 20105 }, { "epoch": 1.4525619954124298, "grad_norm": 6.867719089386421, "learning_rate": 9.194850237057299e-07, "loss": 0.6631, "step": 20106 }, { "epoch": 1.4526342406126393, "grad_norm": 7.155473025591719, "learning_rate": 9.192584035706595e-07, "loss": 0.6533, "step": 20107 }, { "epoch": 1.452706485812849, "grad_norm": 6.752919731215545, "learning_rate": 9.190318050747141e-07, "loss": 0.5925, "step": 20108 }, { "epoch": 1.4527787310130584, "grad_norm": 8.339892692332972, "learning_rate": 9.188052282209956e-07, "loss": 0.5644, "step": 20109 }, { "epoch": 1.4528509762132678, "grad_norm": 7.625140079399771, "learning_rate": 9.185786730126059e-07, "loss": 0.6089, "step": 20110 }, { "epoch": 1.4529232214134773, "grad_norm": 6.851264625259488, "learning_rate": 9.183521394526473e-07, "loss": 0.6068, "step": 20111 }, { "epoch": 1.4529954666136868, "grad_norm": 5.415715217169064, "learning_rate": 9.181256275442188e-07, "loss": 0.6067, "step": 20112 }, { "epoch": 1.4530677118138964, "grad_norm": 6.979318584857374, "learning_rate": 9.178991372904223e-07, "loss": 0.5491, "step": 20113 }, { "epoch": 1.453139957014106, "grad_norm": 6.274663859935111, "learning_rate": 9.17672668694359e-07, "loss": 0.5344, "step": 20114 }, { "epoch": 1.4532122022143155, "grad_norm": 6.487324312804313, "learning_rate": 9.174462217591274e-07, "loss": 0.5982, "step": 20115 }, { "epoch": 1.453284447414525, "grad_norm": 6.434173020559526, "learning_rate": 9.172197964878282e-07, "loss": 0.6445, "step": 20116 }, { "epoch": 1.4533566926147343, "grad_norm": 6.788870612309077, "learning_rate": 9.169933928835612e-07, "loss": 0.5934, "step": 20117 }, { "epoch": 1.4534289378149439, "grad_norm": 7.995256925251449, "learning_rate": 9.167670109494253e-07, "loss": 0.5725, "step": 20118 }, { "epoch": 1.4535011830151534, "grad_norm": 7.492287294043414, "learning_rate": 9.165406506885199e-07, "loss": 0.6321, "step": 20119 }, { "epoch": 1.453573428215363, "grad_norm": 8.80522212357575, "learning_rate": 9.163143121039436e-07, "loss": 0.6345, "step": 20120 }, { "epoch": 1.4536456734155725, "grad_norm": 7.12224030170867, "learning_rate": 9.160879951987945e-07, "loss": 0.6027, "step": 20121 }, { "epoch": 1.453717918615782, "grad_norm": 6.911110112938736, "learning_rate": 9.158616999761719e-07, "loss": 0.6575, "step": 20122 }, { "epoch": 1.4537901638159916, "grad_norm": 7.653994356422945, "learning_rate": 9.156354264391717e-07, "loss": 0.6884, "step": 20123 }, { "epoch": 1.453862409016201, "grad_norm": 7.788945448750638, "learning_rate": 9.154091745908925e-07, "loss": 0.6254, "step": 20124 }, { "epoch": 1.4539346542164104, "grad_norm": 6.747045364808596, "learning_rate": 9.151829444344321e-07, "loss": 0.6424, "step": 20125 }, { "epoch": 1.45400689941662, "grad_norm": 5.934782671271511, "learning_rate": 9.149567359728848e-07, "loss": 0.6354, "step": 20126 }, { "epoch": 1.4540791446168295, "grad_norm": 6.707717993652763, "learning_rate": 9.1473054920935e-07, "loss": 0.6119, "step": 20127 }, { "epoch": 1.454151389817039, "grad_norm": 6.940830015738251, "learning_rate": 9.145043841469231e-07, "loss": 0.6772, "step": 20128 }, { "epoch": 1.4542236350172486, "grad_norm": 6.9838511976752145, "learning_rate": 9.14278240788701e-07, "loss": 0.6766, "step": 20129 }, { "epoch": 1.4542958802174581, "grad_norm": 6.450429727596007, "learning_rate": 9.140521191377777e-07, "loss": 0.5429, "step": 20130 }, { "epoch": 1.4543681254176675, "grad_norm": 6.445647481825309, "learning_rate": 9.138260191972495e-07, "loss": 0.5734, "step": 20131 }, { "epoch": 1.454440370617877, "grad_norm": 7.639088856555447, "learning_rate": 9.135999409702123e-07, "loss": 0.6233, "step": 20132 }, { "epoch": 1.4545126158180866, "grad_norm": 7.2981481319582615, "learning_rate": 9.133738844597595e-07, "loss": 0.6168, "step": 20133 }, { "epoch": 1.454584861018296, "grad_norm": 7.213680452372791, "learning_rate": 9.13147849668986e-07, "loss": 0.5943, "step": 20134 }, { "epoch": 1.4546571062185056, "grad_norm": 6.556672915190819, "learning_rate": 9.129218366009865e-07, "loss": 0.616, "step": 20135 }, { "epoch": 1.4547293514187152, "grad_norm": 6.884195455113763, "learning_rate": 9.126958452588547e-07, "loss": 0.6107, "step": 20136 }, { "epoch": 1.4548015966189247, "grad_norm": 6.831327407551911, "learning_rate": 9.124698756456843e-07, "loss": 0.6229, "step": 20137 }, { "epoch": 1.454873841819134, "grad_norm": 6.566063195139715, "learning_rate": 9.122439277645689e-07, "loss": 0.6381, "step": 20138 }, { "epoch": 1.4549460870193436, "grad_norm": 5.710893748229296, "learning_rate": 9.12018001618602e-07, "loss": 0.5697, "step": 20139 }, { "epoch": 1.4550183322195531, "grad_norm": 6.497969970614951, "learning_rate": 9.117920972108749e-07, "loss": 0.5969, "step": 20140 }, { "epoch": 1.4550905774197627, "grad_norm": 7.562400628059674, "learning_rate": 9.115662145444806e-07, "loss": 0.6196, "step": 20141 }, { "epoch": 1.4551628226199722, "grad_norm": 6.2822009832809345, "learning_rate": 9.113403536225115e-07, "loss": 0.6704, "step": 20142 }, { "epoch": 1.4552350678201817, "grad_norm": 6.130854708404564, "learning_rate": 9.111145144480604e-07, "loss": 0.5794, "step": 20143 }, { "epoch": 1.4553073130203913, "grad_norm": 6.9587538662531925, "learning_rate": 9.10888697024217e-07, "loss": 0.611, "step": 20144 }, { "epoch": 1.4553795582206006, "grad_norm": 7.625011006331708, "learning_rate": 9.106629013540736e-07, "loss": 0.6721, "step": 20145 }, { "epoch": 1.4554518034208102, "grad_norm": 7.169709853256463, "learning_rate": 9.104371274407203e-07, "loss": 0.6617, "step": 20146 }, { "epoch": 1.4555240486210197, "grad_norm": 6.073066548176793, "learning_rate": 9.102113752872499e-07, "loss": 0.5296, "step": 20147 }, { "epoch": 1.4555962938212292, "grad_norm": 6.4496253917643775, "learning_rate": 9.099856448967506e-07, "loss": 0.6028, "step": 20148 }, { "epoch": 1.4556685390214388, "grad_norm": 6.489126647588064, "learning_rate": 9.097599362723134e-07, "loss": 0.5781, "step": 20149 }, { "epoch": 1.4557407842216483, "grad_norm": 6.512810627563788, "learning_rate": 9.095342494170287e-07, "loss": 0.6174, "step": 20150 }, { "epoch": 1.4558130294218579, "grad_norm": 6.605745080799725, "learning_rate": 9.093085843339844e-07, "loss": 0.6478, "step": 20151 }, { "epoch": 1.4558852746220672, "grad_norm": 8.023327196174602, "learning_rate": 9.090829410262706e-07, "loss": 0.6671, "step": 20152 }, { "epoch": 1.4559575198222767, "grad_norm": 6.334806705889025, "learning_rate": 9.088573194969758e-07, "loss": 0.5955, "step": 20153 }, { "epoch": 1.4560297650224863, "grad_norm": 6.069212667819984, "learning_rate": 9.086317197491889e-07, "loss": 0.5385, "step": 20154 }, { "epoch": 1.4561020102226958, "grad_norm": 6.561225694730178, "learning_rate": 9.084061417859982e-07, "loss": 0.6212, "step": 20155 }, { "epoch": 1.4561742554229054, "grad_norm": 7.414305350343011, "learning_rate": 9.081805856104916e-07, "loss": 0.6252, "step": 20156 }, { "epoch": 1.456246500623115, "grad_norm": 7.0481131876279655, "learning_rate": 9.079550512257579e-07, "loss": 0.6712, "step": 20157 }, { "epoch": 1.4563187458233244, "grad_norm": 6.324115302292444, "learning_rate": 9.077295386348822e-07, "loss": 0.5941, "step": 20158 }, { "epoch": 1.4563909910235338, "grad_norm": 6.1531664876473995, "learning_rate": 9.07504047840953e-07, "loss": 0.6882, "step": 20159 }, { "epoch": 1.4564632362237435, "grad_norm": 6.89943002751603, "learning_rate": 9.072785788470568e-07, "loss": 0.6989, "step": 20160 }, { "epoch": 1.4565354814239528, "grad_norm": 6.45053854180006, "learning_rate": 9.070531316562811e-07, "loss": 0.6759, "step": 20161 }, { "epoch": 1.4566077266241624, "grad_norm": 6.37986928441184, "learning_rate": 9.068277062717105e-07, "loss": 0.6201, "step": 20162 }, { "epoch": 1.456679971824372, "grad_norm": 6.783890030095542, "learning_rate": 9.066023026964312e-07, "loss": 0.6045, "step": 20163 }, { "epoch": 1.4567522170245815, "grad_norm": 7.526167678218065, "learning_rate": 9.063769209335293e-07, "loss": 0.6528, "step": 20164 }, { "epoch": 1.456824462224791, "grad_norm": 6.1863149413613705, "learning_rate": 9.061515609860902e-07, "loss": 0.6024, "step": 20165 }, { "epoch": 1.4568967074250003, "grad_norm": 5.733483518067627, "learning_rate": 9.059262228571985e-07, "loss": 0.544, "step": 20166 }, { "epoch": 1.45696895262521, "grad_norm": 7.843521175618323, "learning_rate": 9.057009065499392e-07, "loss": 0.6414, "step": 20167 }, { "epoch": 1.4570411978254194, "grad_norm": 9.075539514567096, "learning_rate": 9.054756120673975e-07, "loss": 0.6507, "step": 20168 }, { "epoch": 1.457113443025629, "grad_norm": 6.5544343337953395, "learning_rate": 9.052503394126555e-07, "loss": 0.6273, "step": 20169 }, { "epoch": 1.4571856882258385, "grad_norm": 6.159412172186951, "learning_rate": 9.050250885887982e-07, "loss": 0.5897, "step": 20170 }, { "epoch": 1.457257933426048, "grad_norm": 6.627121207896411, "learning_rate": 9.047998595989091e-07, "loss": 0.5607, "step": 20171 }, { "epoch": 1.4573301786262576, "grad_norm": 7.069883824625492, "learning_rate": 9.045746524460722e-07, "loss": 0.6926, "step": 20172 }, { "epoch": 1.457402423826467, "grad_norm": 5.540452790744168, "learning_rate": 9.043494671333686e-07, "loss": 0.6116, "step": 20173 }, { "epoch": 1.4574746690266767, "grad_norm": 7.333002025171674, "learning_rate": 9.041243036638819e-07, "loss": 0.6854, "step": 20174 }, { "epoch": 1.457546914226886, "grad_norm": 6.591583040513141, "learning_rate": 9.038991620406945e-07, "loss": 0.6265, "step": 20175 }, { "epoch": 1.4576191594270955, "grad_norm": 6.397998985755509, "learning_rate": 9.03674042266888e-07, "loss": 0.6444, "step": 20176 }, { "epoch": 1.457691404627305, "grad_norm": 5.922536526169222, "learning_rate": 9.034489443455446e-07, "loss": 0.598, "step": 20177 }, { "epoch": 1.4577636498275146, "grad_norm": 7.217017556250404, "learning_rate": 9.032238682797453e-07, "loss": 0.5855, "step": 20178 }, { "epoch": 1.4578358950277241, "grad_norm": 7.905490280862443, "learning_rate": 9.029988140725726e-07, "loss": 0.6277, "step": 20179 }, { "epoch": 1.4579081402279335, "grad_norm": 6.2819292450873085, "learning_rate": 9.027737817271051e-07, "loss": 0.646, "step": 20180 }, { "epoch": 1.4579803854281432, "grad_norm": 7.449395164157126, "learning_rate": 9.025487712464243e-07, "loss": 0.6956, "step": 20181 }, { "epoch": 1.4580526306283526, "grad_norm": 6.8285247196896135, "learning_rate": 9.023237826336106e-07, "loss": 0.6961, "step": 20182 }, { "epoch": 1.458124875828562, "grad_norm": 7.014697041131386, "learning_rate": 9.020988158917437e-07, "loss": 0.6249, "step": 20183 }, { "epoch": 1.4581971210287716, "grad_norm": 6.835137229942676, "learning_rate": 9.018738710239036e-07, "loss": 0.5941, "step": 20184 }, { "epoch": 1.4582693662289812, "grad_norm": 6.539546971393695, "learning_rate": 9.016489480331688e-07, "loss": 0.574, "step": 20185 }, { "epoch": 1.4583416114291907, "grad_norm": 7.733120903298312, "learning_rate": 9.014240469226201e-07, "loss": 0.6892, "step": 20186 }, { "epoch": 1.4584138566294003, "grad_norm": 6.3297379310197845, "learning_rate": 9.011991676953341e-07, "loss": 0.5655, "step": 20187 }, { "epoch": 1.4584861018296098, "grad_norm": 6.62852010237267, "learning_rate": 9.009743103543902e-07, "loss": 0.59, "step": 20188 }, { "epoch": 1.4585583470298191, "grad_norm": 7.134534646719724, "learning_rate": 9.007494749028673e-07, "loss": 0.6638, "step": 20189 }, { "epoch": 1.4586305922300287, "grad_norm": 6.81706434860401, "learning_rate": 9.005246613438412e-07, "loss": 0.6175, "step": 20190 }, { "epoch": 1.4587028374302382, "grad_norm": 6.607824265385452, "learning_rate": 9.002998696803908e-07, "loss": 0.621, "step": 20191 }, { "epoch": 1.4587750826304478, "grad_norm": 6.559525460548051, "learning_rate": 9.00075099915593e-07, "loss": 0.5905, "step": 20192 }, { "epoch": 1.4588473278306573, "grad_norm": 6.275744098452216, "learning_rate": 8.998503520525248e-07, "loss": 0.6495, "step": 20193 }, { "epoch": 1.4589195730308668, "grad_norm": 7.985932618141361, "learning_rate": 8.996256260942629e-07, "loss": 0.6587, "step": 20194 }, { "epoch": 1.4589918182310764, "grad_norm": 7.623036866666502, "learning_rate": 8.994009220438835e-07, "loss": 0.5956, "step": 20195 }, { "epoch": 1.4590640634312857, "grad_norm": 6.809330194053626, "learning_rate": 8.991762399044626e-07, "loss": 0.6484, "step": 20196 }, { "epoch": 1.4591363086314952, "grad_norm": 7.966322106394646, "learning_rate": 8.989515796790771e-07, "loss": 0.5415, "step": 20197 }, { "epoch": 1.4592085538317048, "grad_norm": 7.073109177978488, "learning_rate": 8.987269413708005e-07, "loss": 0.6362, "step": 20198 }, { "epoch": 1.4592807990319143, "grad_norm": 8.347022854128516, "learning_rate": 8.985023249827085e-07, "loss": 0.666, "step": 20199 }, { "epoch": 1.4593530442321239, "grad_norm": 7.011861288592976, "learning_rate": 8.982777305178775e-07, "loss": 0.6182, "step": 20200 }, { "epoch": 1.4594252894323334, "grad_norm": 7.752589623733701, "learning_rate": 8.980531579793795e-07, "loss": 0.5983, "step": 20201 }, { "epoch": 1.459497534632543, "grad_norm": 7.5613432701035235, "learning_rate": 8.978286073702899e-07, "loss": 0.5482, "step": 20202 }, { "epoch": 1.4595697798327523, "grad_norm": 5.468818533331738, "learning_rate": 8.976040786936818e-07, "loss": 0.6184, "step": 20203 }, { "epoch": 1.4596420250329618, "grad_norm": 7.29381031315439, "learning_rate": 8.973795719526316e-07, "loss": 0.5884, "step": 20204 }, { "epoch": 1.4597142702331714, "grad_norm": 7.394658155606739, "learning_rate": 8.971550871502096e-07, "loss": 0.6577, "step": 20205 }, { "epoch": 1.459786515433381, "grad_norm": 6.1829857590591715, "learning_rate": 8.969306242894904e-07, "loss": 0.5984, "step": 20206 }, { "epoch": 1.4598587606335904, "grad_norm": 7.25969862887771, "learning_rate": 8.967061833735466e-07, "loss": 0.6573, "step": 20207 }, { "epoch": 1.4599310058338, "grad_norm": 6.021281330200564, "learning_rate": 8.964817644054496e-07, "loss": 0.6282, "step": 20208 }, { "epoch": 1.4600032510340095, "grad_norm": 8.063342146546036, "learning_rate": 8.962573673882721e-07, "loss": 0.5467, "step": 20209 }, { "epoch": 1.4600754962342188, "grad_norm": 6.524884640196414, "learning_rate": 8.960329923250863e-07, "loss": 0.5848, "step": 20210 }, { "epoch": 1.4601477414344284, "grad_norm": 7.648333508853185, "learning_rate": 8.958086392189633e-07, "loss": 0.5886, "step": 20211 }, { "epoch": 1.460219986634638, "grad_norm": 8.38079223748161, "learning_rate": 8.955843080729742e-07, "loss": 0.6479, "step": 20212 }, { "epoch": 1.4602922318348475, "grad_norm": 6.556558589745956, "learning_rate": 8.953599988901904e-07, "loss": 0.6074, "step": 20213 }, { "epoch": 1.460364477035057, "grad_norm": 7.116743959577416, "learning_rate": 8.951357116736834e-07, "loss": 0.5975, "step": 20214 }, { "epoch": 1.4604367222352665, "grad_norm": 7.835266875667953, "learning_rate": 8.94911446426521e-07, "loss": 0.6489, "step": 20215 }, { "epoch": 1.460508967435476, "grad_norm": 7.550329077414182, "learning_rate": 8.94687203151775e-07, "loss": 0.6839, "step": 20216 }, { "epoch": 1.4605812126356854, "grad_norm": 7.328554445872206, "learning_rate": 8.944629818525147e-07, "loss": 0.617, "step": 20217 }, { "epoch": 1.460653457835895, "grad_norm": 6.429987699932953, "learning_rate": 8.942387825318102e-07, "loss": 0.6224, "step": 20218 }, { "epoch": 1.4607257030361045, "grad_norm": 6.4376522898021245, "learning_rate": 8.940146051927295e-07, "loss": 0.5735, "step": 20219 }, { "epoch": 1.460797948236314, "grad_norm": 8.617261870712616, "learning_rate": 8.937904498383415e-07, "loss": 0.5431, "step": 20220 }, { "epoch": 1.4608701934365236, "grad_norm": 7.841373159474242, "learning_rate": 8.935663164717154e-07, "loss": 0.597, "step": 20221 }, { "epoch": 1.4609424386367331, "grad_norm": 7.14785901006269, "learning_rate": 8.933422050959189e-07, "loss": 0.6214, "step": 20222 }, { "epoch": 1.4610146838369427, "grad_norm": 7.164747232476065, "learning_rate": 8.931181157140203e-07, "loss": 0.6163, "step": 20223 }, { "epoch": 1.461086929037152, "grad_norm": 8.258495695928403, "learning_rate": 8.928940483290869e-07, "loss": 0.6275, "step": 20224 }, { "epoch": 1.4611591742373615, "grad_norm": 6.552344611638491, "learning_rate": 8.926700029441871e-07, "loss": 0.6152, "step": 20225 }, { "epoch": 1.461231419437571, "grad_norm": 6.518765870291683, "learning_rate": 8.924459795623861e-07, "loss": 0.5773, "step": 20226 }, { "epoch": 1.4613036646377806, "grad_norm": 8.531249552855986, "learning_rate": 8.922219781867511e-07, "loss": 0.6084, "step": 20227 }, { "epoch": 1.4613759098379902, "grad_norm": 6.4077602188072325, "learning_rate": 8.919979988203492e-07, "loss": 0.6004, "step": 20228 }, { "epoch": 1.4614481550381997, "grad_norm": 6.719740683683934, "learning_rate": 8.917740414662471e-07, "loss": 0.6446, "step": 20229 }, { "epoch": 1.4615204002384092, "grad_norm": 7.0694394747606895, "learning_rate": 8.915501061275087e-07, "loss": 0.6761, "step": 20230 }, { "epoch": 1.4615926454386186, "grad_norm": 6.706143924268835, "learning_rate": 8.913261928071995e-07, "loss": 0.5974, "step": 20231 }, { "epoch": 1.4616648906388283, "grad_norm": 6.383056635956026, "learning_rate": 8.911023015083875e-07, "loss": 0.5993, "step": 20232 }, { "epoch": 1.4617371358390376, "grad_norm": 7.247284479321807, "learning_rate": 8.908784322341349e-07, "loss": 0.5908, "step": 20233 }, { "epoch": 1.4618093810392472, "grad_norm": 7.102027668002309, "learning_rate": 8.90654584987507e-07, "loss": 0.5743, "step": 20234 }, { "epoch": 1.4618816262394567, "grad_norm": 7.138697791933119, "learning_rate": 8.904307597715683e-07, "loss": 0.6131, "step": 20235 }, { "epoch": 1.4619538714396663, "grad_norm": 6.274681490531387, "learning_rate": 8.902069565893839e-07, "loss": 0.5348, "step": 20236 }, { "epoch": 1.4620261166398758, "grad_norm": 7.952812503453599, "learning_rate": 8.899831754440152e-07, "loss": 0.6443, "step": 20237 }, { "epoch": 1.4620983618400851, "grad_norm": 6.9709291343999595, "learning_rate": 8.897594163385268e-07, "loss": 0.641, "step": 20238 }, { "epoch": 1.462170607040295, "grad_norm": 5.724827273232758, "learning_rate": 8.895356792759818e-07, "loss": 0.6373, "step": 20239 }, { "epoch": 1.4622428522405042, "grad_norm": 6.331204039697754, "learning_rate": 8.893119642594428e-07, "loss": 0.5562, "step": 20240 }, { "epoch": 1.4623150974407138, "grad_norm": 6.654277097215251, "learning_rate": 8.890882712919724e-07, "loss": 0.5466, "step": 20241 }, { "epoch": 1.4623873426409233, "grad_norm": 7.283942907504503, "learning_rate": 8.888646003766327e-07, "loss": 0.6416, "step": 20242 }, { "epoch": 1.4624595878411328, "grad_norm": 6.848267061765887, "learning_rate": 8.886409515164868e-07, "loss": 0.5698, "step": 20243 }, { "epoch": 1.4625318330413424, "grad_norm": 6.716468885111281, "learning_rate": 8.884173247145941e-07, "loss": 0.5766, "step": 20244 }, { "epoch": 1.4626040782415517, "grad_norm": 7.119903079884119, "learning_rate": 8.881937199740167e-07, "loss": 0.5941, "step": 20245 }, { "epoch": 1.4626763234417615, "grad_norm": 8.341806706927594, "learning_rate": 8.87970137297817e-07, "loss": 0.726, "step": 20246 }, { "epoch": 1.4627485686419708, "grad_norm": 8.795177126473074, "learning_rate": 8.877465766890533e-07, "loss": 0.6809, "step": 20247 }, { "epoch": 1.4628208138421803, "grad_norm": 6.844369520600234, "learning_rate": 8.875230381507874e-07, "loss": 0.6218, "step": 20248 }, { "epoch": 1.4628930590423899, "grad_norm": 7.294412266754794, "learning_rate": 8.872995216860788e-07, "loss": 0.6284, "step": 20249 }, { "epoch": 1.4629653042425994, "grad_norm": 6.6083423718666845, "learning_rate": 8.870760272979878e-07, "loss": 0.6744, "step": 20250 }, { "epoch": 1.463037549442809, "grad_norm": 5.64133100238903, "learning_rate": 8.868525549895737e-07, "loss": 0.6208, "step": 20251 }, { "epoch": 1.4631097946430183, "grad_norm": 6.014140314960015, "learning_rate": 8.866291047638953e-07, "loss": 0.5714, "step": 20252 }, { "epoch": 1.463182039843228, "grad_norm": 9.004135029631438, "learning_rate": 8.86405676624012e-07, "loss": 0.6548, "step": 20253 }, { "epoch": 1.4632542850434374, "grad_norm": 6.962027690215414, "learning_rate": 8.861822705729831e-07, "loss": 0.5843, "step": 20254 }, { "epoch": 1.463326530243647, "grad_norm": 7.2762620073804944, "learning_rate": 8.859588866138647e-07, "loss": 0.6434, "step": 20255 }, { "epoch": 1.4633987754438564, "grad_norm": 6.759794405457368, "learning_rate": 8.85735524749716e-07, "loss": 0.5506, "step": 20256 }, { "epoch": 1.463471020644066, "grad_norm": 6.024272776675453, "learning_rate": 8.855121849835954e-07, "loss": 0.5351, "step": 20257 }, { "epoch": 1.4635432658442755, "grad_norm": 7.830750898697466, "learning_rate": 8.852888673185586e-07, "loss": 0.6103, "step": 20258 }, { "epoch": 1.463615511044485, "grad_norm": 6.328410125713905, "learning_rate": 8.850655717576626e-07, "loss": 0.6371, "step": 20259 }, { "epoch": 1.4636877562446946, "grad_norm": 6.193150262060257, "learning_rate": 8.848422983039659e-07, "loss": 0.6648, "step": 20260 }, { "epoch": 1.463760001444904, "grad_norm": 7.280973895817923, "learning_rate": 8.846190469605248e-07, "loss": 0.5697, "step": 20261 }, { "epoch": 1.4638322466451135, "grad_norm": 8.872916823350796, "learning_rate": 8.843958177303941e-07, "loss": 0.7284, "step": 20262 }, { "epoch": 1.463904491845323, "grad_norm": 8.038459362244549, "learning_rate": 8.841726106166298e-07, "loss": 0.6697, "step": 20263 }, { "epoch": 1.4639767370455326, "grad_norm": 6.309878267543733, "learning_rate": 8.839494256222891e-07, "loss": 0.6051, "step": 20264 }, { "epoch": 1.464048982245742, "grad_norm": 8.415106550098562, "learning_rate": 8.83726262750425e-07, "loss": 0.6608, "step": 20265 }, { "epoch": 1.4641212274459516, "grad_norm": 6.480774249693341, "learning_rate": 8.835031220040932e-07, "loss": 0.6716, "step": 20266 }, { "epoch": 1.4641934726461612, "grad_norm": 7.270783723882361, "learning_rate": 8.83280003386349e-07, "loss": 0.6128, "step": 20267 }, { "epoch": 1.4642657178463705, "grad_norm": 6.675501246687865, "learning_rate": 8.830569069002459e-07, "loss": 0.6093, "step": 20268 }, { "epoch": 1.46433796304658, "grad_norm": 6.414141302519117, "learning_rate": 8.828338325488383e-07, "loss": 0.5787, "step": 20269 }, { "epoch": 1.4644102082467896, "grad_norm": 6.283847556418336, "learning_rate": 8.826107803351799e-07, "loss": 0.6007, "step": 20270 }, { "epoch": 1.4644824534469991, "grad_norm": 8.420679577294045, "learning_rate": 8.823877502623249e-07, "loss": 0.5861, "step": 20271 }, { "epoch": 1.4645546986472087, "grad_norm": 8.429790223974429, "learning_rate": 8.821647423333249e-07, "loss": 0.6331, "step": 20272 }, { "epoch": 1.4646269438474182, "grad_norm": 7.440947430901094, "learning_rate": 8.819417565512334e-07, "loss": 0.6569, "step": 20273 }, { "epoch": 1.4646991890476277, "grad_norm": 7.932201870071661, "learning_rate": 8.817187929191026e-07, "loss": 0.6321, "step": 20274 }, { "epoch": 1.464771434247837, "grad_norm": 7.83342437150147, "learning_rate": 8.814958514399863e-07, "loss": 0.6898, "step": 20275 }, { "epoch": 1.4648436794480466, "grad_norm": 6.01675681824214, "learning_rate": 8.812729321169338e-07, "loss": 0.5654, "step": 20276 }, { "epoch": 1.4649159246482562, "grad_norm": 6.901933382591041, "learning_rate": 8.810500349529983e-07, "loss": 0.5914, "step": 20277 }, { "epoch": 1.4649881698484657, "grad_norm": 7.538717153278711, "learning_rate": 8.808271599512308e-07, "loss": 0.6875, "step": 20278 }, { "epoch": 1.4650604150486752, "grad_norm": 6.961763035450688, "learning_rate": 8.806043071146822e-07, "loss": 0.611, "step": 20279 }, { "epoch": 1.4651326602488848, "grad_norm": 7.054892975274545, "learning_rate": 8.803814764464033e-07, "loss": 0.583, "step": 20280 }, { "epoch": 1.4652049054490943, "grad_norm": 9.450656234319252, "learning_rate": 8.801586679494445e-07, "loss": 0.6857, "step": 20281 }, { "epoch": 1.4652771506493036, "grad_norm": 7.0962649017225905, "learning_rate": 8.799358816268563e-07, "loss": 0.5831, "step": 20282 }, { "epoch": 1.4653493958495132, "grad_norm": 7.527607016916003, "learning_rate": 8.797131174816875e-07, "loss": 0.6568, "step": 20283 }, { "epoch": 1.4654216410497227, "grad_norm": 8.33696937073504, "learning_rate": 8.794903755169879e-07, "loss": 0.5917, "step": 20284 }, { "epoch": 1.4654938862499323, "grad_norm": 7.325183790589692, "learning_rate": 8.792676557358071e-07, "loss": 0.6969, "step": 20285 }, { "epoch": 1.4655661314501418, "grad_norm": 6.288713414710001, "learning_rate": 8.790449581411941e-07, "loss": 0.5641, "step": 20286 }, { "epoch": 1.4656383766503513, "grad_norm": 7.723988843991774, "learning_rate": 8.788222827361965e-07, "loss": 0.6615, "step": 20287 }, { "epoch": 1.465710621850561, "grad_norm": 6.986136468226594, "learning_rate": 8.785996295238619e-07, "loss": 0.6337, "step": 20288 }, { "epoch": 1.4657828670507702, "grad_norm": 7.010099754331163, "learning_rate": 8.783769985072416e-07, "loss": 0.6387, "step": 20289 }, { "epoch": 1.4658551122509798, "grad_norm": 6.808880325563212, "learning_rate": 8.781543896893798e-07, "loss": 0.5896, "step": 20290 }, { "epoch": 1.4659273574511893, "grad_norm": 12.071191058236678, "learning_rate": 8.779318030733253e-07, "loss": 0.6053, "step": 20291 }, { "epoch": 1.4659996026513988, "grad_norm": 6.290635418113424, "learning_rate": 8.777092386621249e-07, "loss": 0.5797, "step": 20292 }, { "epoch": 1.4660718478516084, "grad_norm": 7.238034601841921, "learning_rate": 8.774866964588263e-07, "loss": 0.5828, "step": 20293 }, { "epoch": 1.466144093051818, "grad_norm": 6.327285221091504, "learning_rate": 8.772641764664741e-07, "loss": 0.6595, "step": 20294 }, { "epoch": 1.4662163382520275, "grad_norm": 5.961736103123087, "learning_rate": 8.770416786881156e-07, "loss": 0.6061, "step": 20295 }, { "epoch": 1.4662885834522368, "grad_norm": 8.354751021067202, "learning_rate": 8.768192031267961e-07, "loss": 0.668, "step": 20296 }, { "epoch": 1.4663608286524463, "grad_norm": 6.774023750040539, "learning_rate": 8.765967497855615e-07, "loss": 0.6512, "step": 20297 }, { "epoch": 1.4664330738526559, "grad_norm": 8.160110436982455, "learning_rate": 8.76374318667457e-07, "loss": 0.6135, "step": 20298 }, { "epoch": 1.4665053190528654, "grad_norm": 7.149219507686961, "learning_rate": 8.761519097755272e-07, "loss": 0.657, "step": 20299 }, { "epoch": 1.466577564253075, "grad_norm": 6.465062661512659, "learning_rate": 8.759295231128179e-07, "loss": 0.6318, "step": 20300 }, { "epoch": 1.4666498094532845, "grad_norm": 6.822296378401913, "learning_rate": 8.757071586823715e-07, "loss": 0.6579, "step": 20301 }, { "epoch": 1.466722054653494, "grad_norm": 7.0140593890962695, "learning_rate": 8.754848164872332e-07, "loss": 0.5805, "step": 20302 }, { "epoch": 1.4667942998537034, "grad_norm": 7.144364661125075, "learning_rate": 8.752624965304459e-07, "loss": 0.6463, "step": 20303 }, { "epoch": 1.4668665450539131, "grad_norm": 6.219332720421435, "learning_rate": 8.750401988150547e-07, "loss": 0.6732, "step": 20304 }, { "epoch": 1.4669387902541224, "grad_norm": 6.464095205436811, "learning_rate": 8.748179233441007e-07, "loss": 0.5292, "step": 20305 }, { "epoch": 1.467011035454332, "grad_norm": 7.179236131163395, "learning_rate": 8.74595670120627e-07, "loss": 0.6356, "step": 20306 }, { "epoch": 1.4670832806545415, "grad_norm": 7.725095789046966, "learning_rate": 8.743734391476772e-07, "loss": 0.5837, "step": 20307 }, { "epoch": 1.467155525854751, "grad_norm": 7.225909229340888, "learning_rate": 8.741512304282923e-07, "loss": 0.6132, "step": 20308 }, { "epoch": 1.4672277710549606, "grad_norm": 6.429458780960062, "learning_rate": 8.739290439655149e-07, "loss": 0.5739, "step": 20309 }, { "epoch": 1.46730001625517, "grad_norm": 7.479084350926354, "learning_rate": 8.73706879762386e-07, "loss": 0.5536, "step": 20310 }, { "epoch": 1.4673722614553797, "grad_norm": 8.145696484391026, "learning_rate": 8.734847378219483e-07, "loss": 0.6877, "step": 20311 }, { "epoch": 1.467444506655589, "grad_norm": 6.629282592808403, "learning_rate": 8.732626181472409e-07, "loss": 0.6019, "step": 20312 }, { "epoch": 1.4675167518557986, "grad_norm": 7.614253491425765, "learning_rate": 8.730405207413048e-07, "loss": 0.6518, "step": 20313 }, { "epoch": 1.467588997056008, "grad_norm": 8.788297275715454, "learning_rate": 8.728184456071819e-07, "loss": 0.6891, "step": 20314 }, { "epoch": 1.4676612422562176, "grad_norm": 6.739539341338343, "learning_rate": 8.7259639274791e-07, "loss": 0.5497, "step": 20315 }, { "epoch": 1.4677334874564272, "grad_norm": 6.828668847674573, "learning_rate": 8.723743621665293e-07, "loss": 0.6798, "step": 20316 }, { "epoch": 1.4678057326566365, "grad_norm": 6.9718987574358495, "learning_rate": 8.721523538660803e-07, "loss": 0.6051, "step": 20317 }, { "epoch": 1.4678779778568463, "grad_norm": 6.375273455570418, "learning_rate": 8.719303678496027e-07, "loss": 0.563, "step": 20318 }, { "epoch": 1.4679502230570556, "grad_norm": 6.386993794249624, "learning_rate": 8.717084041201335e-07, "loss": 0.5444, "step": 20319 }, { "epoch": 1.4680224682572651, "grad_norm": 6.838529084642684, "learning_rate": 8.714864626807118e-07, "loss": 0.6539, "step": 20320 }, { "epoch": 1.4680947134574747, "grad_norm": 7.943306308603785, "learning_rate": 8.712645435343767e-07, "loss": 0.694, "step": 20321 }, { "epoch": 1.4681669586576842, "grad_norm": 7.143582121978249, "learning_rate": 8.710426466841648e-07, "loss": 0.6575, "step": 20322 }, { "epoch": 1.4682392038578937, "grad_norm": 7.186377661608362, "learning_rate": 8.708207721331141e-07, "loss": 0.6034, "step": 20323 }, { "epoch": 1.468311449058103, "grad_norm": 6.611026927223236, "learning_rate": 8.705989198842621e-07, "loss": 0.6747, "step": 20324 }, { "epoch": 1.4683836942583128, "grad_norm": 6.523086629785588, "learning_rate": 8.703770899406458e-07, "loss": 0.5948, "step": 20325 }, { "epoch": 1.4684559394585222, "grad_norm": 6.84841439488887, "learning_rate": 8.701552823053016e-07, "loss": 0.6046, "step": 20326 }, { "epoch": 1.4685281846587317, "grad_norm": 6.585038201779583, "learning_rate": 8.699334969812662e-07, "loss": 0.5917, "step": 20327 }, { "epoch": 1.4686004298589412, "grad_norm": 7.55793990429428, "learning_rate": 8.697117339715755e-07, "loss": 0.6183, "step": 20328 }, { "epoch": 1.4686726750591508, "grad_norm": 7.639646627432516, "learning_rate": 8.694899932792664e-07, "loss": 0.5783, "step": 20329 }, { "epoch": 1.4687449202593603, "grad_norm": 6.003437646908833, "learning_rate": 8.692682749073722e-07, "loss": 0.5865, "step": 20330 }, { "epoch": 1.4688171654595699, "grad_norm": 7.222025866977424, "learning_rate": 8.690465788589295e-07, "loss": 0.5849, "step": 20331 }, { "epoch": 1.4688894106597794, "grad_norm": 6.660980119955866, "learning_rate": 8.688249051369732e-07, "loss": 0.5772, "step": 20332 }, { "epoch": 1.4689616558599887, "grad_norm": 7.159958856986556, "learning_rate": 8.686032537445369e-07, "loss": 0.5992, "step": 20333 }, { "epoch": 1.4690339010601983, "grad_norm": 7.307381166946403, "learning_rate": 8.683816246846549e-07, "loss": 0.5674, "step": 20334 }, { "epoch": 1.4691061462604078, "grad_norm": 6.933068311956959, "learning_rate": 8.68160017960362e-07, "loss": 0.6435, "step": 20335 }, { "epoch": 1.4691783914606174, "grad_norm": 6.16153393460152, "learning_rate": 8.679384335746913e-07, "loss": 0.5951, "step": 20336 }, { "epoch": 1.469250636660827, "grad_norm": 6.725143158817647, "learning_rate": 8.677168715306764e-07, "loss": 0.6554, "step": 20337 }, { "epoch": 1.4693228818610364, "grad_norm": 7.491449441048321, "learning_rate": 8.674953318313498e-07, "loss": 0.5798, "step": 20338 }, { "epoch": 1.469395127061246, "grad_norm": 7.025426190572333, "learning_rate": 8.672738144797454e-07, "loss": 0.6354, "step": 20339 }, { "epoch": 1.4694673722614553, "grad_norm": 6.765224383256666, "learning_rate": 8.670523194788944e-07, "loss": 0.6898, "step": 20340 }, { "epoch": 1.4695396174616648, "grad_norm": 7.784301159606881, "learning_rate": 8.668308468318287e-07, "loss": 0.5971, "step": 20341 }, { "epoch": 1.4696118626618744, "grad_norm": 7.097547951241049, "learning_rate": 8.666093965415809e-07, "loss": 0.6, "step": 20342 }, { "epoch": 1.469684107862084, "grad_norm": 7.363683261572147, "learning_rate": 8.663879686111831e-07, "loss": 0.6405, "step": 20343 }, { "epoch": 1.4697563530622935, "grad_norm": 6.5112020359627145, "learning_rate": 8.661665630436638e-07, "loss": 0.5947, "step": 20344 }, { "epoch": 1.469828598262503, "grad_norm": 7.445238975650212, "learning_rate": 8.659451798420566e-07, "loss": 0.6004, "step": 20345 }, { "epoch": 1.4699008434627125, "grad_norm": 6.2156002176518586, "learning_rate": 8.657238190093917e-07, "loss": 0.6263, "step": 20346 }, { "epoch": 1.4699730886629219, "grad_norm": 7.3095227562957, "learning_rate": 8.655024805486981e-07, "loss": 0.6257, "step": 20347 }, { "epoch": 1.4700453338631314, "grad_norm": 7.000464287755536, "learning_rate": 8.652811644630066e-07, "loss": 0.622, "step": 20348 }, { "epoch": 1.470117579063341, "grad_norm": 7.319719190787473, "learning_rate": 8.650598707553465e-07, "loss": 0.6965, "step": 20349 }, { "epoch": 1.4701898242635505, "grad_norm": 7.566436247601223, "learning_rate": 8.648385994287481e-07, "loss": 0.5754, "step": 20350 }, { "epoch": 1.47026206946376, "grad_norm": 6.289837636626019, "learning_rate": 8.64617350486239e-07, "loss": 0.5679, "step": 20351 }, { "epoch": 1.4703343146639696, "grad_norm": 7.557525511965783, "learning_rate": 8.643961239308485e-07, "loss": 0.6029, "step": 20352 }, { "epoch": 1.4704065598641791, "grad_norm": 7.121647899533621, "learning_rate": 8.641749197656052e-07, "loss": 0.6876, "step": 20353 }, { "epoch": 1.4704788050643884, "grad_norm": 7.119369155711268, "learning_rate": 8.639537379935369e-07, "loss": 0.6097, "step": 20354 }, { "epoch": 1.470551050264598, "grad_norm": 7.525465140430456, "learning_rate": 8.637325786176718e-07, "loss": 0.6187, "step": 20355 }, { "epoch": 1.4706232954648075, "grad_norm": 6.532032604876669, "learning_rate": 8.63511441641037e-07, "loss": 0.5873, "step": 20356 }, { "epoch": 1.470695540665017, "grad_norm": 5.745353479890225, "learning_rate": 8.63290327066661e-07, "loss": 0.6216, "step": 20357 }, { "epoch": 1.4707677858652266, "grad_norm": 6.5522590294242695, "learning_rate": 8.630692348975686e-07, "loss": 0.6493, "step": 20358 }, { "epoch": 1.4708400310654361, "grad_norm": 7.983441143329966, "learning_rate": 8.628481651367876e-07, "loss": 0.6517, "step": 20359 }, { "epoch": 1.4709122762656457, "grad_norm": 6.5683010074904455, "learning_rate": 8.626271177873438e-07, "loss": 0.5636, "step": 20360 }, { "epoch": 1.470984521465855, "grad_norm": 6.34684429903179, "learning_rate": 8.624060928522643e-07, "loss": 0.6468, "step": 20361 }, { "epoch": 1.4710567666660646, "grad_norm": 6.887801580178552, "learning_rate": 8.621850903345732e-07, "loss": 0.6051, "step": 20362 }, { "epoch": 1.471129011866274, "grad_norm": 7.160024655262358, "learning_rate": 8.619641102372964e-07, "loss": 0.6244, "step": 20363 }, { "epoch": 1.4712012570664836, "grad_norm": 7.167637722260949, "learning_rate": 8.61743152563459e-07, "loss": 0.6784, "step": 20364 }, { "epoch": 1.4712735022666932, "grad_norm": 7.881310492819612, "learning_rate": 8.615222173160859e-07, "loss": 0.6236, "step": 20365 }, { "epoch": 1.4713457474669027, "grad_norm": 7.464737201211728, "learning_rate": 8.613013044982016e-07, "loss": 0.6225, "step": 20366 }, { "epoch": 1.4714179926671123, "grad_norm": 7.464073600545179, "learning_rate": 8.610804141128299e-07, "loss": 0.6025, "step": 20367 }, { "epoch": 1.4714902378673216, "grad_norm": 6.9227454195779226, "learning_rate": 8.608595461629957e-07, "loss": 0.5845, "step": 20368 }, { "epoch": 1.4715624830675311, "grad_norm": 8.318011528959216, "learning_rate": 8.606387006517209e-07, "loss": 0.5977, "step": 20369 }, { "epoch": 1.4716347282677407, "grad_norm": 7.210118537595056, "learning_rate": 8.604178775820291e-07, "loss": 0.6219, "step": 20370 }, { "epoch": 1.4717069734679502, "grad_norm": 7.2515246662582316, "learning_rate": 8.601970769569445e-07, "loss": 0.6623, "step": 20371 }, { "epoch": 1.4717792186681597, "grad_norm": 6.8612800333676125, "learning_rate": 8.599762987794869e-07, "loss": 0.6068, "step": 20372 }, { "epoch": 1.4718514638683693, "grad_norm": 7.322807416105461, "learning_rate": 8.597555430526813e-07, "loss": 0.7034, "step": 20373 }, { "epoch": 1.4719237090685788, "grad_norm": 7.200327961128072, "learning_rate": 8.595348097795489e-07, "loss": 0.6274, "step": 20374 }, { "epoch": 1.4719959542687882, "grad_norm": 6.0521724867259055, "learning_rate": 8.593140989631119e-07, "loss": 0.5845, "step": 20375 }, { "epoch": 1.4720681994689977, "grad_norm": 8.207669871928482, "learning_rate": 8.5909341060639e-07, "loss": 0.6039, "step": 20376 }, { "epoch": 1.4721404446692072, "grad_norm": 6.336048280474195, "learning_rate": 8.588727447124054e-07, "loss": 0.5918, "step": 20377 }, { "epoch": 1.4722126898694168, "grad_norm": 6.244420727510917, "learning_rate": 8.586521012841795e-07, "loss": 0.6875, "step": 20378 }, { "epoch": 1.4722849350696263, "grad_norm": 8.000185010678074, "learning_rate": 8.584314803247312e-07, "loss": 0.6649, "step": 20379 }, { "epoch": 1.4723571802698359, "grad_norm": 7.113175602421109, "learning_rate": 8.58210881837081e-07, "loss": 0.6334, "step": 20380 }, { "epoch": 1.4724294254700454, "grad_norm": 8.348917414675409, "learning_rate": 8.579903058242494e-07, "loss": 0.6636, "step": 20381 }, { "epoch": 1.4725016706702547, "grad_norm": 6.157070047202818, "learning_rate": 8.577697522892553e-07, "loss": 0.6412, "step": 20382 }, { "epoch": 1.4725739158704645, "grad_norm": 7.206302099378694, "learning_rate": 8.575492212351183e-07, "loss": 0.6252, "step": 20383 }, { "epoch": 1.4726461610706738, "grad_norm": 7.411191683967814, "learning_rate": 8.573287126648571e-07, "loss": 0.6144, "step": 20384 }, { "epoch": 1.4727184062708834, "grad_norm": 7.622440846715041, "learning_rate": 8.571082265814907e-07, "loss": 0.683, "step": 20385 }, { "epoch": 1.472790651471093, "grad_norm": 6.698708241754845, "learning_rate": 8.568877629880376e-07, "loss": 0.5387, "step": 20386 }, { "epoch": 1.4728628966713024, "grad_norm": 8.753758958209753, "learning_rate": 8.566673218875146e-07, "loss": 0.7862, "step": 20387 }, { "epoch": 1.472935141871512, "grad_norm": 6.383506933230477, "learning_rate": 8.564469032829398e-07, "loss": 0.6279, "step": 20388 }, { "epoch": 1.4730073870717213, "grad_norm": 8.997181345052557, "learning_rate": 8.562265071773315e-07, "loss": 0.6808, "step": 20389 }, { "epoch": 1.473079632271931, "grad_norm": 7.292074515426004, "learning_rate": 8.560061335737055e-07, "loss": 0.6223, "step": 20390 }, { "epoch": 1.4731518774721404, "grad_norm": 6.883928362210412, "learning_rate": 8.557857824750787e-07, "loss": 0.6661, "step": 20391 }, { "epoch": 1.47322412267235, "grad_norm": 6.578643415958638, "learning_rate": 8.555654538844683e-07, "loss": 0.6683, "step": 20392 }, { "epoch": 1.4732963678725595, "grad_norm": 6.5200648560838, "learning_rate": 8.553451478048896e-07, "loss": 0.5839, "step": 20393 }, { "epoch": 1.473368613072769, "grad_norm": 7.416758104539341, "learning_rate": 8.551248642393589e-07, "loss": 0.6433, "step": 20394 }, { "epoch": 1.4734408582729785, "grad_norm": 8.54987066516876, "learning_rate": 8.549046031908919e-07, "loss": 0.6085, "step": 20395 }, { "epoch": 1.4735131034731879, "grad_norm": 6.6247598856472125, "learning_rate": 8.546843646625041e-07, "loss": 0.5883, "step": 20396 }, { "epoch": 1.4735853486733976, "grad_norm": 7.691641335602362, "learning_rate": 8.544641486572092e-07, "loss": 0.6424, "step": 20397 }, { "epoch": 1.473657593873607, "grad_norm": 8.447962173232732, "learning_rate": 8.542439551780224e-07, "loss": 0.6849, "step": 20398 }, { "epoch": 1.4737298390738165, "grad_norm": 6.607753257098017, "learning_rate": 8.54023784227958e-07, "loss": 0.578, "step": 20399 }, { "epoch": 1.473802084274026, "grad_norm": 6.716274071413684, "learning_rate": 8.538036358100308e-07, "loss": 0.6656, "step": 20400 }, { "epoch": 1.4738743294742356, "grad_norm": 6.698988129230146, "learning_rate": 8.535835099272519e-07, "loss": 0.6575, "step": 20401 }, { "epoch": 1.4739465746744451, "grad_norm": 6.929403168049963, "learning_rate": 8.533634065826374e-07, "loss": 0.5944, "step": 20402 }, { "epoch": 1.4740188198746544, "grad_norm": 8.916169007100729, "learning_rate": 8.531433257791999e-07, "loss": 0.6767, "step": 20403 }, { "epoch": 1.4740910650748642, "grad_norm": 7.403994446144927, "learning_rate": 8.529232675199509e-07, "loss": 0.6508, "step": 20404 }, { "epoch": 1.4741633102750735, "grad_norm": 7.605146733360081, "learning_rate": 8.527032318079034e-07, "loss": 0.5917, "step": 20405 }, { "epoch": 1.474235555475283, "grad_norm": 6.247082754232764, "learning_rate": 8.524832186460699e-07, "loss": 0.5571, "step": 20406 }, { "epoch": 1.4743078006754926, "grad_norm": 7.041023751729013, "learning_rate": 8.522632280374624e-07, "loss": 0.6297, "step": 20407 }, { "epoch": 1.4743800458757021, "grad_norm": 7.440054062267484, "learning_rate": 8.520432599850914e-07, "loss": 0.7903, "step": 20408 }, { "epoch": 1.4744522910759117, "grad_norm": 6.377297679244498, "learning_rate": 8.518233144919683e-07, "loss": 0.5757, "step": 20409 }, { "epoch": 1.4745245362761212, "grad_norm": 6.79295723789637, "learning_rate": 8.516033915611046e-07, "loss": 0.606, "step": 20410 }, { "epoch": 1.4745967814763308, "grad_norm": 9.886581389800442, "learning_rate": 8.513834911955104e-07, "loss": 0.7021, "step": 20411 }, { "epoch": 1.47466902667654, "grad_norm": 6.661559087504843, "learning_rate": 8.511636133981963e-07, "loss": 0.6081, "step": 20412 }, { "epoch": 1.4747412718767496, "grad_norm": 7.093834964730709, "learning_rate": 8.509437581721719e-07, "loss": 0.6421, "step": 20413 }, { "epoch": 1.4748135170769592, "grad_norm": 6.385821264827129, "learning_rate": 8.507239255204478e-07, "loss": 0.6518, "step": 20414 }, { "epoch": 1.4748857622771687, "grad_norm": 6.709770696277992, "learning_rate": 8.505041154460319e-07, "loss": 0.6233, "step": 20415 }, { "epoch": 1.4749580074773783, "grad_norm": 6.181484187890212, "learning_rate": 8.502843279519338e-07, "loss": 0.5605, "step": 20416 }, { "epoch": 1.4750302526775878, "grad_norm": 6.788139535129539, "learning_rate": 8.500645630411624e-07, "loss": 0.6321, "step": 20417 }, { "epoch": 1.4751024978777973, "grad_norm": 7.350086611282848, "learning_rate": 8.49844820716727e-07, "loss": 0.6403, "step": 20418 }, { "epoch": 1.4751747430780067, "grad_norm": 6.680192894805988, "learning_rate": 8.496251009816337e-07, "loss": 0.5852, "step": 20419 }, { "epoch": 1.4752469882782162, "grad_norm": 7.4574839642896125, "learning_rate": 8.494054038388916e-07, "loss": 0.6287, "step": 20420 }, { "epoch": 1.4753192334784258, "grad_norm": 7.080583225517271, "learning_rate": 8.491857292915076e-07, "loss": 0.6944, "step": 20421 }, { "epoch": 1.4753914786786353, "grad_norm": 5.915899880738837, "learning_rate": 8.489660773424893e-07, "loss": 0.6603, "step": 20422 }, { "epoch": 1.4754637238788448, "grad_norm": 7.057355509301481, "learning_rate": 8.487464479948434e-07, "loss": 0.6028, "step": 20423 }, { "epoch": 1.4755359690790544, "grad_norm": 6.285217853858288, "learning_rate": 8.485268412515768e-07, "loss": 0.644, "step": 20424 }, { "epoch": 1.475608214279264, "grad_norm": 7.2412000721639656, "learning_rate": 8.483072571156961e-07, "loss": 0.6852, "step": 20425 }, { "epoch": 1.4756804594794732, "grad_norm": 7.936839399106977, "learning_rate": 8.480876955902057e-07, "loss": 0.6136, "step": 20426 }, { "epoch": 1.4757527046796828, "grad_norm": 7.59219859409562, "learning_rate": 8.478681566781122e-07, "loss": 0.6204, "step": 20427 }, { "epoch": 1.4758249498798923, "grad_norm": 7.096491750147541, "learning_rate": 8.476486403824216e-07, "loss": 0.5686, "step": 20428 }, { "epoch": 1.4758971950801019, "grad_norm": 7.417554252176682, "learning_rate": 8.474291467061366e-07, "loss": 0.6013, "step": 20429 }, { "epoch": 1.4759694402803114, "grad_norm": 7.602648133149041, "learning_rate": 8.47209675652264e-07, "loss": 0.6027, "step": 20430 }, { "epoch": 1.476041685480521, "grad_norm": 7.416927575940596, "learning_rate": 8.469902272238081e-07, "loss": 0.6859, "step": 20431 }, { "epoch": 1.4761139306807305, "grad_norm": 6.927337075846847, "learning_rate": 8.46770801423773e-07, "loss": 0.6446, "step": 20432 }, { "epoch": 1.4761861758809398, "grad_norm": 7.09787472360854, "learning_rate": 8.465513982551612e-07, "loss": 0.6753, "step": 20433 }, { "epoch": 1.4762584210811494, "grad_norm": 7.043185670276365, "learning_rate": 8.463320177209769e-07, "loss": 0.6274, "step": 20434 }, { "epoch": 1.476330666281359, "grad_norm": 7.201969873316074, "learning_rate": 8.461126598242233e-07, "loss": 0.5745, "step": 20435 }, { "epoch": 1.4764029114815684, "grad_norm": 7.324496348905958, "learning_rate": 8.458933245679043e-07, "loss": 0.6097, "step": 20436 }, { "epoch": 1.476475156681778, "grad_norm": 7.860283558319477, "learning_rate": 8.456740119550203e-07, "loss": 0.6589, "step": 20437 }, { "epoch": 1.4765474018819875, "grad_norm": 6.930132553792515, "learning_rate": 8.454547219885744e-07, "loss": 0.659, "step": 20438 }, { "epoch": 1.476619647082197, "grad_norm": 5.6367175656725195, "learning_rate": 8.452354546715691e-07, "loss": 0.6265, "step": 20439 }, { "epoch": 1.4766918922824064, "grad_norm": 6.3297340137107, "learning_rate": 8.450162100070053e-07, "loss": 0.5564, "step": 20440 }, { "epoch": 1.476764137482616, "grad_norm": 6.863529971571064, "learning_rate": 8.447969879978846e-07, "loss": 0.5511, "step": 20441 }, { "epoch": 1.4768363826828255, "grad_norm": 7.043992630876177, "learning_rate": 8.445777886472082e-07, "loss": 0.6264, "step": 20442 }, { "epoch": 1.476908627883035, "grad_norm": 7.571225515763407, "learning_rate": 8.443586119579769e-07, "loss": 0.6803, "step": 20443 }, { "epoch": 1.4769808730832445, "grad_norm": 6.944292472024181, "learning_rate": 8.441394579331902e-07, "loss": 0.5799, "step": 20444 }, { "epoch": 1.477053118283454, "grad_norm": 5.64565232703484, "learning_rate": 8.439203265758486e-07, "loss": 0.5434, "step": 20445 }, { "epoch": 1.4771253634836636, "grad_norm": 6.751869507907172, "learning_rate": 8.437012178889525e-07, "loss": 0.6437, "step": 20446 }, { "epoch": 1.477197608683873, "grad_norm": 6.636980560150116, "learning_rate": 8.434821318755002e-07, "loss": 0.6048, "step": 20447 }, { "epoch": 1.4772698538840825, "grad_norm": 6.821813254398046, "learning_rate": 8.43263068538491e-07, "loss": 0.6543, "step": 20448 }, { "epoch": 1.477342099084292, "grad_norm": 7.129675151171368, "learning_rate": 8.430440278809232e-07, "loss": 0.6524, "step": 20449 }, { "epoch": 1.4774143442845016, "grad_norm": 6.350113406632956, "learning_rate": 8.428250099057981e-07, "loss": 0.6539, "step": 20450 }, { "epoch": 1.4774865894847111, "grad_norm": 7.5176839248693, "learning_rate": 8.426060146161108e-07, "loss": 0.7214, "step": 20451 }, { "epoch": 1.4775588346849207, "grad_norm": 8.28641081452107, "learning_rate": 8.423870420148602e-07, "loss": 0.6039, "step": 20452 }, { "epoch": 1.4776310798851302, "grad_norm": 6.819696107956924, "learning_rate": 8.421680921050448e-07, "loss": 0.6123, "step": 20453 }, { "epoch": 1.4777033250853395, "grad_norm": 6.754062136720367, "learning_rate": 8.419491648896602e-07, "loss": 0.725, "step": 20454 }, { "epoch": 1.4777755702855493, "grad_norm": 7.5541800138170085, "learning_rate": 8.417302603717042e-07, "loss": 0.6533, "step": 20455 }, { "epoch": 1.4778478154857586, "grad_norm": 7.883580694961815, "learning_rate": 8.415113785541734e-07, "loss": 0.6957, "step": 20456 }, { "epoch": 1.4779200606859682, "grad_norm": 6.712241919757918, "learning_rate": 8.412925194400643e-07, "loss": 0.627, "step": 20457 }, { "epoch": 1.4779923058861777, "grad_norm": 6.682117898950789, "learning_rate": 8.410736830323723e-07, "loss": 0.6282, "step": 20458 }, { "epoch": 1.4780645510863872, "grad_norm": 7.035716897080167, "learning_rate": 8.408548693340937e-07, "loss": 0.6262, "step": 20459 }, { "epoch": 1.4781367962865968, "grad_norm": 7.130484628351446, "learning_rate": 8.406360783482237e-07, "loss": 0.6495, "step": 20460 }, { "epoch": 1.478209041486806, "grad_norm": 7.350373612853645, "learning_rate": 8.404173100777582e-07, "loss": 0.6646, "step": 20461 }, { "epoch": 1.4782812866870159, "grad_norm": 7.84011160673772, "learning_rate": 8.401985645256902e-07, "loss": 0.6646, "step": 20462 }, { "epoch": 1.4783535318872252, "grad_norm": 6.645998802237757, "learning_rate": 8.399798416950152e-07, "loss": 0.6335, "step": 20463 }, { "epoch": 1.4784257770874347, "grad_norm": 6.811049840726562, "learning_rate": 8.39761141588728e-07, "loss": 0.5985, "step": 20464 }, { "epoch": 1.4784980222876443, "grad_norm": 6.073983240008351, "learning_rate": 8.39542464209821e-07, "loss": 0.6389, "step": 20465 }, { "epoch": 1.4785702674878538, "grad_norm": 6.26040058694595, "learning_rate": 8.393238095612882e-07, "loss": 0.5817, "step": 20466 }, { "epoch": 1.4786425126880633, "grad_norm": 7.328866754337401, "learning_rate": 8.391051776461232e-07, "loss": 0.5869, "step": 20467 }, { "epoch": 1.4787147578882727, "grad_norm": 7.5244165340075435, "learning_rate": 8.388865684673187e-07, "loss": 0.6416, "step": 20468 }, { "epoch": 1.4787870030884824, "grad_norm": 8.101912811857302, "learning_rate": 8.386679820278671e-07, "loss": 0.6919, "step": 20469 }, { "epoch": 1.4788592482886918, "grad_norm": 7.1994081783780315, "learning_rate": 8.38449418330761e-07, "loss": 0.6391, "step": 20470 }, { "epoch": 1.4789314934889013, "grad_norm": 7.251114595171866, "learning_rate": 8.382308773789932e-07, "loss": 0.6231, "step": 20471 }, { "epoch": 1.4790037386891108, "grad_norm": 6.821570281537604, "learning_rate": 8.380123591755534e-07, "loss": 0.5901, "step": 20472 }, { "epoch": 1.4790759838893204, "grad_norm": 5.3872188698334185, "learning_rate": 8.377938637234342e-07, "loss": 0.5764, "step": 20473 }, { "epoch": 1.47914822908953, "grad_norm": 8.871867741994654, "learning_rate": 8.375753910256263e-07, "loss": 0.6804, "step": 20474 }, { "epoch": 1.4792204742897392, "grad_norm": 7.232777099786452, "learning_rate": 8.373569410851212e-07, "loss": 0.5385, "step": 20475 }, { "epoch": 1.479292719489949, "grad_norm": 7.317830286785911, "learning_rate": 8.371385139049077e-07, "loss": 0.684, "step": 20476 }, { "epoch": 1.4793649646901583, "grad_norm": 7.6278621117261824, "learning_rate": 8.369201094879769e-07, "loss": 0.6402, "step": 20477 }, { "epoch": 1.4794372098903679, "grad_norm": 7.454078725257333, "learning_rate": 8.367017278373188e-07, "loss": 0.5917, "step": 20478 }, { "epoch": 1.4795094550905774, "grad_norm": 6.934305916330759, "learning_rate": 8.364833689559224e-07, "loss": 0.643, "step": 20479 }, { "epoch": 1.479581700290787, "grad_norm": 7.558587441454086, "learning_rate": 8.36265032846777e-07, "loss": 0.6652, "step": 20480 }, { "epoch": 1.4796539454909965, "grad_norm": 9.222120179782497, "learning_rate": 8.360467195128713e-07, "loss": 0.6721, "step": 20481 }, { "epoch": 1.479726190691206, "grad_norm": 6.665899741246606, "learning_rate": 8.35828428957195e-07, "loss": 0.5285, "step": 20482 }, { "epoch": 1.4797984358914156, "grad_norm": 5.924764044203971, "learning_rate": 8.356101611827347e-07, "loss": 0.5291, "step": 20483 }, { "epoch": 1.479870681091625, "grad_norm": 7.142296393045677, "learning_rate": 8.353919161924789e-07, "loss": 0.6524, "step": 20484 }, { "epoch": 1.4799429262918344, "grad_norm": 6.737963647406353, "learning_rate": 8.351736939894159e-07, "loss": 0.6182, "step": 20485 }, { "epoch": 1.480015171492044, "grad_norm": 7.173889276216362, "learning_rate": 8.349554945765309e-07, "loss": 0.6577, "step": 20486 }, { "epoch": 1.4800874166922535, "grad_norm": 7.030478473295865, "learning_rate": 8.347373179568133e-07, "loss": 0.6745, "step": 20487 }, { "epoch": 1.480159661892463, "grad_norm": 7.2973287633269015, "learning_rate": 8.345191641332487e-07, "loss": 0.6546, "step": 20488 }, { "epoch": 1.4802319070926726, "grad_norm": 6.7123652439336725, "learning_rate": 8.343010331088244e-07, "loss": 0.6665, "step": 20489 }, { "epoch": 1.4803041522928821, "grad_norm": 7.103402582359409, "learning_rate": 8.340829248865248e-07, "loss": 0.5735, "step": 20490 }, { "epoch": 1.4803763974930915, "grad_norm": 7.11912990825668, "learning_rate": 8.338648394693363e-07, "loss": 0.6068, "step": 20491 }, { "epoch": 1.480448642693301, "grad_norm": 8.833150705812823, "learning_rate": 8.336467768602447e-07, "loss": 0.5993, "step": 20492 }, { "epoch": 1.4805208878935106, "grad_norm": 7.211984081096292, "learning_rate": 8.334287370622357e-07, "loss": 0.5826, "step": 20493 }, { "epoch": 1.48059313309372, "grad_norm": 6.533504991157688, "learning_rate": 8.332107200782924e-07, "loss": 0.577, "step": 20494 }, { "epoch": 1.4806653782939296, "grad_norm": 10.259021417600088, "learning_rate": 8.329927259114001e-07, "loss": 0.5864, "step": 20495 }, { "epoch": 1.4807376234941392, "grad_norm": 8.715418900507773, "learning_rate": 8.327747545645431e-07, "loss": 0.649, "step": 20496 }, { "epoch": 1.4808098686943487, "grad_norm": 6.261732761798109, "learning_rate": 8.325568060407052e-07, "loss": 0.5971, "step": 20497 }, { "epoch": 1.480882113894558, "grad_norm": 7.368957889332512, "learning_rate": 8.3233888034287e-07, "loss": 0.5834, "step": 20498 }, { "epoch": 1.4809543590947676, "grad_norm": 7.544512582332968, "learning_rate": 8.321209774740207e-07, "loss": 0.6149, "step": 20499 }, { "epoch": 1.4810266042949771, "grad_norm": 7.5648736932853895, "learning_rate": 8.319030974371408e-07, "loss": 0.6547, "step": 20500 }, { "epoch": 1.4810988494951867, "grad_norm": 9.37760095756043, "learning_rate": 8.316852402352116e-07, "loss": 0.5867, "step": 20501 }, { "epoch": 1.4811710946953962, "grad_norm": 6.2327566699484676, "learning_rate": 8.314674058712163e-07, "loss": 0.5345, "step": 20502 }, { "epoch": 1.4812433398956057, "grad_norm": 7.055449080310446, "learning_rate": 8.312495943481372e-07, "loss": 0.6188, "step": 20503 }, { "epoch": 1.4813155850958153, "grad_norm": 6.76500052373846, "learning_rate": 8.310318056689548e-07, "loss": 0.5794, "step": 20504 }, { "epoch": 1.4813878302960246, "grad_norm": 8.004037315148222, "learning_rate": 8.30814039836651e-07, "loss": 0.6124, "step": 20505 }, { "epoch": 1.4814600754962342, "grad_norm": 6.717871320177463, "learning_rate": 8.305962968542061e-07, "loss": 0.6166, "step": 20506 }, { "epoch": 1.4815323206964437, "grad_norm": 8.335560869683997, "learning_rate": 8.303785767246034e-07, "loss": 0.6941, "step": 20507 }, { "epoch": 1.4816045658966532, "grad_norm": 6.810484981884665, "learning_rate": 8.301608794508209e-07, "loss": 0.6298, "step": 20508 }, { "epoch": 1.4816768110968628, "grad_norm": 6.175199052946056, "learning_rate": 8.299432050358395e-07, "loss": 0.6701, "step": 20509 }, { "epoch": 1.4817490562970723, "grad_norm": 7.125157940101168, "learning_rate": 8.297255534826393e-07, "loss": 0.6435, "step": 20510 }, { "epoch": 1.4818213014972819, "grad_norm": 7.010480392037941, "learning_rate": 8.29507924794199e-07, "loss": 0.6561, "step": 20511 }, { "epoch": 1.4818935466974912, "grad_norm": 7.73994039034499, "learning_rate": 8.292903189734977e-07, "loss": 0.5953, "step": 20512 }, { "epoch": 1.4819657918977007, "grad_norm": 6.7458503118171365, "learning_rate": 8.290727360235151e-07, "loss": 0.6724, "step": 20513 }, { "epoch": 1.4820380370979103, "grad_norm": 6.277459514431158, "learning_rate": 8.288551759472294e-07, "loss": 0.5499, "step": 20514 }, { "epoch": 1.4821102822981198, "grad_norm": 6.677529576506359, "learning_rate": 8.286376387476183e-07, "loss": 0.6099, "step": 20515 }, { "epoch": 1.4821825274983293, "grad_norm": 6.3801210067897065, "learning_rate": 8.284201244276607e-07, "loss": 0.5956, "step": 20516 }, { "epoch": 1.482254772698539, "grad_norm": 6.423355711479118, "learning_rate": 8.282026329903333e-07, "loss": 0.6613, "step": 20517 }, { "epoch": 1.4823270178987484, "grad_norm": 8.626437302399326, "learning_rate": 8.27985164438615e-07, "loss": 0.6684, "step": 20518 }, { "epoch": 1.4823992630989578, "grad_norm": 7.480527139540394, "learning_rate": 8.277677187754804e-07, "loss": 0.6802, "step": 20519 }, { "epoch": 1.4824715082991673, "grad_norm": 6.874670679701371, "learning_rate": 8.275502960039075e-07, "loss": 0.5915, "step": 20520 }, { "epoch": 1.4825437534993768, "grad_norm": 7.059802595193825, "learning_rate": 8.273328961268734e-07, "loss": 0.6466, "step": 20521 }, { "epoch": 1.4826159986995864, "grad_norm": 6.453795612949073, "learning_rate": 8.271155191473523e-07, "loss": 0.6012, "step": 20522 }, { "epoch": 1.482688243899796, "grad_norm": 6.940230923199457, "learning_rate": 8.268981650683208e-07, "loss": 0.6067, "step": 20523 }, { "epoch": 1.4827604891000055, "grad_norm": 6.6364175549664735, "learning_rate": 8.266808338927543e-07, "loss": 0.5857, "step": 20524 }, { "epoch": 1.482832734300215, "grad_norm": 7.9842039563993366, "learning_rate": 8.264635256236281e-07, "loss": 0.6556, "step": 20525 }, { "epoch": 1.4829049795004243, "grad_norm": 7.123031076466576, "learning_rate": 8.262462402639166e-07, "loss": 0.6086, "step": 20526 }, { "epoch": 1.482977224700634, "grad_norm": 7.788187021777381, "learning_rate": 8.260289778165945e-07, "loss": 0.6212, "step": 20527 }, { "epoch": 1.4830494699008434, "grad_norm": 5.82361188729283, "learning_rate": 8.25811738284637e-07, "loss": 0.5753, "step": 20528 }, { "epoch": 1.483121715101053, "grad_norm": 7.204781099868439, "learning_rate": 8.255945216710157e-07, "loss": 0.7313, "step": 20529 }, { "epoch": 1.4831939603012625, "grad_norm": 7.496030392759218, "learning_rate": 8.253773279787056e-07, "loss": 0.6196, "step": 20530 }, { "epoch": 1.483266205501472, "grad_norm": 6.395291217439545, "learning_rate": 8.251601572106796e-07, "loss": 0.6005, "step": 20531 }, { "epoch": 1.4833384507016816, "grad_norm": 6.474118276529715, "learning_rate": 8.249430093699112e-07, "loss": 0.5925, "step": 20532 }, { "epoch": 1.483410695901891, "grad_norm": 6.813166874434144, "learning_rate": 8.247258844593717e-07, "loss": 0.589, "step": 20533 }, { "epoch": 1.4834829411021007, "grad_norm": 6.838493662711926, "learning_rate": 8.245087824820333e-07, "loss": 0.5767, "step": 20534 }, { "epoch": 1.48355518630231, "grad_norm": 6.523594800685827, "learning_rate": 8.242917034408704e-07, "loss": 0.5635, "step": 20535 }, { "epoch": 1.4836274315025195, "grad_norm": 7.5391275195544685, "learning_rate": 8.240746473388523e-07, "loss": 0.646, "step": 20536 }, { "epoch": 1.483699676702729, "grad_norm": 7.221108319710364, "learning_rate": 8.238576141789506e-07, "loss": 0.6013, "step": 20537 }, { "epoch": 1.4837719219029386, "grad_norm": 6.912501754898483, "learning_rate": 8.23640603964137e-07, "loss": 0.5635, "step": 20538 }, { "epoch": 1.4838441671031481, "grad_norm": 7.101734390108958, "learning_rate": 8.234236166973827e-07, "loss": 0.5965, "step": 20539 }, { "epoch": 1.4839164123033575, "grad_norm": 6.313601558241577, "learning_rate": 8.232066523816565e-07, "loss": 0.5491, "step": 20540 }, { "epoch": 1.4839886575035672, "grad_norm": 7.043101448590438, "learning_rate": 8.229897110199295e-07, "loss": 0.5984, "step": 20541 }, { "epoch": 1.4840609027037766, "grad_norm": 7.3195302702062, "learning_rate": 8.22772792615171e-07, "loss": 0.5858, "step": 20542 }, { "epoch": 1.484133147903986, "grad_norm": 7.811830049399636, "learning_rate": 8.225558971703507e-07, "loss": 0.6434, "step": 20543 }, { "epoch": 1.4842053931041956, "grad_norm": 8.23395805099509, "learning_rate": 8.223390246884374e-07, "loss": 0.6454, "step": 20544 }, { "epoch": 1.4842776383044052, "grad_norm": 7.105409425862162, "learning_rate": 8.221221751724006e-07, "loss": 0.6254, "step": 20545 }, { "epoch": 1.4843498835046147, "grad_norm": 7.073437348996299, "learning_rate": 8.219053486252094e-07, "loss": 0.5578, "step": 20546 }, { "epoch": 1.484422128704824, "grad_norm": 7.621166829160127, "learning_rate": 8.2168854504983e-07, "loss": 0.6172, "step": 20547 }, { "epoch": 1.4844943739050338, "grad_norm": 6.949470982897811, "learning_rate": 8.214717644492312e-07, "loss": 0.6774, "step": 20548 }, { "epoch": 1.4845666191052431, "grad_norm": 6.793752369533712, "learning_rate": 8.212550068263808e-07, "loss": 0.6655, "step": 20549 }, { "epoch": 1.4846388643054527, "grad_norm": 8.921365325853648, "learning_rate": 8.210382721842467e-07, "loss": 0.6435, "step": 20550 }, { "epoch": 1.4847111095056622, "grad_norm": 7.141591613921178, "learning_rate": 8.208215605257941e-07, "loss": 0.619, "step": 20551 }, { "epoch": 1.4847833547058717, "grad_norm": 6.211389220503251, "learning_rate": 8.206048718539905e-07, "loss": 0.611, "step": 20552 }, { "epoch": 1.4848555999060813, "grad_norm": 7.175741910956403, "learning_rate": 8.203882061718024e-07, "loss": 0.6616, "step": 20553 }, { "epoch": 1.4849278451062908, "grad_norm": 7.121506754856914, "learning_rate": 8.201715634821958e-07, "loss": 0.6879, "step": 20554 }, { "epoch": 1.4850000903065004, "grad_norm": 7.915699494828719, "learning_rate": 8.199549437881357e-07, "loss": 0.6526, "step": 20555 }, { "epoch": 1.4850723355067097, "grad_norm": 6.853273497789071, "learning_rate": 8.197383470925882e-07, "loss": 0.6062, "step": 20556 }, { "epoch": 1.4851445807069192, "grad_norm": 7.879687654917706, "learning_rate": 8.195217733985192e-07, "loss": 0.6943, "step": 20557 }, { "epoch": 1.4852168259071288, "grad_norm": 7.777802833637872, "learning_rate": 8.19305222708891e-07, "loss": 0.6564, "step": 20558 }, { "epoch": 1.4852890711073383, "grad_norm": 6.682423598983381, "learning_rate": 8.190886950266697e-07, "loss": 0.5728, "step": 20559 }, { "epoch": 1.4853613163075479, "grad_norm": 7.507141527883815, "learning_rate": 8.188721903548197e-07, "loss": 0.6777, "step": 20560 }, { "epoch": 1.4854335615077574, "grad_norm": 7.494693913638559, "learning_rate": 8.186557086963032e-07, "loss": 0.6292, "step": 20561 }, { "epoch": 1.485505806707967, "grad_norm": 9.017064764667872, "learning_rate": 8.184392500540847e-07, "loss": 0.6631, "step": 20562 }, { "epoch": 1.4855780519081763, "grad_norm": 6.298674598419682, "learning_rate": 8.182228144311263e-07, "loss": 0.5918, "step": 20563 }, { "epoch": 1.4856502971083858, "grad_norm": 6.213514721778158, "learning_rate": 8.180064018303935e-07, "loss": 0.6212, "step": 20564 }, { "epoch": 1.4857225423085954, "grad_norm": 6.455284366977564, "learning_rate": 8.17790012254846e-07, "loss": 0.6546, "step": 20565 }, { "epoch": 1.485794787508805, "grad_norm": 8.935320395012338, "learning_rate": 8.175736457074473e-07, "loss": 0.6522, "step": 20566 }, { "epoch": 1.4858670327090144, "grad_norm": 6.643034087371999, "learning_rate": 8.17357302191159e-07, "loss": 0.6403, "step": 20567 }, { "epoch": 1.485939277909224, "grad_norm": 8.411375390619186, "learning_rate": 8.171409817089437e-07, "loss": 0.6891, "step": 20568 }, { "epoch": 1.4860115231094335, "grad_norm": 6.805222289906411, "learning_rate": 8.169246842637607e-07, "loss": 0.6175, "step": 20569 }, { "epoch": 1.4860837683096428, "grad_norm": 7.318334876193315, "learning_rate": 8.16708409858572e-07, "loss": 0.6057, "step": 20570 }, { "epoch": 1.4861560135098524, "grad_norm": 5.366045390275148, "learning_rate": 8.164921584963384e-07, "loss": 0.5744, "step": 20571 }, { "epoch": 1.486228258710062, "grad_norm": 6.935999527611106, "learning_rate": 8.162759301800197e-07, "loss": 0.5484, "step": 20572 }, { "epoch": 1.4863005039102715, "grad_norm": 7.552159577785025, "learning_rate": 8.160597249125763e-07, "loss": 0.6159, "step": 20573 }, { "epoch": 1.486372749110481, "grad_norm": 7.1491757538012894, "learning_rate": 8.158435426969677e-07, "loss": 0.658, "step": 20574 }, { "epoch": 1.4864449943106905, "grad_norm": 6.103752495403667, "learning_rate": 8.156273835361542e-07, "loss": 0.6226, "step": 20575 }, { "epoch": 1.4865172395109, "grad_norm": 8.06777001183519, "learning_rate": 8.154112474330933e-07, "loss": 0.6086, "step": 20576 }, { "epoch": 1.4865894847111094, "grad_norm": 7.807985512047756, "learning_rate": 8.151951343907444e-07, "loss": 0.5838, "step": 20577 }, { "epoch": 1.486661729911319, "grad_norm": 7.054334122603641, "learning_rate": 8.149790444120664e-07, "loss": 0.6972, "step": 20578 }, { "epoch": 1.4867339751115285, "grad_norm": 7.101740030182661, "learning_rate": 8.147629775000165e-07, "loss": 0.6168, "step": 20579 }, { "epoch": 1.486806220311738, "grad_norm": 6.133608004736015, "learning_rate": 8.145469336575529e-07, "loss": 0.6167, "step": 20580 }, { "epoch": 1.4868784655119476, "grad_norm": 5.890923530441039, "learning_rate": 8.143309128876331e-07, "loss": 0.6348, "step": 20581 }, { "epoch": 1.4869507107121571, "grad_norm": 6.6298790006111075, "learning_rate": 8.14114915193214e-07, "loss": 0.6448, "step": 20582 }, { "epoch": 1.4870229559123667, "grad_norm": 6.944995851453691, "learning_rate": 8.13898940577253e-07, "loss": 0.5654, "step": 20583 }, { "epoch": 1.487095201112576, "grad_norm": 6.634349343910149, "learning_rate": 8.136829890427062e-07, "loss": 0.6159, "step": 20584 }, { "epoch": 1.4871674463127855, "grad_norm": 6.266839497124116, "learning_rate": 8.13467060592531e-07, "loss": 0.608, "step": 20585 }, { "epoch": 1.487239691512995, "grad_norm": 7.444827533310119, "learning_rate": 8.132511552296812e-07, "loss": 0.6879, "step": 20586 }, { "epoch": 1.4873119367132046, "grad_norm": 6.728376722078586, "learning_rate": 8.130352729571134e-07, "loss": 0.5854, "step": 20587 }, { "epoch": 1.4873841819134141, "grad_norm": 6.440542446534704, "learning_rate": 8.128194137777828e-07, "loss": 0.5926, "step": 20588 }, { "epoch": 1.4874564271136237, "grad_norm": 6.676350649082323, "learning_rate": 8.126035776946453e-07, "loss": 0.6139, "step": 20589 }, { "epoch": 1.4875286723138332, "grad_norm": 6.838757230949728, "learning_rate": 8.12387764710654e-07, "loss": 0.6509, "step": 20590 }, { "epoch": 1.4876009175140426, "grad_norm": 6.7066922599248535, "learning_rate": 8.121719748287629e-07, "loss": 0.5817, "step": 20591 }, { "epoch": 1.487673162714252, "grad_norm": 7.242655486918957, "learning_rate": 8.119562080519278e-07, "loss": 0.5294, "step": 20592 }, { "epoch": 1.4877454079144616, "grad_norm": 6.4114584986058905, "learning_rate": 8.117404643831022e-07, "loss": 0.6759, "step": 20593 }, { "epoch": 1.4878176531146712, "grad_norm": 7.123596320836571, "learning_rate": 8.11524743825238e-07, "loss": 0.624, "step": 20594 }, { "epoch": 1.4878898983148807, "grad_norm": 5.638390095682787, "learning_rate": 8.113090463812892e-07, "loss": 0.6114, "step": 20595 }, { "epoch": 1.4879621435150903, "grad_norm": 8.811732481952461, "learning_rate": 8.110933720542091e-07, "loss": 0.5701, "step": 20596 }, { "epoch": 1.4880343887152998, "grad_norm": 6.478918661622924, "learning_rate": 8.108777208469487e-07, "loss": 0.5771, "step": 20597 }, { "epoch": 1.4881066339155091, "grad_norm": 6.763254221601878, "learning_rate": 8.106620927624606e-07, "loss": 0.6002, "step": 20598 }, { "epoch": 1.4881788791157187, "grad_norm": 6.9259822878675825, "learning_rate": 8.104464878036969e-07, "loss": 0.5786, "step": 20599 }, { "epoch": 1.4882511243159282, "grad_norm": 8.859701375987775, "learning_rate": 8.102309059736089e-07, "loss": 0.6631, "step": 20600 }, { "epoch": 1.4883233695161378, "grad_norm": 6.890290059709649, "learning_rate": 8.100153472751476e-07, "loss": 0.5952, "step": 20601 }, { "epoch": 1.4883956147163473, "grad_norm": 6.308389363949209, "learning_rate": 8.097998117112641e-07, "loss": 0.6196, "step": 20602 }, { "epoch": 1.4884678599165568, "grad_norm": 6.511112983596295, "learning_rate": 8.095842992849099e-07, "loss": 0.6322, "step": 20603 }, { "epoch": 1.4885401051167664, "grad_norm": 6.576267297254683, "learning_rate": 8.09368809999033e-07, "loss": 0.6209, "step": 20604 }, { "epoch": 1.4886123503169757, "grad_norm": 7.391777739036161, "learning_rate": 8.091533438565844e-07, "loss": 0.6731, "step": 20605 }, { "epoch": 1.4886845955171855, "grad_norm": 8.178734666476771, "learning_rate": 8.089379008605138e-07, "loss": 0.6549, "step": 20606 }, { "epoch": 1.4887568407173948, "grad_norm": 7.755105828572136, "learning_rate": 8.087224810137712e-07, "loss": 0.6229, "step": 20607 }, { "epoch": 1.4888290859176043, "grad_norm": 6.138826917486073, "learning_rate": 8.085070843193035e-07, "loss": 0.5543, "step": 20608 }, { "epoch": 1.4889013311178139, "grad_norm": 7.0820643765600035, "learning_rate": 8.082917107800605e-07, "loss": 0.6491, "step": 20609 }, { "epoch": 1.4889735763180234, "grad_norm": 6.362074970071061, "learning_rate": 8.080763603989908e-07, "loss": 0.6691, "step": 20610 }, { "epoch": 1.489045821518233, "grad_norm": 8.5345819135996, "learning_rate": 8.078610331790418e-07, "loss": 0.5926, "step": 20611 }, { "epoch": 1.4891180667184423, "grad_norm": 7.283827165961157, "learning_rate": 8.076457291231615e-07, "loss": 0.631, "step": 20612 }, { "epoch": 1.489190311918652, "grad_norm": 5.512594368401527, "learning_rate": 8.07430448234297e-07, "loss": 0.5425, "step": 20613 }, { "epoch": 1.4892625571188614, "grad_norm": 7.017266096423705, "learning_rate": 8.072151905153963e-07, "loss": 0.5688, "step": 20614 }, { "epoch": 1.489334802319071, "grad_norm": 7.09277014329832, "learning_rate": 8.069999559694048e-07, "loss": 0.6326, "step": 20615 }, { "epoch": 1.4894070475192804, "grad_norm": 7.038487486051857, "learning_rate": 8.067847445992691e-07, "loss": 0.6871, "step": 20616 }, { "epoch": 1.48947929271949, "grad_norm": 7.076404488741528, "learning_rate": 8.065695564079362e-07, "loss": 0.5205, "step": 20617 }, { "epoch": 1.4895515379196995, "grad_norm": 7.137609734167675, "learning_rate": 8.063543913983507e-07, "loss": 0.6727, "step": 20618 }, { "epoch": 1.4896237831199088, "grad_norm": 7.132618094027196, "learning_rate": 8.061392495734574e-07, "loss": 0.6556, "step": 20619 }, { "epoch": 1.4896960283201186, "grad_norm": 8.296889253018595, "learning_rate": 8.059241309362037e-07, "loss": 0.5612, "step": 20620 }, { "epoch": 1.489768273520328, "grad_norm": 6.647694995626892, "learning_rate": 8.057090354895339e-07, "loss": 0.6192, "step": 20621 }, { "epoch": 1.4898405187205375, "grad_norm": 7.06806577951319, "learning_rate": 8.054939632363912e-07, "loss": 0.5561, "step": 20622 }, { "epoch": 1.489912763920747, "grad_norm": 8.416726090123872, "learning_rate": 8.052789141797204e-07, "loss": 0.6816, "step": 20623 }, { "epoch": 1.4899850091209565, "grad_norm": 8.769450340662392, "learning_rate": 8.050638883224654e-07, "loss": 0.57, "step": 20624 }, { "epoch": 1.490057254321166, "grad_norm": 7.467764434270269, "learning_rate": 8.048488856675704e-07, "loss": 0.634, "step": 20625 }, { "epoch": 1.4901294995213754, "grad_norm": 6.241709894999132, "learning_rate": 8.046339062179775e-07, "loss": 0.6554, "step": 20626 }, { "epoch": 1.4902017447215852, "grad_norm": 5.726595806946114, "learning_rate": 8.044189499766297e-07, "loss": 0.5633, "step": 20627 }, { "epoch": 1.4902739899217945, "grad_norm": 7.773890435139834, "learning_rate": 8.042040169464702e-07, "loss": 0.6276, "step": 20628 }, { "epoch": 1.490346235122004, "grad_norm": 6.521478818038213, "learning_rate": 8.039891071304409e-07, "loss": 0.5643, "step": 20629 }, { "epoch": 1.4904184803222136, "grad_norm": 7.160040638557911, "learning_rate": 8.037742205314839e-07, "loss": 0.64, "step": 20630 }, { "epoch": 1.4904907255224231, "grad_norm": 8.052200244155175, "learning_rate": 8.035593571525404e-07, "loss": 0.652, "step": 20631 }, { "epoch": 1.4905629707226327, "grad_norm": 6.435353634238377, "learning_rate": 8.033445169965534e-07, "loss": 0.6156, "step": 20632 }, { "epoch": 1.4906352159228422, "grad_norm": 7.665917594914616, "learning_rate": 8.031297000664617e-07, "loss": 0.64, "step": 20633 }, { "epoch": 1.4907074611230517, "grad_norm": 8.105039051110088, "learning_rate": 8.029149063652067e-07, "loss": 0.6211, "step": 20634 }, { "epoch": 1.490779706323261, "grad_norm": 7.276962131030044, "learning_rate": 8.027001358957298e-07, "loss": 0.6198, "step": 20635 }, { "epoch": 1.4908519515234706, "grad_norm": 7.093847064055786, "learning_rate": 8.024853886609693e-07, "loss": 0.6014, "step": 20636 }, { "epoch": 1.4909241967236802, "grad_norm": 7.272062997391944, "learning_rate": 8.022706646638661e-07, "loss": 0.6219, "step": 20637 }, { "epoch": 1.4909964419238897, "grad_norm": 7.748971193999219, "learning_rate": 8.020559639073591e-07, "loss": 0.6359, "step": 20638 }, { "epoch": 1.4910686871240992, "grad_norm": 7.59970173501445, "learning_rate": 8.018412863943875e-07, "loss": 0.6493, "step": 20639 }, { "epoch": 1.4911409323243088, "grad_norm": 8.060838631668592, "learning_rate": 8.016266321278901e-07, "loss": 0.5951, "step": 20640 }, { "epoch": 1.4912131775245183, "grad_norm": 6.639428603164106, "learning_rate": 8.014120011108057e-07, "loss": 0.5518, "step": 20641 }, { "epoch": 1.4912854227247276, "grad_norm": 7.0138513578747395, "learning_rate": 8.011973933460726e-07, "loss": 0.5891, "step": 20642 }, { "epoch": 1.4913576679249372, "grad_norm": 6.1122487521655735, "learning_rate": 8.009828088366275e-07, "loss": 0.5709, "step": 20643 }, { "epoch": 1.4914299131251467, "grad_norm": 6.815622436578925, "learning_rate": 8.007682475854086e-07, "loss": 0.6394, "step": 20644 }, { "epoch": 1.4915021583253563, "grad_norm": 6.522422556243751, "learning_rate": 8.005537095953531e-07, "loss": 0.6002, "step": 20645 }, { "epoch": 1.4915744035255658, "grad_norm": 8.070317226825818, "learning_rate": 8.003391948693984e-07, "loss": 0.6984, "step": 20646 }, { "epoch": 1.4916466487257753, "grad_norm": 6.036955511526032, "learning_rate": 8.00124703410479e-07, "loss": 0.5559, "step": 20647 }, { "epoch": 1.491718893925985, "grad_norm": 7.392504076164807, "learning_rate": 7.999102352215332e-07, "loss": 0.6626, "step": 20648 }, { "epoch": 1.4917911391261942, "grad_norm": 7.447654908033158, "learning_rate": 7.996957903054964e-07, "loss": 0.6735, "step": 20649 }, { "epoch": 1.4918633843264038, "grad_norm": 5.946433003087235, "learning_rate": 7.994813686653047e-07, "loss": 0.6134, "step": 20650 }, { "epoch": 1.4919356295266133, "grad_norm": 6.537544685050589, "learning_rate": 7.992669703038919e-07, "loss": 0.5275, "step": 20651 }, { "epoch": 1.4920078747268228, "grad_norm": 6.887937268121009, "learning_rate": 7.990525952241937e-07, "loss": 0.6138, "step": 20652 }, { "epoch": 1.4920801199270324, "grad_norm": 7.357800876221839, "learning_rate": 7.988382434291455e-07, "loss": 0.6441, "step": 20653 }, { "epoch": 1.492152365127242, "grad_norm": 8.907715579798557, "learning_rate": 7.986239149216802e-07, "loss": 0.61, "step": 20654 }, { "epoch": 1.4922246103274515, "grad_norm": 7.498417750986574, "learning_rate": 7.984096097047325e-07, "loss": 0.6651, "step": 20655 }, { "epoch": 1.4922968555276608, "grad_norm": 6.360057297454505, "learning_rate": 7.98195327781236e-07, "loss": 0.6053, "step": 20656 }, { "epoch": 1.4923691007278703, "grad_norm": 8.301035382139228, "learning_rate": 7.97981069154124e-07, "loss": 0.6345, "step": 20657 }, { "epoch": 1.4924413459280799, "grad_norm": 8.123931814549, "learning_rate": 7.977668338263297e-07, "loss": 0.6058, "step": 20658 }, { "epoch": 1.4925135911282894, "grad_norm": 7.388557257735104, "learning_rate": 7.975526218007859e-07, "loss": 0.6233, "step": 20659 }, { "epoch": 1.492585836328499, "grad_norm": 7.249466712017, "learning_rate": 7.973384330804257e-07, "loss": 0.6405, "step": 20660 }, { "epoch": 1.4926580815287085, "grad_norm": 5.694896206600703, "learning_rate": 7.971242676681793e-07, "loss": 0.5978, "step": 20661 }, { "epoch": 1.492730326728918, "grad_norm": 7.210579876695294, "learning_rate": 7.969101255669795e-07, "loss": 0.6077, "step": 20662 }, { "epoch": 1.4928025719291274, "grad_norm": 6.923322332548249, "learning_rate": 7.966960067797577e-07, "loss": 0.6, "step": 20663 }, { "epoch": 1.492874817129337, "grad_norm": 6.674029893423504, "learning_rate": 7.964819113094457e-07, "loss": 0.6109, "step": 20664 }, { "epoch": 1.4929470623295464, "grad_norm": 6.615964666196115, "learning_rate": 7.962678391589732e-07, "loss": 0.5525, "step": 20665 }, { "epoch": 1.493019307529756, "grad_norm": 6.300548154205079, "learning_rate": 7.96053790331271e-07, "loss": 0.6205, "step": 20666 }, { "epoch": 1.4930915527299655, "grad_norm": 6.391182798184891, "learning_rate": 7.958397648292693e-07, "loss": 0.5179, "step": 20667 }, { "epoch": 1.493163797930175, "grad_norm": 7.699871745837313, "learning_rate": 7.956257626558981e-07, "loss": 0.6517, "step": 20668 }, { "epoch": 1.4932360431303846, "grad_norm": 6.7279886291262665, "learning_rate": 7.954117838140868e-07, "loss": 0.6165, "step": 20669 }, { "epoch": 1.493308288330594, "grad_norm": 8.777046473604015, "learning_rate": 7.951978283067646e-07, "loss": 0.6691, "step": 20670 }, { "epoch": 1.4933805335308035, "grad_norm": 6.696131474761747, "learning_rate": 7.949838961368611e-07, "loss": 0.5387, "step": 20671 }, { "epoch": 1.493452778731013, "grad_norm": 7.654787009457301, "learning_rate": 7.947699873073036e-07, "loss": 0.6985, "step": 20672 }, { "epoch": 1.4935250239312226, "grad_norm": 7.738754234898166, "learning_rate": 7.94556101821021e-07, "loss": 0.6377, "step": 20673 }, { "epoch": 1.493597269131432, "grad_norm": 8.750880169877691, "learning_rate": 7.943422396809411e-07, "loss": 0.6349, "step": 20674 }, { "epoch": 1.4936695143316416, "grad_norm": 7.283324114039309, "learning_rate": 7.941284008899924e-07, "loss": 0.6473, "step": 20675 }, { "epoch": 1.4937417595318512, "grad_norm": 7.261604492864578, "learning_rate": 7.939145854510996e-07, "loss": 0.6435, "step": 20676 }, { "epoch": 1.4938140047320605, "grad_norm": 6.9340457049003135, "learning_rate": 7.937007933671925e-07, "loss": 0.5915, "step": 20677 }, { "epoch": 1.4938862499322703, "grad_norm": 6.838377354767094, "learning_rate": 7.934870246411974e-07, "loss": 0.5881, "step": 20678 }, { "epoch": 1.4939584951324796, "grad_norm": 7.08504540191296, "learning_rate": 7.932732792760392e-07, "loss": 0.6097, "step": 20679 }, { "epoch": 1.4940307403326891, "grad_norm": 6.936605034914442, "learning_rate": 7.930595572746444e-07, "loss": 0.6977, "step": 20680 }, { "epoch": 1.4941029855328987, "grad_norm": 6.157522621872142, "learning_rate": 7.92845858639939e-07, "loss": 0.5696, "step": 20681 }, { "epoch": 1.4941752307331082, "grad_norm": 8.776549251981724, "learning_rate": 7.92632183374849e-07, "loss": 0.6836, "step": 20682 }, { "epoch": 1.4942474759333177, "grad_norm": 6.605722270230999, "learning_rate": 7.924185314822982e-07, "loss": 0.6096, "step": 20683 }, { "epoch": 1.494319721133527, "grad_norm": 6.937338337432291, "learning_rate": 7.922049029652115e-07, "loss": 0.5483, "step": 20684 }, { "epoch": 1.4943919663337368, "grad_norm": 6.281810223092949, "learning_rate": 7.91991297826514e-07, "loss": 0.5823, "step": 20685 }, { "epoch": 1.4944642115339462, "grad_norm": 7.266741398185001, "learning_rate": 7.917777160691293e-07, "loss": 0.6156, "step": 20686 }, { "epoch": 1.4945364567341557, "grad_norm": 7.417296850741615, "learning_rate": 7.915641576959812e-07, "loss": 0.6464, "step": 20687 }, { "epoch": 1.4946087019343652, "grad_norm": 5.939825947584462, "learning_rate": 7.913506227099932e-07, "loss": 0.5248, "step": 20688 }, { "epoch": 1.4946809471345748, "grad_norm": 6.542585110021497, "learning_rate": 7.911371111140895e-07, "loss": 0.7257, "step": 20689 }, { "epoch": 1.4947531923347843, "grad_norm": 7.67210272860839, "learning_rate": 7.909236229111911e-07, "loss": 0.5885, "step": 20690 }, { "epoch": 1.4948254375349936, "grad_norm": 8.134359749403442, "learning_rate": 7.907101581042212e-07, "loss": 0.6481, "step": 20691 }, { "epoch": 1.4948976827352034, "grad_norm": 6.509821587386967, "learning_rate": 7.90496716696103e-07, "loss": 0.5617, "step": 20692 }, { "epoch": 1.4949699279354127, "grad_norm": 5.72235394092712, "learning_rate": 7.902832986897566e-07, "loss": 0.5855, "step": 20693 }, { "epoch": 1.4950421731356223, "grad_norm": 6.473283588808896, "learning_rate": 7.900699040881041e-07, "loss": 0.6218, "step": 20694 }, { "epoch": 1.4951144183358318, "grad_norm": 7.326391604776065, "learning_rate": 7.898565328940671e-07, "loss": 0.6022, "step": 20695 }, { "epoch": 1.4951866635360413, "grad_norm": 6.962057278321606, "learning_rate": 7.896431851105663e-07, "loss": 0.6005, "step": 20696 }, { "epoch": 1.495258908736251, "grad_norm": 7.541117555077111, "learning_rate": 7.894298607405224e-07, "loss": 0.6408, "step": 20697 }, { "epoch": 1.4953311539364602, "grad_norm": 6.7093388848294415, "learning_rate": 7.892165597868553e-07, "loss": 0.6134, "step": 20698 }, { "epoch": 1.49540339913667, "grad_norm": 6.441456884181674, "learning_rate": 7.890032822524854e-07, "loss": 0.6529, "step": 20699 }, { "epoch": 1.4954756443368793, "grad_norm": 7.439554395559736, "learning_rate": 7.887900281403327e-07, "loss": 0.7059, "step": 20700 }, { "epoch": 1.4955478895370888, "grad_norm": 7.685877434917895, "learning_rate": 7.885767974533152e-07, "loss": 0.739, "step": 20701 }, { "epoch": 1.4956201347372984, "grad_norm": 5.63472517451902, "learning_rate": 7.883635901943523e-07, "loss": 0.6829, "step": 20702 }, { "epoch": 1.495692379937508, "grad_norm": 7.946527588841232, "learning_rate": 7.881504063663637e-07, "loss": 0.6616, "step": 20703 }, { "epoch": 1.4957646251377175, "grad_norm": 6.290139962535891, "learning_rate": 7.879372459722653e-07, "loss": 0.6153, "step": 20704 }, { "epoch": 1.495836870337927, "grad_norm": 6.910804593047832, "learning_rate": 7.877241090149775e-07, "loss": 0.6502, "step": 20705 }, { "epoch": 1.4959091155381365, "grad_norm": 6.960874203994631, "learning_rate": 7.875109954974172e-07, "loss": 0.677, "step": 20706 }, { "epoch": 1.4959813607383459, "grad_norm": 6.258348306298493, "learning_rate": 7.872979054225025e-07, "loss": 0.6712, "step": 20707 }, { "epoch": 1.4960536059385554, "grad_norm": 6.612625657385786, "learning_rate": 7.870848387931487e-07, "loss": 0.6332, "step": 20708 }, { "epoch": 1.496125851138765, "grad_norm": 6.904710416805429, "learning_rate": 7.868717956122737e-07, "loss": 0.5258, "step": 20709 }, { "epoch": 1.4961980963389745, "grad_norm": 6.186077840725387, "learning_rate": 7.866587758827942e-07, "loss": 0.5897, "step": 20710 }, { "epoch": 1.496270341539184, "grad_norm": 7.5135360638070345, "learning_rate": 7.86445779607625e-07, "loss": 0.6571, "step": 20711 }, { "epoch": 1.4963425867393936, "grad_norm": 7.518319707593309, "learning_rate": 7.862328067896827e-07, "loss": 0.5772, "step": 20712 }, { "epoch": 1.4964148319396031, "grad_norm": 6.901901325927646, "learning_rate": 7.860198574318825e-07, "loss": 0.6777, "step": 20713 }, { "epoch": 1.4964870771398124, "grad_norm": 6.844818449879909, "learning_rate": 7.858069315371397e-07, "loss": 0.6164, "step": 20714 }, { "epoch": 1.496559322340022, "grad_norm": 7.510251985632394, "learning_rate": 7.855940291083692e-07, "loss": 0.6084, "step": 20715 }, { "epoch": 1.4966315675402315, "grad_norm": 7.76913108965112, "learning_rate": 7.853811501484851e-07, "loss": 0.6787, "step": 20716 }, { "epoch": 1.496703812740441, "grad_norm": 8.295073405211769, "learning_rate": 7.851682946604025e-07, "loss": 0.6371, "step": 20717 }, { "epoch": 1.4967760579406506, "grad_norm": 7.338315860549303, "learning_rate": 7.849554626470338e-07, "loss": 0.5858, "step": 20718 }, { "epoch": 1.4968483031408601, "grad_norm": 5.82060120141788, "learning_rate": 7.847426541112929e-07, "loss": 0.5389, "step": 20719 }, { "epoch": 1.4969205483410697, "grad_norm": 6.947752653718744, "learning_rate": 7.845298690560935e-07, "loss": 0.6151, "step": 20720 }, { "epoch": 1.496992793541279, "grad_norm": 7.0239884921003535, "learning_rate": 7.843171074843492e-07, "loss": 0.596, "step": 20721 }, { "epoch": 1.4970650387414886, "grad_norm": 6.007532477828811, "learning_rate": 7.841043693989703e-07, "loss": 0.5975, "step": 20722 }, { "epoch": 1.497137283941698, "grad_norm": 7.2674516248060765, "learning_rate": 7.838916548028705e-07, "loss": 0.6254, "step": 20723 }, { "epoch": 1.4972095291419076, "grad_norm": 6.410066388957282, "learning_rate": 7.836789636989614e-07, "loss": 0.6726, "step": 20724 }, { "epoch": 1.4972817743421172, "grad_norm": 7.666500532036428, "learning_rate": 7.834662960901548e-07, "loss": 0.636, "step": 20725 }, { "epoch": 1.4973540195423267, "grad_norm": 6.304787333665895, "learning_rate": 7.832536519793618e-07, "loss": 0.6248, "step": 20726 }, { "epoch": 1.4974262647425363, "grad_norm": 6.510468196410655, "learning_rate": 7.830410313694934e-07, "loss": 0.6625, "step": 20727 }, { "epoch": 1.4974985099427456, "grad_norm": 7.6409892051969, "learning_rate": 7.828284342634607e-07, "loss": 0.5775, "step": 20728 }, { "epoch": 1.4975707551429551, "grad_norm": 7.423131061476645, "learning_rate": 7.826158606641726e-07, "loss": 0.6371, "step": 20729 }, { "epoch": 1.4976430003431647, "grad_norm": 8.139960029372697, "learning_rate": 7.824033105745402e-07, "loss": 0.5753, "step": 20730 }, { "epoch": 1.4977152455433742, "grad_norm": 6.651882117533834, "learning_rate": 7.821907839974727e-07, "loss": 0.5845, "step": 20731 }, { "epoch": 1.4977874907435837, "grad_norm": 7.283217528562454, "learning_rate": 7.819782809358795e-07, "loss": 0.6562, "step": 20732 }, { "epoch": 1.4978597359437933, "grad_norm": 6.926565815421184, "learning_rate": 7.817658013926699e-07, "loss": 0.6645, "step": 20733 }, { "epoch": 1.4979319811440028, "grad_norm": 5.90926481938036, "learning_rate": 7.815533453707522e-07, "loss": 0.5722, "step": 20734 }, { "epoch": 1.4980042263442122, "grad_norm": 5.6164315979136585, "learning_rate": 7.813409128730357e-07, "loss": 0.4802, "step": 20735 }, { "epoch": 1.4980764715444217, "grad_norm": 7.21917168512676, "learning_rate": 7.811285039024269e-07, "loss": 0.6004, "step": 20736 }, { "epoch": 1.4981487167446312, "grad_norm": 6.446520660914056, "learning_rate": 7.809161184618342e-07, "loss": 0.6062, "step": 20737 }, { "epoch": 1.4982209619448408, "grad_norm": 7.919695733130856, "learning_rate": 7.80703756554165e-07, "loss": 0.5663, "step": 20738 }, { "epoch": 1.4982932071450503, "grad_norm": 6.670504069576476, "learning_rate": 7.804914181823273e-07, "loss": 0.6777, "step": 20739 }, { "epoch": 1.4983654523452599, "grad_norm": 7.099637583764454, "learning_rate": 7.802791033492263e-07, "loss": 0.6286, "step": 20740 }, { "epoch": 1.4984376975454694, "grad_norm": 8.238640102278001, "learning_rate": 7.80066812057769e-07, "loss": 0.5888, "step": 20741 }, { "epoch": 1.4985099427456787, "grad_norm": 7.603235168536889, "learning_rate": 7.798545443108615e-07, "loss": 0.6193, "step": 20742 }, { "epoch": 1.4985821879458883, "grad_norm": 6.003434469813892, "learning_rate": 7.796423001114098e-07, "loss": 0.6132, "step": 20743 }, { "epoch": 1.4986544331460978, "grad_norm": 8.178778975929681, "learning_rate": 7.794300794623191e-07, "loss": 0.6126, "step": 20744 }, { "epoch": 1.4987266783463074, "grad_norm": 7.256587225859268, "learning_rate": 7.792178823664948e-07, "loss": 0.6523, "step": 20745 }, { "epoch": 1.498798923546517, "grad_norm": 7.698288952846189, "learning_rate": 7.790057088268424e-07, "loss": 0.7251, "step": 20746 }, { "epoch": 1.4988711687467264, "grad_norm": 8.82948430491953, "learning_rate": 7.787935588462647e-07, "loss": 0.6227, "step": 20747 }, { "epoch": 1.498943413946936, "grad_norm": 7.051564350123705, "learning_rate": 7.78581432427667e-07, "loss": 0.6018, "step": 20748 }, { "epoch": 1.4990156591471453, "grad_norm": 7.163336164788417, "learning_rate": 7.783693295739533e-07, "loss": 0.6407, "step": 20749 }, { "epoch": 1.499087904347355, "grad_norm": 6.628777920273736, "learning_rate": 7.781572502880263e-07, "loss": 0.5849, "step": 20750 }, { "epoch": 1.4991601495475644, "grad_norm": 8.435573781865516, "learning_rate": 7.779451945727894e-07, "loss": 0.698, "step": 20751 }, { "epoch": 1.499232394747774, "grad_norm": 7.037811608470927, "learning_rate": 7.777331624311457e-07, "loss": 0.5872, "step": 20752 }, { "epoch": 1.4993046399479835, "grad_norm": 6.716844013355964, "learning_rate": 7.775211538659977e-07, "loss": 0.6017, "step": 20753 }, { "epoch": 1.499376885148193, "grad_norm": 7.007992677275433, "learning_rate": 7.773091688802478e-07, "loss": 0.6101, "step": 20754 }, { "epoch": 1.4994491303484025, "grad_norm": 7.8946684797573, "learning_rate": 7.77097207476798e-07, "loss": 0.5992, "step": 20755 }, { "epoch": 1.4995213755486119, "grad_norm": 6.216485152607371, "learning_rate": 7.768852696585493e-07, "loss": 0.5894, "step": 20756 }, { "epoch": 1.4995936207488216, "grad_norm": 7.653542799038167, "learning_rate": 7.766733554284042e-07, "loss": 0.6596, "step": 20757 }, { "epoch": 1.499665865949031, "grad_norm": 6.622477789064331, "learning_rate": 7.764614647892621e-07, "loss": 0.5516, "step": 20758 }, { "epoch": 1.4997381111492405, "grad_norm": 7.107916713847056, "learning_rate": 7.762495977440243e-07, "loss": 0.6193, "step": 20759 }, { "epoch": 1.49981035634945, "grad_norm": 6.034171706202239, "learning_rate": 7.76037754295591e-07, "loss": 0.5958, "step": 20760 }, { "epoch": 1.4998826015496596, "grad_norm": 7.524101441936102, "learning_rate": 7.758259344468624e-07, "loss": 0.6072, "step": 20761 }, { "epoch": 1.4999548467498691, "grad_norm": 7.646470655718855, "learning_rate": 7.756141382007382e-07, "loss": 0.6426, "step": 20762 }, { "epoch": 1.5000270919500784, "grad_norm": 7.18846137210136, "learning_rate": 7.754023655601173e-07, "loss": 0.5782, "step": 20763 }, { "epoch": 1.5000993371502882, "grad_norm": 66.25330180451387, "learning_rate": 7.751906165278997e-07, "loss": 0.6687, "step": 20764 }, { "epoch": 1.5001715823504975, "grad_norm": 6.917115101199501, "learning_rate": 7.749788911069828e-07, "loss": 0.6546, "step": 20765 }, { "epoch": 1.500243827550707, "grad_norm": 6.345589807871733, "learning_rate": 7.747671893002651e-07, "loss": 0.6214, "step": 20766 }, { "epoch": 1.5003160727509166, "grad_norm": 7.69743833660269, "learning_rate": 7.745555111106462e-07, "loss": 0.5913, "step": 20767 }, { "epoch": 1.5003883179511261, "grad_norm": 7.404137676341831, "learning_rate": 7.743438565410216e-07, "loss": 0.5985, "step": 20768 }, { "epoch": 1.5004605631513357, "grad_norm": 6.382571641913638, "learning_rate": 7.741322255942896e-07, "loss": 0.5972, "step": 20769 }, { "epoch": 1.500532808351545, "grad_norm": 7.509407247219307, "learning_rate": 7.739206182733478e-07, "loss": 0.6123, "step": 20770 }, { "epoch": 1.5006050535517548, "grad_norm": 7.342531013773957, "learning_rate": 7.737090345810922e-07, "loss": 0.6038, "step": 20771 }, { "epoch": 1.500677298751964, "grad_norm": 7.5782629256134, "learning_rate": 7.734974745204196e-07, "loss": 0.5844, "step": 20772 }, { "epoch": 1.5007495439521736, "grad_norm": 7.2508699618637085, "learning_rate": 7.73285938094226e-07, "loss": 0.5707, "step": 20773 }, { "epoch": 1.5008217891523832, "grad_norm": 6.683274404721, "learning_rate": 7.730744253054081e-07, "loss": 0.5873, "step": 20774 }, { "epoch": 1.5008940343525927, "grad_norm": 7.088684155342176, "learning_rate": 7.728629361568596e-07, "loss": 0.6116, "step": 20775 }, { "epoch": 1.5009662795528023, "grad_norm": 8.31431662994315, "learning_rate": 7.726514706514765e-07, "loss": 0.6152, "step": 20776 }, { "epoch": 1.5010385247530116, "grad_norm": 6.276113659035567, "learning_rate": 7.724400287921535e-07, "loss": 0.6205, "step": 20777 }, { "epoch": 1.5011107699532213, "grad_norm": 7.1135670805079725, "learning_rate": 7.722286105817861e-07, "loss": 0.6576, "step": 20778 }, { "epoch": 1.5011830151534307, "grad_norm": 8.362059659440781, "learning_rate": 7.720172160232664e-07, "loss": 0.613, "step": 20779 }, { "epoch": 1.5012552603536402, "grad_norm": 6.686446632778733, "learning_rate": 7.718058451194896e-07, "loss": 0.5973, "step": 20780 }, { "epoch": 1.5013275055538498, "grad_norm": 7.1115177488622505, "learning_rate": 7.71594497873348e-07, "loss": 0.6629, "step": 20781 }, { "epoch": 1.5013997507540593, "grad_norm": 7.412667242675525, "learning_rate": 7.713831742877373e-07, "loss": 0.6784, "step": 20782 }, { "epoch": 1.5014719959542688, "grad_norm": 5.98201854714924, "learning_rate": 7.71171874365548e-07, "loss": 0.6464, "step": 20783 }, { "epoch": 1.5015442411544782, "grad_norm": 7.03581069537216, "learning_rate": 7.709605981096732e-07, "loss": 0.6581, "step": 20784 }, { "epoch": 1.501616486354688, "grad_norm": 7.741633421378583, "learning_rate": 7.707493455230064e-07, "loss": 0.6609, "step": 20785 }, { "epoch": 1.5016887315548972, "grad_norm": 7.866825765951343, "learning_rate": 7.705381166084375e-07, "loss": 0.6207, "step": 20786 }, { "epoch": 1.5017609767551068, "grad_norm": 6.562638926170856, "learning_rate": 7.703269113688589e-07, "loss": 0.6861, "step": 20787 }, { "epoch": 1.5018332219553163, "grad_norm": 7.823663167134894, "learning_rate": 7.701157298071618e-07, "loss": 0.6626, "step": 20788 }, { "epoch": 1.5019054671555259, "grad_norm": 6.9308569092384165, "learning_rate": 7.699045719262374e-07, "loss": 0.5866, "step": 20789 }, { "epoch": 1.5019777123557354, "grad_norm": 6.1592307063696765, "learning_rate": 7.696934377289759e-07, "loss": 0.5647, "step": 20790 }, { "epoch": 1.5020499575559447, "grad_norm": 6.780533009960937, "learning_rate": 7.694823272182677e-07, "loss": 0.547, "step": 20791 }, { "epoch": 1.5021222027561545, "grad_norm": 7.710349511760531, "learning_rate": 7.692712403970038e-07, "loss": 0.592, "step": 20792 }, { "epoch": 1.5021944479563638, "grad_norm": 7.605145980968787, "learning_rate": 7.690601772680717e-07, "loss": 0.6384, "step": 20793 }, { "epoch": 1.5022666931565736, "grad_norm": 7.211862688649958, "learning_rate": 7.688491378343621e-07, "loss": 0.641, "step": 20794 }, { "epoch": 1.502338938356783, "grad_norm": 6.442658662925784, "learning_rate": 7.686381220987635e-07, "loss": 0.5882, "step": 20795 }, { "epoch": 1.5024111835569924, "grad_norm": 7.287217473987146, "learning_rate": 7.684271300641655e-07, "loss": 0.6235, "step": 20796 }, { "epoch": 1.502483428757202, "grad_norm": 8.117492963754362, "learning_rate": 7.682161617334546e-07, "loss": 0.639, "step": 20797 }, { "epoch": 1.5025556739574113, "grad_norm": 6.895077882012895, "learning_rate": 7.680052171095201e-07, "loss": 0.5834, "step": 20798 }, { "epoch": 1.502627919157621, "grad_norm": 6.982021677779608, "learning_rate": 7.677942961952492e-07, "loss": 0.5684, "step": 20799 }, { "epoch": 1.5027001643578304, "grad_norm": 7.058266510209882, "learning_rate": 7.675833989935297e-07, "loss": 0.647, "step": 20800 }, { "epoch": 1.5027724095580401, "grad_norm": 8.077609313206283, "learning_rate": 7.673725255072481e-07, "loss": 0.5992, "step": 20801 }, { "epoch": 1.5028446547582495, "grad_norm": 8.245070400735578, "learning_rate": 7.671616757392913e-07, "loss": 0.6298, "step": 20802 }, { "epoch": 1.502916899958459, "grad_norm": 7.110689213608572, "learning_rate": 7.669508496925466e-07, "loss": 0.6439, "step": 20803 }, { "epoch": 1.5029891451586685, "grad_norm": 5.991959588641604, "learning_rate": 7.667400473698982e-07, "loss": 0.6734, "step": 20804 }, { "epoch": 1.5030613903588779, "grad_norm": 6.679335699800168, "learning_rate": 7.665292687742329e-07, "loss": 0.5864, "step": 20805 }, { "epoch": 1.5031336355590876, "grad_norm": 6.502102438489067, "learning_rate": 7.663185139084358e-07, "loss": 0.6164, "step": 20806 }, { "epoch": 1.503205880759297, "grad_norm": 7.193349829446558, "learning_rate": 7.661077827753929e-07, "loss": 0.5776, "step": 20807 }, { "epoch": 1.5032781259595067, "grad_norm": 7.435931128434836, "learning_rate": 7.658970753779876e-07, "loss": 0.6553, "step": 20808 }, { "epoch": 1.503350371159716, "grad_norm": 7.0031077434526345, "learning_rate": 7.65686391719104e-07, "loss": 0.6945, "step": 20809 }, { "epoch": 1.5034226163599256, "grad_norm": 6.702430386777591, "learning_rate": 7.654757318016287e-07, "loss": 0.66, "step": 20810 }, { "epoch": 1.5034948615601351, "grad_norm": 7.269369099984978, "learning_rate": 7.65265095628443e-07, "loss": 0.6185, "step": 20811 }, { "epoch": 1.5035671067603447, "grad_norm": 6.956079022687497, "learning_rate": 7.650544832024315e-07, "loss": 0.5984, "step": 20812 }, { "epoch": 1.5036393519605542, "grad_norm": 7.560453279823024, "learning_rate": 7.648438945264767e-07, "loss": 0.6607, "step": 20813 }, { "epoch": 1.5037115971607635, "grad_norm": 7.457797523162209, "learning_rate": 7.646333296034625e-07, "loss": 0.6429, "step": 20814 }, { "epoch": 1.5037838423609733, "grad_norm": 6.603442837331766, "learning_rate": 7.6442278843627e-07, "loss": 0.6026, "step": 20815 }, { "epoch": 1.5038560875611826, "grad_norm": 7.089988215212046, "learning_rate": 7.642122710277819e-07, "loss": 0.6804, "step": 20816 }, { "epoch": 1.5039283327613922, "grad_norm": 8.589963654517842, "learning_rate": 7.6400177738088e-07, "loss": 0.609, "step": 20817 }, { "epoch": 1.5040005779616017, "grad_norm": 7.271741429615704, "learning_rate": 7.637913074984457e-07, "loss": 0.6159, "step": 20818 }, { "epoch": 1.5040728231618112, "grad_norm": 5.743713591318059, "learning_rate": 7.635808613833607e-07, "loss": 0.6006, "step": 20819 }, { "epoch": 1.5041450683620208, "grad_norm": 7.19239600569479, "learning_rate": 7.633704390385049e-07, "loss": 0.6398, "step": 20820 }, { "epoch": 1.50421731356223, "grad_norm": 6.41098428105266, "learning_rate": 7.631600404667608e-07, "loss": 0.6349, "step": 20821 }, { "epoch": 1.5042895587624399, "grad_norm": 7.5826317039896916, "learning_rate": 7.629496656710058e-07, "loss": 0.6624, "step": 20822 }, { "epoch": 1.5043618039626492, "grad_norm": 7.304110417545672, "learning_rate": 7.627393146541215e-07, "loss": 0.6391, "step": 20823 }, { "epoch": 1.5044340491628587, "grad_norm": 6.114071184986165, "learning_rate": 7.625289874189878e-07, "loss": 0.6007, "step": 20824 }, { "epoch": 1.5045062943630683, "grad_norm": 8.58166764278657, "learning_rate": 7.623186839684826e-07, "loss": 0.6607, "step": 20825 }, { "epoch": 1.5045785395632778, "grad_norm": 7.335479104504828, "learning_rate": 7.621084043054853e-07, "loss": 0.6098, "step": 20826 }, { "epoch": 1.5046507847634873, "grad_norm": 7.653987877311107, "learning_rate": 7.618981484328744e-07, "loss": 0.642, "step": 20827 }, { "epoch": 1.5047230299636967, "grad_norm": 7.605979334106777, "learning_rate": 7.616879163535284e-07, "loss": 0.6192, "step": 20828 }, { "epoch": 1.5047952751639064, "grad_norm": 7.054221102990255, "learning_rate": 7.614777080703251e-07, "loss": 0.6226, "step": 20829 }, { "epoch": 1.5048675203641158, "grad_norm": 7.452707594853347, "learning_rate": 7.612675235861419e-07, "loss": 0.5924, "step": 20830 }, { "epoch": 1.5049397655643253, "grad_norm": 7.520827810297776, "learning_rate": 7.610573629038567e-07, "loss": 0.7037, "step": 20831 }, { "epoch": 1.5050120107645348, "grad_norm": 7.646288561110261, "learning_rate": 7.608472260263464e-07, "loss": 0.713, "step": 20832 }, { "epoch": 1.5050842559647444, "grad_norm": 7.017020377745062, "learning_rate": 7.606371129564866e-07, "loss": 0.623, "step": 20833 }, { "epoch": 1.505156501164954, "grad_norm": 7.758028363531257, "learning_rate": 7.604270236971542e-07, "loss": 0.6937, "step": 20834 }, { "epoch": 1.5052287463651632, "grad_norm": 7.955353130604397, "learning_rate": 7.602169582512259e-07, "loss": 0.6147, "step": 20835 }, { "epoch": 1.505300991565373, "grad_norm": 6.9582777496980786, "learning_rate": 7.60006916621576e-07, "loss": 0.6612, "step": 20836 }, { "epoch": 1.5053732367655823, "grad_norm": 8.291270286504185, "learning_rate": 7.597968988110793e-07, "loss": 0.6373, "step": 20837 }, { "epoch": 1.5054454819657919, "grad_norm": 6.21838255136943, "learning_rate": 7.595869048226126e-07, "loss": 0.6024, "step": 20838 }, { "epoch": 1.5055177271660014, "grad_norm": 5.703340374785996, "learning_rate": 7.59376934659051e-07, "loss": 0.6964, "step": 20839 }, { "epoch": 1.505589972366211, "grad_norm": 8.469393209545217, "learning_rate": 7.591669883232666e-07, "loss": 0.6463, "step": 20840 }, { "epoch": 1.5056622175664205, "grad_norm": 7.480004932177544, "learning_rate": 7.589570658181345e-07, "loss": 0.6293, "step": 20841 }, { "epoch": 1.5057344627666298, "grad_norm": 7.10980692798494, "learning_rate": 7.587471671465291e-07, "loss": 0.6522, "step": 20842 }, { "epoch": 1.5058067079668396, "grad_norm": 7.328968772218923, "learning_rate": 7.585372923113224e-07, "loss": 0.6045, "step": 20843 }, { "epoch": 1.505878953167049, "grad_norm": 6.987791313963931, "learning_rate": 7.583274413153877e-07, "loss": 0.5733, "step": 20844 }, { "epoch": 1.5059511983672584, "grad_norm": 7.2719415587037775, "learning_rate": 7.581176141615982e-07, "loss": 0.6383, "step": 20845 }, { "epoch": 1.506023443567468, "grad_norm": 6.712905683991692, "learning_rate": 7.579078108528263e-07, "loss": 0.5466, "step": 20846 }, { "epoch": 1.5060956887676775, "grad_norm": 7.176221938869921, "learning_rate": 7.576980313919432e-07, "loss": 0.6803, "step": 20847 }, { "epoch": 1.506167933967887, "grad_norm": 7.505438549385606, "learning_rate": 7.574882757818217e-07, "loss": 0.6064, "step": 20848 }, { "epoch": 1.5062401791680964, "grad_norm": 6.036248700174747, "learning_rate": 7.572785440253336e-07, "loss": 0.59, "step": 20849 }, { "epoch": 1.5063124243683061, "grad_norm": 6.803611065835278, "learning_rate": 7.570688361253481e-07, "loss": 0.5818, "step": 20850 }, { "epoch": 1.5063846695685155, "grad_norm": 7.459039097076043, "learning_rate": 7.568591520847368e-07, "loss": 0.6059, "step": 20851 }, { "epoch": 1.506456914768725, "grad_norm": 6.6946857386043055, "learning_rate": 7.566494919063705e-07, "loss": 0.6158, "step": 20852 }, { "epoch": 1.5065291599689346, "grad_norm": 7.004960346204987, "learning_rate": 7.564398555931199e-07, "loss": 0.6135, "step": 20853 }, { "epoch": 1.506601405169144, "grad_norm": 7.497461779561555, "learning_rate": 7.562302431478527e-07, "loss": 0.5759, "step": 20854 }, { "epoch": 1.5066736503693536, "grad_norm": 7.88425978955534, "learning_rate": 7.560206545734399e-07, "loss": 0.5772, "step": 20855 }, { "epoch": 1.506745895569563, "grad_norm": 7.356810558290609, "learning_rate": 7.5581108987275e-07, "loss": 0.6242, "step": 20856 }, { "epoch": 1.5068181407697727, "grad_norm": 6.395028162007661, "learning_rate": 7.556015490486521e-07, "loss": 0.6017, "step": 20857 }, { "epoch": 1.506890385969982, "grad_norm": 6.482215613715368, "learning_rate": 7.553920321040145e-07, "loss": 0.6452, "step": 20858 }, { "epoch": 1.5069626311701916, "grad_norm": 5.8264160923505255, "learning_rate": 7.551825390417055e-07, "loss": 0.5412, "step": 20859 }, { "epoch": 1.5070348763704011, "grad_norm": 7.33633685015255, "learning_rate": 7.549730698645935e-07, "loss": 0.6893, "step": 20860 }, { "epoch": 1.5071071215706107, "grad_norm": 7.78167429307259, "learning_rate": 7.547636245755443e-07, "loss": 0.6436, "step": 20861 }, { "epoch": 1.5071793667708202, "grad_norm": 6.748544606734257, "learning_rate": 7.545542031774263e-07, "loss": 0.6179, "step": 20862 }, { "epoch": 1.5072516119710295, "grad_norm": 7.104266869816221, "learning_rate": 7.543448056731059e-07, "loss": 0.6804, "step": 20863 }, { "epoch": 1.5073238571712393, "grad_norm": 6.852017917090761, "learning_rate": 7.541354320654503e-07, "loss": 0.6565, "step": 20864 }, { "epoch": 1.5073961023714486, "grad_norm": 7.625975499523713, "learning_rate": 7.539260823573244e-07, "loss": 0.6833, "step": 20865 }, { "epoch": 1.5074683475716584, "grad_norm": 6.954057060948925, "learning_rate": 7.537167565515941e-07, "loss": 0.5802, "step": 20866 }, { "epoch": 1.5075405927718677, "grad_norm": 6.491725055920049, "learning_rate": 7.535074546511267e-07, "loss": 0.5746, "step": 20867 }, { "epoch": 1.5076128379720772, "grad_norm": 7.396899795651828, "learning_rate": 7.532981766587857e-07, "loss": 0.5793, "step": 20868 }, { "epoch": 1.5076850831722868, "grad_norm": 6.880298029632328, "learning_rate": 7.530889225774362e-07, "loss": 0.6636, "step": 20869 }, { "epoch": 1.507757328372496, "grad_norm": 6.845620374410699, "learning_rate": 7.528796924099432e-07, "loss": 0.6095, "step": 20870 }, { "epoch": 1.5078295735727059, "grad_norm": 6.112246255737041, "learning_rate": 7.526704861591714e-07, "loss": 0.5915, "step": 20871 }, { "epoch": 1.5079018187729152, "grad_norm": 7.1540068063295665, "learning_rate": 7.52461303827983e-07, "loss": 0.6133, "step": 20872 }, { "epoch": 1.507974063973125, "grad_norm": 7.146099504531273, "learning_rate": 7.522521454192425e-07, "loss": 0.5803, "step": 20873 }, { "epoch": 1.5080463091733343, "grad_norm": 6.662691107150821, "learning_rate": 7.520430109358131e-07, "loss": 0.5623, "step": 20874 }, { "epoch": 1.5081185543735438, "grad_norm": 6.120780581391323, "learning_rate": 7.518339003805578e-07, "loss": 0.6239, "step": 20875 }, { "epoch": 1.5081907995737533, "grad_norm": 7.1976382196635935, "learning_rate": 7.51624813756339e-07, "loss": 0.6153, "step": 20876 }, { "epoch": 1.5082630447739627, "grad_norm": 7.104831190902477, "learning_rate": 7.514157510660189e-07, "loss": 0.5921, "step": 20877 }, { "epoch": 1.5083352899741724, "grad_norm": 7.6704567649144515, "learning_rate": 7.512067123124603e-07, "loss": 0.6585, "step": 20878 }, { "epoch": 1.5084075351743818, "grad_norm": 7.029364167675952, "learning_rate": 7.50997697498523e-07, "loss": 0.6259, "step": 20879 }, { "epoch": 1.5084797803745915, "grad_norm": 7.650256742270566, "learning_rate": 7.507887066270695e-07, "loss": 0.6509, "step": 20880 }, { "epoch": 1.5085520255748008, "grad_norm": 7.558222546198367, "learning_rate": 7.505797397009609e-07, "loss": 0.6148, "step": 20881 }, { "epoch": 1.5086242707750104, "grad_norm": 7.454396009533124, "learning_rate": 7.50370796723057e-07, "loss": 0.6459, "step": 20882 }, { "epoch": 1.50869651597522, "grad_norm": 7.40825822217845, "learning_rate": 7.50161877696218e-07, "loss": 0.6339, "step": 20883 }, { "epoch": 1.5087687611754295, "grad_norm": 9.06023004976626, "learning_rate": 7.499529826233043e-07, "loss": 0.6224, "step": 20884 }, { "epoch": 1.508841006375639, "grad_norm": 7.614633486087259, "learning_rate": 7.497441115071755e-07, "loss": 0.6499, "step": 20885 }, { "epoch": 1.5089132515758483, "grad_norm": 6.998402276943763, "learning_rate": 7.495352643506909e-07, "loss": 0.6075, "step": 20886 }, { "epoch": 1.508985496776058, "grad_norm": 6.849070810378308, "learning_rate": 7.493264411567094e-07, "loss": 0.6272, "step": 20887 }, { "epoch": 1.5090577419762674, "grad_norm": 7.081078054389283, "learning_rate": 7.491176419280893e-07, "loss": 0.5818, "step": 20888 }, { "epoch": 1.509129987176477, "grad_norm": 6.3762454144046865, "learning_rate": 7.489088666676903e-07, "loss": 0.5921, "step": 20889 }, { "epoch": 1.5092022323766865, "grad_norm": 6.34980822896863, "learning_rate": 7.487001153783685e-07, "loss": 0.5439, "step": 20890 }, { "epoch": 1.509274477576896, "grad_norm": 6.05281567588186, "learning_rate": 7.484913880629821e-07, "loss": 0.6231, "step": 20891 }, { "epoch": 1.5093467227771056, "grad_norm": 8.313937184852426, "learning_rate": 7.482826847243895e-07, "loss": 0.6571, "step": 20892 }, { "epoch": 1.509418967977315, "grad_norm": 6.255143757364252, "learning_rate": 7.480740053654461e-07, "loss": 0.5739, "step": 20893 }, { "epoch": 1.5094912131775247, "grad_norm": 6.813943972362562, "learning_rate": 7.478653499890087e-07, "loss": 0.6543, "step": 20894 }, { "epoch": 1.509563458377734, "grad_norm": 6.79531737270606, "learning_rate": 7.476567185979347e-07, "loss": 0.6194, "step": 20895 }, { "epoch": 1.5096357035779435, "grad_norm": 8.10090797892482, "learning_rate": 7.474481111950808e-07, "loss": 0.661, "step": 20896 }, { "epoch": 1.509707948778153, "grad_norm": 8.437632919959968, "learning_rate": 7.472395277833003e-07, "loss": 0.6787, "step": 20897 }, { "epoch": 1.5097801939783626, "grad_norm": 6.039824562303793, "learning_rate": 7.470309683654497e-07, "loss": 0.5995, "step": 20898 }, { "epoch": 1.5098524391785721, "grad_norm": 7.953366259080127, "learning_rate": 7.468224329443849e-07, "loss": 0.6053, "step": 20899 }, { "epoch": 1.5099246843787815, "grad_norm": 6.646664590379294, "learning_rate": 7.466139215229593e-07, "loss": 0.6081, "step": 20900 }, { "epoch": 1.5099969295789912, "grad_norm": 6.401097155941682, "learning_rate": 7.464054341040272e-07, "loss": 0.6401, "step": 20901 }, { "epoch": 1.5100691747792006, "grad_norm": 7.404324438174099, "learning_rate": 7.461969706904434e-07, "loss": 0.627, "step": 20902 }, { "epoch": 1.51014141997941, "grad_norm": 9.111149139118178, "learning_rate": 7.459885312850613e-07, "loss": 0.6238, "step": 20903 }, { "epoch": 1.5102136651796196, "grad_norm": 6.733830334793356, "learning_rate": 7.457801158907343e-07, "loss": 0.5489, "step": 20904 }, { "epoch": 1.5102859103798292, "grad_norm": 6.338160264872835, "learning_rate": 7.455717245103153e-07, "loss": 0.5906, "step": 20905 }, { "epoch": 1.5103581555800387, "grad_norm": 6.394968510744832, "learning_rate": 7.453633571466581e-07, "loss": 0.5822, "step": 20906 }, { "epoch": 1.510430400780248, "grad_norm": 7.555214643578757, "learning_rate": 7.451550138026134e-07, "loss": 0.5632, "step": 20907 }, { "epoch": 1.5105026459804578, "grad_norm": 6.62916980729747, "learning_rate": 7.449466944810341e-07, "loss": 0.6094, "step": 20908 }, { "epoch": 1.5105748911806671, "grad_norm": 6.650570160939299, "learning_rate": 7.447383991847718e-07, "loss": 0.5555, "step": 20909 }, { "epoch": 1.5106471363808767, "grad_norm": 7.248785509133366, "learning_rate": 7.445301279166786e-07, "loss": 0.644, "step": 20910 }, { "epoch": 1.5107193815810862, "grad_norm": 6.912821823271791, "learning_rate": 7.44321880679604e-07, "loss": 0.6846, "step": 20911 }, { "epoch": 1.5107916267812957, "grad_norm": 6.735395668375416, "learning_rate": 7.441136574763999e-07, "loss": 0.5962, "step": 20912 }, { "epoch": 1.5108638719815053, "grad_norm": 6.203740548213697, "learning_rate": 7.439054583099164e-07, "loss": 0.5513, "step": 20913 }, { "epoch": 1.5109361171817146, "grad_norm": 7.100679617578457, "learning_rate": 7.436972831830037e-07, "loss": 0.7301, "step": 20914 }, { "epoch": 1.5110083623819244, "grad_norm": 6.6437782609971885, "learning_rate": 7.434891320985113e-07, "loss": 0.6789, "step": 20915 }, { "epoch": 1.5110806075821337, "grad_norm": 6.579881592122872, "learning_rate": 7.432810050592892e-07, "loss": 0.6505, "step": 20916 }, { "epoch": 1.5111528527823432, "grad_norm": 8.63029383860843, "learning_rate": 7.430729020681868e-07, "loss": 0.6713, "step": 20917 }, { "epoch": 1.5112250979825528, "grad_norm": 6.699404379716808, "learning_rate": 7.42864823128051e-07, "loss": 0.5775, "step": 20918 }, { "epoch": 1.5112973431827623, "grad_norm": 7.797513457858748, "learning_rate": 7.426567682417321e-07, "loss": 0.6105, "step": 20919 }, { "epoch": 1.5113695883829719, "grad_norm": 7.836044356434301, "learning_rate": 7.42448737412077e-07, "loss": 0.6094, "step": 20920 }, { "epoch": 1.5114418335831812, "grad_norm": 7.414616876583286, "learning_rate": 7.422407306419352e-07, "loss": 0.6139, "step": 20921 }, { "epoch": 1.511514078783391, "grad_norm": 6.902754369329737, "learning_rate": 7.420327479341513e-07, "loss": 0.5952, "step": 20922 }, { "epoch": 1.5115863239836003, "grad_norm": 7.761415503481036, "learning_rate": 7.418247892915747e-07, "loss": 0.5898, "step": 20923 }, { "epoch": 1.5116585691838098, "grad_norm": 7.171226056560822, "learning_rate": 7.416168547170527e-07, "loss": 0.6066, "step": 20924 }, { "epoch": 1.5117308143840194, "grad_norm": 6.618176094799635, "learning_rate": 7.414089442134298e-07, "loss": 0.6273, "step": 20925 }, { "epoch": 1.511803059584229, "grad_norm": 7.548979471720209, "learning_rate": 7.412010577835529e-07, "loss": 0.586, "step": 20926 }, { "epoch": 1.5118753047844384, "grad_norm": 5.935280475368376, "learning_rate": 7.409931954302679e-07, "loss": 0.6807, "step": 20927 }, { "epoch": 1.5119475499846478, "grad_norm": 7.132917589330585, "learning_rate": 7.40785357156421e-07, "loss": 0.6689, "step": 20928 }, { "epoch": 1.5120197951848575, "grad_norm": 8.386485408811431, "learning_rate": 7.40577542964856e-07, "loss": 0.5662, "step": 20929 }, { "epoch": 1.5120920403850668, "grad_norm": 8.372129489267268, "learning_rate": 7.403697528584183e-07, "loss": 0.675, "step": 20930 }, { "epoch": 1.5121642855852764, "grad_norm": 6.713023881696718, "learning_rate": 7.401619868399524e-07, "loss": 0.5657, "step": 20931 }, { "epoch": 1.512236530785486, "grad_norm": 6.299169988300356, "learning_rate": 7.399542449123023e-07, "loss": 0.6287, "step": 20932 }, { "epoch": 1.5123087759856955, "grad_norm": 8.323210964482238, "learning_rate": 7.397465270783122e-07, "loss": 0.6931, "step": 20933 }, { "epoch": 1.512381021185905, "grad_norm": 5.927355643275929, "learning_rate": 7.395388333408254e-07, "loss": 0.5558, "step": 20934 }, { "epoch": 1.5124532663861143, "grad_norm": 8.12754382879384, "learning_rate": 7.39331163702686e-07, "loss": 0.6168, "step": 20935 }, { "epoch": 1.512525511586324, "grad_norm": 8.642792855736388, "learning_rate": 7.391235181667347e-07, "loss": 0.5722, "step": 20936 }, { "epoch": 1.5125977567865334, "grad_norm": 7.3337344435609255, "learning_rate": 7.389158967358154e-07, "loss": 0.6229, "step": 20937 }, { "epoch": 1.512670001986743, "grad_norm": 6.601179375901857, "learning_rate": 7.3870829941277e-07, "loss": 0.6395, "step": 20938 }, { "epoch": 1.5127422471869525, "grad_norm": 6.973482859791138, "learning_rate": 7.385007262004415e-07, "loss": 0.6432, "step": 20939 }, { "epoch": 1.512814492387162, "grad_norm": 5.86076415824376, "learning_rate": 7.382931771016694e-07, "loss": 0.6177, "step": 20940 }, { "epoch": 1.5128867375873716, "grad_norm": 7.7358409378599005, "learning_rate": 7.380856521192956e-07, "loss": 0.6111, "step": 20941 }, { "epoch": 1.512958982787581, "grad_norm": 7.157536153698695, "learning_rate": 7.378781512561614e-07, "loss": 0.6048, "step": 20942 }, { "epoch": 1.5130312279877907, "grad_norm": 6.564071031259333, "learning_rate": 7.376706745151072e-07, "loss": 0.6258, "step": 20943 }, { "epoch": 1.513103473188, "grad_norm": 7.924089772994041, "learning_rate": 7.37463221898973e-07, "loss": 0.6308, "step": 20944 }, { "epoch": 1.5131757183882097, "grad_norm": 6.853409312726307, "learning_rate": 7.372557934105986e-07, "loss": 0.6363, "step": 20945 }, { "epoch": 1.513247963588419, "grad_norm": 7.7599696361301636, "learning_rate": 7.370483890528247e-07, "loss": 0.6725, "step": 20946 }, { "epoch": 1.5133202087886286, "grad_norm": 6.181158033097597, "learning_rate": 7.368410088284886e-07, "loss": 0.5943, "step": 20947 }, { "epoch": 1.5133924539888381, "grad_norm": 6.683551513966489, "learning_rate": 7.366336527404302e-07, "loss": 0.5463, "step": 20948 }, { "epoch": 1.5134646991890475, "grad_norm": 6.322052026628109, "learning_rate": 7.364263207914887e-07, "loss": 0.6146, "step": 20949 }, { "epoch": 1.5135369443892572, "grad_norm": 6.950169996199677, "learning_rate": 7.362190129845007e-07, "loss": 0.6027, "step": 20950 }, { "epoch": 1.5136091895894666, "grad_norm": 7.927453599286952, "learning_rate": 7.360117293223043e-07, "loss": 0.716, "step": 20951 }, { "epoch": 1.5136814347896763, "grad_norm": 8.36208292515495, "learning_rate": 7.358044698077383e-07, "loss": 0.6356, "step": 20952 }, { "epoch": 1.5137536799898856, "grad_norm": 9.268460385099363, "learning_rate": 7.355972344436402e-07, "loss": 0.5854, "step": 20953 }, { "epoch": 1.5138259251900952, "grad_norm": 6.240618263817878, "learning_rate": 7.353900232328451e-07, "loss": 0.5733, "step": 20954 }, { "epoch": 1.5138981703903047, "grad_norm": 6.9159414378434185, "learning_rate": 7.351828361781904e-07, "loss": 0.5919, "step": 20955 }, { "epoch": 1.513970415590514, "grad_norm": 7.635358184826414, "learning_rate": 7.349756732825133e-07, "loss": 0.5995, "step": 20956 }, { "epoch": 1.5140426607907238, "grad_norm": 6.4149735764325735, "learning_rate": 7.347685345486479e-07, "loss": 0.6338, "step": 20957 }, { "epoch": 1.5141149059909331, "grad_norm": 6.292787498359667, "learning_rate": 7.345614199794307e-07, "loss": 0.5865, "step": 20958 }, { "epoch": 1.5141871511911429, "grad_norm": 6.93102670349428, "learning_rate": 7.343543295776967e-07, "loss": 0.6527, "step": 20959 }, { "epoch": 1.5142593963913522, "grad_norm": 7.688807182514891, "learning_rate": 7.341472633462813e-07, "loss": 0.6244, "step": 20960 }, { "epoch": 1.5143316415915618, "grad_norm": 5.932855013880135, "learning_rate": 7.339402212880187e-07, "loss": 0.6325, "step": 20961 }, { "epoch": 1.5144038867917713, "grad_norm": 7.580138263498233, "learning_rate": 7.337332034057432e-07, "loss": 0.6453, "step": 20962 }, { "epoch": 1.5144761319919808, "grad_norm": 6.933904592504083, "learning_rate": 7.335262097022885e-07, "loss": 0.6264, "step": 20963 }, { "epoch": 1.5145483771921904, "grad_norm": 6.604510598197206, "learning_rate": 7.333192401804895e-07, "loss": 0.6354, "step": 20964 }, { "epoch": 1.5146206223923997, "grad_norm": 6.832044650501437, "learning_rate": 7.331122948431774e-07, "loss": 0.5433, "step": 20965 }, { "epoch": 1.5146928675926095, "grad_norm": 6.534879561163025, "learning_rate": 7.329053736931859e-07, "loss": 0.599, "step": 20966 }, { "epoch": 1.5147651127928188, "grad_norm": 6.77080156367747, "learning_rate": 7.326984767333489e-07, "loss": 0.6013, "step": 20967 }, { "epoch": 1.5148373579930283, "grad_norm": 8.889883965849585, "learning_rate": 7.324916039664967e-07, "loss": 0.7192, "step": 20968 }, { "epoch": 1.5149096031932379, "grad_norm": 6.771859272233121, "learning_rate": 7.32284755395462e-07, "loss": 0.5831, "step": 20969 }, { "epoch": 1.5149818483934474, "grad_norm": 7.4551026861332375, "learning_rate": 7.320779310230763e-07, "loss": 0.5904, "step": 20970 }, { "epoch": 1.515054093593657, "grad_norm": 6.637157010341976, "learning_rate": 7.318711308521712e-07, "loss": 0.6214, "step": 20971 }, { "epoch": 1.5151263387938663, "grad_norm": 8.800271498219974, "learning_rate": 7.316643548855776e-07, "loss": 0.6603, "step": 20972 }, { "epoch": 1.515198583994076, "grad_norm": 5.962695501229361, "learning_rate": 7.314576031261256e-07, "loss": 0.5863, "step": 20973 }, { "epoch": 1.5152708291942854, "grad_norm": 5.436836991502317, "learning_rate": 7.312508755766468e-07, "loss": 0.5745, "step": 20974 }, { "epoch": 1.515343074394495, "grad_norm": 7.187130064358911, "learning_rate": 7.310441722399694e-07, "loss": 0.5814, "step": 20975 }, { "epoch": 1.5154153195947044, "grad_norm": 7.49598382866254, "learning_rate": 7.308374931189239e-07, "loss": 0.6356, "step": 20976 }, { "epoch": 1.515487564794914, "grad_norm": 6.168592753623681, "learning_rate": 7.306308382163393e-07, "loss": 0.6037, "step": 20977 }, { "epoch": 1.5155598099951235, "grad_norm": 6.489040819476784, "learning_rate": 7.304242075350454e-07, "loss": 0.678, "step": 20978 }, { "epoch": 1.5156320551953328, "grad_norm": 6.652473058692599, "learning_rate": 7.302176010778686e-07, "loss": 0.6505, "step": 20979 }, { "epoch": 1.5157043003955426, "grad_norm": 8.868533505357272, "learning_rate": 7.300110188476395e-07, "loss": 0.6305, "step": 20980 }, { "epoch": 1.515776545595752, "grad_norm": 7.849890019320633, "learning_rate": 7.29804460847186e-07, "loss": 0.6354, "step": 20981 }, { "epoch": 1.5158487907959615, "grad_norm": 7.9999277588443585, "learning_rate": 7.295979270793343e-07, "loss": 0.6374, "step": 20982 }, { "epoch": 1.515921035996171, "grad_norm": 6.242327749421779, "learning_rate": 7.293914175469125e-07, "loss": 0.6497, "step": 20983 }, { "epoch": 1.5159932811963805, "grad_norm": 5.808565838765322, "learning_rate": 7.291849322527469e-07, "loss": 0.5325, "step": 20984 }, { "epoch": 1.51606552639659, "grad_norm": 6.925473623226491, "learning_rate": 7.289784711996659e-07, "loss": 0.655, "step": 20985 }, { "epoch": 1.5161377715967994, "grad_norm": 6.2098189588316375, "learning_rate": 7.287720343904933e-07, "loss": 0.5983, "step": 20986 }, { "epoch": 1.5162100167970092, "grad_norm": 7.346471404003677, "learning_rate": 7.285656218280565e-07, "loss": 0.6336, "step": 20987 }, { "epoch": 1.5162822619972185, "grad_norm": 6.255725831777879, "learning_rate": 7.283592335151809e-07, "loss": 0.5595, "step": 20988 }, { "epoch": 1.516354507197428, "grad_norm": 6.00975832537337, "learning_rate": 7.281528694546918e-07, "loss": 0.5823, "step": 20989 }, { "epoch": 1.5164267523976376, "grad_norm": 6.996654801100432, "learning_rate": 7.279465296494143e-07, "loss": 0.58, "step": 20990 }, { "epoch": 1.5164989975978471, "grad_norm": 7.746417786654075, "learning_rate": 7.277402141021727e-07, "loss": 0.5985, "step": 20991 }, { "epoch": 1.5165712427980567, "grad_norm": 6.687687666211751, "learning_rate": 7.275339228157924e-07, "loss": 0.6031, "step": 20992 }, { "epoch": 1.516643487998266, "grad_norm": 7.201956631450169, "learning_rate": 7.273276557930959e-07, "loss": 0.6073, "step": 20993 }, { "epoch": 1.5167157331984757, "grad_norm": 7.0474734168551, "learning_rate": 7.27121413036907e-07, "loss": 0.6103, "step": 20994 }, { "epoch": 1.516787978398685, "grad_norm": 7.5206616946542315, "learning_rate": 7.269151945500497e-07, "loss": 0.6183, "step": 20995 }, { "epoch": 1.5168602235988946, "grad_norm": 6.964135528174704, "learning_rate": 7.267090003353472e-07, "loss": 0.6298, "step": 20996 }, { "epoch": 1.5169324687991042, "grad_norm": 7.62877374036865, "learning_rate": 7.265028303956209e-07, "loss": 0.579, "step": 20997 }, { "epoch": 1.5170047139993137, "grad_norm": 6.73259837353863, "learning_rate": 7.262966847336939e-07, "loss": 0.6196, "step": 20998 }, { "epoch": 1.5170769591995232, "grad_norm": 7.770346727723479, "learning_rate": 7.260905633523879e-07, "loss": 0.571, "step": 20999 }, { "epoch": 1.5171492043997326, "grad_norm": 6.567855829271209, "learning_rate": 7.25884466254525e-07, "loss": 0.6271, "step": 21000 }, { "epoch": 1.5172214495999423, "grad_norm": 6.508004175411074, "learning_rate": 7.256783934429262e-07, "loss": 0.5933, "step": 21001 }, { "epoch": 1.5172936948001516, "grad_norm": 6.381880095605644, "learning_rate": 7.254723449204124e-07, "loss": 0.539, "step": 21002 }, { "epoch": 1.5173659400003612, "grad_norm": 7.238950003678829, "learning_rate": 7.252663206898053e-07, "loss": 0.6506, "step": 21003 }, { "epoch": 1.5174381852005707, "grad_norm": 6.785847173551212, "learning_rate": 7.250603207539234e-07, "loss": 0.6894, "step": 21004 }, { "epoch": 1.5175104304007803, "grad_norm": 6.882184522820668, "learning_rate": 7.248543451155873e-07, "loss": 0.6487, "step": 21005 }, { "epoch": 1.5175826756009898, "grad_norm": 6.031026727368274, "learning_rate": 7.246483937776181e-07, "loss": 0.58, "step": 21006 }, { "epoch": 1.5176549208011991, "grad_norm": 6.492666068496204, "learning_rate": 7.244424667428321e-07, "loss": 0.543, "step": 21007 }, { "epoch": 1.517727166001409, "grad_norm": 7.391229907195123, "learning_rate": 7.242365640140512e-07, "loss": 0.6401, "step": 21008 }, { "epoch": 1.5177994112016182, "grad_norm": 6.579230208892342, "learning_rate": 7.240306855940926e-07, "loss": 0.6147, "step": 21009 }, { "epoch": 1.5178716564018278, "grad_norm": 7.046773228893713, "learning_rate": 7.23824831485776e-07, "loss": 0.6316, "step": 21010 }, { "epoch": 1.5179439016020373, "grad_norm": 7.302423038462371, "learning_rate": 7.236190016919173e-07, "loss": 0.5827, "step": 21011 }, { "epoch": 1.5180161468022468, "grad_norm": 6.868943199377145, "learning_rate": 7.234131962153354e-07, "loss": 0.6219, "step": 21012 }, { "epoch": 1.5180883920024564, "grad_norm": 7.904474473696671, "learning_rate": 7.232074150588481e-07, "loss": 0.7034, "step": 21013 }, { "epoch": 1.5181606372026657, "grad_norm": 6.477741571277586, "learning_rate": 7.230016582252708e-07, "loss": 0.6511, "step": 21014 }, { "epoch": 1.5182328824028755, "grad_norm": 7.464081522194857, "learning_rate": 7.227959257174214e-07, "loss": 0.6588, "step": 21015 }, { "epoch": 1.5183051276030848, "grad_norm": 7.19862075946657, "learning_rate": 7.225902175381155e-07, "loss": 0.6276, "step": 21016 }, { "epoch": 1.5183773728032945, "grad_norm": 7.732764983941708, "learning_rate": 7.223845336901697e-07, "loss": 0.6711, "step": 21017 }, { "epoch": 1.5184496180035039, "grad_norm": 7.257832998322775, "learning_rate": 7.221788741763993e-07, "loss": 0.5964, "step": 21018 }, { "epoch": 1.5185218632037134, "grad_norm": 7.841718798257019, "learning_rate": 7.219732389996198e-07, "loss": 0.5627, "step": 21019 }, { "epoch": 1.518594108403923, "grad_norm": 6.727351870059552, "learning_rate": 7.217676281626459e-07, "loss": 0.6148, "step": 21020 }, { "epoch": 1.5186663536041323, "grad_norm": 6.035171419883847, "learning_rate": 7.215620416682934e-07, "loss": 0.6541, "step": 21021 }, { "epoch": 1.518738598804342, "grad_norm": 7.582037790601249, "learning_rate": 7.213564795193751e-07, "loss": 0.6132, "step": 21022 }, { "epoch": 1.5188108440045514, "grad_norm": 7.77967861502179, "learning_rate": 7.211509417187051e-07, "loss": 0.7094, "step": 21023 }, { "epoch": 1.5188830892047611, "grad_norm": 8.758397895243528, "learning_rate": 7.209454282690987e-07, "loss": 0.6188, "step": 21024 }, { "epoch": 1.5189553344049704, "grad_norm": 7.1785448096892726, "learning_rate": 7.207399391733672e-07, "loss": 0.5935, "step": 21025 }, { "epoch": 1.51902757960518, "grad_norm": 6.41406844739371, "learning_rate": 7.205344744343243e-07, "loss": 0.6253, "step": 21026 }, { "epoch": 1.5190998248053895, "grad_norm": 6.596003287514075, "learning_rate": 7.203290340547819e-07, "loss": 0.5544, "step": 21027 }, { "epoch": 1.5191720700055988, "grad_norm": 6.939857151199348, "learning_rate": 7.201236180375551e-07, "loss": 0.7381, "step": 21028 }, { "epoch": 1.5192443152058086, "grad_norm": 7.257742595027988, "learning_rate": 7.199182263854529e-07, "loss": 0.6189, "step": 21029 }, { "epoch": 1.519316560406018, "grad_norm": 7.465283726731722, "learning_rate": 7.197128591012881e-07, "loss": 0.5802, "step": 21030 }, { "epoch": 1.5193888056062277, "grad_norm": 6.670981854372502, "learning_rate": 7.195075161878729e-07, "loss": 0.5555, "step": 21031 }, { "epoch": 1.519461050806437, "grad_norm": 7.049225070649943, "learning_rate": 7.193021976480164e-07, "loss": 0.5985, "step": 21032 }, { "epoch": 1.5195332960066465, "grad_norm": 8.278297214581585, "learning_rate": 7.190969034845302e-07, "loss": 0.6516, "step": 21033 }, { "epoch": 1.519605541206856, "grad_norm": 7.578373415306623, "learning_rate": 7.188916337002247e-07, "loss": 0.6636, "step": 21034 }, { "epoch": 1.5196777864070656, "grad_norm": 6.5380006791724465, "learning_rate": 7.186863882979098e-07, "loss": 0.6104, "step": 21035 }, { "epoch": 1.5197500316072752, "grad_norm": 6.2632379336939215, "learning_rate": 7.184811672803952e-07, "loss": 0.5713, "step": 21036 }, { "epoch": 1.5198222768074845, "grad_norm": 6.392512483431696, "learning_rate": 7.182759706504899e-07, "loss": 0.6067, "step": 21037 }, { "epoch": 1.5198945220076943, "grad_norm": 8.360217356799385, "learning_rate": 7.180707984110041e-07, "loss": 0.6469, "step": 21038 }, { "epoch": 1.5199667672079036, "grad_norm": 6.785582955602509, "learning_rate": 7.178656505647447e-07, "loss": 0.6227, "step": 21039 }, { "epoch": 1.5200390124081131, "grad_norm": 7.833517383550551, "learning_rate": 7.176605271145206e-07, "loss": 0.5692, "step": 21040 }, { "epoch": 1.5201112576083227, "grad_norm": 7.899149605093107, "learning_rate": 7.174554280631402e-07, "loss": 0.6162, "step": 21041 }, { "epoch": 1.5201835028085322, "grad_norm": 6.431544539784851, "learning_rate": 7.172503534134118e-07, "loss": 0.5918, "step": 21042 }, { "epoch": 1.5202557480087417, "grad_norm": 6.751116342420451, "learning_rate": 7.17045303168141e-07, "loss": 0.6311, "step": 21043 }, { "epoch": 1.520327993208951, "grad_norm": 6.876656072734464, "learning_rate": 7.168402773301356e-07, "loss": 0.5664, "step": 21044 }, { "epoch": 1.5204002384091608, "grad_norm": 6.9919239868292085, "learning_rate": 7.166352759022022e-07, "loss": 0.6303, "step": 21045 }, { "epoch": 1.5204724836093702, "grad_norm": 7.37181876154232, "learning_rate": 7.164302988871472e-07, "loss": 0.5623, "step": 21046 }, { "epoch": 1.5205447288095797, "grad_norm": 6.3890952072369895, "learning_rate": 7.162253462877763e-07, "loss": 0.6969, "step": 21047 }, { "epoch": 1.5206169740097892, "grad_norm": 8.000616049888556, "learning_rate": 7.160204181068958e-07, "loss": 0.6269, "step": 21048 }, { "epoch": 1.5206892192099988, "grad_norm": 10.217984698741281, "learning_rate": 7.158155143473114e-07, "loss": 0.6295, "step": 21049 }, { "epoch": 1.5207614644102083, "grad_norm": 6.156668866619604, "learning_rate": 7.156106350118264e-07, "loss": 0.5505, "step": 21050 }, { "epoch": 1.5208337096104176, "grad_norm": 9.411369882043774, "learning_rate": 7.15405780103246e-07, "loss": 0.6741, "step": 21051 }, { "epoch": 1.5209059548106274, "grad_norm": 5.812356926582553, "learning_rate": 7.152009496243753e-07, "loss": 0.5947, "step": 21052 }, { "epoch": 1.5209782000108367, "grad_norm": 7.6362242087314085, "learning_rate": 7.149961435780184e-07, "loss": 0.6414, "step": 21053 }, { "epoch": 1.5210504452110463, "grad_norm": 5.947731598637719, "learning_rate": 7.147913619669775e-07, "loss": 0.679, "step": 21054 }, { "epoch": 1.5211226904112558, "grad_norm": 7.641739528007744, "learning_rate": 7.145866047940567e-07, "loss": 0.6563, "step": 21055 }, { "epoch": 1.5211949356114653, "grad_norm": 7.341237259198861, "learning_rate": 7.14381872062059e-07, "loss": 0.593, "step": 21056 }, { "epoch": 1.521267180811675, "grad_norm": 7.684641484948876, "learning_rate": 7.141771637737871e-07, "loss": 0.6923, "step": 21057 }, { "epoch": 1.5213394260118842, "grad_norm": 6.475728716403063, "learning_rate": 7.139724799320433e-07, "loss": 0.5983, "step": 21058 }, { "epoch": 1.521411671212094, "grad_norm": 8.265782836099355, "learning_rate": 7.137678205396292e-07, "loss": 0.5806, "step": 21059 }, { "epoch": 1.5214839164123033, "grad_norm": 8.409406445181128, "learning_rate": 7.135631855993477e-07, "loss": 0.6597, "step": 21060 }, { "epoch": 1.5215561616125128, "grad_norm": 7.010561196668661, "learning_rate": 7.133585751139985e-07, "loss": 0.5863, "step": 21061 }, { "epoch": 1.5216284068127224, "grad_norm": 6.759368893659834, "learning_rate": 7.131539890863828e-07, "loss": 0.6104, "step": 21062 }, { "epoch": 1.521700652012932, "grad_norm": 6.645475882201946, "learning_rate": 7.129494275193027e-07, "loss": 0.593, "step": 21063 }, { "epoch": 1.5217728972131415, "grad_norm": 7.774915698413176, "learning_rate": 7.127448904155557e-07, "loss": 0.618, "step": 21064 }, { "epoch": 1.5218451424133508, "grad_norm": 7.027505513354961, "learning_rate": 7.125403777779441e-07, "loss": 0.6468, "step": 21065 }, { "epoch": 1.5219173876135605, "grad_norm": 6.625668473976854, "learning_rate": 7.123358896092669e-07, "loss": 0.5547, "step": 21066 }, { "epoch": 1.5219896328137699, "grad_norm": 6.926453464716048, "learning_rate": 7.121314259123241e-07, "loss": 0.6277, "step": 21067 }, { "epoch": 1.5220618780139794, "grad_norm": 6.912347233827729, "learning_rate": 7.119269866899134e-07, "loss": 0.6018, "step": 21068 }, { "epoch": 1.522134123214189, "grad_norm": 7.165015836326844, "learning_rate": 7.117225719448337e-07, "loss": 0.6771, "step": 21069 }, { "epoch": 1.5222063684143985, "grad_norm": 7.346371965861177, "learning_rate": 7.115181816798833e-07, "loss": 0.5997, "step": 21070 }, { "epoch": 1.522278613614608, "grad_norm": 6.934206894104339, "learning_rate": 7.113138158978614e-07, "loss": 0.6925, "step": 21071 }, { "epoch": 1.5223508588148174, "grad_norm": 6.12255468017784, "learning_rate": 7.111094746015635e-07, "loss": 0.6536, "step": 21072 }, { "epoch": 1.5224231040150271, "grad_norm": 6.696240853721207, "learning_rate": 7.109051577937882e-07, "loss": 0.6433, "step": 21073 }, { "epoch": 1.5224953492152364, "grad_norm": 5.980711450517045, "learning_rate": 7.107008654773321e-07, "loss": 0.5083, "step": 21074 }, { "epoch": 1.522567594415446, "grad_norm": 6.240091633246149, "learning_rate": 7.104965976549916e-07, "loss": 0.5958, "step": 21075 }, { "epoch": 1.5226398396156555, "grad_norm": 7.835251052622193, "learning_rate": 7.102923543295634e-07, "loss": 0.6028, "step": 21076 }, { "epoch": 1.522712084815865, "grad_norm": 7.760026414177369, "learning_rate": 7.100881355038431e-07, "loss": 0.6688, "step": 21077 }, { "epoch": 1.5227843300160746, "grad_norm": 7.3865163080403, "learning_rate": 7.098839411806274e-07, "loss": 0.553, "step": 21078 }, { "epoch": 1.522856575216284, "grad_norm": 7.456978303652397, "learning_rate": 7.096797713627096e-07, "loss": 0.5691, "step": 21079 }, { "epoch": 1.5229288204164937, "grad_norm": 6.6330054516803685, "learning_rate": 7.094756260528859e-07, "loss": 0.606, "step": 21080 }, { "epoch": 1.523001065616703, "grad_norm": 6.74180699199658, "learning_rate": 7.092715052539512e-07, "loss": 0.5451, "step": 21081 }, { "epoch": 1.5230733108169126, "grad_norm": 7.4880002983451845, "learning_rate": 7.090674089686986e-07, "loss": 0.5389, "step": 21082 }, { "epoch": 1.523145556017122, "grad_norm": 8.871412384881943, "learning_rate": 7.088633371999226e-07, "loss": 0.6236, "step": 21083 }, { "epoch": 1.5232178012173316, "grad_norm": 6.427576212065794, "learning_rate": 7.086592899504158e-07, "loss": 0.5651, "step": 21084 }, { "epoch": 1.5232900464175412, "grad_norm": 6.359382198245312, "learning_rate": 7.084552672229741e-07, "loss": 0.6745, "step": 21085 }, { "epoch": 1.5233622916177505, "grad_norm": 8.094703883220884, "learning_rate": 7.08251269020388e-07, "loss": 0.6495, "step": 21086 }, { "epoch": 1.5234345368179603, "grad_norm": 6.845090133931794, "learning_rate": 7.080472953454509e-07, "loss": 0.5948, "step": 21087 }, { "epoch": 1.5235067820181696, "grad_norm": 6.513095867991238, "learning_rate": 7.078433462009556e-07, "loss": 0.5987, "step": 21088 }, { "epoch": 1.5235790272183793, "grad_norm": 7.583676285027846, "learning_rate": 7.076394215896926e-07, "loss": 0.7108, "step": 21089 }, { "epoch": 1.5236512724185887, "grad_norm": 6.569176465680904, "learning_rate": 7.074355215144543e-07, "loss": 0.5253, "step": 21090 }, { "epoch": 1.5237235176187982, "grad_norm": 6.186486671778085, "learning_rate": 7.072316459780318e-07, "loss": 0.6203, "step": 21091 }, { "epoch": 1.5237957628190077, "grad_norm": 6.625826226302071, "learning_rate": 7.07027794983216e-07, "loss": 0.5978, "step": 21092 }, { "epoch": 1.523868008019217, "grad_norm": 6.914265809599094, "learning_rate": 7.06823968532798e-07, "loss": 0.639, "step": 21093 }, { "epoch": 1.5239402532194268, "grad_norm": 6.405628006676013, "learning_rate": 7.066201666295669e-07, "loss": 0.5775, "step": 21094 }, { "epoch": 1.5240124984196362, "grad_norm": 8.149515111809855, "learning_rate": 7.064163892763146e-07, "loss": 0.5903, "step": 21095 }, { "epoch": 1.524084743619846, "grad_norm": 7.385252727918294, "learning_rate": 7.062126364758282e-07, "loss": 0.6624, "step": 21096 }, { "epoch": 1.5241569888200552, "grad_norm": 7.022151230937443, "learning_rate": 7.060089082308979e-07, "loss": 0.6303, "step": 21097 }, { "epoch": 1.5242292340202648, "grad_norm": 5.920662869123224, "learning_rate": 7.058052045443131e-07, "loss": 0.6085, "step": 21098 }, { "epoch": 1.5243014792204743, "grad_norm": 7.133370819468652, "learning_rate": 7.056015254188625e-07, "loss": 0.63, "step": 21099 }, { "epoch": 1.5243737244206836, "grad_norm": 7.322245828027386, "learning_rate": 7.05397870857333e-07, "loss": 0.6642, "step": 21100 }, { "epoch": 1.5244459696208934, "grad_norm": 7.046599456745685, "learning_rate": 7.051942408625131e-07, "loss": 0.6674, "step": 21101 }, { "epoch": 1.5245182148211027, "grad_norm": 8.191210655747778, "learning_rate": 7.049906354371908e-07, "loss": 0.6116, "step": 21102 }, { "epoch": 1.5245904600213125, "grad_norm": 7.443567702001329, "learning_rate": 7.047870545841526e-07, "loss": 0.6095, "step": 21103 }, { "epoch": 1.5246627052215218, "grad_norm": 8.48332148802804, "learning_rate": 7.045834983061859e-07, "loss": 0.5748, "step": 21104 }, { "epoch": 1.5247349504217313, "grad_norm": 7.491210364624049, "learning_rate": 7.043799666060771e-07, "loss": 0.6244, "step": 21105 }, { "epoch": 1.524807195621941, "grad_norm": 6.509588065760281, "learning_rate": 7.041764594866129e-07, "loss": 0.5715, "step": 21106 }, { "epoch": 1.5248794408221504, "grad_norm": 7.102557524918429, "learning_rate": 7.039729769505782e-07, "loss": 0.6188, "step": 21107 }, { "epoch": 1.52495168602236, "grad_norm": 7.973878892884483, "learning_rate": 7.037695190007587e-07, "loss": 0.6482, "step": 21108 }, { "epoch": 1.5250239312225693, "grad_norm": 26.490994056722016, "learning_rate": 7.035660856399399e-07, "loss": 0.6809, "step": 21109 }, { "epoch": 1.525096176422779, "grad_norm": 7.026088600388399, "learning_rate": 7.03362676870907e-07, "loss": 0.6509, "step": 21110 }, { "epoch": 1.5251684216229884, "grad_norm": 8.937716821561414, "learning_rate": 7.031592926964436e-07, "loss": 0.5844, "step": 21111 }, { "epoch": 1.525240666823198, "grad_norm": 7.629831503111268, "learning_rate": 7.029559331193336e-07, "loss": 0.6852, "step": 21112 }, { "epoch": 1.5253129120234075, "grad_norm": 6.555583978939759, "learning_rate": 7.02752598142363e-07, "loss": 0.5477, "step": 21113 }, { "epoch": 1.525385157223617, "grad_norm": 7.8891278650088426, "learning_rate": 7.025492877683129e-07, "loss": 0.684, "step": 21114 }, { "epoch": 1.5254574024238265, "grad_norm": 6.9324473634159824, "learning_rate": 7.023460019999675e-07, "loss": 0.6491, "step": 21115 }, { "epoch": 1.5255296476240359, "grad_norm": 5.884962036217788, "learning_rate": 7.021427408401097e-07, "loss": 0.552, "step": 21116 }, { "epoch": 1.5256018928242456, "grad_norm": 6.391185484093686, "learning_rate": 7.019395042915225e-07, "loss": 0.5338, "step": 21117 }, { "epoch": 1.525674138024455, "grad_norm": 7.276496612663374, "learning_rate": 7.017362923569867e-07, "loss": 0.6145, "step": 21118 }, { "epoch": 1.5257463832246645, "grad_norm": 7.98434633373034, "learning_rate": 7.015331050392849e-07, "loss": 0.5852, "step": 21119 }, { "epoch": 1.525818628424874, "grad_norm": 7.541876043571109, "learning_rate": 7.013299423411982e-07, "loss": 0.5644, "step": 21120 }, { "epoch": 1.5258908736250836, "grad_norm": 6.195660690778964, "learning_rate": 7.011268042655081e-07, "loss": 0.599, "step": 21121 }, { "epoch": 1.5259631188252931, "grad_norm": 6.4461620533853985, "learning_rate": 7.009236908149955e-07, "loss": 0.5324, "step": 21122 }, { "epoch": 1.5260353640255024, "grad_norm": 7.0906386757353514, "learning_rate": 7.007206019924403e-07, "loss": 0.7129, "step": 21123 }, { "epoch": 1.5261076092257122, "grad_norm": 6.660788265008827, "learning_rate": 7.00517537800624e-07, "loss": 0.5835, "step": 21124 }, { "epoch": 1.5261798544259215, "grad_norm": 7.444452450243439, "learning_rate": 7.003144982423247e-07, "loss": 0.6537, "step": 21125 }, { "epoch": 1.526252099626131, "grad_norm": 7.6178643028650255, "learning_rate": 7.001114833203227e-07, "loss": 0.5993, "step": 21126 }, { "epoch": 1.5263243448263406, "grad_norm": 5.6722227047778775, "learning_rate": 6.999084930373967e-07, "loss": 0.6022, "step": 21127 }, { "epoch": 1.5263965900265501, "grad_norm": 7.114502518633786, "learning_rate": 6.997055273963266e-07, "loss": 0.6045, "step": 21128 }, { "epoch": 1.5264688352267597, "grad_norm": 7.593820579420588, "learning_rate": 6.995025863998891e-07, "loss": 0.6063, "step": 21129 }, { "epoch": 1.526541080426969, "grad_norm": 7.488638091800852, "learning_rate": 6.992996700508634e-07, "loss": 0.6503, "step": 21130 }, { "epoch": 1.5266133256271788, "grad_norm": 7.733860314709055, "learning_rate": 6.99096778352027e-07, "loss": 0.6125, "step": 21131 }, { "epoch": 1.526685570827388, "grad_norm": 6.097595879245508, "learning_rate": 6.988939113061571e-07, "loss": 0.5627, "step": 21132 }, { "epoch": 1.5267578160275976, "grad_norm": 7.549594681325278, "learning_rate": 6.986910689160315e-07, "loss": 0.6076, "step": 21133 }, { "epoch": 1.5268300612278072, "grad_norm": 7.849454105129551, "learning_rate": 6.984882511844263e-07, "loss": 0.7027, "step": 21134 }, { "epoch": 1.5269023064280167, "grad_norm": 6.68651823163683, "learning_rate": 6.982854581141191e-07, "loss": 0.6607, "step": 21135 }, { "epoch": 1.5269745516282263, "grad_norm": 7.802992750337297, "learning_rate": 6.980826897078841e-07, "loss": 0.6769, "step": 21136 }, { "epoch": 1.5270467968284356, "grad_norm": 8.913031529310354, "learning_rate": 6.978799459684979e-07, "loss": 0.6577, "step": 21137 }, { "epoch": 1.5271190420286453, "grad_norm": 6.555096619197288, "learning_rate": 6.97677226898737e-07, "loss": 0.6164, "step": 21138 }, { "epoch": 1.5271912872288547, "grad_norm": 6.851517402039436, "learning_rate": 6.974745325013746e-07, "loss": 0.6394, "step": 21139 }, { "epoch": 1.5272635324290642, "grad_norm": 7.3888283092083675, "learning_rate": 6.972718627791863e-07, "loss": 0.6262, "step": 21140 }, { "epoch": 1.5273357776292737, "grad_norm": 8.784714636382663, "learning_rate": 6.970692177349456e-07, "loss": 0.6017, "step": 21141 }, { "epoch": 1.5274080228294833, "grad_norm": 7.722859759584589, "learning_rate": 6.968665973714289e-07, "loss": 0.6277, "step": 21142 }, { "epoch": 1.5274802680296928, "grad_norm": 7.278950995438432, "learning_rate": 6.966640016914081e-07, "loss": 0.6287, "step": 21143 }, { "epoch": 1.5275525132299022, "grad_norm": 6.89854173592328, "learning_rate": 6.964614306976563e-07, "loss": 0.6064, "step": 21144 }, { "epoch": 1.527624758430112, "grad_norm": 5.9038365191230735, "learning_rate": 6.962588843929486e-07, "loss": 0.6121, "step": 21145 }, { "epoch": 1.5276970036303212, "grad_norm": 8.94716692363972, "learning_rate": 6.960563627800549e-07, "loss": 0.6686, "step": 21146 }, { "epoch": 1.5277692488305308, "grad_norm": 6.209252240407839, "learning_rate": 6.958538658617492e-07, "loss": 0.6011, "step": 21147 }, { "epoch": 1.5278414940307403, "grad_norm": 7.4753307256607595, "learning_rate": 6.956513936408032e-07, "loss": 0.5689, "step": 21148 }, { "epoch": 1.5279137392309499, "grad_norm": 7.638469118857281, "learning_rate": 6.954489461199887e-07, "loss": 0.6315, "step": 21149 }, { "epoch": 1.5279859844311594, "grad_norm": 8.986246408357538, "learning_rate": 6.95246523302077e-07, "loss": 0.5998, "step": 21150 }, { "epoch": 1.5280582296313687, "grad_norm": 6.324757374569239, "learning_rate": 6.950441251898388e-07, "loss": 0.5865, "step": 21151 }, { "epoch": 1.5281304748315785, "grad_norm": 7.0036990065371825, "learning_rate": 6.948417517860454e-07, "loss": 0.5918, "step": 21152 }, { "epoch": 1.5282027200317878, "grad_norm": 6.855792872056522, "learning_rate": 6.946394030934675e-07, "loss": 0.7035, "step": 21153 }, { "epoch": 1.5282749652319974, "grad_norm": 6.825767546591201, "learning_rate": 6.944370791148736e-07, "loss": 0.5826, "step": 21154 }, { "epoch": 1.528347210432207, "grad_norm": 7.173113946549681, "learning_rate": 6.942347798530341e-07, "loss": 0.6802, "step": 21155 }, { "epoch": 1.5284194556324164, "grad_norm": 7.544048656755738, "learning_rate": 6.940325053107191e-07, "loss": 0.5939, "step": 21156 }, { "epoch": 1.528491700832626, "grad_norm": 7.014478424106518, "learning_rate": 6.938302554906961e-07, "loss": 0.6045, "step": 21157 }, { "epoch": 1.5285639460328353, "grad_norm": 7.40757849001233, "learning_rate": 6.936280303957346e-07, "loss": 0.5783, "step": 21158 }, { "epoch": 1.528636191233045, "grad_norm": 7.1067034386309125, "learning_rate": 6.934258300286028e-07, "loss": 0.6022, "step": 21159 }, { "epoch": 1.5287084364332544, "grad_norm": 6.154995398314403, "learning_rate": 6.932236543920687e-07, "loss": 0.5566, "step": 21160 }, { "epoch": 1.528780681633464, "grad_norm": 7.86839768497413, "learning_rate": 6.930215034888998e-07, "loss": 0.6322, "step": 21161 }, { "epoch": 1.5288529268336735, "grad_norm": 5.808018423088006, "learning_rate": 6.928193773218636e-07, "loss": 0.6265, "step": 21162 }, { "epoch": 1.528925172033883, "grad_norm": 7.385754777049464, "learning_rate": 6.926172758937278e-07, "loss": 0.5836, "step": 21163 }, { "epoch": 1.5289974172340925, "grad_norm": 6.743969236527731, "learning_rate": 6.924151992072572e-07, "loss": 0.5747, "step": 21164 }, { "epoch": 1.5290696624343019, "grad_norm": 7.503294411954705, "learning_rate": 6.922131472652191e-07, "loss": 0.617, "step": 21165 }, { "epoch": 1.5291419076345116, "grad_norm": 6.671119379141102, "learning_rate": 6.920111200703791e-07, "loss": 0.5789, "step": 21166 }, { "epoch": 1.529214152834721, "grad_norm": 8.747848682240841, "learning_rate": 6.918091176255043e-07, "loss": 0.6556, "step": 21167 }, { "epoch": 1.5292863980349307, "grad_norm": 6.8949984902759915, "learning_rate": 6.916071399333576e-07, "loss": 0.6209, "step": 21168 }, { "epoch": 1.52935864323514, "grad_norm": 7.399656813628092, "learning_rate": 6.914051869967042e-07, "loss": 0.6582, "step": 21169 }, { "epoch": 1.5294308884353496, "grad_norm": 7.352412402718474, "learning_rate": 6.912032588183112e-07, "loss": 0.6339, "step": 21170 }, { "epoch": 1.5295031336355591, "grad_norm": 7.863400348002064, "learning_rate": 6.910013554009404e-07, "loss": 0.6352, "step": 21171 }, { "epoch": 1.5295753788357684, "grad_norm": 6.567108861038061, "learning_rate": 6.907994767473564e-07, "loss": 0.6041, "step": 21172 }, { "epoch": 1.5296476240359782, "grad_norm": 9.08472322335259, "learning_rate": 6.905976228603228e-07, "loss": 0.6942, "step": 21173 }, { "epoch": 1.5297198692361875, "grad_norm": 6.809713091078985, "learning_rate": 6.903957937426037e-07, "loss": 0.6165, "step": 21174 }, { "epoch": 1.5297921144363973, "grad_norm": 7.444531362731529, "learning_rate": 6.901939893969603e-07, "loss": 0.5595, "step": 21175 }, { "epoch": 1.5298643596366066, "grad_norm": 8.063041727954673, "learning_rate": 6.899922098261561e-07, "loss": 0.6595, "step": 21176 }, { "epoch": 1.5299366048368161, "grad_norm": 8.102120449235853, "learning_rate": 6.897904550329529e-07, "loss": 0.6756, "step": 21177 }, { "epoch": 1.5300088500370257, "grad_norm": 6.408705226919621, "learning_rate": 6.895887250201128e-07, "loss": 0.5947, "step": 21178 }, { "epoch": 1.530081095237235, "grad_norm": 6.959707645984716, "learning_rate": 6.893870197903976e-07, "loss": 0.5322, "step": 21179 }, { "epoch": 1.5301533404374448, "grad_norm": 6.888878702682566, "learning_rate": 6.891853393465681e-07, "loss": 0.7053, "step": 21180 }, { "epoch": 1.530225585637654, "grad_norm": 6.793818345555196, "learning_rate": 6.88983683691386e-07, "loss": 0.5853, "step": 21181 }, { "epoch": 1.5302978308378639, "grad_norm": 7.475112822328701, "learning_rate": 6.887820528276107e-07, "loss": 0.6422, "step": 21182 }, { "epoch": 1.5303700760380732, "grad_norm": 8.119953172670836, "learning_rate": 6.885804467580023e-07, "loss": 0.6377, "step": 21183 }, { "epoch": 1.5304423212382827, "grad_norm": 7.505412628195896, "learning_rate": 6.883788654853215e-07, "loss": 0.6471, "step": 21184 }, { "epoch": 1.5305145664384923, "grad_norm": 7.645442886846723, "learning_rate": 6.881773090123281e-07, "loss": 0.6357, "step": 21185 }, { "epoch": 1.5305868116387018, "grad_norm": 6.4795221392723334, "learning_rate": 6.879757773417794e-07, "loss": 0.5934, "step": 21186 }, { "epoch": 1.5306590568389113, "grad_norm": 7.045958736423374, "learning_rate": 6.877742704764359e-07, "loss": 0.5397, "step": 21187 }, { "epoch": 1.5307313020391207, "grad_norm": 7.1241609848998735, "learning_rate": 6.875727884190553e-07, "loss": 0.5999, "step": 21188 }, { "epoch": 1.5308035472393304, "grad_norm": 6.555049772560995, "learning_rate": 6.873713311723959e-07, "loss": 0.6685, "step": 21189 }, { "epoch": 1.5308757924395398, "grad_norm": 5.88302551127052, "learning_rate": 6.871698987392155e-07, "loss": 0.6304, "step": 21190 }, { "epoch": 1.5309480376397493, "grad_norm": 6.449025031634052, "learning_rate": 6.869684911222718e-07, "loss": 0.5272, "step": 21191 }, { "epoch": 1.5310202828399588, "grad_norm": 7.341612419537085, "learning_rate": 6.867671083243224e-07, "loss": 0.6315, "step": 21192 }, { "epoch": 1.5310925280401684, "grad_norm": 7.561967310784608, "learning_rate": 6.865657503481227e-07, "loss": 0.655, "step": 21193 }, { "epoch": 1.531164773240378, "grad_norm": 5.606609034444287, "learning_rate": 6.863644171964298e-07, "loss": 0.5724, "step": 21194 }, { "epoch": 1.5312370184405872, "grad_norm": 7.381712622684284, "learning_rate": 6.861631088720005e-07, "loss": 0.6146, "step": 21195 }, { "epoch": 1.531309263640797, "grad_norm": 6.156707901554093, "learning_rate": 6.859618253775893e-07, "loss": 0.5454, "step": 21196 }, { "epoch": 1.5313815088410063, "grad_norm": 7.606723832704919, "learning_rate": 6.857605667159514e-07, "loss": 0.6233, "step": 21197 }, { "epoch": 1.5314537540412159, "grad_norm": 7.988505211560966, "learning_rate": 6.855593328898433e-07, "loss": 0.6126, "step": 21198 }, { "epoch": 1.5315259992414254, "grad_norm": 7.218195642100366, "learning_rate": 6.853581239020198e-07, "loss": 0.6687, "step": 21199 }, { "epoch": 1.531598244441635, "grad_norm": 8.40769339156132, "learning_rate": 6.85156939755234e-07, "loss": 0.6115, "step": 21200 }, { "epoch": 1.5316704896418445, "grad_norm": 8.66991505511953, "learning_rate": 6.849557804522408e-07, "loss": 0.685, "step": 21201 }, { "epoch": 1.5317427348420538, "grad_norm": 6.197603242359665, "learning_rate": 6.847546459957932e-07, "loss": 0.5604, "step": 21202 }, { "epoch": 1.5318149800422636, "grad_norm": 6.046806551917992, "learning_rate": 6.845535363886463e-07, "loss": 0.5265, "step": 21203 }, { "epoch": 1.531887225242473, "grad_norm": 6.756746664503654, "learning_rate": 6.84352451633551e-07, "loss": 0.6156, "step": 21204 }, { "epoch": 1.5319594704426824, "grad_norm": 8.172464090561881, "learning_rate": 6.841513917332607e-07, "loss": 0.6048, "step": 21205 }, { "epoch": 1.532031715642892, "grad_norm": 6.434614997971451, "learning_rate": 6.839503566905279e-07, "loss": 0.5881, "step": 21206 }, { "epoch": 1.5321039608431015, "grad_norm": 6.978072017019245, "learning_rate": 6.837493465081047e-07, "loss": 0.6398, "step": 21207 }, { "epoch": 1.532176206043311, "grad_norm": 7.543667129276332, "learning_rate": 6.835483611887428e-07, "loss": 0.6186, "step": 21208 }, { "epoch": 1.5322484512435204, "grad_norm": 6.984702151699041, "learning_rate": 6.833474007351934e-07, "loss": 0.6243, "step": 21209 }, { "epoch": 1.5323206964437301, "grad_norm": 5.935843146192559, "learning_rate": 6.831464651502081e-07, "loss": 0.5525, "step": 21210 }, { "epoch": 1.5323929416439395, "grad_norm": 6.759918837201145, "learning_rate": 6.829455544365366e-07, "loss": 0.6349, "step": 21211 }, { "epoch": 1.532465186844149, "grad_norm": 7.962855174528642, "learning_rate": 6.827446685969294e-07, "loss": 0.5768, "step": 21212 }, { "epoch": 1.5325374320443585, "grad_norm": 6.798008335378889, "learning_rate": 6.825438076341376e-07, "loss": 0.6223, "step": 21213 }, { "epoch": 1.532609677244568, "grad_norm": 8.134080712838344, "learning_rate": 6.823429715509089e-07, "loss": 0.5978, "step": 21214 }, { "epoch": 1.5326819224447776, "grad_norm": 7.257051130257919, "learning_rate": 6.821421603499936e-07, "loss": 0.6512, "step": 21215 }, { "epoch": 1.532754167644987, "grad_norm": 7.033470786440429, "learning_rate": 6.81941374034141e-07, "loss": 0.5644, "step": 21216 }, { "epoch": 1.5328264128451967, "grad_norm": 6.207786211658566, "learning_rate": 6.817406126060991e-07, "loss": 0.6125, "step": 21217 }, { "epoch": 1.532898658045406, "grad_norm": 6.157336763610369, "learning_rate": 6.815398760686165e-07, "loss": 0.6924, "step": 21218 }, { "epoch": 1.5329709032456156, "grad_norm": 8.115790209228848, "learning_rate": 6.81339164424441e-07, "loss": 0.5647, "step": 21219 }, { "epoch": 1.5330431484458251, "grad_norm": 7.248382124758164, "learning_rate": 6.81138477676321e-07, "loss": 0.5635, "step": 21220 }, { "epoch": 1.5331153936460347, "grad_norm": 6.352934404132699, "learning_rate": 6.809378158270022e-07, "loss": 0.6896, "step": 21221 }, { "epoch": 1.5331876388462442, "grad_norm": 6.456538814499202, "learning_rate": 6.807371788792325e-07, "loss": 0.6055, "step": 21222 }, { "epoch": 1.5332598840464535, "grad_norm": 6.724975540513721, "learning_rate": 6.805365668357581e-07, "loss": 0.5625, "step": 21223 }, { "epoch": 1.5333321292466633, "grad_norm": 6.639168900561692, "learning_rate": 6.803359796993261e-07, "loss": 0.628, "step": 21224 }, { "epoch": 1.5334043744468726, "grad_norm": 6.513215641818269, "learning_rate": 6.801354174726802e-07, "loss": 0.5968, "step": 21225 }, { "epoch": 1.5334766196470822, "grad_norm": 7.081374881245889, "learning_rate": 6.799348801585681e-07, "loss": 0.5575, "step": 21226 }, { "epoch": 1.5335488648472917, "grad_norm": 7.432912229005638, "learning_rate": 6.797343677597352e-07, "loss": 0.6638, "step": 21227 }, { "epoch": 1.5336211100475012, "grad_norm": 6.845012670244705, "learning_rate": 6.795338802789244e-07, "loss": 0.5732, "step": 21228 }, { "epoch": 1.5336933552477108, "grad_norm": 6.120652816461509, "learning_rate": 6.793334177188815e-07, "loss": 0.6008, "step": 21229 }, { "epoch": 1.53376560044792, "grad_norm": 7.611039686525309, "learning_rate": 6.791329800823504e-07, "loss": 0.6668, "step": 21230 }, { "epoch": 1.5338378456481299, "grad_norm": 8.259963521438111, "learning_rate": 6.789325673720759e-07, "loss": 0.6257, "step": 21231 }, { "epoch": 1.5339100908483392, "grad_norm": 6.640164563541193, "learning_rate": 6.787321795907998e-07, "loss": 0.5429, "step": 21232 }, { "epoch": 1.5339823360485487, "grad_norm": 6.809181735135258, "learning_rate": 6.785318167412658e-07, "loss": 0.608, "step": 21233 }, { "epoch": 1.5340545812487583, "grad_norm": 6.456111337034965, "learning_rate": 6.783314788262171e-07, "loss": 0.6541, "step": 21234 }, { "epoch": 1.5341268264489678, "grad_norm": 7.1893377193773365, "learning_rate": 6.78131165848396e-07, "loss": 0.6017, "step": 21235 }, { "epoch": 1.5341990716491773, "grad_norm": 8.644946047436383, "learning_rate": 6.779308778105446e-07, "loss": 0.7319, "step": 21236 }, { "epoch": 1.5342713168493867, "grad_norm": 8.113463207366207, "learning_rate": 6.777306147154047e-07, "loss": 0.6017, "step": 21237 }, { "epoch": 1.5343435620495964, "grad_norm": 6.247337995110116, "learning_rate": 6.775303765657187e-07, "loss": 0.5725, "step": 21238 }, { "epoch": 1.5344158072498058, "grad_norm": 8.094397088183369, "learning_rate": 6.77330163364226e-07, "loss": 0.5919, "step": 21239 }, { "epoch": 1.5344880524500155, "grad_norm": 6.970573973481336, "learning_rate": 6.771299751136682e-07, "loss": 0.571, "step": 21240 }, { "epoch": 1.5345602976502248, "grad_norm": 6.209971917867514, "learning_rate": 6.769298118167855e-07, "loss": 0.6649, "step": 21241 }, { "epoch": 1.5346325428504344, "grad_norm": 6.929583457329152, "learning_rate": 6.76729673476319e-07, "loss": 0.6338, "step": 21242 }, { "epoch": 1.534704788050644, "grad_norm": 7.7847385170008225, "learning_rate": 6.76529560095007e-07, "loss": 0.6854, "step": 21243 }, { "epoch": 1.5347770332508532, "grad_norm": 6.837102858256907, "learning_rate": 6.763294716755892e-07, "loss": 0.5667, "step": 21244 }, { "epoch": 1.534849278451063, "grad_norm": 7.408908029012983, "learning_rate": 6.761294082208053e-07, "loss": 0.5706, "step": 21245 }, { "epoch": 1.5349215236512723, "grad_norm": 7.607205498768144, "learning_rate": 6.759293697333932e-07, "loss": 0.6538, "step": 21246 }, { "epoch": 1.534993768851482, "grad_norm": 7.738274102012726, "learning_rate": 6.757293562160921e-07, "loss": 0.6087, "step": 21247 }, { "epoch": 1.5350660140516914, "grad_norm": 7.020876951299118, "learning_rate": 6.755293676716396e-07, "loss": 0.5764, "step": 21248 }, { "epoch": 1.535138259251901, "grad_norm": 7.289032669251659, "learning_rate": 6.753294041027742e-07, "loss": 0.5378, "step": 21249 }, { "epoch": 1.5352105044521105, "grad_norm": 6.22703085254324, "learning_rate": 6.751294655122315e-07, "loss": 0.5912, "step": 21250 }, { "epoch": 1.5352827496523198, "grad_norm": 6.838013635622339, "learning_rate": 6.749295519027496e-07, "loss": 0.6203, "step": 21251 }, { "epoch": 1.5353549948525296, "grad_norm": 7.380964550471198, "learning_rate": 6.747296632770659e-07, "loss": 0.5592, "step": 21252 }, { "epoch": 1.535427240052739, "grad_norm": 7.766104669934907, "learning_rate": 6.745297996379152e-07, "loss": 0.5993, "step": 21253 }, { "epoch": 1.5354994852529487, "grad_norm": 7.345796600209564, "learning_rate": 6.743299609880332e-07, "loss": 0.5946, "step": 21254 }, { "epoch": 1.535571730453158, "grad_norm": 6.535177847580869, "learning_rate": 6.741301473301573e-07, "loss": 0.5667, "step": 21255 }, { "epoch": 1.5356439756533675, "grad_norm": 7.358160935222388, "learning_rate": 6.739303586670229e-07, "loss": 0.597, "step": 21256 }, { "epoch": 1.535716220853577, "grad_norm": 7.0428430902544825, "learning_rate": 6.737305950013631e-07, "loss": 0.6979, "step": 21257 }, { "epoch": 1.5357884660537866, "grad_norm": 8.113532792050314, "learning_rate": 6.735308563359136e-07, "loss": 0.6232, "step": 21258 }, { "epoch": 1.5358607112539961, "grad_norm": 7.947930878859412, "learning_rate": 6.733311426734085e-07, "loss": 0.6483, "step": 21259 }, { "epoch": 1.5359329564542055, "grad_norm": 6.455063055485436, "learning_rate": 6.731314540165823e-07, "loss": 0.6398, "step": 21260 }, { "epoch": 1.5360052016544152, "grad_norm": 6.976795700977192, "learning_rate": 6.729317903681673e-07, "loss": 0.6568, "step": 21261 }, { "epoch": 1.5360774468546246, "grad_norm": 7.257149426872447, "learning_rate": 6.727321517308977e-07, "loss": 0.6295, "step": 21262 }, { "epoch": 1.536149692054834, "grad_norm": 6.668862871181931, "learning_rate": 6.72532538107506e-07, "loss": 0.6831, "step": 21263 }, { "epoch": 1.5362219372550436, "grad_norm": 7.097636363999385, "learning_rate": 6.72332949500725e-07, "loss": 0.622, "step": 21264 }, { "epoch": 1.5362941824552532, "grad_norm": 7.338557838727153, "learning_rate": 6.721333859132867e-07, "loss": 0.6194, "step": 21265 }, { "epoch": 1.5363664276554627, "grad_norm": 6.85330327711429, "learning_rate": 6.719338473479231e-07, "loss": 0.5755, "step": 21266 }, { "epoch": 1.536438672855672, "grad_norm": 6.092333506742145, "learning_rate": 6.717343338073667e-07, "loss": 0.5507, "step": 21267 }, { "epoch": 1.5365109180558818, "grad_norm": 6.340869569080399, "learning_rate": 6.715348452943468e-07, "loss": 0.5862, "step": 21268 }, { "epoch": 1.5365831632560911, "grad_norm": 7.759834694428953, "learning_rate": 6.71335381811595e-07, "loss": 0.5645, "step": 21269 }, { "epoch": 1.5366554084563007, "grad_norm": 7.012347910926621, "learning_rate": 6.711359433618431e-07, "loss": 0.6167, "step": 21270 }, { "epoch": 1.5367276536565102, "grad_norm": 8.329379504335607, "learning_rate": 6.70936529947819e-07, "loss": 0.691, "step": 21271 }, { "epoch": 1.5367998988567197, "grad_norm": 7.018073863125909, "learning_rate": 6.707371415722538e-07, "loss": 0.642, "step": 21272 }, { "epoch": 1.5368721440569293, "grad_norm": 6.672408142944, "learning_rate": 6.705377782378769e-07, "loss": 0.6342, "step": 21273 }, { "epoch": 1.5369443892571386, "grad_norm": 6.860002840892326, "learning_rate": 6.703384399474172e-07, "loss": 0.5897, "step": 21274 }, { "epoch": 1.5370166344573484, "grad_norm": 7.114148358908365, "learning_rate": 6.701391267036034e-07, "loss": 0.591, "step": 21275 }, { "epoch": 1.5370888796575577, "grad_norm": 7.193701680780926, "learning_rate": 6.699398385091647e-07, "loss": 0.6133, "step": 21276 }, { "epoch": 1.5371611248577672, "grad_norm": 6.640641658986457, "learning_rate": 6.697405753668287e-07, "loss": 0.6038, "step": 21277 }, { "epoch": 1.5372333700579768, "grad_norm": 5.7427106346523855, "learning_rate": 6.69541337279323e-07, "loss": 0.553, "step": 21278 }, { "epoch": 1.5373056152581863, "grad_norm": 7.663372603696491, "learning_rate": 6.693421242493748e-07, "loss": 0.5906, "step": 21279 }, { "epoch": 1.5373778604583959, "grad_norm": 6.401961216836213, "learning_rate": 6.691429362797116e-07, "loss": 0.5607, "step": 21280 }, { "epoch": 1.5374501056586052, "grad_norm": 7.245297189202336, "learning_rate": 6.689437733730609e-07, "loss": 0.6179, "step": 21281 }, { "epoch": 1.537522350858815, "grad_norm": 5.938291798047631, "learning_rate": 6.687446355321464e-07, "loss": 0.5301, "step": 21282 }, { "epoch": 1.5375945960590243, "grad_norm": 7.451871946552256, "learning_rate": 6.685455227596971e-07, "loss": 0.6227, "step": 21283 }, { "epoch": 1.5376668412592338, "grad_norm": 6.5187685036328045, "learning_rate": 6.683464350584373e-07, "loss": 0.6387, "step": 21284 }, { "epoch": 1.5377390864594433, "grad_norm": 6.359467076940138, "learning_rate": 6.681473724310933e-07, "loss": 0.5974, "step": 21285 }, { "epoch": 1.537811331659653, "grad_norm": 7.255737927987661, "learning_rate": 6.679483348803889e-07, "loss": 0.6292, "step": 21286 }, { "epoch": 1.5378835768598624, "grad_norm": 7.645337607574244, "learning_rate": 6.677493224090492e-07, "loss": 0.6966, "step": 21287 }, { "epoch": 1.5379558220600718, "grad_norm": 7.093918852960105, "learning_rate": 6.675503350197995e-07, "loss": 0.5815, "step": 21288 }, { "epoch": 1.5380280672602815, "grad_norm": 6.383398171569503, "learning_rate": 6.673513727153618e-07, "loss": 0.5503, "step": 21289 }, { "epoch": 1.5381003124604908, "grad_norm": 7.420268637840199, "learning_rate": 6.671524354984613e-07, "loss": 0.5556, "step": 21290 }, { "epoch": 1.5381725576607004, "grad_norm": 6.398159370385941, "learning_rate": 6.669535233718205e-07, "loss": 0.6799, "step": 21291 }, { "epoch": 1.53824480286091, "grad_norm": 6.992228144522233, "learning_rate": 6.66754636338163e-07, "loss": 0.6982, "step": 21292 }, { "epoch": 1.5383170480611195, "grad_norm": 8.606264857637411, "learning_rate": 6.66555774400211e-07, "loss": 0.578, "step": 21293 }, { "epoch": 1.538389293261329, "grad_norm": 6.436749164533981, "learning_rate": 6.663569375606868e-07, "loss": 0.6266, "step": 21294 }, { "epoch": 1.5384615384615383, "grad_norm": 6.3017653035607495, "learning_rate": 6.661581258223129e-07, "loss": 0.6029, "step": 21295 }, { "epoch": 1.538533783661748, "grad_norm": 7.307594153482507, "learning_rate": 6.6595933918781e-07, "loss": 0.6491, "step": 21296 }, { "epoch": 1.5386060288619574, "grad_norm": 7.4415884881720595, "learning_rate": 6.657605776598996e-07, "loss": 0.681, "step": 21297 }, { "epoch": 1.538678274062167, "grad_norm": 7.189490266645436, "learning_rate": 6.655618412413025e-07, "loss": 0.5961, "step": 21298 }, { "epoch": 1.5387505192623765, "grad_norm": 7.2070640666597034, "learning_rate": 6.653631299347404e-07, "loss": 0.5902, "step": 21299 }, { "epoch": 1.538822764462586, "grad_norm": 6.568634653575901, "learning_rate": 6.651644437429319e-07, "loss": 0.5989, "step": 21300 }, { "epoch": 1.5388950096627956, "grad_norm": 7.488275646352885, "learning_rate": 6.649657826685974e-07, "loss": 0.6024, "step": 21301 }, { "epoch": 1.538967254863005, "grad_norm": 5.814164118300347, "learning_rate": 6.647671467144564e-07, "loss": 0.5868, "step": 21302 }, { "epoch": 1.5390395000632147, "grad_norm": 8.138091353861117, "learning_rate": 6.645685358832285e-07, "loss": 0.6196, "step": 21303 }, { "epoch": 1.539111745263424, "grad_norm": 6.338203899711369, "learning_rate": 6.643699501776318e-07, "loss": 0.5956, "step": 21304 }, { "epoch": 1.5391839904636335, "grad_norm": 7.168919438402599, "learning_rate": 6.641713896003856e-07, "loss": 0.661, "step": 21305 }, { "epoch": 1.539256235663843, "grad_norm": 7.085412591672998, "learning_rate": 6.639728541542082e-07, "loss": 0.6161, "step": 21306 }, { "epoch": 1.5393284808640526, "grad_norm": 6.842193844642668, "learning_rate": 6.637743438418162e-07, "loss": 0.571, "step": 21307 }, { "epoch": 1.5394007260642621, "grad_norm": 7.161208121812689, "learning_rate": 6.635758586659274e-07, "loss": 0.6199, "step": 21308 }, { "epoch": 1.5394729712644715, "grad_norm": 6.872860939869168, "learning_rate": 6.633773986292597e-07, "loss": 0.5454, "step": 21309 }, { "epoch": 1.5395452164646812, "grad_norm": 7.328906052209916, "learning_rate": 6.63178963734529e-07, "loss": 0.6524, "step": 21310 }, { "epoch": 1.5396174616648906, "grad_norm": 5.938371453831567, "learning_rate": 6.629805539844522e-07, "loss": 0.669, "step": 21311 }, { "epoch": 1.5396897068651003, "grad_norm": 7.601481955749417, "learning_rate": 6.627821693817452e-07, "loss": 0.624, "step": 21312 }, { "epoch": 1.5397619520653096, "grad_norm": 7.628847245995582, "learning_rate": 6.625838099291246e-07, "loss": 0.5764, "step": 21313 }, { "epoch": 1.5398341972655192, "grad_norm": 6.989890291283525, "learning_rate": 6.623854756293041e-07, "loss": 0.5596, "step": 21314 }, { "epoch": 1.5399064424657287, "grad_norm": 7.191634075611066, "learning_rate": 6.62187166485e-07, "loss": 0.6742, "step": 21315 }, { "epoch": 1.539978687665938, "grad_norm": 7.254319318391861, "learning_rate": 6.619888824989262e-07, "loss": 0.6807, "step": 21316 }, { "epoch": 1.5400509328661478, "grad_norm": 8.061927479369755, "learning_rate": 6.617906236737983e-07, "loss": 0.6519, "step": 21317 }, { "epoch": 1.5401231780663571, "grad_norm": 7.325988067542253, "learning_rate": 6.615923900123289e-07, "loss": 0.6088, "step": 21318 }, { "epoch": 1.5401954232665669, "grad_norm": 7.759460581485352, "learning_rate": 6.61394181517232e-07, "loss": 0.5874, "step": 21319 }, { "epoch": 1.5402676684667762, "grad_norm": 6.502942812932579, "learning_rate": 6.611959981912214e-07, "loss": 0.6397, "step": 21320 }, { "epoch": 1.5403399136669857, "grad_norm": 7.338735873430158, "learning_rate": 6.609978400370098e-07, "loss": 0.5903, "step": 21321 }, { "epoch": 1.5404121588671953, "grad_norm": 7.200886078458776, "learning_rate": 6.607997070573099e-07, "loss": 0.6525, "step": 21322 }, { "epoch": 1.5404844040674046, "grad_norm": 6.61875650119192, "learning_rate": 6.606015992548339e-07, "loss": 0.5895, "step": 21323 }, { "epoch": 1.5405566492676144, "grad_norm": 7.191876215292763, "learning_rate": 6.604035166322944e-07, "loss": 0.731, "step": 21324 }, { "epoch": 1.5406288944678237, "grad_norm": 8.431172124391322, "learning_rate": 6.60205459192402e-07, "loss": 0.6072, "step": 21325 }, { "epoch": 1.5407011396680335, "grad_norm": 6.982860289995056, "learning_rate": 6.600074269378681e-07, "loss": 0.5735, "step": 21326 }, { "epoch": 1.5407733848682428, "grad_norm": 7.5441060485505735, "learning_rate": 6.598094198714047e-07, "loss": 0.6072, "step": 21327 }, { "epoch": 1.5408456300684523, "grad_norm": 8.145314335854867, "learning_rate": 6.596114379957208e-07, "loss": 0.6238, "step": 21328 }, { "epoch": 1.5409178752686619, "grad_norm": 8.265655459832818, "learning_rate": 6.594134813135275e-07, "loss": 0.6352, "step": 21329 }, { "epoch": 1.5409901204688712, "grad_norm": 5.937966739981536, "learning_rate": 6.592155498275343e-07, "loss": 0.5676, "step": 21330 }, { "epoch": 1.541062365669081, "grad_norm": 6.838496451846987, "learning_rate": 6.590176435404508e-07, "loss": 0.5852, "step": 21331 }, { "epoch": 1.5411346108692903, "grad_norm": 7.043494655039244, "learning_rate": 6.588197624549867e-07, "loss": 0.567, "step": 21332 }, { "epoch": 1.5412068560695, "grad_norm": 7.496534691385773, "learning_rate": 6.586219065738503e-07, "loss": 0.6319, "step": 21333 }, { "epoch": 1.5412791012697094, "grad_norm": 8.05159666919176, "learning_rate": 6.584240758997501e-07, "loss": 0.6061, "step": 21334 }, { "epoch": 1.541351346469919, "grad_norm": 5.93280518284955, "learning_rate": 6.582262704353954e-07, "loss": 0.642, "step": 21335 }, { "epoch": 1.5414235916701284, "grad_norm": 6.298436276571296, "learning_rate": 6.580284901834921e-07, "loss": 0.6073, "step": 21336 }, { "epoch": 1.541495836870338, "grad_norm": 6.332379152129029, "learning_rate": 6.578307351467486e-07, "loss": 0.6483, "step": 21337 }, { "epoch": 1.5415680820705475, "grad_norm": 7.7790406549099735, "learning_rate": 6.576330053278721e-07, "loss": 0.6146, "step": 21338 }, { "epoch": 1.5416403272707568, "grad_norm": 6.237136726274031, "learning_rate": 6.574353007295692e-07, "loss": 0.6278, "step": 21339 }, { "epoch": 1.5417125724709666, "grad_norm": 7.603629761296574, "learning_rate": 6.572376213545462e-07, "loss": 0.6383, "step": 21340 }, { "epoch": 1.541784817671176, "grad_norm": 7.653652201975773, "learning_rate": 6.570399672055094e-07, "loss": 0.6451, "step": 21341 }, { "epoch": 1.5418570628713855, "grad_norm": 6.516403042013057, "learning_rate": 6.568423382851655e-07, "loss": 0.6192, "step": 21342 }, { "epoch": 1.541929308071595, "grad_norm": 6.697597118327127, "learning_rate": 6.566447345962179e-07, "loss": 0.6385, "step": 21343 }, { "epoch": 1.5420015532718045, "grad_norm": 8.171381102127775, "learning_rate": 6.564471561413727e-07, "loss": 0.6599, "step": 21344 }, { "epoch": 1.542073798472014, "grad_norm": 6.311656933375649, "learning_rate": 6.562496029233351e-07, "loss": 0.6368, "step": 21345 }, { "epoch": 1.5421460436722234, "grad_norm": 6.655717568750981, "learning_rate": 6.560520749448082e-07, "loss": 0.637, "step": 21346 }, { "epoch": 1.5422182888724332, "grad_norm": 6.703970048896691, "learning_rate": 6.55854572208497e-07, "loss": 0.6472, "step": 21347 }, { "epoch": 1.5422905340726425, "grad_norm": 7.620049292137132, "learning_rate": 6.556570947171043e-07, "loss": 0.5871, "step": 21348 }, { "epoch": 1.542362779272852, "grad_norm": 6.980927779395287, "learning_rate": 6.554596424733342e-07, "loss": 0.5821, "step": 21349 }, { "epoch": 1.5424350244730616, "grad_norm": 8.127144867886463, "learning_rate": 6.552622154798893e-07, "loss": 0.6667, "step": 21350 }, { "epoch": 1.5425072696732711, "grad_norm": 6.3748997699113685, "learning_rate": 6.550648137394725e-07, "loss": 0.581, "step": 21351 }, { "epoch": 1.5425795148734807, "grad_norm": 6.308517559524434, "learning_rate": 6.548674372547865e-07, "loss": 0.5303, "step": 21352 }, { "epoch": 1.54265176007369, "grad_norm": 6.549709680544174, "learning_rate": 6.546700860285318e-07, "loss": 0.622, "step": 21353 }, { "epoch": 1.5427240052738997, "grad_norm": 8.185785711067663, "learning_rate": 6.544727600634107e-07, "loss": 0.5974, "step": 21354 }, { "epoch": 1.542796250474109, "grad_norm": 6.899782768921119, "learning_rate": 6.542754593621247e-07, "loss": 0.6523, "step": 21355 }, { "epoch": 1.5428684956743186, "grad_norm": 6.874536394613759, "learning_rate": 6.540781839273755e-07, "loss": 0.7182, "step": 21356 }, { "epoch": 1.5429407408745281, "grad_norm": 8.10359635753048, "learning_rate": 6.538809337618617e-07, "loss": 0.653, "step": 21357 }, { "epoch": 1.5430129860747377, "grad_norm": 7.014611117839783, "learning_rate": 6.536837088682849e-07, "loss": 0.5995, "step": 21358 }, { "epoch": 1.5430852312749472, "grad_norm": 6.3661074980331325, "learning_rate": 6.534865092493442e-07, "loss": 0.6101, "step": 21359 }, { "epoch": 1.5431574764751566, "grad_norm": 7.6825478583253215, "learning_rate": 6.532893349077393e-07, "loss": 0.6173, "step": 21360 }, { "epoch": 1.5432297216753663, "grad_norm": 9.331891856823107, "learning_rate": 6.5309218584617e-07, "loss": 0.6579, "step": 21361 }, { "epoch": 1.5433019668755756, "grad_norm": 6.406984463899869, "learning_rate": 6.528950620673344e-07, "loss": 0.5237, "step": 21362 }, { "epoch": 1.5433742120757852, "grad_norm": 7.641911497808126, "learning_rate": 6.52697963573932e-07, "loss": 0.6544, "step": 21363 }, { "epoch": 1.5434464572759947, "grad_norm": 7.7618239251178105, "learning_rate": 6.525008903686592e-07, "loss": 0.6078, "step": 21364 }, { "epoch": 1.5435187024762043, "grad_norm": 6.5920558398205715, "learning_rate": 6.52303842454215e-07, "loss": 0.561, "step": 21365 }, { "epoch": 1.5435909476764138, "grad_norm": 6.9651397796209515, "learning_rate": 6.521068198332963e-07, "loss": 0.6518, "step": 21366 }, { "epoch": 1.5436631928766231, "grad_norm": 8.109284681577963, "learning_rate": 6.519098225086007e-07, "loss": 0.5869, "step": 21367 }, { "epoch": 1.5437354380768329, "grad_norm": 7.123686284538194, "learning_rate": 6.517128504828243e-07, "loss": 0.6747, "step": 21368 }, { "epoch": 1.5438076832770422, "grad_norm": 7.15009532944945, "learning_rate": 6.515159037586643e-07, "loss": 0.6199, "step": 21369 }, { "epoch": 1.5438799284772518, "grad_norm": 7.070760032020598, "learning_rate": 6.513189823388169e-07, "loss": 0.644, "step": 21370 }, { "epoch": 1.5439521736774613, "grad_norm": 7.0441686334911475, "learning_rate": 6.511220862259765e-07, "loss": 0.6615, "step": 21371 }, { "epoch": 1.5440244188776708, "grad_norm": 7.048616790351605, "learning_rate": 6.50925215422839e-07, "loss": 0.5923, "step": 21372 }, { "epoch": 1.5440966640778804, "grad_norm": 6.774874876774276, "learning_rate": 6.507283699320999e-07, "loss": 0.5406, "step": 21373 }, { "epoch": 1.5441689092780897, "grad_norm": 7.275007854863806, "learning_rate": 6.50531549756454e-07, "loss": 0.6844, "step": 21374 }, { "epoch": 1.5442411544782995, "grad_norm": 7.9360936638626915, "learning_rate": 6.503347548985944e-07, "loss": 0.611, "step": 21375 }, { "epoch": 1.5443133996785088, "grad_norm": 8.045136436125798, "learning_rate": 6.501379853612161e-07, "loss": 0.602, "step": 21376 }, { "epoch": 1.5443856448787183, "grad_norm": 6.689415773162143, "learning_rate": 6.499412411470124e-07, "loss": 0.6689, "step": 21377 }, { "epoch": 1.5444578900789279, "grad_norm": 7.382339446236734, "learning_rate": 6.497445222586765e-07, "loss": 0.6602, "step": 21378 }, { "epoch": 1.5445301352791374, "grad_norm": 7.409465108039572, "learning_rate": 6.495478286989015e-07, "loss": 0.5877, "step": 21379 }, { "epoch": 1.544602380479347, "grad_norm": 8.349525081811352, "learning_rate": 6.4935116047038e-07, "loss": 0.5607, "step": 21380 }, { "epoch": 1.5446746256795563, "grad_norm": 7.111968051748531, "learning_rate": 6.491545175758049e-07, "loss": 0.6823, "step": 21381 }, { "epoch": 1.544746870879766, "grad_norm": 6.029028606965098, "learning_rate": 6.489579000178667e-07, "loss": 0.6352, "step": 21382 }, { "epoch": 1.5448191160799754, "grad_norm": 6.20409379979937, "learning_rate": 6.487613077992574e-07, "loss": 0.5971, "step": 21383 }, { "epoch": 1.544891361280185, "grad_norm": 6.91957401926985, "learning_rate": 6.485647409226692e-07, "loss": 0.6133, "step": 21384 }, { "epoch": 1.5449636064803944, "grad_norm": 8.2253832139368, "learning_rate": 6.483681993907914e-07, "loss": 0.6655, "step": 21385 }, { "epoch": 1.545035851680604, "grad_norm": 8.032210355701734, "learning_rate": 6.481716832063154e-07, "loss": 0.5975, "step": 21386 }, { "epoch": 1.5451080968808135, "grad_norm": 7.232582216187799, "learning_rate": 6.479751923719305e-07, "loss": 0.583, "step": 21387 }, { "epoch": 1.5451803420810228, "grad_norm": 6.193090514209162, "learning_rate": 6.477787268903285e-07, "loss": 0.6263, "step": 21388 }, { "epoch": 1.5452525872812326, "grad_norm": 7.929521530744649, "learning_rate": 6.47582286764197e-07, "loss": 0.7335, "step": 21389 }, { "epoch": 1.545324832481442, "grad_norm": 6.951652593836979, "learning_rate": 6.473858719962256e-07, "loss": 0.6501, "step": 21390 }, { "epoch": 1.5453970776816517, "grad_norm": 7.6258476911772375, "learning_rate": 6.471894825891031e-07, "loss": 0.5389, "step": 21391 }, { "epoch": 1.545469322881861, "grad_norm": 7.036618501735393, "learning_rate": 6.469931185455186e-07, "loss": 0.6459, "step": 21392 }, { "epoch": 1.5455415680820705, "grad_norm": 6.0854498260162275, "learning_rate": 6.467967798681591e-07, "loss": 0.628, "step": 21393 }, { "epoch": 1.54561381328228, "grad_norm": 7.992383669238396, "learning_rate": 6.466004665597126e-07, "loss": 0.6527, "step": 21394 }, { "epoch": 1.5456860584824894, "grad_norm": 7.284931087338138, "learning_rate": 6.464041786228667e-07, "loss": 0.6276, "step": 21395 }, { "epoch": 1.5457583036826992, "grad_norm": 7.6903001601195236, "learning_rate": 6.462079160603083e-07, "loss": 0.6008, "step": 21396 }, { "epoch": 1.5458305488829085, "grad_norm": 7.328610402117539, "learning_rate": 6.460116788747242e-07, "loss": 0.5371, "step": 21397 }, { "epoch": 1.5459027940831183, "grad_norm": 6.81383984190363, "learning_rate": 6.458154670688008e-07, "loss": 0.5869, "step": 21398 }, { "epoch": 1.5459750392833276, "grad_norm": 7.697540425426437, "learning_rate": 6.456192806452243e-07, "loss": 0.7043, "step": 21399 }, { "epoch": 1.5460472844835371, "grad_norm": 7.641604994508016, "learning_rate": 6.454231196066796e-07, "loss": 0.5892, "step": 21400 }, { "epoch": 1.5461195296837467, "grad_norm": 7.7972514763034795, "learning_rate": 6.452269839558526e-07, "loss": 0.6424, "step": 21401 }, { "epoch": 1.546191774883956, "grad_norm": 7.899862853199746, "learning_rate": 6.450308736954283e-07, "loss": 0.5573, "step": 21402 }, { "epoch": 1.5462640200841657, "grad_norm": 6.5132724530834505, "learning_rate": 6.448347888280907e-07, "loss": 0.5905, "step": 21403 }, { "epoch": 1.546336265284375, "grad_norm": 7.177123698511838, "learning_rate": 6.446387293565242e-07, "loss": 0.6131, "step": 21404 }, { "epoch": 1.5464085104845848, "grad_norm": 6.409339420363063, "learning_rate": 6.444426952834129e-07, "loss": 0.5836, "step": 21405 }, { "epoch": 1.5464807556847942, "grad_norm": 7.083142655741997, "learning_rate": 6.442466866114405e-07, "loss": 0.5893, "step": 21406 }, { "epoch": 1.5465530008850037, "grad_norm": 7.4886587223914285, "learning_rate": 6.4405070334329e-07, "loss": 0.6121, "step": 21407 }, { "epoch": 1.5466252460852132, "grad_norm": 6.43597156935174, "learning_rate": 6.438547454816444e-07, "loss": 0.5858, "step": 21408 }, { "epoch": 1.5466974912854228, "grad_norm": 6.559869729586147, "learning_rate": 6.436588130291868e-07, "loss": 0.6439, "step": 21409 }, { "epoch": 1.5467697364856323, "grad_norm": 7.420872927829065, "learning_rate": 6.434629059885977e-07, "loss": 0.6837, "step": 21410 }, { "epoch": 1.5468419816858416, "grad_norm": 7.0018697693783185, "learning_rate": 6.432670243625602e-07, "loss": 0.5943, "step": 21411 }, { "epoch": 1.5469142268860514, "grad_norm": 6.932698280643653, "learning_rate": 6.430711681537555e-07, "loss": 0.6312, "step": 21412 }, { "epoch": 1.5469864720862607, "grad_norm": 7.572919745009547, "learning_rate": 6.428753373648653e-07, "loss": 0.6113, "step": 21413 }, { "epoch": 1.5470587172864703, "grad_norm": 6.738207936648272, "learning_rate": 6.42679531998569e-07, "loss": 0.5871, "step": 21414 }, { "epoch": 1.5471309624866798, "grad_norm": 7.026528634025153, "learning_rate": 6.424837520575472e-07, "loss": 0.5376, "step": 21415 }, { "epoch": 1.5472032076868893, "grad_norm": 6.586898353507028, "learning_rate": 6.422879975444812e-07, "loss": 0.6271, "step": 21416 }, { "epoch": 1.547275452887099, "grad_norm": 6.561935255182947, "learning_rate": 6.420922684620509e-07, "loss": 0.5915, "step": 21417 }, { "epoch": 1.5473476980873082, "grad_norm": 7.626844589496628, "learning_rate": 6.418965648129341e-07, "loss": 0.6218, "step": 21418 }, { "epoch": 1.547419943287518, "grad_norm": 6.646486096826107, "learning_rate": 6.417008865998106e-07, "loss": 0.6325, "step": 21419 }, { "epoch": 1.5474921884877273, "grad_norm": 7.135055941260174, "learning_rate": 6.415052338253596e-07, "loss": 0.6296, "step": 21420 }, { "epoch": 1.5475644336879368, "grad_norm": 6.1874873613941315, "learning_rate": 6.413096064922586e-07, "loss": 0.5367, "step": 21421 }, { "epoch": 1.5476366788881464, "grad_norm": 6.796342920984003, "learning_rate": 6.411140046031858e-07, "loss": 0.6619, "step": 21422 }, { "epoch": 1.547708924088356, "grad_norm": 7.314597847783888, "learning_rate": 6.409184281608189e-07, "loss": 0.6259, "step": 21423 }, { "epoch": 1.5477811692885655, "grad_norm": 7.089400113100053, "learning_rate": 6.407228771678351e-07, "loss": 0.5783, "step": 21424 }, { "epoch": 1.5478534144887748, "grad_norm": 6.829660622695055, "learning_rate": 6.405273516269115e-07, "loss": 0.5945, "step": 21425 }, { "epoch": 1.5479256596889845, "grad_norm": 8.071399597038802, "learning_rate": 6.403318515407247e-07, "loss": 0.577, "step": 21426 }, { "epoch": 1.5479979048891939, "grad_norm": 7.0156119501549, "learning_rate": 6.401363769119517e-07, "loss": 0.6517, "step": 21427 }, { "epoch": 1.5480701500894034, "grad_norm": 6.410107153891051, "learning_rate": 6.39940927743267e-07, "loss": 0.6288, "step": 21428 }, { "epoch": 1.548142395289613, "grad_norm": 7.46181149781053, "learning_rate": 6.397455040373465e-07, "loss": 0.5831, "step": 21429 }, { "epoch": 1.5482146404898225, "grad_norm": 6.860506629141619, "learning_rate": 6.395501057968659e-07, "loss": 0.6232, "step": 21430 }, { "epoch": 1.548286885690032, "grad_norm": 6.87881835326794, "learning_rate": 6.393547330245003e-07, "loss": 0.6336, "step": 21431 }, { "epoch": 1.5483591308902414, "grad_norm": 8.150376349810767, "learning_rate": 6.39159385722923e-07, "loss": 0.6996, "step": 21432 }, { "epoch": 1.5484313760904511, "grad_norm": 6.63033727464058, "learning_rate": 6.389640638948091e-07, "loss": 0.6508, "step": 21433 }, { "epoch": 1.5485036212906604, "grad_norm": 7.362742437182566, "learning_rate": 6.38768767542832e-07, "loss": 0.5785, "step": 21434 }, { "epoch": 1.54857586649087, "grad_norm": 8.935486039751927, "learning_rate": 6.385734966696652e-07, "loss": 0.6269, "step": 21435 }, { "epoch": 1.5486481116910795, "grad_norm": 6.817858067542209, "learning_rate": 6.383782512779821e-07, "loss": 0.5438, "step": 21436 }, { "epoch": 1.548720356891289, "grad_norm": 6.816875487520182, "learning_rate": 6.381830313704554e-07, "loss": 0.6708, "step": 21437 }, { "epoch": 1.5487926020914986, "grad_norm": 5.894086635194939, "learning_rate": 6.379878369497577e-07, "loss": 0.5743, "step": 21438 }, { "epoch": 1.548864847291708, "grad_norm": 7.907243146074527, "learning_rate": 6.377926680185606e-07, "loss": 0.6328, "step": 21439 }, { "epoch": 1.5489370924919177, "grad_norm": 7.2074784964546, "learning_rate": 6.375975245795355e-07, "loss": 0.6141, "step": 21440 }, { "epoch": 1.549009337692127, "grad_norm": 6.260018520632515, "learning_rate": 6.374024066353543e-07, "loss": 0.6059, "step": 21441 }, { "epoch": 1.5490815828923366, "grad_norm": 6.558813312938142, "learning_rate": 6.37207314188689e-07, "loss": 0.5831, "step": 21442 }, { "epoch": 1.549153828092546, "grad_norm": 7.038352803516429, "learning_rate": 6.37012247242208e-07, "loss": 0.5764, "step": 21443 }, { "epoch": 1.5492260732927556, "grad_norm": 7.658548313758288, "learning_rate": 6.368172057985825e-07, "loss": 0.6014, "step": 21444 }, { "epoch": 1.5492983184929652, "grad_norm": 7.577222129285455, "learning_rate": 6.366221898604838e-07, "loss": 0.6083, "step": 21445 }, { "epoch": 1.5493705636931745, "grad_norm": 6.679012152902781, "learning_rate": 6.364271994305801e-07, "loss": 0.6802, "step": 21446 }, { "epoch": 1.5494428088933843, "grad_norm": 6.929346465413902, "learning_rate": 6.36232234511541e-07, "loss": 0.6003, "step": 21447 }, { "epoch": 1.5495150540935936, "grad_norm": 6.848797891765814, "learning_rate": 6.360372951060356e-07, "loss": 0.5701, "step": 21448 }, { "epoch": 1.5495872992938031, "grad_norm": 9.449736285692877, "learning_rate": 6.35842381216733e-07, "loss": 0.7646, "step": 21449 }, { "epoch": 1.5496595444940127, "grad_norm": 7.9945750916478415, "learning_rate": 6.356474928463e-07, "loss": 0.572, "step": 21450 }, { "epoch": 1.5497317896942222, "grad_norm": 6.7047586651067625, "learning_rate": 6.354526299974051e-07, "loss": 0.5908, "step": 21451 }, { "epoch": 1.5498040348944317, "grad_norm": 7.074083669248017, "learning_rate": 6.352577926727163e-07, "loss": 0.6338, "step": 21452 }, { "epoch": 1.549876280094641, "grad_norm": 7.607215026472196, "learning_rate": 6.350629808749e-07, "loss": 0.5927, "step": 21453 }, { "epoch": 1.5499485252948508, "grad_norm": 6.455895962752557, "learning_rate": 6.34868194606624e-07, "loss": 0.662, "step": 21454 }, { "epoch": 1.5500207704950602, "grad_norm": 6.801638562867963, "learning_rate": 6.346734338705537e-07, "loss": 0.6234, "step": 21455 }, { "epoch": 1.5500930156952697, "grad_norm": 6.170737632543414, "learning_rate": 6.344786986693568e-07, "loss": 0.6399, "step": 21456 }, { "epoch": 1.5501652608954792, "grad_norm": 7.940445481109769, "learning_rate": 6.342839890056973e-07, "loss": 0.6314, "step": 21457 }, { "epoch": 1.5502375060956888, "grad_norm": 7.344691179715498, "learning_rate": 6.340893048822413e-07, "loss": 0.6105, "step": 21458 }, { "epoch": 1.5503097512958983, "grad_norm": 7.00562360211019, "learning_rate": 6.338946463016548e-07, "loss": 0.6261, "step": 21459 }, { "epoch": 1.5503819964961076, "grad_norm": 8.775128277224116, "learning_rate": 6.337000132666008e-07, "loss": 0.6405, "step": 21460 }, { "epoch": 1.5504542416963174, "grad_norm": 7.498923669512304, "learning_rate": 6.335054057797446e-07, "loss": 0.5703, "step": 21461 }, { "epoch": 1.5505264868965267, "grad_norm": 7.3912526160573755, "learning_rate": 6.3331082384375e-07, "loss": 0.6495, "step": 21462 }, { "epoch": 1.5505987320967365, "grad_norm": 6.309740426457144, "learning_rate": 6.331162674612809e-07, "loss": 0.6106, "step": 21463 }, { "epoch": 1.5506709772969458, "grad_norm": 6.176089159089706, "learning_rate": 6.329217366350005e-07, "loss": 0.5718, "step": 21464 }, { "epoch": 1.5507432224971553, "grad_norm": 6.854115061773423, "learning_rate": 6.32727231367572e-07, "loss": 0.6198, "step": 21465 }, { "epoch": 1.550815467697365, "grad_norm": 6.375218107194972, "learning_rate": 6.325327516616583e-07, "loss": 0.5869, "step": 21466 }, { "epoch": 1.5508877128975742, "grad_norm": 6.280123633364378, "learning_rate": 6.323382975199208e-07, "loss": 0.5569, "step": 21467 }, { "epoch": 1.550959958097784, "grad_norm": 6.601416591860198, "learning_rate": 6.321438689450218e-07, "loss": 0.5658, "step": 21468 }, { "epoch": 1.5510322032979933, "grad_norm": 7.126155023482241, "learning_rate": 6.319494659396231e-07, "loss": 0.6698, "step": 21469 }, { "epoch": 1.551104448498203, "grad_norm": 6.683996120449472, "learning_rate": 6.317550885063861e-07, "loss": 0.5853, "step": 21470 }, { "epoch": 1.5511766936984124, "grad_norm": 8.37379059670825, "learning_rate": 6.315607366479709e-07, "loss": 0.5976, "step": 21471 }, { "epoch": 1.551248938898622, "grad_norm": 6.37267036506574, "learning_rate": 6.313664103670375e-07, "loss": 0.6194, "step": 21472 }, { "epoch": 1.5513211840988315, "grad_norm": 6.305012407404091, "learning_rate": 6.311721096662479e-07, "loss": 0.6519, "step": 21473 }, { "epoch": 1.5513934292990408, "grad_norm": 6.57909627163677, "learning_rate": 6.309778345482617e-07, "loss": 0.6023, "step": 21474 }, { "epoch": 1.5514656744992505, "grad_norm": 7.104195453962095, "learning_rate": 6.307835850157374e-07, "loss": 0.6226, "step": 21475 }, { "epoch": 1.5515379196994599, "grad_norm": 7.922377711490133, "learning_rate": 6.30589361071334e-07, "loss": 0.636, "step": 21476 }, { "epoch": 1.5516101648996696, "grad_norm": 5.61409074631297, "learning_rate": 6.303951627177115e-07, "loss": 0.6454, "step": 21477 }, { "epoch": 1.551682410099879, "grad_norm": 6.591137987903601, "learning_rate": 6.302009899575273e-07, "loss": 0.5773, "step": 21478 }, { "epoch": 1.5517546553000885, "grad_norm": 7.627686824504832, "learning_rate": 6.300068427934394e-07, "loss": 0.5662, "step": 21479 }, { "epoch": 1.551826900500298, "grad_norm": 6.405552970368848, "learning_rate": 6.298127212281057e-07, "loss": 0.5318, "step": 21480 }, { "epoch": 1.5518991457005076, "grad_norm": 7.535418737733061, "learning_rate": 6.296186252641842e-07, "loss": 0.6326, "step": 21481 }, { "epoch": 1.5519713909007171, "grad_norm": 6.0217634316419435, "learning_rate": 6.29424554904331e-07, "loss": 0.5782, "step": 21482 }, { "epoch": 1.5520436361009264, "grad_norm": 7.606725838665355, "learning_rate": 6.292305101512036e-07, "loss": 0.6432, "step": 21483 }, { "epoch": 1.5521158813011362, "grad_norm": 7.767637794040995, "learning_rate": 6.290364910074584e-07, "loss": 0.6043, "step": 21484 }, { "epoch": 1.5521881265013455, "grad_norm": 6.592026327025053, "learning_rate": 6.288424974757504e-07, "loss": 0.6449, "step": 21485 }, { "epoch": 1.552260371701555, "grad_norm": 9.532122862824034, "learning_rate": 6.286485295587358e-07, "loss": 0.5939, "step": 21486 }, { "epoch": 1.5523326169017646, "grad_norm": 7.110038173052633, "learning_rate": 6.284545872590695e-07, "loss": 0.5684, "step": 21487 }, { "epoch": 1.5524048621019741, "grad_norm": 7.712908430119991, "learning_rate": 6.282606705794078e-07, "loss": 0.6243, "step": 21488 }, { "epoch": 1.5524771073021837, "grad_norm": 7.396741984967942, "learning_rate": 6.280667795224033e-07, "loss": 0.6417, "step": 21489 }, { "epoch": 1.552549352502393, "grad_norm": 7.340571588197685, "learning_rate": 6.278729140907111e-07, "loss": 0.6432, "step": 21490 }, { "epoch": 1.5526215977026028, "grad_norm": 7.674018016707541, "learning_rate": 6.276790742869851e-07, "loss": 0.6279, "step": 21491 }, { "epoch": 1.552693842902812, "grad_norm": 5.636389158979784, "learning_rate": 6.27485260113879e-07, "loss": 0.5196, "step": 21492 }, { "epoch": 1.5527660881030216, "grad_norm": 8.028178655638866, "learning_rate": 6.272914715740455e-07, "loss": 0.6095, "step": 21493 }, { "epoch": 1.5528383333032312, "grad_norm": 6.417292015352172, "learning_rate": 6.27097708670138e-07, "loss": 0.5919, "step": 21494 }, { "epoch": 1.5529105785034407, "grad_norm": 6.702418434588701, "learning_rate": 6.269039714048092e-07, "loss": 0.6039, "step": 21495 }, { "epoch": 1.5529828237036503, "grad_norm": 7.433451857142224, "learning_rate": 6.267102597807101e-07, "loss": 0.6029, "step": 21496 }, { "epoch": 1.5530550689038596, "grad_norm": 6.852047423510944, "learning_rate": 6.265165738004928e-07, "loss": 0.6283, "step": 21497 }, { "epoch": 1.5531273141040693, "grad_norm": 6.387832591137807, "learning_rate": 6.263229134668089e-07, "loss": 0.5362, "step": 21498 }, { "epoch": 1.5531995593042787, "grad_norm": 6.468688964555803, "learning_rate": 6.261292787823103e-07, "loss": 0.5262, "step": 21499 }, { "epoch": 1.5532718045044882, "grad_norm": 7.580632186333962, "learning_rate": 6.259356697496458e-07, "loss": 0.6223, "step": 21500 }, { "epoch": 1.5533440497046977, "grad_norm": 8.080112353006426, "learning_rate": 6.257420863714672e-07, "loss": 0.6736, "step": 21501 }, { "epoch": 1.5534162949049073, "grad_norm": 7.339552178715131, "learning_rate": 6.255485286504248e-07, "loss": 0.6876, "step": 21502 }, { "epoch": 1.5534885401051168, "grad_norm": 7.341747514133083, "learning_rate": 6.253549965891673e-07, "loss": 0.5588, "step": 21503 }, { "epoch": 1.5535607853053262, "grad_norm": 7.568408265971116, "learning_rate": 6.251614901903441e-07, "loss": 0.6061, "step": 21504 }, { "epoch": 1.553633030505536, "grad_norm": 8.006412796878728, "learning_rate": 6.249680094566044e-07, "loss": 0.656, "step": 21505 }, { "epoch": 1.5537052757057452, "grad_norm": 8.612203642199075, "learning_rate": 6.247745543905975e-07, "loss": 0.6186, "step": 21506 }, { "epoch": 1.5537775209059548, "grad_norm": 6.75974925958724, "learning_rate": 6.245811249949702e-07, "loss": 0.6091, "step": 21507 }, { "epoch": 1.5538497661061643, "grad_norm": 6.714264263826096, "learning_rate": 6.243877212723712e-07, "loss": 0.5868, "step": 21508 }, { "epoch": 1.5539220113063739, "grad_norm": 7.398877816556774, "learning_rate": 6.24194343225448e-07, "loss": 0.6547, "step": 21509 }, { "epoch": 1.5539942565065834, "grad_norm": 7.427563609412914, "learning_rate": 6.240009908568476e-07, "loss": 0.6746, "step": 21510 }, { "epoch": 1.5540665017067927, "grad_norm": 7.220170578988925, "learning_rate": 6.23807664169217e-07, "loss": 0.6685, "step": 21511 }, { "epoch": 1.5541387469070025, "grad_norm": 7.696532362863009, "learning_rate": 6.236143631652028e-07, "loss": 0.5883, "step": 21512 }, { "epoch": 1.5542109921072118, "grad_norm": 7.113433015095729, "learning_rate": 6.23421087847452e-07, "loss": 0.6146, "step": 21513 }, { "epoch": 1.5542832373074214, "grad_norm": 5.92986441085733, "learning_rate": 6.232278382186083e-07, "loss": 0.6587, "step": 21514 }, { "epoch": 1.554355482507631, "grad_norm": 7.353048468311885, "learning_rate": 6.230346142813185e-07, "loss": 0.6298, "step": 21515 }, { "epoch": 1.5544277277078404, "grad_norm": 6.194014077662975, "learning_rate": 6.228414160382282e-07, "loss": 0.5857, "step": 21516 }, { "epoch": 1.55449997290805, "grad_norm": 7.245935287182404, "learning_rate": 6.226482434919806e-07, "loss": 0.638, "step": 21517 }, { "epoch": 1.5545722181082593, "grad_norm": 6.927490987231512, "learning_rate": 6.224550966452208e-07, "loss": 0.6497, "step": 21518 }, { "epoch": 1.554644463308469, "grad_norm": 6.732976002543754, "learning_rate": 6.222619755005932e-07, "loss": 0.5618, "step": 21519 }, { "epoch": 1.5547167085086784, "grad_norm": 6.127409791815457, "learning_rate": 6.220688800607411e-07, "loss": 0.6414, "step": 21520 }, { "epoch": 1.554788953708888, "grad_norm": 8.23907487138745, "learning_rate": 6.218758103283076e-07, "loss": 0.6154, "step": 21521 }, { "epoch": 1.5548611989090975, "grad_norm": 8.357780860702638, "learning_rate": 6.216827663059363e-07, "loss": 0.5892, "step": 21522 }, { "epoch": 1.554933444109307, "grad_norm": 5.9010212677089315, "learning_rate": 6.214897479962695e-07, "loss": 0.5661, "step": 21523 }, { "epoch": 1.5550056893095165, "grad_norm": 7.481463859643666, "learning_rate": 6.2129675540195e-07, "loss": 0.6139, "step": 21524 }, { "epoch": 1.5550779345097259, "grad_norm": 7.9903394066978475, "learning_rate": 6.211037885256188e-07, "loss": 0.5253, "step": 21525 }, { "epoch": 1.5551501797099356, "grad_norm": 8.229163009707126, "learning_rate": 6.209108473699177e-07, "loss": 0.5818, "step": 21526 }, { "epoch": 1.555222424910145, "grad_norm": 6.534604028147957, "learning_rate": 6.207179319374887e-07, "loss": 0.5681, "step": 21527 }, { "epoch": 1.5552946701103545, "grad_norm": 8.595307476054021, "learning_rate": 6.205250422309714e-07, "loss": 0.6316, "step": 21528 }, { "epoch": 1.555366915310564, "grad_norm": 6.676571481717865, "learning_rate": 6.203321782530062e-07, "loss": 0.5818, "step": 21529 }, { "epoch": 1.5554391605107736, "grad_norm": 6.2199810094022165, "learning_rate": 6.201393400062347e-07, "loss": 0.619, "step": 21530 }, { "epoch": 1.5555114057109831, "grad_norm": 6.266677882137137, "learning_rate": 6.199465274932967e-07, "loss": 0.624, "step": 21531 }, { "epoch": 1.5555836509111924, "grad_norm": 6.941847255352886, "learning_rate": 6.197537407168305e-07, "loss": 0.607, "step": 21532 }, { "epoch": 1.5556558961114022, "grad_norm": 7.839081244838811, "learning_rate": 6.195609796794755e-07, "loss": 0.5878, "step": 21533 }, { "epoch": 1.5557281413116115, "grad_norm": 6.808812254454241, "learning_rate": 6.193682443838714e-07, "loss": 0.5529, "step": 21534 }, { "epoch": 1.5558003865118213, "grad_norm": 6.606696702187657, "learning_rate": 6.19175534832655e-07, "loss": 0.5959, "step": 21535 }, { "epoch": 1.5558726317120306, "grad_norm": 5.95762900433314, "learning_rate": 6.189828510284651e-07, "loss": 0.5636, "step": 21536 }, { "epoch": 1.5559448769122401, "grad_norm": 8.115651077974803, "learning_rate": 6.187901929739396e-07, "loss": 0.636, "step": 21537 }, { "epoch": 1.5560171221124497, "grad_norm": 7.065176397606565, "learning_rate": 6.185975606717157e-07, "loss": 0.6028, "step": 21538 }, { "epoch": 1.556089367312659, "grad_norm": 6.409581950774278, "learning_rate": 6.184049541244306e-07, "loss": 0.5506, "step": 21539 }, { "epoch": 1.5561616125128688, "grad_norm": 7.477126527755201, "learning_rate": 6.182123733347204e-07, "loss": 0.6371, "step": 21540 }, { "epoch": 1.556233857713078, "grad_norm": 7.952974629057951, "learning_rate": 6.180198183052224e-07, "loss": 0.6322, "step": 21541 }, { "epoch": 1.5563061029132879, "grad_norm": 8.275602437473964, "learning_rate": 6.178272890385712e-07, "loss": 0.6053, "step": 21542 }, { "epoch": 1.5563783481134972, "grad_norm": 6.667178356878691, "learning_rate": 6.176347855374029e-07, "loss": 0.6303, "step": 21543 }, { "epoch": 1.5564505933137067, "grad_norm": 7.208470543527545, "learning_rate": 6.174423078043529e-07, "loss": 0.6172, "step": 21544 }, { "epoch": 1.5565228385139163, "grad_norm": 6.17929365250959, "learning_rate": 6.172498558420567e-07, "loss": 0.5577, "step": 21545 }, { "epoch": 1.5565950837141256, "grad_norm": 7.638780492148374, "learning_rate": 6.170574296531476e-07, "loss": 0.6571, "step": 21546 }, { "epoch": 1.5566673289143353, "grad_norm": 8.058536945388742, "learning_rate": 6.1686502924026e-07, "loss": 0.6069, "step": 21547 }, { "epoch": 1.5567395741145447, "grad_norm": 6.079976170141803, "learning_rate": 6.166726546060281e-07, "loss": 0.6489, "step": 21548 }, { "epoch": 1.5568118193147544, "grad_norm": 6.965354742366803, "learning_rate": 6.164803057530852e-07, "loss": 0.5428, "step": 21549 }, { "epoch": 1.5568840645149638, "grad_norm": 6.348202500534539, "learning_rate": 6.162879826840645e-07, "loss": 0.6101, "step": 21550 }, { "epoch": 1.5569563097151733, "grad_norm": 6.7413461100501495, "learning_rate": 6.160956854015987e-07, "loss": 0.6085, "step": 21551 }, { "epoch": 1.5570285549153828, "grad_norm": 6.4134269892039475, "learning_rate": 6.159034139083209e-07, "loss": 0.6277, "step": 21552 }, { "epoch": 1.5571008001155922, "grad_norm": 7.243660095025555, "learning_rate": 6.157111682068617e-07, "loss": 0.5856, "step": 21553 }, { "epoch": 1.557173045315802, "grad_norm": 8.597059909464052, "learning_rate": 6.155189482998536e-07, "loss": 0.6091, "step": 21554 }, { "epoch": 1.5572452905160112, "grad_norm": 7.407718561136476, "learning_rate": 6.15326754189928e-07, "loss": 0.5542, "step": 21555 }, { "epoch": 1.557317535716221, "grad_norm": 7.640911822405962, "learning_rate": 6.151345858797164e-07, "loss": 0.5988, "step": 21556 }, { "epoch": 1.5573897809164303, "grad_norm": 8.526457505910509, "learning_rate": 6.149424433718473e-07, "loss": 0.6029, "step": 21557 }, { "epoch": 1.5574620261166399, "grad_norm": 8.514429298938426, "learning_rate": 6.147503266689533e-07, "loss": 0.574, "step": 21558 }, { "epoch": 1.5575342713168494, "grad_norm": 8.305370973756606, "learning_rate": 6.145582357736644e-07, "loss": 0.5805, "step": 21559 }, { "epoch": 1.557606516517059, "grad_norm": 7.419130866052788, "learning_rate": 6.143661706886083e-07, "loss": 0.6852, "step": 21560 }, { "epoch": 1.5576787617172685, "grad_norm": 7.292480190853066, "learning_rate": 6.141741314164154e-07, "loss": 0.5706, "step": 21561 }, { "epoch": 1.5577510069174778, "grad_norm": 6.324332450047254, "learning_rate": 6.139821179597144e-07, "loss": 0.6, "step": 21562 }, { "epoch": 1.5578232521176876, "grad_norm": 7.335312951815636, "learning_rate": 6.137901303211344e-07, "loss": 0.5895, "step": 21563 }, { "epoch": 1.557895497317897, "grad_norm": 7.770124578667808, "learning_rate": 6.135981685033024e-07, "loss": 0.5991, "step": 21564 }, { "epoch": 1.5579677425181064, "grad_norm": 6.371946314047347, "learning_rate": 6.134062325088467e-07, "loss": 0.6228, "step": 21565 }, { "epoch": 1.558039987718316, "grad_norm": 6.701658288507254, "learning_rate": 6.132143223403947e-07, "loss": 0.5606, "step": 21566 }, { "epoch": 1.5581122329185255, "grad_norm": 6.308955339869569, "learning_rate": 6.130224380005736e-07, "loss": 0.542, "step": 21567 }, { "epoch": 1.558184478118735, "grad_norm": 6.908713889444343, "learning_rate": 6.128305794920106e-07, "loss": 0.6386, "step": 21568 }, { "epoch": 1.5582567233189444, "grad_norm": 7.725844115523124, "learning_rate": 6.126387468173314e-07, "loss": 0.6462, "step": 21569 }, { "epoch": 1.5583289685191541, "grad_norm": 6.955655098400178, "learning_rate": 6.12446939979163e-07, "loss": 0.6176, "step": 21570 }, { "epoch": 1.5584012137193635, "grad_norm": 7.571858566083719, "learning_rate": 6.122551589801296e-07, "loss": 0.6043, "step": 21571 }, { "epoch": 1.558473458919573, "grad_norm": 6.625714533438132, "learning_rate": 6.120634038228576e-07, "loss": 0.6462, "step": 21572 }, { "epoch": 1.5585457041197825, "grad_norm": 9.377309285896429, "learning_rate": 6.118716745099715e-07, "loss": 0.6481, "step": 21573 }, { "epoch": 1.558617949319992, "grad_norm": 7.5730804327704675, "learning_rate": 6.116799710440968e-07, "loss": 0.5968, "step": 21574 }, { "epoch": 1.5586901945202016, "grad_norm": 6.8622418019790095, "learning_rate": 6.114882934278569e-07, "loss": 0.6151, "step": 21575 }, { "epoch": 1.558762439720411, "grad_norm": 7.390885652941958, "learning_rate": 6.112966416638752e-07, "loss": 0.5871, "step": 21576 }, { "epoch": 1.5588346849206207, "grad_norm": 8.049728805996986, "learning_rate": 6.111050157547765e-07, "loss": 0.589, "step": 21577 }, { "epoch": 1.55890693012083, "grad_norm": 6.341819431839311, "learning_rate": 6.109134157031835e-07, "loss": 0.5354, "step": 21578 }, { "epoch": 1.5589791753210396, "grad_norm": 6.9984649609969996, "learning_rate": 6.107218415117188e-07, "loss": 0.617, "step": 21579 }, { "epoch": 1.5590514205212491, "grad_norm": 7.144727200531839, "learning_rate": 6.105302931830051e-07, "loss": 0.6184, "step": 21580 }, { "epoch": 1.5591236657214587, "grad_norm": 7.636493213181735, "learning_rate": 6.103387707196656e-07, "loss": 0.5764, "step": 21581 }, { "epoch": 1.5591959109216682, "grad_norm": 6.57254625375984, "learning_rate": 6.101472741243202e-07, "loss": 0.6714, "step": 21582 }, { "epoch": 1.5592681561218775, "grad_norm": 6.15210751034535, "learning_rate": 6.099558033995915e-07, "loss": 0.5827, "step": 21583 }, { "epoch": 1.5593404013220873, "grad_norm": 6.322311179651295, "learning_rate": 6.097643585481009e-07, "loss": 0.5833, "step": 21584 }, { "epoch": 1.5594126465222966, "grad_norm": 6.866501427794955, "learning_rate": 6.095729395724669e-07, "loss": 0.6246, "step": 21585 }, { "epoch": 1.5594848917225062, "grad_norm": 6.4230596558818895, "learning_rate": 6.093815464753125e-07, "loss": 0.5216, "step": 21586 }, { "epoch": 1.5595571369227157, "grad_norm": 7.156179635980491, "learning_rate": 6.091901792592566e-07, "loss": 0.6004, "step": 21587 }, { "epoch": 1.5596293821229252, "grad_norm": 7.262618035360187, "learning_rate": 6.089988379269198e-07, "loss": 0.6927, "step": 21588 }, { "epoch": 1.5597016273231348, "grad_norm": 6.155291332598333, "learning_rate": 6.088075224809201e-07, "loss": 0.6437, "step": 21589 }, { "epoch": 1.559773872523344, "grad_norm": 8.141711142453314, "learning_rate": 6.086162329238768e-07, "loss": 0.6192, "step": 21590 }, { "epoch": 1.5598461177235539, "grad_norm": 7.673618840999556, "learning_rate": 6.084249692584097e-07, "loss": 0.5922, "step": 21591 }, { "epoch": 1.5599183629237632, "grad_norm": 6.70821359973779, "learning_rate": 6.08233731487135e-07, "loss": 0.5684, "step": 21592 }, { "epoch": 1.5599906081239727, "grad_norm": 6.797973824612535, "learning_rate": 6.080425196126719e-07, "loss": 0.5695, "step": 21593 }, { "epoch": 1.5600628533241823, "grad_norm": 6.711854030563231, "learning_rate": 6.078513336376374e-07, "loss": 0.58, "step": 21594 }, { "epoch": 1.5601350985243918, "grad_norm": 7.0745700557269675, "learning_rate": 6.076601735646493e-07, "loss": 0.6612, "step": 21595 }, { "epoch": 1.5602073437246013, "grad_norm": 6.739413118251974, "learning_rate": 6.07469039396324e-07, "loss": 0.5906, "step": 21596 }, { "epoch": 1.5602795889248107, "grad_norm": 6.282715280155858, "learning_rate": 6.07277931135278e-07, "loss": 0.6103, "step": 21597 }, { "epoch": 1.5603518341250204, "grad_norm": 7.203822964760946, "learning_rate": 6.070868487841283e-07, "loss": 0.5879, "step": 21598 }, { "epoch": 1.5604240793252298, "grad_norm": 7.152958408613972, "learning_rate": 6.06895792345489e-07, "loss": 0.534, "step": 21599 }, { "epoch": 1.5604963245254393, "grad_norm": 5.821164799698093, "learning_rate": 6.067047618219768e-07, "loss": 0.5713, "step": 21600 }, { "epoch": 1.5605685697256488, "grad_norm": 7.268780554250459, "learning_rate": 6.065137572162061e-07, "loss": 0.6461, "step": 21601 }, { "epoch": 1.5606408149258584, "grad_norm": 7.149730127750152, "learning_rate": 6.063227785307926e-07, "loss": 0.6455, "step": 21602 }, { "epoch": 1.560713060126068, "grad_norm": 9.19969644460357, "learning_rate": 6.061318257683493e-07, "loss": 0.5901, "step": 21603 }, { "epoch": 1.5607853053262772, "grad_norm": 6.788881850390714, "learning_rate": 6.059408989314907e-07, "loss": 0.5982, "step": 21604 }, { "epoch": 1.560857550526487, "grad_norm": 5.709275365167851, "learning_rate": 6.057499980228298e-07, "loss": 0.5272, "step": 21605 }, { "epoch": 1.5609297957266963, "grad_norm": 6.693424345869381, "learning_rate": 6.055591230449822e-07, "loss": 0.6338, "step": 21606 }, { "epoch": 1.5610020409269059, "grad_norm": 6.150279291525672, "learning_rate": 6.053682740005585e-07, "loss": 0.652, "step": 21607 }, { "epoch": 1.5610742861271154, "grad_norm": 11.611252398039436, "learning_rate": 6.051774508921721e-07, "loss": 0.6384, "step": 21608 }, { "epoch": 1.561146531327325, "grad_norm": 6.823258051206155, "learning_rate": 6.049866537224358e-07, "loss": 0.5576, "step": 21609 }, { "epoch": 1.5612187765275345, "grad_norm": 7.370429481611517, "learning_rate": 6.047958824939601e-07, "loss": 0.6447, "step": 21610 }, { "epoch": 1.5612910217277438, "grad_norm": 7.392573480843984, "learning_rate": 6.046051372093572e-07, "loss": 0.577, "step": 21611 }, { "epoch": 1.5613632669279536, "grad_norm": 7.0607246260474605, "learning_rate": 6.044144178712386e-07, "loss": 0.5668, "step": 21612 }, { "epoch": 1.561435512128163, "grad_norm": 7.287941669670359, "learning_rate": 6.042237244822144e-07, "loss": 0.613, "step": 21613 }, { "epoch": 1.5615077573283727, "grad_norm": 7.825488470754283, "learning_rate": 6.040330570448957e-07, "loss": 0.6082, "step": 21614 }, { "epoch": 1.561580002528582, "grad_norm": 6.533804508008864, "learning_rate": 6.038424155618922e-07, "loss": 0.5563, "step": 21615 }, { "epoch": 1.5616522477287915, "grad_norm": 6.673124459904299, "learning_rate": 6.036518000358144e-07, "loss": 0.5835, "step": 21616 }, { "epoch": 1.561724492929001, "grad_norm": 6.624837909371025, "learning_rate": 6.034612104692706e-07, "loss": 0.5995, "step": 21617 }, { "epoch": 1.5617967381292104, "grad_norm": 5.951306162847389, "learning_rate": 6.0327064686487e-07, "loss": 0.5423, "step": 21618 }, { "epoch": 1.5618689833294201, "grad_norm": 6.6862911664829765, "learning_rate": 6.030801092252215e-07, "loss": 0.6276, "step": 21619 }, { "epoch": 1.5619412285296295, "grad_norm": 8.994730784451841, "learning_rate": 6.028895975529341e-07, "loss": 0.6741, "step": 21620 }, { "epoch": 1.5620134737298392, "grad_norm": 6.4935558359822805, "learning_rate": 6.026991118506148e-07, "loss": 0.5598, "step": 21621 }, { "epoch": 1.5620857189300486, "grad_norm": 6.997890699572555, "learning_rate": 6.025086521208712e-07, "loss": 0.6281, "step": 21622 }, { "epoch": 1.562157964130258, "grad_norm": 8.69004817339118, "learning_rate": 6.023182183663109e-07, "loss": 0.5725, "step": 21623 }, { "epoch": 1.5622302093304676, "grad_norm": 5.281485106522589, "learning_rate": 6.021278105895407e-07, "loss": 0.5159, "step": 21624 }, { "epoch": 1.562302454530677, "grad_norm": 7.4501130677930245, "learning_rate": 6.019374287931673e-07, "loss": 0.6122, "step": 21625 }, { "epoch": 1.5623746997308867, "grad_norm": 7.24125301582936, "learning_rate": 6.017470729797966e-07, "loss": 0.6775, "step": 21626 }, { "epoch": 1.562446944931096, "grad_norm": 6.362564824042009, "learning_rate": 6.015567431520353e-07, "loss": 0.6131, "step": 21627 }, { "epoch": 1.5625191901313058, "grad_norm": 7.55186534347744, "learning_rate": 6.013664393124874e-07, "loss": 0.5986, "step": 21628 }, { "epoch": 1.5625914353315151, "grad_norm": 7.284416852558339, "learning_rate": 6.01176161463759e-07, "loss": 0.5713, "step": 21629 }, { "epoch": 1.5626636805317247, "grad_norm": 7.287638338061702, "learning_rate": 6.009859096084544e-07, "loss": 0.6913, "step": 21630 }, { "epoch": 1.5627359257319342, "grad_norm": 7.641656412043567, "learning_rate": 6.007956837491788e-07, "loss": 0.6148, "step": 21631 }, { "epoch": 1.5628081709321437, "grad_norm": 8.238978103677123, "learning_rate": 6.00605483888535e-07, "loss": 0.6671, "step": 21632 }, { "epoch": 1.5628804161323533, "grad_norm": 7.424332446777517, "learning_rate": 6.004153100291277e-07, "loss": 0.6336, "step": 21633 }, { "epoch": 1.5629526613325626, "grad_norm": 7.351405532247595, "learning_rate": 6.002251621735594e-07, "loss": 0.6021, "step": 21634 }, { "epoch": 1.5630249065327724, "grad_norm": 6.7520135772532415, "learning_rate": 6.000350403244338e-07, "loss": 0.6211, "step": 21635 }, { "epoch": 1.5630971517329817, "grad_norm": 6.377841690103591, "learning_rate": 5.998449444843532e-07, "loss": 0.5521, "step": 21636 }, { "epoch": 1.5631693969331912, "grad_norm": 7.019896160781461, "learning_rate": 5.9965487465592e-07, "loss": 0.6859, "step": 21637 }, { "epoch": 1.5632416421334008, "grad_norm": 7.459677321177102, "learning_rate": 5.994648308417367e-07, "loss": 0.6053, "step": 21638 }, { "epoch": 1.5633138873336103, "grad_norm": 6.4675035243251475, "learning_rate": 5.992748130444034e-07, "loss": 0.5986, "step": 21639 }, { "epoch": 1.5633861325338199, "grad_norm": 6.505617208792259, "learning_rate": 5.990848212665223e-07, "loss": 0.5692, "step": 21640 }, { "epoch": 1.5634583777340292, "grad_norm": 6.120669020941748, "learning_rate": 5.988948555106947e-07, "loss": 0.5814, "step": 21641 }, { "epoch": 1.563530622934239, "grad_norm": 6.898615557229241, "learning_rate": 5.987049157795188e-07, "loss": 0.6668, "step": 21642 }, { "epoch": 1.5636028681344483, "grad_norm": 7.490948874005148, "learning_rate": 5.985150020755972e-07, "loss": 0.6275, "step": 21643 }, { "epoch": 1.5636751133346578, "grad_norm": 6.758201420347597, "learning_rate": 5.983251144015287e-07, "loss": 0.576, "step": 21644 }, { "epoch": 1.5637473585348673, "grad_norm": 6.868954028777809, "learning_rate": 5.981352527599138e-07, "loss": 0.6249, "step": 21645 }, { "epoch": 1.563819603735077, "grad_norm": 8.001647779520166, "learning_rate": 5.979454171533497e-07, "loss": 0.5427, "step": 21646 }, { "epoch": 1.5638918489352864, "grad_norm": 7.1206761675596475, "learning_rate": 5.977556075844359e-07, "loss": 0.5526, "step": 21647 }, { "epoch": 1.5639640941354958, "grad_norm": 6.147212331773183, "learning_rate": 5.975658240557716e-07, "loss": 0.6833, "step": 21648 }, { "epoch": 1.5640363393357055, "grad_norm": 6.101159545252924, "learning_rate": 5.973760665699535e-07, "loss": 0.5685, "step": 21649 }, { "epoch": 1.5641085845359148, "grad_norm": 6.9142641544568875, "learning_rate": 5.971863351295795e-07, "loss": 0.6529, "step": 21650 }, { "epoch": 1.5641808297361244, "grad_norm": 7.454006310904748, "learning_rate": 5.969966297372473e-07, "loss": 0.6932, "step": 21651 }, { "epoch": 1.564253074936334, "grad_norm": 7.570227088092087, "learning_rate": 5.968069503955534e-07, "loss": 0.6146, "step": 21652 }, { "epoch": 1.5643253201365435, "grad_norm": 8.369032370580497, "learning_rate": 5.966172971070949e-07, "loss": 0.6426, "step": 21653 }, { "epoch": 1.564397565336753, "grad_norm": 8.327201061992888, "learning_rate": 5.964276698744675e-07, "loss": 0.6175, "step": 21654 }, { "epoch": 1.5644698105369623, "grad_norm": 7.471326567123634, "learning_rate": 5.962380687002675e-07, "loss": 0.6268, "step": 21655 }, { "epoch": 1.564542055737172, "grad_norm": 6.181316021295704, "learning_rate": 5.960484935870909e-07, "loss": 0.6106, "step": 21656 }, { "epoch": 1.5646143009373814, "grad_norm": 6.258687809413657, "learning_rate": 5.958589445375313e-07, "loss": 0.6015, "step": 21657 }, { "epoch": 1.564686546137591, "grad_norm": 7.422122477370729, "learning_rate": 5.956694215541841e-07, "loss": 0.5959, "step": 21658 }, { "epoch": 1.5647587913378005, "grad_norm": 5.730112929371428, "learning_rate": 5.954799246396447e-07, "loss": 0.5635, "step": 21659 }, { "epoch": 1.56483103653801, "grad_norm": 6.970075951576995, "learning_rate": 5.952904537965057e-07, "loss": 0.5834, "step": 21660 }, { "epoch": 1.5649032817382196, "grad_norm": 6.505620433825194, "learning_rate": 5.951010090273615e-07, "loss": 0.6455, "step": 21661 }, { "epoch": 1.564975526938429, "grad_norm": 7.9285126515766455, "learning_rate": 5.949115903348049e-07, "loss": 0.5925, "step": 21662 }, { "epoch": 1.5650477721386387, "grad_norm": 7.373422744972258, "learning_rate": 5.947221977214304e-07, "loss": 0.6265, "step": 21663 }, { "epoch": 1.565120017338848, "grad_norm": 7.684905816259895, "learning_rate": 5.945328311898294e-07, "loss": 0.6344, "step": 21664 }, { "epoch": 1.5651922625390575, "grad_norm": 7.753688795754331, "learning_rate": 5.943434907425941e-07, "loss": 0.6807, "step": 21665 }, { "epoch": 1.565264507739267, "grad_norm": 8.453964783984356, "learning_rate": 5.941541763823177e-07, "loss": 0.6194, "step": 21666 }, { "epoch": 1.5653367529394766, "grad_norm": 7.281227210013288, "learning_rate": 5.939648881115901e-07, "loss": 0.5678, "step": 21667 }, { "epoch": 1.5654089981396861, "grad_norm": 6.97217150801021, "learning_rate": 5.93775625933003e-07, "loss": 0.5642, "step": 21668 }, { "epoch": 1.5654812433398955, "grad_norm": 6.731444958313679, "learning_rate": 5.935863898491479e-07, "loss": 0.5619, "step": 21669 }, { "epoch": 1.5655534885401052, "grad_norm": 6.6092664921648545, "learning_rate": 5.933971798626145e-07, "loss": 0.609, "step": 21670 }, { "epoch": 1.5656257337403146, "grad_norm": 6.222073534294022, "learning_rate": 5.932079959759935e-07, "loss": 0.6033, "step": 21671 }, { "epoch": 1.565697978940524, "grad_norm": 6.632193061291725, "learning_rate": 5.930188381918745e-07, "loss": 0.5764, "step": 21672 }, { "epoch": 1.5657702241407336, "grad_norm": 7.506110690155794, "learning_rate": 5.928297065128477e-07, "loss": 0.6323, "step": 21673 }, { "epoch": 1.5658424693409432, "grad_norm": 7.961094668468181, "learning_rate": 5.926406009415006e-07, "loss": 0.6362, "step": 21674 }, { "epoch": 1.5659147145411527, "grad_norm": 6.332883350381643, "learning_rate": 5.924515214804227e-07, "loss": 0.557, "step": 21675 }, { "epoch": 1.565986959741362, "grad_norm": 7.021637580908527, "learning_rate": 5.922624681322023e-07, "loss": 0.5667, "step": 21676 }, { "epoch": 1.5660592049415718, "grad_norm": 7.138777679619474, "learning_rate": 5.920734408994283e-07, "loss": 0.6438, "step": 21677 }, { "epoch": 1.5661314501417811, "grad_norm": 7.156045302749278, "learning_rate": 5.918844397846868e-07, "loss": 0.5689, "step": 21678 }, { "epoch": 1.5662036953419907, "grad_norm": 5.734351716786859, "learning_rate": 5.916954647905656e-07, "loss": 0.6151, "step": 21679 }, { "epoch": 1.5662759405422002, "grad_norm": 7.535030698150494, "learning_rate": 5.915065159196517e-07, "loss": 0.6074, "step": 21680 }, { "epoch": 1.5663481857424097, "grad_norm": 6.009360958406307, "learning_rate": 5.913175931745321e-07, "loss": 0.5943, "step": 21681 }, { "epoch": 1.5664204309426193, "grad_norm": 7.303137891282215, "learning_rate": 5.911286965577922e-07, "loss": 0.6611, "step": 21682 }, { "epoch": 1.5664926761428286, "grad_norm": 5.95679622999508, "learning_rate": 5.909398260720186e-07, "loss": 0.5857, "step": 21683 }, { "epoch": 1.5665649213430384, "grad_norm": 8.558100328799199, "learning_rate": 5.907509817197974e-07, "loss": 0.6514, "step": 21684 }, { "epoch": 1.5666371665432477, "grad_norm": 6.3748575830446255, "learning_rate": 5.905621635037117e-07, "loss": 0.586, "step": 21685 }, { "epoch": 1.5667094117434575, "grad_norm": 6.428082438445327, "learning_rate": 5.903733714263476e-07, "loss": 0.5519, "step": 21686 }, { "epoch": 1.5667816569436668, "grad_norm": 5.89549511410033, "learning_rate": 5.901846054902893e-07, "loss": 0.5391, "step": 21687 }, { "epoch": 1.5668539021438763, "grad_norm": 7.448377588766159, "learning_rate": 5.899958656981217e-07, "loss": 0.6689, "step": 21688 }, { "epoch": 1.5669261473440859, "grad_norm": 6.951286295683036, "learning_rate": 5.898071520524267e-07, "loss": 0.5333, "step": 21689 }, { "epoch": 1.5669983925442952, "grad_norm": 7.062152145056199, "learning_rate": 5.896184645557882e-07, "loss": 0.6227, "step": 21690 }, { "epoch": 1.567070637744505, "grad_norm": 7.632522374459891, "learning_rate": 5.89429803210791e-07, "loss": 0.6274, "step": 21691 }, { "epoch": 1.5671428829447143, "grad_norm": 6.589939553152895, "learning_rate": 5.892411680200152e-07, "loss": 0.5997, "step": 21692 }, { "epoch": 1.567215128144924, "grad_norm": 6.097503288740497, "learning_rate": 5.890525589860447e-07, "loss": 0.5539, "step": 21693 }, { "epoch": 1.5672873733451333, "grad_norm": 7.167337282726775, "learning_rate": 5.888639761114606e-07, "loss": 0.6186, "step": 21694 }, { "epoch": 1.567359618545343, "grad_norm": 7.45362938667704, "learning_rate": 5.886754193988456e-07, "loss": 0.5704, "step": 21695 }, { "epoch": 1.5674318637455524, "grad_norm": 6.210542870226396, "learning_rate": 5.884868888507794e-07, "loss": 0.5401, "step": 21696 }, { "epoch": 1.5675041089457618, "grad_norm": 6.620886047166236, "learning_rate": 5.882983844698434e-07, "loss": 0.6452, "step": 21697 }, { "epoch": 1.5675763541459715, "grad_norm": 7.117801710942014, "learning_rate": 5.881099062586182e-07, "loss": 0.589, "step": 21698 }, { "epoch": 1.5676485993461808, "grad_norm": 7.601567016474763, "learning_rate": 5.879214542196839e-07, "loss": 0.706, "step": 21699 }, { "epoch": 1.5677208445463906, "grad_norm": 7.134491604801591, "learning_rate": 5.877330283556204e-07, "loss": 0.6249, "step": 21700 }, { "epoch": 1.5677930897466, "grad_norm": 6.368810903091403, "learning_rate": 5.875446286690067e-07, "loss": 0.5901, "step": 21701 }, { "epoch": 1.5678653349468095, "grad_norm": 6.866710867631492, "learning_rate": 5.873562551624229e-07, "loss": 0.6013, "step": 21702 }, { "epoch": 1.567937580147019, "grad_norm": 7.900848352778549, "learning_rate": 5.871679078384463e-07, "loss": 0.5857, "step": 21703 }, { "epoch": 1.5680098253472285, "grad_norm": 6.662896934265202, "learning_rate": 5.869795866996558e-07, "loss": 0.6189, "step": 21704 }, { "epoch": 1.568082070547438, "grad_norm": 6.53475230371423, "learning_rate": 5.867912917486293e-07, "loss": 0.6036, "step": 21705 }, { "epoch": 1.5681543157476474, "grad_norm": 7.384507855525007, "learning_rate": 5.866030229879452e-07, "loss": 0.6215, "step": 21706 }, { "epoch": 1.5682265609478572, "grad_norm": 7.1275598043543935, "learning_rate": 5.864147804201794e-07, "loss": 0.6974, "step": 21707 }, { "epoch": 1.5682988061480665, "grad_norm": 6.743041796615118, "learning_rate": 5.862265640479095e-07, "loss": 0.6009, "step": 21708 }, { "epoch": 1.568371051348276, "grad_norm": 6.787191995899997, "learning_rate": 5.860383738737119e-07, "loss": 0.6123, "step": 21709 }, { "epoch": 1.5684432965484856, "grad_norm": 6.666901711453063, "learning_rate": 5.858502099001631e-07, "loss": 0.6195, "step": 21710 }, { "epoch": 1.5685155417486951, "grad_norm": 7.1956491464067085, "learning_rate": 5.856620721298384e-07, "loss": 0.6538, "step": 21711 }, { "epoch": 1.5685877869489047, "grad_norm": 6.815138839460503, "learning_rate": 5.854739605653137e-07, "loss": 0.5893, "step": 21712 }, { "epoch": 1.568660032149114, "grad_norm": 7.406413273681561, "learning_rate": 5.852858752091645e-07, "loss": 0.5901, "step": 21713 }, { "epoch": 1.5687322773493237, "grad_norm": 8.534821485854883, "learning_rate": 5.850978160639645e-07, "loss": 0.6602, "step": 21714 }, { "epoch": 1.568804522549533, "grad_norm": 8.462345418264805, "learning_rate": 5.849097831322884e-07, "loss": 0.7048, "step": 21715 }, { "epoch": 1.5688767677497426, "grad_norm": 7.232355944203943, "learning_rate": 5.847217764167112e-07, "loss": 0.6379, "step": 21716 }, { "epoch": 1.5689490129499521, "grad_norm": 6.069005562835522, "learning_rate": 5.84533795919805e-07, "loss": 0.5721, "step": 21717 }, { "epoch": 1.5690212581501617, "grad_norm": 6.698125651072009, "learning_rate": 5.843458416441439e-07, "loss": 0.5962, "step": 21718 }, { "epoch": 1.5690935033503712, "grad_norm": 6.631490443092425, "learning_rate": 5.841579135923e-07, "loss": 0.5726, "step": 21719 }, { "epoch": 1.5691657485505806, "grad_norm": 8.81523273924576, "learning_rate": 5.839700117668485e-07, "loss": 0.6914, "step": 21720 }, { "epoch": 1.5692379937507903, "grad_norm": 6.747961796225606, "learning_rate": 5.837821361703589e-07, "loss": 0.6051, "step": 21721 }, { "epoch": 1.5693102389509996, "grad_norm": 6.879889674078374, "learning_rate": 5.835942868054037e-07, "loss": 0.6345, "step": 21722 }, { "epoch": 1.5693824841512092, "grad_norm": 6.926583714262291, "learning_rate": 5.834064636745557e-07, "loss": 0.6448, "step": 21723 }, { "epoch": 1.5694547293514187, "grad_norm": 6.857128705282594, "learning_rate": 5.832186667803844e-07, "loss": 0.6349, "step": 21724 }, { "epoch": 1.5695269745516283, "grad_norm": 7.584885937833304, "learning_rate": 5.830308961254613e-07, "loss": 0.7168, "step": 21725 }, { "epoch": 1.5695992197518378, "grad_norm": 6.311106008578344, "learning_rate": 5.828431517123567e-07, "loss": 0.6894, "step": 21726 }, { "epoch": 1.5696714649520471, "grad_norm": 6.209808515707857, "learning_rate": 5.826554335436407e-07, "loss": 0.6301, "step": 21727 }, { "epoch": 1.5697437101522569, "grad_norm": 9.791566715677252, "learning_rate": 5.824677416218832e-07, "loss": 0.6489, "step": 21728 }, { "epoch": 1.5698159553524662, "grad_norm": 6.752726251567827, "learning_rate": 5.822800759496536e-07, "loss": 0.622, "step": 21729 }, { "epoch": 1.5698882005526757, "grad_norm": 6.036042676103619, "learning_rate": 5.820924365295214e-07, "loss": 0.6613, "step": 21730 }, { "epoch": 1.5699604457528853, "grad_norm": 7.295766350049972, "learning_rate": 5.819048233640537e-07, "loss": 0.6132, "step": 21731 }, { "epoch": 1.5700326909530948, "grad_norm": 6.420014271943103, "learning_rate": 5.817172364558196e-07, "loss": 0.5604, "step": 21732 }, { "epoch": 1.5701049361533044, "grad_norm": 6.504443777055572, "learning_rate": 5.815296758073874e-07, "loss": 0.6682, "step": 21733 }, { "epoch": 1.5701771813535137, "grad_norm": 7.01850407238341, "learning_rate": 5.813421414213247e-07, "loss": 0.5182, "step": 21734 }, { "epoch": 1.5702494265537235, "grad_norm": 7.077260754097884, "learning_rate": 5.81154633300198e-07, "loss": 0.5663, "step": 21735 }, { "epoch": 1.5703216717539328, "grad_norm": 6.2246850604317725, "learning_rate": 5.809671514465743e-07, "loss": 0.595, "step": 21736 }, { "epoch": 1.5703939169541423, "grad_norm": 8.572508816540696, "learning_rate": 5.807796958630202e-07, "loss": 0.6782, "step": 21737 }, { "epoch": 1.5704661621543519, "grad_norm": 8.406243193102496, "learning_rate": 5.805922665521019e-07, "loss": 0.6399, "step": 21738 }, { "epoch": 1.5705384073545614, "grad_norm": 8.457667232148923, "learning_rate": 5.804048635163853e-07, "loss": 0.666, "step": 21739 }, { "epoch": 1.570610652554771, "grad_norm": 7.37851540785973, "learning_rate": 5.802174867584354e-07, "loss": 0.6328, "step": 21740 }, { "epoch": 1.5706828977549803, "grad_norm": 7.0886093536292, "learning_rate": 5.800301362808183e-07, "loss": 0.5523, "step": 21741 }, { "epoch": 1.57075514295519, "grad_norm": 6.525138660579936, "learning_rate": 5.798428120860972e-07, "loss": 0.5758, "step": 21742 }, { "epoch": 1.5708273881553994, "grad_norm": 6.6309957184067985, "learning_rate": 5.796555141768373e-07, "loss": 0.6748, "step": 21743 }, { "epoch": 1.570899633355609, "grad_norm": 6.825675332888315, "learning_rate": 5.79468242555602e-07, "loss": 0.5316, "step": 21744 }, { "epoch": 1.5709718785558184, "grad_norm": 6.279112798623847, "learning_rate": 5.792809972249561e-07, "loss": 0.6623, "step": 21745 }, { "epoch": 1.571044123756028, "grad_norm": 6.9100389400549815, "learning_rate": 5.790937781874614e-07, "loss": 0.5842, "step": 21746 }, { "epoch": 1.5711163689562375, "grad_norm": 7.36177869284733, "learning_rate": 5.789065854456805e-07, "loss": 0.5868, "step": 21747 }, { "epoch": 1.5711886141564468, "grad_norm": 7.471702343307208, "learning_rate": 5.787194190021784e-07, "loss": 0.6222, "step": 21748 }, { "epoch": 1.5712608593566566, "grad_norm": 7.0202594224429085, "learning_rate": 5.785322788595149e-07, "loss": 0.5934, "step": 21749 }, { "epoch": 1.571333104556866, "grad_norm": 7.665654848198488, "learning_rate": 5.783451650202526e-07, "loss": 0.5841, "step": 21750 }, { "epoch": 1.5714053497570755, "grad_norm": 7.938705713205327, "learning_rate": 5.78158077486953e-07, "loss": 0.5835, "step": 21751 }, { "epoch": 1.571477594957285, "grad_norm": 6.670394268593599, "learning_rate": 5.779710162621777e-07, "loss": 0.5944, "step": 21752 }, { "epoch": 1.5715498401574945, "grad_norm": 7.410617490257749, "learning_rate": 5.777839813484862e-07, "loss": 0.5545, "step": 21753 }, { "epoch": 1.571622085357704, "grad_norm": 6.887712966201855, "learning_rate": 5.775969727484393e-07, "loss": 0.5934, "step": 21754 }, { "epoch": 1.5716943305579134, "grad_norm": 7.661959516123077, "learning_rate": 5.774099904645975e-07, "loss": 0.6918, "step": 21755 }, { "epoch": 1.5717665757581232, "grad_norm": 6.575999879414726, "learning_rate": 5.772230344995198e-07, "loss": 0.6094, "step": 21756 }, { "epoch": 1.5718388209583325, "grad_norm": 7.144956514561212, "learning_rate": 5.77036104855766e-07, "loss": 0.6042, "step": 21757 }, { "epoch": 1.5719110661585423, "grad_norm": 7.740008404552224, "learning_rate": 5.768492015358948e-07, "loss": 0.5708, "step": 21758 }, { "epoch": 1.5719833113587516, "grad_norm": 6.709407396598107, "learning_rate": 5.766623245424655e-07, "loss": 0.5291, "step": 21759 }, { "epoch": 1.5720555565589611, "grad_norm": 7.102350743090689, "learning_rate": 5.764754738780351e-07, "loss": 0.6159, "step": 21760 }, { "epoch": 1.5721278017591707, "grad_norm": 6.507595905853171, "learning_rate": 5.762886495451616e-07, "loss": 0.5525, "step": 21761 }, { "epoch": 1.57220004695938, "grad_norm": 6.010282606440178, "learning_rate": 5.761018515464031e-07, "loss": 0.5751, "step": 21762 }, { "epoch": 1.5722722921595897, "grad_norm": 6.492931630883701, "learning_rate": 5.759150798843169e-07, "loss": 0.5726, "step": 21763 }, { "epoch": 1.572344537359799, "grad_norm": 7.7625090029260395, "learning_rate": 5.757283345614589e-07, "loss": 0.6394, "step": 21764 }, { "epoch": 1.5724167825600088, "grad_norm": 8.677259378528777, "learning_rate": 5.755416155803856e-07, "loss": 0.6873, "step": 21765 }, { "epoch": 1.5724890277602181, "grad_norm": 8.948022157153073, "learning_rate": 5.753549229436533e-07, "loss": 0.7436, "step": 21766 }, { "epoch": 1.5725612729604277, "grad_norm": 6.7695549173706295, "learning_rate": 5.75168256653818e-07, "loss": 0.6499, "step": 21767 }, { "epoch": 1.5726335181606372, "grad_norm": 6.0345276144080024, "learning_rate": 5.749816167134348e-07, "loss": 0.6216, "step": 21768 }, { "epoch": 1.5727057633608466, "grad_norm": 7.394949359630135, "learning_rate": 5.747950031250582e-07, "loss": 0.5768, "step": 21769 }, { "epoch": 1.5727780085610563, "grad_norm": 6.6832704092410555, "learning_rate": 5.74608415891244e-07, "loss": 0.5787, "step": 21770 }, { "epoch": 1.5728502537612656, "grad_norm": 9.265195170784821, "learning_rate": 5.744218550145453e-07, "loss": 0.61, "step": 21771 }, { "epoch": 1.5729224989614754, "grad_norm": 7.441101740816846, "learning_rate": 5.74235320497516e-07, "loss": 0.6874, "step": 21772 }, { "epoch": 1.5729947441616847, "grad_norm": 6.312989168759364, "learning_rate": 5.740488123427104e-07, "loss": 0.6523, "step": 21773 }, { "epoch": 1.5730669893618943, "grad_norm": 7.073556263413045, "learning_rate": 5.738623305526808e-07, "loss": 0.6883, "step": 21774 }, { "epoch": 1.5731392345621038, "grad_norm": 7.282510934776323, "learning_rate": 5.736758751299795e-07, "loss": 0.6885, "step": 21775 }, { "epoch": 1.5732114797623131, "grad_norm": 6.687954896090334, "learning_rate": 5.734894460771603e-07, "loss": 0.5809, "step": 21776 }, { "epoch": 1.573283724962523, "grad_norm": 6.572424659108555, "learning_rate": 5.733030433967756e-07, "loss": 0.5443, "step": 21777 }, { "epoch": 1.5733559701627322, "grad_norm": 7.077458567119069, "learning_rate": 5.731166670913757e-07, "loss": 0.6414, "step": 21778 }, { "epoch": 1.573428215362942, "grad_norm": 8.295332310822314, "learning_rate": 5.729303171635123e-07, "loss": 0.6898, "step": 21779 }, { "epoch": 1.5735004605631513, "grad_norm": 6.417998468814333, "learning_rate": 5.727439936157372e-07, "loss": 0.5918, "step": 21780 }, { "epoch": 1.5735727057633608, "grad_norm": 8.850681136477002, "learning_rate": 5.725576964505999e-07, "loss": 0.6137, "step": 21781 }, { "epoch": 1.5736449509635704, "grad_norm": 8.40104338205836, "learning_rate": 5.723714256706512e-07, "loss": 0.6285, "step": 21782 }, { "epoch": 1.57371719616378, "grad_norm": 6.70794006837699, "learning_rate": 5.721851812784409e-07, "loss": 0.5141, "step": 21783 }, { "epoch": 1.5737894413639895, "grad_norm": 7.544877887847443, "learning_rate": 5.719989632765186e-07, "loss": 0.6003, "step": 21784 }, { "epoch": 1.5738616865641988, "grad_norm": 6.47679691787767, "learning_rate": 5.718127716674335e-07, "loss": 0.6514, "step": 21785 }, { "epoch": 1.5739339317644085, "grad_norm": 8.3974018256765, "learning_rate": 5.716266064537346e-07, "loss": 0.6244, "step": 21786 }, { "epoch": 1.5740061769646179, "grad_norm": 7.928171999577382, "learning_rate": 5.714404676379701e-07, "loss": 0.5477, "step": 21787 }, { "epoch": 1.5740784221648274, "grad_norm": 6.8544375782525115, "learning_rate": 5.712543552226887e-07, "loss": 0.6002, "step": 21788 }, { "epoch": 1.574150667365037, "grad_norm": 8.026994936156253, "learning_rate": 5.710682692104369e-07, "loss": 0.6066, "step": 21789 }, { "epoch": 1.5742229125652465, "grad_norm": 8.810768769027453, "learning_rate": 5.708822096037628e-07, "loss": 0.6243, "step": 21790 }, { "epoch": 1.574295157765456, "grad_norm": 5.906207695057651, "learning_rate": 5.706961764052141e-07, "loss": 0.6155, "step": 21791 }, { "epoch": 1.5743674029656654, "grad_norm": 6.112130482743798, "learning_rate": 5.705101696173363e-07, "loss": 0.607, "step": 21792 }, { "epoch": 1.5744396481658751, "grad_norm": 6.65809770468758, "learning_rate": 5.703241892426758e-07, "loss": 0.6234, "step": 21793 }, { "epoch": 1.5745118933660844, "grad_norm": 8.778486692664131, "learning_rate": 5.70138235283779e-07, "loss": 0.6644, "step": 21794 }, { "epoch": 1.574584138566294, "grad_norm": 6.974148562293846, "learning_rate": 5.699523077431912e-07, "loss": 0.5791, "step": 21795 }, { "epoch": 1.5746563837665035, "grad_norm": 6.453854424952314, "learning_rate": 5.697664066234579e-07, "loss": 0.6157, "step": 21796 }, { "epoch": 1.574728628966713, "grad_norm": 6.582232644823196, "learning_rate": 5.695805319271236e-07, "loss": 0.542, "step": 21797 }, { "epoch": 1.5748008741669226, "grad_norm": 5.551087748839186, "learning_rate": 5.693946836567337e-07, "loss": 0.494, "step": 21798 }, { "epoch": 1.574873119367132, "grad_norm": 7.676220317833505, "learning_rate": 5.692088618148309e-07, "loss": 0.6866, "step": 21799 }, { "epoch": 1.5749453645673417, "grad_norm": 6.659935732236425, "learning_rate": 5.690230664039595e-07, "loss": 0.595, "step": 21800 }, { "epoch": 1.575017609767551, "grad_norm": 7.241913593795497, "learning_rate": 5.688372974266631e-07, "loss": 0.6022, "step": 21801 }, { "epoch": 1.5750898549677605, "grad_norm": 6.440924465097218, "learning_rate": 5.686515548854854e-07, "loss": 0.6325, "step": 21802 }, { "epoch": 1.57516210016797, "grad_norm": 6.194651160713547, "learning_rate": 5.68465838782967e-07, "loss": 0.6524, "step": 21803 }, { "epoch": 1.5752343453681796, "grad_norm": 7.527476271456057, "learning_rate": 5.682801491216519e-07, "loss": 0.5912, "step": 21804 }, { "epoch": 1.5753065905683892, "grad_norm": 6.771215653018129, "learning_rate": 5.680944859040826e-07, "loss": 0.6234, "step": 21805 }, { "epoch": 1.5753788357685985, "grad_norm": 8.59473760305293, "learning_rate": 5.679088491327991e-07, "loss": 0.6048, "step": 21806 }, { "epoch": 1.5754510809688083, "grad_norm": 7.410677459677823, "learning_rate": 5.677232388103432e-07, "loss": 0.6139, "step": 21807 }, { "epoch": 1.5755233261690176, "grad_norm": 6.039981194776351, "learning_rate": 5.675376549392559e-07, "loss": 0.5751, "step": 21808 }, { "epoch": 1.5755955713692271, "grad_norm": 7.3308587089372, "learning_rate": 5.673520975220781e-07, "loss": 0.5135, "step": 21809 }, { "epoch": 1.5756678165694367, "grad_norm": 7.38531445287681, "learning_rate": 5.671665665613491e-07, "loss": 0.5587, "step": 21810 }, { "epoch": 1.5757400617696462, "grad_norm": 7.686425855073157, "learning_rate": 5.669810620596089e-07, "loss": 0.6158, "step": 21811 }, { "epoch": 1.5758123069698557, "grad_norm": 7.750602452642391, "learning_rate": 5.667955840193973e-07, "loss": 0.6704, "step": 21812 }, { "epoch": 1.575884552170065, "grad_norm": 7.886896744524368, "learning_rate": 5.666101324432527e-07, "loss": 0.56, "step": 21813 }, { "epoch": 1.5759567973702748, "grad_norm": 6.2858866568871905, "learning_rate": 5.664247073337145e-07, "loss": 0.6446, "step": 21814 }, { "epoch": 1.5760290425704842, "grad_norm": 7.698374430493123, "learning_rate": 5.662393086933208e-07, "loss": 0.6589, "step": 21815 }, { "epoch": 1.5761012877706937, "grad_norm": 7.979573876039, "learning_rate": 5.660539365246098e-07, "loss": 0.6122, "step": 21816 }, { "epoch": 1.5761735329709032, "grad_norm": 5.734499397279796, "learning_rate": 5.658685908301184e-07, "loss": 0.5916, "step": 21817 }, { "epoch": 1.5762457781711128, "grad_norm": 7.172045470894078, "learning_rate": 5.656832716123842e-07, "loss": 0.5615, "step": 21818 }, { "epoch": 1.5763180233713223, "grad_norm": 5.88628877462957, "learning_rate": 5.654979788739442e-07, "loss": 0.6412, "step": 21819 }, { "epoch": 1.5763902685715316, "grad_norm": 6.686956936186014, "learning_rate": 5.653127126173355e-07, "loss": 0.5866, "step": 21820 }, { "epoch": 1.5764625137717414, "grad_norm": 7.601074955861121, "learning_rate": 5.651274728450928e-07, "loss": 0.5837, "step": 21821 }, { "epoch": 1.5765347589719507, "grad_norm": 8.317981260850457, "learning_rate": 5.649422595597526e-07, "loss": 0.6367, "step": 21822 }, { "epoch": 1.5766070041721603, "grad_norm": 7.847871829651569, "learning_rate": 5.647570727638504e-07, "loss": 0.7095, "step": 21823 }, { "epoch": 1.5766792493723698, "grad_norm": 8.750915043532368, "learning_rate": 5.645719124599213e-07, "loss": 0.6836, "step": 21824 }, { "epoch": 1.5767514945725793, "grad_norm": 6.427426057536381, "learning_rate": 5.643867786505e-07, "loss": 0.5862, "step": 21825 }, { "epoch": 1.576823739772789, "grad_norm": 6.765880694414063, "learning_rate": 5.642016713381207e-07, "loss": 0.6408, "step": 21826 }, { "epoch": 1.5768959849729982, "grad_norm": 7.09205882881667, "learning_rate": 5.640165905253184e-07, "loss": 0.6114, "step": 21827 }, { "epoch": 1.576968230173208, "grad_norm": 6.226084615444274, "learning_rate": 5.638315362146249e-07, "loss": 0.5711, "step": 21828 }, { "epoch": 1.5770404753734173, "grad_norm": 7.317866516213663, "learning_rate": 5.636465084085743e-07, "loss": 0.6094, "step": 21829 }, { "epoch": 1.5771127205736268, "grad_norm": 8.48562843238956, "learning_rate": 5.634615071097002e-07, "loss": 0.6806, "step": 21830 }, { "epoch": 1.5771849657738364, "grad_norm": 7.504489063861652, "learning_rate": 5.632765323205339e-07, "loss": 0.6008, "step": 21831 }, { "epoch": 1.577257210974046, "grad_norm": 7.169937836232575, "learning_rate": 5.630915840436074e-07, "loss": 0.5887, "step": 21832 }, { "epoch": 1.5773294561742555, "grad_norm": 7.27175533129, "learning_rate": 5.629066622814539e-07, "loss": 0.6304, "step": 21833 }, { "epoch": 1.5774017013744648, "grad_norm": 7.9154117861231, "learning_rate": 5.627217670366044e-07, "loss": 0.6121, "step": 21834 }, { "epoch": 1.5774739465746745, "grad_norm": 6.864675086126505, "learning_rate": 5.625368983115895e-07, "loss": 0.5743, "step": 21835 }, { "epoch": 1.5775461917748839, "grad_norm": 6.14456756833275, "learning_rate": 5.623520561089401e-07, "loss": 0.5558, "step": 21836 }, { "epoch": 1.5776184369750936, "grad_norm": 6.852431552103934, "learning_rate": 5.62167240431187e-07, "loss": 0.5935, "step": 21837 }, { "epoch": 1.577690682175303, "grad_norm": 7.2219516540447275, "learning_rate": 5.619824512808591e-07, "loss": 0.6107, "step": 21838 }, { "epoch": 1.5777629273755125, "grad_norm": 6.82584578754399, "learning_rate": 5.61797688660487e-07, "loss": 0.6225, "step": 21839 }, { "epoch": 1.577835172575722, "grad_norm": 8.057079770228674, "learning_rate": 5.616129525725994e-07, "loss": 0.5633, "step": 21840 }, { "epoch": 1.5779074177759314, "grad_norm": 7.119861556861003, "learning_rate": 5.614282430197254e-07, "loss": 0.6726, "step": 21841 }, { "epoch": 1.5779796629761411, "grad_norm": 6.116890657708324, "learning_rate": 5.612435600043936e-07, "loss": 0.5957, "step": 21842 }, { "epoch": 1.5780519081763504, "grad_norm": 7.001991261228084, "learning_rate": 5.61058903529132e-07, "loss": 0.6696, "step": 21843 }, { "epoch": 1.5781241533765602, "grad_norm": 7.391009524385067, "learning_rate": 5.608742735964687e-07, "loss": 0.5411, "step": 21844 }, { "epoch": 1.5781963985767695, "grad_norm": 6.6753878134320255, "learning_rate": 5.606896702089315e-07, "loss": 0.668, "step": 21845 }, { "epoch": 1.578268643776979, "grad_norm": 7.298419927707785, "learning_rate": 5.605050933690461e-07, "loss": 0.6027, "step": 21846 }, { "epoch": 1.5783408889771886, "grad_norm": 7.105164071000055, "learning_rate": 5.603205430793405e-07, "loss": 0.6238, "step": 21847 }, { "epoch": 1.578413134177398, "grad_norm": 9.037106631253636, "learning_rate": 5.601360193423408e-07, "loss": 0.612, "step": 21848 }, { "epoch": 1.5784853793776077, "grad_norm": 6.2446070482857445, "learning_rate": 5.599515221605725e-07, "loss": 0.6345, "step": 21849 }, { "epoch": 1.578557624577817, "grad_norm": 6.208888837637901, "learning_rate": 5.597670515365613e-07, "loss": 0.673, "step": 21850 }, { "epoch": 1.5786298697780268, "grad_norm": 7.164712092277984, "learning_rate": 5.595826074728328e-07, "loss": 0.6057, "step": 21851 }, { "epoch": 1.578702114978236, "grad_norm": 7.205253634095099, "learning_rate": 5.593981899719117e-07, "loss": 0.5206, "step": 21852 }, { "epoch": 1.5787743601784456, "grad_norm": 6.788497778404464, "learning_rate": 5.592137990363225e-07, "loss": 0.5769, "step": 21853 }, { "epoch": 1.5788466053786552, "grad_norm": 7.7022967096361254, "learning_rate": 5.590294346685896e-07, "loss": 0.6074, "step": 21854 }, { "epoch": 1.5789188505788647, "grad_norm": 7.7220780460550404, "learning_rate": 5.588450968712372e-07, "loss": 0.6607, "step": 21855 }, { "epoch": 1.5789910957790743, "grad_norm": 7.025158765634628, "learning_rate": 5.586607856467877e-07, "loss": 0.5623, "step": 21856 }, { "epoch": 1.5790633409792836, "grad_norm": 7.071082917279847, "learning_rate": 5.584765009977647e-07, "loss": 0.6084, "step": 21857 }, { "epoch": 1.5791355861794933, "grad_norm": 6.4367316815283075, "learning_rate": 5.582922429266907e-07, "loss": 0.6256, "step": 21858 }, { "epoch": 1.5792078313797027, "grad_norm": 6.576749608012619, "learning_rate": 5.581080114360892e-07, "loss": 0.5929, "step": 21859 }, { "epoch": 1.5792800765799122, "grad_norm": 7.044124227092321, "learning_rate": 5.579238065284798e-07, "loss": 0.5857, "step": 21860 }, { "epoch": 1.5793523217801217, "grad_norm": 7.5380863802994105, "learning_rate": 5.57739628206386e-07, "loss": 0.7635, "step": 21861 }, { "epoch": 1.5794245669803313, "grad_norm": 8.995050976933745, "learning_rate": 5.575554764723295e-07, "loss": 0.6071, "step": 21862 }, { "epoch": 1.5794968121805408, "grad_norm": 7.853193335761425, "learning_rate": 5.573713513288298e-07, "loss": 0.6305, "step": 21863 }, { "epoch": 1.5795690573807502, "grad_norm": 6.8095427927926995, "learning_rate": 5.571872527784078e-07, "loss": 0.5738, "step": 21864 }, { "epoch": 1.57964130258096, "grad_norm": 6.858192289747663, "learning_rate": 5.570031808235837e-07, "loss": 0.568, "step": 21865 }, { "epoch": 1.5797135477811692, "grad_norm": 7.150701112317944, "learning_rate": 5.568191354668784e-07, "loss": 0.6042, "step": 21866 }, { "epoch": 1.5797857929813788, "grad_norm": 6.768790475824705, "learning_rate": 5.566351167108094e-07, "loss": 0.5591, "step": 21867 }, { "epoch": 1.5798580381815883, "grad_norm": 6.497827900578555, "learning_rate": 5.564511245578966e-07, "loss": 0.6249, "step": 21868 }, { "epoch": 1.5799302833817979, "grad_norm": 7.113646446041393, "learning_rate": 5.562671590106591e-07, "loss": 0.5974, "step": 21869 }, { "epoch": 1.5800025285820074, "grad_norm": 7.648472163401405, "learning_rate": 5.560832200716149e-07, "loss": 0.5997, "step": 21870 }, { "epoch": 1.5800747737822167, "grad_norm": 6.948143569771864, "learning_rate": 5.55899307743282e-07, "loss": 0.5439, "step": 21871 }, { "epoch": 1.5801470189824265, "grad_norm": 6.89818312455692, "learning_rate": 5.557154220281782e-07, "loss": 0.5422, "step": 21872 }, { "epoch": 1.5802192641826358, "grad_norm": 5.299896548719043, "learning_rate": 5.555315629288213e-07, "loss": 0.6252, "step": 21873 }, { "epoch": 1.5802915093828453, "grad_norm": 7.357338398783148, "learning_rate": 5.55347730447727e-07, "loss": 0.629, "step": 21874 }, { "epoch": 1.580363754583055, "grad_norm": 8.553338042823087, "learning_rate": 5.551639245874122e-07, "loss": 0.634, "step": 21875 }, { "epoch": 1.5804359997832644, "grad_norm": 6.335610264842568, "learning_rate": 5.54980145350393e-07, "loss": 0.6211, "step": 21876 }, { "epoch": 1.580508244983474, "grad_norm": 6.649748730035401, "learning_rate": 5.547963927391861e-07, "loss": 0.6276, "step": 21877 }, { "epoch": 1.5805804901836833, "grad_norm": 6.877462050271865, "learning_rate": 5.546126667563059e-07, "loss": 0.6735, "step": 21878 }, { "epoch": 1.580652735383893, "grad_norm": 7.255276831426542, "learning_rate": 5.544289674042674e-07, "loss": 0.5881, "step": 21879 }, { "epoch": 1.5807249805841024, "grad_norm": 7.183910501869922, "learning_rate": 5.542452946855858e-07, "loss": 0.6253, "step": 21880 }, { "epoch": 1.580797225784312, "grad_norm": 6.575780310216607, "learning_rate": 5.540616486027756e-07, "loss": 0.5672, "step": 21881 }, { "epoch": 1.5808694709845215, "grad_norm": 6.7447924836107935, "learning_rate": 5.538780291583504e-07, "loss": 0.6047, "step": 21882 }, { "epoch": 1.580941716184731, "grad_norm": 8.264573605448911, "learning_rate": 5.536944363548238e-07, "loss": 0.6534, "step": 21883 }, { "epoch": 1.5810139613849405, "grad_norm": 6.97692063662933, "learning_rate": 5.535108701947098e-07, "loss": 0.6363, "step": 21884 }, { "epoch": 1.5810862065851499, "grad_norm": 7.622199122570292, "learning_rate": 5.5332733068052e-07, "loss": 0.6248, "step": 21885 }, { "epoch": 1.5811584517853596, "grad_norm": 8.195842191235311, "learning_rate": 5.531438178147677e-07, "loss": 0.6036, "step": 21886 }, { "epoch": 1.581230696985569, "grad_norm": 7.438507268320666, "learning_rate": 5.529603315999654e-07, "loss": 0.6766, "step": 21887 }, { "epoch": 1.5813029421857785, "grad_norm": 8.68776082257975, "learning_rate": 5.527768720386231e-07, "loss": 0.6948, "step": 21888 }, { "epoch": 1.581375187385988, "grad_norm": 6.387639698509047, "learning_rate": 5.525934391332541e-07, "loss": 0.5452, "step": 21889 }, { "epoch": 1.5814474325861976, "grad_norm": 9.454043408382368, "learning_rate": 5.524100328863691e-07, "loss": 0.5853, "step": 21890 }, { "epoch": 1.5815196777864071, "grad_norm": 6.158772372992363, "learning_rate": 5.522266533004789e-07, "loss": 0.5718, "step": 21891 }, { "epoch": 1.5815919229866164, "grad_norm": 6.391727417711445, "learning_rate": 5.520433003780929e-07, "loss": 0.5581, "step": 21892 }, { "epoch": 1.5816641681868262, "grad_norm": 7.949989885047859, "learning_rate": 5.518599741217217e-07, "loss": 0.6693, "step": 21893 }, { "epoch": 1.5817364133870355, "grad_norm": 6.457978499314427, "learning_rate": 5.516766745338747e-07, "loss": 0.6697, "step": 21894 }, { "epoch": 1.581808658587245, "grad_norm": 7.359833202959876, "learning_rate": 5.51493401617062e-07, "loss": 0.6059, "step": 21895 }, { "epoch": 1.5818809037874546, "grad_norm": 7.003785608362123, "learning_rate": 5.513101553737912e-07, "loss": 0.6138, "step": 21896 }, { "epoch": 1.5819531489876641, "grad_norm": 8.263431596970625, "learning_rate": 5.51126935806571e-07, "loss": 0.6263, "step": 21897 }, { "epoch": 1.5820253941878737, "grad_norm": 6.3052888982837505, "learning_rate": 5.509437429179102e-07, "loss": 0.6286, "step": 21898 }, { "epoch": 1.582097639388083, "grad_norm": 6.345326495536769, "learning_rate": 5.507605767103161e-07, "loss": 0.5636, "step": 21899 }, { "epoch": 1.5821698845882928, "grad_norm": 8.162975593019164, "learning_rate": 5.50577437186296e-07, "loss": 0.5687, "step": 21900 }, { "epoch": 1.582242129788502, "grad_norm": 6.767167759022955, "learning_rate": 5.503943243483572e-07, "loss": 0.6057, "step": 21901 }, { "epoch": 1.5823143749887116, "grad_norm": 6.348654368642261, "learning_rate": 5.502112381990072e-07, "loss": 0.6206, "step": 21902 }, { "epoch": 1.5823866201889212, "grad_norm": 6.725967008702052, "learning_rate": 5.500281787407507e-07, "loss": 0.578, "step": 21903 }, { "epoch": 1.5824588653891307, "grad_norm": 7.003713984916827, "learning_rate": 5.498451459760943e-07, "loss": 0.5733, "step": 21904 }, { "epoch": 1.5825311105893403, "grad_norm": 6.391474361917051, "learning_rate": 5.496621399075444e-07, "loss": 0.5666, "step": 21905 }, { "epoch": 1.5826033557895496, "grad_norm": 5.99107714614774, "learning_rate": 5.494791605376046e-07, "loss": 0.5316, "step": 21906 }, { "epoch": 1.5826756009897593, "grad_norm": 6.378200251328342, "learning_rate": 5.492962078687808e-07, "loss": 0.547, "step": 21907 }, { "epoch": 1.5827478461899687, "grad_norm": 7.116283773921369, "learning_rate": 5.491132819035774e-07, "loss": 0.681, "step": 21908 }, { "epoch": 1.5828200913901784, "grad_norm": 8.280978677011618, "learning_rate": 5.489303826444981e-07, "loss": 0.6379, "step": 21909 }, { "epoch": 1.5828923365903877, "grad_norm": 6.976333665672459, "learning_rate": 5.487475100940473e-07, "loss": 0.5181, "step": 21910 }, { "epoch": 1.5829645817905973, "grad_norm": 5.8394031371196435, "learning_rate": 5.485646642547277e-07, "loss": 0.6306, "step": 21911 }, { "epoch": 1.5830368269908068, "grad_norm": 7.481727721243551, "learning_rate": 5.483818451290435e-07, "loss": 0.6803, "step": 21912 }, { "epoch": 1.5831090721910162, "grad_norm": 8.097728797009806, "learning_rate": 5.481990527194958e-07, "loss": 0.6648, "step": 21913 }, { "epoch": 1.583181317391226, "grad_norm": 7.234083132695667, "learning_rate": 5.480162870285877e-07, "loss": 0.6452, "step": 21914 }, { "epoch": 1.5832535625914352, "grad_norm": 8.285792994007924, "learning_rate": 5.478335480588209e-07, "loss": 0.6405, "step": 21915 }, { "epoch": 1.583325807791645, "grad_norm": 7.3140976945518865, "learning_rate": 5.47650835812697e-07, "loss": 0.5968, "step": 21916 }, { "epoch": 1.5833980529918543, "grad_norm": 8.239680454020958, "learning_rate": 5.474681502927174e-07, "loss": 0.6657, "step": 21917 }, { "epoch": 1.5834702981920639, "grad_norm": 6.768324104358524, "learning_rate": 5.472854915013829e-07, "loss": 0.5676, "step": 21918 }, { "epoch": 1.5835425433922734, "grad_norm": 7.085072322629779, "learning_rate": 5.471028594411934e-07, "loss": 0.6395, "step": 21919 }, { "epoch": 1.5836147885924827, "grad_norm": 8.221882850356144, "learning_rate": 5.469202541146504e-07, "loss": 0.6226, "step": 21920 }, { "epoch": 1.5836870337926925, "grad_norm": 6.460517989611343, "learning_rate": 5.467376755242521e-07, "loss": 0.6716, "step": 21921 }, { "epoch": 1.5837592789929018, "grad_norm": 6.968035879452256, "learning_rate": 5.46555123672498e-07, "loss": 0.6457, "step": 21922 }, { "epoch": 1.5838315241931116, "grad_norm": 6.799987220752152, "learning_rate": 5.463725985618887e-07, "loss": 0.5878, "step": 21923 }, { "epoch": 1.583903769393321, "grad_norm": 6.413108169240626, "learning_rate": 5.461901001949204e-07, "loss": 0.6468, "step": 21924 }, { "epoch": 1.5839760145935304, "grad_norm": 7.577886141129327, "learning_rate": 5.460076285740931e-07, "loss": 0.6254, "step": 21925 }, { "epoch": 1.58404825979374, "grad_norm": 6.956686072378079, "learning_rate": 5.458251837019038e-07, "loss": 0.5697, "step": 21926 }, { "epoch": 1.5841205049939495, "grad_norm": 7.620274314448359, "learning_rate": 5.456427655808508e-07, "loss": 0.6531, "step": 21927 }, { "epoch": 1.584192750194159, "grad_norm": 7.433780028374119, "learning_rate": 5.454603742134306e-07, "loss": 0.6523, "step": 21928 }, { "epoch": 1.5842649953943684, "grad_norm": 7.418820300648905, "learning_rate": 5.452780096021404e-07, "loss": 0.5748, "step": 21929 }, { "epoch": 1.5843372405945781, "grad_norm": 7.8518063939109535, "learning_rate": 5.45095671749477e-07, "loss": 0.664, "step": 21930 }, { "epoch": 1.5844094857947875, "grad_norm": 7.138451443672631, "learning_rate": 5.449133606579354e-07, "loss": 0.6219, "step": 21931 }, { "epoch": 1.584481730994997, "grad_norm": 6.795455468683131, "learning_rate": 5.44731076330012e-07, "loss": 0.6066, "step": 21932 }, { "epoch": 1.5845539761952065, "grad_norm": 7.776888808658404, "learning_rate": 5.445488187682019e-07, "loss": 0.5972, "step": 21933 }, { "epoch": 1.584626221395416, "grad_norm": 7.320178573181239, "learning_rate": 5.443665879750007e-07, "loss": 0.6211, "step": 21934 }, { "epoch": 1.5846984665956256, "grad_norm": 7.453261400170235, "learning_rate": 5.441843839529018e-07, "loss": 0.5303, "step": 21935 }, { "epoch": 1.584770711795835, "grad_norm": 6.121485421917784, "learning_rate": 5.440022067044004e-07, "loss": 0.584, "step": 21936 }, { "epoch": 1.5848429569960447, "grad_norm": 6.51907367140479, "learning_rate": 5.438200562319898e-07, "loss": 0.5912, "step": 21937 }, { "epoch": 1.584915202196254, "grad_norm": 8.848723721476642, "learning_rate": 5.436379325381638e-07, "loss": 0.6216, "step": 21938 }, { "epoch": 1.5849874473964636, "grad_norm": 7.066667525573294, "learning_rate": 5.434558356254155e-07, "loss": 0.6748, "step": 21939 }, { "epoch": 1.5850596925966731, "grad_norm": 7.652804101374454, "learning_rate": 5.432737654962375e-07, "loss": 0.6122, "step": 21940 }, { "epoch": 1.5851319377968827, "grad_norm": 7.157392785391549, "learning_rate": 5.430917221531232e-07, "loss": 0.6082, "step": 21941 }, { "epoch": 1.5852041829970922, "grad_norm": 7.4457359553798526, "learning_rate": 5.42909705598563e-07, "loss": 0.6918, "step": 21942 }, { "epoch": 1.5852764281973015, "grad_norm": 7.172158229446203, "learning_rate": 5.427277158350494e-07, "loss": 0.6364, "step": 21943 }, { "epoch": 1.5853486733975113, "grad_norm": 8.146715459516203, "learning_rate": 5.425457528650737e-07, "loss": 0.5776, "step": 21944 }, { "epoch": 1.5854209185977206, "grad_norm": 9.530074050151299, "learning_rate": 5.423638166911269e-07, "loss": 0.6515, "step": 21945 }, { "epoch": 1.5854931637979301, "grad_norm": 7.5248844581427585, "learning_rate": 5.421819073156992e-07, "loss": 0.6535, "step": 21946 }, { "epoch": 1.5855654089981397, "grad_norm": 7.20423493346535, "learning_rate": 5.42000024741281e-07, "loss": 0.6234, "step": 21947 }, { "epoch": 1.5856376541983492, "grad_norm": 7.10306505479201, "learning_rate": 5.41818168970363e-07, "loss": 0.5848, "step": 21948 }, { "epoch": 1.5857098993985588, "grad_norm": 6.087352974935418, "learning_rate": 5.416363400054333e-07, "loss": 0.6353, "step": 21949 }, { "epoch": 1.585782144598768, "grad_norm": 5.877117019046533, "learning_rate": 5.414545378489813e-07, "loss": 0.624, "step": 21950 }, { "epoch": 1.5858543897989779, "grad_norm": 6.876414482952392, "learning_rate": 5.412727625034961e-07, "loss": 0.571, "step": 21951 }, { "epoch": 1.5859266349991872, "grad_norm": 8.2793491701138, "learning_rate": 5.410910139714664e-07, "loss": 0.6576, "step": 21952 }, { "epoch": 1.5859988801993967, "grad_norm": 7.285208874223474, "learning_rate": 5.409092922553794e-07, "loss": 0.6203, "step": 21953 }, { "epoch": 1.5860711253996063, "grad_norm": 7.679208393228488, "learning_rate": 5.407275973577228e-07, "loss": 0.6657, "step": 21954 }, { "epoch": 1.5861433705998158, "grad_norm": 8.292807751322265, "learning_rate": 5.405459292809842e-07, "loss": 0.6198, "step": 21955 }, { "epoch": 1.5862156158000253, "grad_norm": 7.880483020366032, "learning_rate": 5.403642880276502e-07, "loss": 0.5956, "step": 21956 }, { "epoch": 1.5862878610002347, "grad_norm": 7.463697184725758, "learning_rate": 5.401826736002077e-07, "loss": 0.6273, "step": 21957 }, { "epoch": 1.5863601062004444, "grad_norm": 6.346721986534878, "learning_rate": 5.400010860011429e-07, "loss": 0.6377, "step": 21958 }, { "epoch": 1.5864323514006538, "grad_norm": 8.207060068071726, "learning_rate": 5.398195252329416e-07, "loss": 0.6073, "step": 21959 }, { "epoch": 1.5865045966008633, "grad_norm": 8.265109934920897, "learning_rate": 5.396379912980887e-07, "loss": 0.6576, "step": 21960 }, { "epoch": 1.5865768418010728, "grad_norm": 7.077316002160254, "learning_rate": 5.394564841990696e-07, "loss": 0.6, "step": 21961 }, { "epoch": 1.5866490870012824, "grad_norm": 7.46319884537557, "learning_rate": 5.392750039383696e-07, "loss": 0.6175, "step": 21962 }, { "epoch": 1.586721332201492, "grad_norm": 8.405186241941266, "learning_rate": 5.390935505184716e-07, "loss": 0.7035, "step": 21963 }, { "epoch": 1.5867935774017012, "grad_norm": 6.784997423209458, "learning_rate": 5.389121239418605e-07, "loss": 0.6137, "step": 21964 }, { "epoch": 1.586865822601911, "grad_norm": 7.159758261750128, "learning_rate": 5.387307242110188e-07, "loss": 0.5658, "step": 21965 }, { "epoch": 1.5869380678021203, "grad_norm": 7.08717370737075, "learning_rate": 5.38549351328432e-07, "loss": 0.6685, "step": 21966 }, { "epoch": 1.5870103130023299, "grad_norm": 7.488207383405893, "learning_rate": 5.383680052965812e-07, "loss": 0.5885, "step": 21967 }, { "epoch": 1.5870825582025394, "grad_norm": 6.14203811964592, "learning_rate": 5.381866861179491e-07, "loss": 0.6138, "step": 21968 }, { "epoch": 1.587154803402749, "grad_norm": 7.086916686582296, "learning_rate": 5.380053937950188e-07, "loss": 0.5859, "step": 21969 }, { "epoch": 1.5872270486029585, "grad_norm": 8.197085295178145, "learning_rate": 5.378241283302707e-07, "loss": 0.6469, "step": 21970 }, { "epoch": 1.5872992938031678, "grad_norm": 7.44485161586686, "learning_rate": 5.376428897261867e-07, "loss": 0.6354, "step": 21971 }, { "epoch": 1.5873715390033776, "grad_norm": 6.4910471955662405, "learning_rate": 5.374616779852476e-07, "loss": 0.614, "step": 21972 }, { "epoch": 1.587443784203587, "grad_norm": 7.521460284222001, "learning_rate": 5.372804931099346e-07, "loss": 0.6406, "step": 21973 }, { "epoch": 1.5875160294037964, "grad_norm": 5.8921657338292945, "learning_rate": 5.370993351027276e-07, "loss": 0.5237, "step": 21974 }, { "epoch": 1.587588274604006, "grad_norm": 7.947393304301226, "learning_rate": 5.369182039661067e-07, "loss": 0.5997, "step": 21975 }, { "epoch": 1.5876605198042155, "grad_norm": 7.460779254293671, "learning_rate": 5.367370997025511e-07, "loss": 0.6574, "step": 21976 }, { "epoch": 1.587732765004425, "grad_norm": 7.138014568746871, "learning_rate": 5.365560223145413e-07, "loss": 0.5673, "step": 21977 }, { "epoch": 1.5878050102046344, "grad_norm": 7.949212031631068, "learning_rate": 5.363749718045542e-07, "loss": 0.607, "step": 21978 }, { "epoch": 1.5878772554048441, "grad_norm": 7.369430766800117, "learning_rate": 5.361939481750689e-07, "loss": 0.6096, "step": 21979 }, { "epoch": 1.5879495006050535, "grad_norm": 7.044425860172203, "learning_rate": 5.360129514285645e-07, "loss": 0.6244, "step": 21980 }, { "epoch": 1.588021745805263, "grad_norm": 6.7088456363020095, "learning_rate": 5.358319815675173e-07, "loss": 0.6192, "step": 21981 }, { "epoch": 1.5880939910054725, "grad_norm": 7.2904463555220085, "learning_rate": 5.35651038594405e-07, "loss": 0.7151, "step": 21982 }, { "epoch": 1.588166236205682, "grad_norm": 7.957684422212619, "learning_rate": 5.354701225117048e-07, "loss": 0.5259, "step": 21983 }, { "epoch": 1.5882384814058916, "grad_norm": 7.674477565020051, "learning_rate": 5.352892333218932e-07, "loss": 0.6299, "step": 21984 }, { "epoch": 1.588310726606101, "grad_norm": 8.47782608669522, "learning_rate": 5.351083710274466e-07, "loss": 0.6632, "step": 21985 }, { "epoch": 1.5883829718063107, "grad_norm": 7.764335909102272, "learning_rate": 5.349275356308406e-07, "loss": 0.7202, "step": 21986 }, { "epoch": 1.58845521700652, "grad_norm": 6.936190094873883, "learning_rate": 5.347467271345516e-07, "loss": 0.5549, "step": 21987 }, { "epoch": 1.5885274622067298, "grad_norm": 7.6936742287692255, "learning_rate": 5.345659455410535e-07, "loss": 0.5673, "step": 21988 }, { "epoch": 1.5885997074069391, "grad_norm": 7.234763610816495, "learning_rate": 5.343851908528213e-07, "loss": 0.6887, "step": 21989 }, { "epoch": 1.5886719526071487, "grad_norm": 8.628760361204485, "learning_rate": 5.342044630723297e-07, "loss": 0.5781, "step": 21990 }, { "epoch": 1.5887441978073582, "grad_norm": 7.878173430920775, "learning_rate": 5.340237622020536e-07, "loss": 0.6024, "step": 21991 }, { "epoch": 1.5888164430075675, "grad_norm": 7.828598160444489, "learning_rate": 5.338430882444648e-07, "loss": 0.6228, "step": 21992 }, { "epoch": 1.5888886882077773, "grad_norm": 8.378381302757385, "learning_rate": 5.336624412020366e-07, "loss": 0.5742, "step": 21993 }, { "epoch": 1.5889609334079866, "grad_norm": 8.200128265866343, "learning_rate": 5.334818210772447e-07, "loss": 0.5574, "step": 21994 }, { "epoch": 1.5890331786081964, "grad_norm": 8.737451328911519, "learning_rate": 5.333012278725585e-07, "loss": 0.7413, "step": 21995 }, { "epoch": 1.5891054238084057, "grad_norm": 6.877265972896707, "learning_rate": 5.331206615904519e-07, "loss": 0.6186, "step": 21996 }, { "epoch": 1.5891776690086152, "grad_norm": 7.573840505157116, "learning_rate": 5.32940122233396e-07, "loss": 0.6679, "step": 21997 }, { "epoch": 1.5892499142088248, "grad_norm": 7.61243641894117, "learning_rate": 5.327596098038634e-07, "loss": 0.6396, "step": 21998 }, { "epoch": 1.589322159409034, "grad_norm": 6.846530018711606, "learning_rate": 5.325791243043236e-07, "loss": 0.5744, "step": 21999 }, { "epoch": 1.5893944046092439, "grad_norm": 6.6182118313210765, "learning_rate": 5.323986657372476e-07, "loss": 0.5932, "step": 22000 }, { "epoch": 1.5894666498094532, "grad_norm": 6.929114970711899, "learning_rate": 5.322182341051066e-07, "loss": 0.6197, "step": 22001 }, { "epoch": 1.589538895009663, "grad_norm": 9.70782111348805, "learning_rate": 5.320378294103698e-07, "loss": 0.6062, "step": 22002 }, { "epoch": 1.5896111402098723, "grad_norm": 7.587351674070828, "learning_rate": 5.318574516555072e-07, "loss": 0.5741, "step": 22003 }, { "epoch": 1.5896833854100818, "grad_norm": 8.518695242825435, "learning_rate": 5.316771008429877e-07, "loss": 0.5998, "step": 22004 }, { "epoch": 1.5897556306102913, "grad_norm": 7.691972625477404, "learning_rate": 5.314967769752813e-07, "loss": 0.5861, "step": 22005 }, { "epoch": 1.589827875810501, "grad_norm": 6.6662339387961635, "learning_rate": 5.313164800548551e-07, "loss": 0.6859, "step": 22006 }, { "epoch": 1.5899001210107104, "grad_norm": 5.529003139550843, "learning_rate": 5.311362100841774e-07, "loss": 0.5918, "step": 22007 }, { "epoch": 1.5899723662109198, "grad_norm": 7.090983788334717, "learning_rate": 5.309559670657163e-07, "loss": 0.6276, "step": 22008 }, { "epoch": 1.5900446114111295, "grad_norm": 6.81203494103207, "learning_rate": 5.307757510019398e-07, "loss": 0.6679, "step": 22009 }, { "epoch": 1.5901168566113388, "grad_norm": 7.000966414088928, "learning_rate": 5.305955618953138e-07, "loss": 0.5204, "step": 22010 }, { "epoch": 1.5901891018115484, "grad_norm": 6.936769189231249, "learning_rate": 5.304153997483052e-07, "loss": 0.5479, "step": 22011 }, { "epoch": 1.590261347011758, "grad_norm": 7.905220537674301, "learning_rate": 5.302352645633804e-07, "loss": 0.624, "step": 22012 }, { "epoch": 1.5903335922119675, "grad_norm": 6.881516021658865, "learning_rate": 5.300551563430056e-07, "loss": 0.5825, "step": 22013 }, { "epoch": 1.590405837412177, "grad_norm": 6.4548893103058536, "learning_rate": 5.298750750896462e-07, "loss": 0.6546, "step": 22014 }, { "epoch": 1.5904780826123863, "grad_norm": 6.858439805358119, "learning_rate": 5.296950208057672e-07, "loss": 0.5685, "step": 22015 }, { "epoch": 1.590550327812596, "grad_norm": 6.36187260200567, "learning_rate": 5.295149934938343e-07, "loss": 0.6195, "step": 22016 }, { "epoch": 1.5906225730128054, "grad_norm": 7.101106971088645, "learning_rate": 5.293349931563102e-07, "loss": 0.5835, "step": 22017 }, { "epoch": 1.590694818213015, "grad_norm": 7.255050478548655, "learning_rate": 5.291550197956602e-07, "loss": 0.5973, "step": 22018 }, { "epoch": 1.5907670634132245, "grad_norm": 6.417843037893121, "learning_rate": 5.289750734143484e-07, "loss": 0.494, "step": 22019 }, { "epoch": 1.590839308613434, "grad_norm": 7.265577484303634, "learning_rate": 5.287951540148364e-07, "loss": 0.6191, "step": 22020 }, { "epoch": 1.5909115538136436, "grad_norm": 8.012448638894227, "learning_rate": 5.286152615995888e-07, "loss": 0.6004, "step": 22021 }, { "epoch": 1.590983799013853, "grad_norm": 6.7628255430827044, "learning_rate": 5.284353961710664e-07, "loss": 0.531, "step": 22022 }, { "epoch": 1.5910560442140627, "grad_norm": 7.9369999284988175, "learning_rate": 5.282555577317341e-07, "loss": 0.5594, "step": 22023 }, { "epoch": 1.591128289414272, "grad_norm": 8.831670160685828, "learning_rate": 5.280757462840516e-07, "loss": 0.668, "step": 22024 }, { "epoch": 1.5912005346144815, "grad_norm": 6.895860408795112, "learning_rate": 5.27895961830481e-07, "loss": 0.5508, "step": 22025 }, { "epoch": 1.591272779814691, "grad_norm": 7.637383832531918, "learning_rate": 5.277162043734838e-07, "loss": 0.6204, "step": 22026 }, { "epoch": 1.5913450250149006, "grad_norm": 6.918133070818086, "learning_rate": 5.275364739155206e-07, "loss": 0.5439, "step": 22027 }, { "epoch": 1.5914172702151101, "grad_norm": 7.242491681769337, "learning_rate": 5.27356770459051e-07, "loss": 0.632, "step": 22028 }, { "epoch": 1.5914895154153195, "grad_norm": 6.570272438752719, "learning_rate": 5.271770940065357e-07, "loss": 0.5659, "step": 22029 }, { "epoch": 1.5915617606155292, "grad_norm": 6.492022387152787, "learning_rate": 5.26997444560434e-07, "loss": 0.6009, "step": 22030 }, { "epoch": 1.5916340058157386, "grad_norm": 7.5265217101084865, "learning_rate": 5.268178221232053e-07, "loss": 0.5539, "step": 22031 }, { "epoch": 1.591706251015948, "grad_norm": 6.077725826384662, "learning_rate": 5.266382266973089e-07, "loss": 0.5455, "step": 22032 }, { "epoch": 1.5917784962161576, "grad_norm": 6.466649400878089, "learning_rate": 5.264586582852025e-07, "loss": 0.5864, "step": 22033 }, { "epoch": 1.5918507414163672, "grad_norm": 7.170948375936865, "learning_rate": 5.262791168893458e-07, "loss": 0.6339, "step": 22034 }, { "epoch": 1.5919229866165767, "grad_norm": 6.986148208037672, "learning_rate": 5.260996025121945e-07, "loss": 0.6804, "step": 22035 }, { "epoch": 1.591995231816786, "grad_norm": 6.404496413481917, "learning_rate": 5.259201151562071e-07, "loss": 0.6297, "step": 22036 }, { "epoch": 1.5920674770169958, "grad_norm": 7.317470849868271, "learning_rate": 5.257406548238414e-07, "loss": 0.5706, "step": 22037 }, { "epoch": 1.5921397222172051, "grad_norm": 8.030150341940066, "learning_rate": 5.255612215175523e-07, "loss": 0.5637, "step": 22038 }, { "epoch": 1.5922119674174147, "grad_norm": 7.752074179421399, "learning_rate": 5.253818152397971e-07, "loss": 0.6083, "step": 22039 }, { "epoch": 1.5922842126176242, "grad_norm": 7.024731136077714, "learning_rate": 5.252024359930316e-07, "loss": 0.6607, "step": 22040 }, { "epoch": 1.5923564578178337, "grad_norm": 7.718426284488441, "learning_rate": 5.250230837797115e-07, "loss": 0.5616, "step": 22041 }, { "epoch": 1.5924287030180433, "grad_norm": 6.478397859383359, "learning_rate": 5.248437586022919e-07, "loss": 0.5792, "step": 22042 }, { "epoch": 1.5925009482182526, "grad_norm": 7.605441665013405, "learning_rate": 5.246644604632278e-07, "loss": 0.6649, "step": 22043 }, { "epoch": 1.5925731934184624, "grad_norm": 7.27387725451294, "learning_rate": 5.244851893649741e-07, "loss": 0.5883, "step": 22044 }, { "epoch": 1.5926454386186717, "grad_norm": 7.460425937550599, "learning_rate": 5.243059453099833e-07, "loss": 0.6732, "step": 22045 }, { "epoch": 1.5927176838188812, "grad_norm": 7.383749217195923, "learning_rate": 5.241267283007104e-07, "loss": 0.5383, "step": 22046 }, { "epoch": 1.5927899290190908, "grad_norm": 7.656354381861794, "learning_rate": 5.239475383396084e-07, "loss": 0.6515, "step": 22047 }, { "epoch": 1.5928621742193003, "grad_norm": 6.831141174510577, "learning_rate": 5.237683754291309e-07, "loss": 0.5696, "step": 22048 }, { "epoch": 1.5929344194195099, "grad_norm": 5.949741313023158, "learning_rate": 5.23589239571729e-07, "loss": 0.5444, "step": 22049 }, { "epoch": 1.5930066646197192, "grad_norm": 6.691253749448003, "learning_rate": 5.234101307698555e-07, "loss": 0.6248, "step": 22050 }, { "epoch": 1.593078909819929, "grad_norm": 6.769114521669042, "learning_rate": 5.232310490259631e-07, "loss": 0.5707, "step": 22051 }, { "epoch": 1.5931511550201383, "grad_norm": 7.475940770175344, "learning_rate": 5.230519943425033e-07, "loss": 0.5743, "step": 22052 }, { "epoch": 1.5932234002203478, "grad_norm": 7.692080489984767, "learning_rate": 5.228729667219263e-07, "loss": 0.5713, "step": 22053 }, { "epoch": 1.5932956454205573, "grad_norm": 7.8925914164853435, "learning_rate": 5.226939661666833e-07, "loss": 0.5869, "step": 22054 }, { "epoch": 1.593367890620767, "grad_norm": 6.948444428385769, "learning_rate": 5.22514992679225e-07, "loss": 0.5853, "step": 22055 }, { "epoch": 1.5934401358209764, "grad_norm": 6.7280728265039045, "learning_rate": 5.223360462620006e-07, "loss": 0.5749, "step": 22056 }, { "epoch": 1.5935123810211858, "grad_norm": 8.767618805945386, "learning_rate": 5.2215712691746e-07, "loss": 0.6526, "step": 22057 }, { "epoch": 1.5935846262213955, "grad_norm": 7.108972559007846, "learning_rate": 5.219782346480526e-07, "loss": 0.5931, "step": 22058 }, { "epoch": 1.5936568714216048, "grad_norm": 7.3068275295194, "learning_rate": 5.217993694562274e-07, "loss": 0.5877, "step": 22059 }, { "epoch": 1.5937291166218146, "grad_norm": 6.558870892465654, "learning_rate": 5.216205313444331e-07, "loss": 0.5897, "step": 22060 }, { "epoch": 1.593801361822024, "grad_norm": 7.308458564523402, "learning_rate": 5.214417203151173e-07, "loss": 0.5461, "step": 22061 }, { "epoch": 1.5938736070222335, "grad_norm": 8.125292493984015, "learning_rate": 5.212629363707289e-07, "loss": 0.7094, "step": 22062 }, { "epoch": 1.593945852222443, "grad_norm": 6.124920591501351, "learning_rate": 5.210841795137137e-07, "loss": 0.6323, "step": 22063 }, { "epoch": 1.5940180974226523, "grad_norm": 9.81748379046659, "learning_rate": 5.209054497465196e-07, "loss": 0.6144, "step": 22064 }, { "epoch": 1.594090342622862, "grad_norm": 8.661633081254212, "learning_rate": 5.207267470715935e-07, "loss": 0.6577, "step": 22065 }, { "epoch": 1.5941625878230714, "grad_norm": 6.9835922860847, "learning_rate": 5.205480714913818e-07, "loss": 0.6048, "step": 22066 }, { "epoch": 1.5942348330232812, "grad_norm": 7.058100857860177, "learning_rate": 5.203694230083295e-07, "loss": 0.5948, "step": 22067 }, { "epoch": 1.5943070782234905, "grad_norm": 6.1717387123995175, "learning_rate": 5.201908016248828e-07, "loss": 0.611, "step": 22068 }, { "epoch": 1.5943793234237, "grad_norm": 7.922365914507459, "learning_rate": 5.200122073434866e-07, "loss": 0.6291, "step": 22069 }, { "epoch": 1.5944515686239096, "grad_norm": 7.368105235538027, "learning_rate": 5.19833640166586e-07, "loss": 0.6376, "step": 22070 }, { "epoch": 1.594523813824119, "grad_norm": 7.728258955334823, "learning_rate": 5.196551000966254e-07, "loss": 0.5813, "step": 22071 }, { "epoch": 1.5945960590243287, "grad_norm": 7.5430982154865625, "learning_rate": 5.194765871360488e-07, "loss": 0.6034, "step": 22072 }, { "epoch": 1.594668304224538, "grad_norm": 7.115408347361874, "learning_rate": 5.192981012873005e-07, "loss": 0.6573, "step": 22073 }, { "epoch": 1.5947405494247477, "grad_norm": 6.010143606593356, "learning_rate": 5.191196425528228e-07, "loss": 0.6254, "step": 22074 }, { "epoch": 1.594812794624957, "grad_norm": 7.4443253686342725, "learning_rate": 5.189412109350592e-07, "loss": 0.7364, "step": 22075 }, { "epoch": 1.5948850398251666, "grad_norm": 6.859798479150867, "learning_rate": 5.187628064364519e-07, "loss": 0.5542, "step": 22076 }, { "epoch": 1.5949572850253761, "grad_norm": 7.374973103102436, "learning_rate": 5.185844290594444e-07, "loss": 0.5314, "step": 22077 }, { "epoch": 1.5950295302255857, "grad_norm": 7.428229958158222, "learning_rate": 5.184060788064763e-07, "loss": 0.5926, "step": 22078 }, { "epoch": 1.5951017754257952, "grad_norm": 6.640801066140201, "learning_rate": 5.182277556799908e-07, "loss": 0.6009, "step": 22079 }, { "epoch": 1.5951740206260046, "grad_norm": 6.870820040561764, "learning_rate": 5.180494596824296e-07, "loss": 0.6036, "step": 22080 }, { "epoch": 1.5952462658262143, "grad_norm": 7.666951823805514, "learning_rate": 5.178711908162315e-07, "loss": 0.611, "step": 22081 }, { "epoch": 1.5953185110264236, "grad_norm": 6.2753960959359905, "learning_rate": 5.176929490838375e-07, "loss": 0.5993, "step": 22082 }, { "epoch": 1.5953907562266332, "grad_norm": 7.327475317682755, "learning_rate": 5.175147344876882e-07, "loss": 0.6087, "step": 22083 }, { "epoch": 1.5954630014268427, "grad_norm": 6.31180863324105, "learning_rate": 5.173365470302235e-07, "loss": 0.5727, "step": 22084 }, { "epoch": 1.5955352466270523, "grad_norm": 7.740185090841079, "learning_rate": 5.171583867138816e-07, "loss": 0.6843, "step": 22085 }, { "epoch": 1.5956074918272618, "grad_norm": 6.852612474778666, "learning_rate": 5.169802535411014e-07, "loss": 0.5789, "step": 22086 }, { "epoch": 1.5956797370274711, "grad_norm": 8.675435642879844, "learning_rate": 5.16802147514322e-07, "loss": 0.5536, "step": 22087 }, { "epoch": 1.5957519822276809, "grad_norm": 8.23681233940949, "learning_rate": 5.166240686359814e-07, "loss": 0.5764, "step": 22088 }, { "epoch": 1.5958242274278902, "grad_norm": 7.444565694526911, "learning_rate": 5.164460169085173e-07, "loss": 0.6106, "step": 22089 }, { "epoch": 1.5958964726280997, "grad_norm": 6.827274644918603, "learning_rate": 5.16267992334367e-07, "loss": 0.6017, "step": 22090 }, { "epoch": 1.5959687178283093, "grad_norm": 6.53980042225971, "learning_rate": 5.160899949159684e-07, "loss": 0.6527, "step": 22091 }, { "epoch": 1.5960409630285188, "grad_norm": 8.486223613064736, "learning_rate": 5.159120246557567e-07, "loss": 0.6075, "step": 22092 }, { "epoch": 1.5961132082287284, "grad_norm": 8.823389994697909, "learning_rate": 5.157340815561687e-07, "loss": 0.6063, "step": 22093 }, { "epoch": 1.5961854534289377, "grad_norm": 7.745595603387819, "learning_rate": 5.155561656196411e-07, "loss": 0.6354, "step": 22094 }, { "epoch": 1.5962576986291475, "grad_norm": 7.411166462587495, "learning_rate": 5.153782768486082e-07, "loss": 0.6094, "step": 22095 }, { "epoch": 1.5963299438293568, "grad_norm": 7.026914624804197, "learning_rate": 5.152004152455056e-07, "loss": 0.6337, "step": 22096 }, { "epoch": 1.5964021890295663, "grad_norm": 7.6162872564975075, "learning_rate": 5.150225808127685e-07, "loss": 0.6031, "step": 22097 }, { "epoch": 1.5964744342297759, "grad_norm": 8.235028177663636, "learning_rate": 5.148447735528309e-07, "loss": 0.6964, "step": 22098 }, { "epoch": 1.5965466794299854, "grad_norm": 8.005343559941403, "learning_rate": 5.14666993468127e-07, "loss": 0.549, "step": 22099 }, { "epoch": 1.596618924630195, "grad_norm": 8.141773457706933, "learning_rate": 5.144892405610901e-07, "loss": 0.5897, "step": 22100 }, { "epoch": 1.5966911698304043, "grad_norm": 6.723812023868917, "learning_rate": 5.143115148341549e-07, "loss": 0.6513, "step": 22101 }, { "epoch": 1.596763415030614, "grad_norm": 6.933096373005557, "learning_rate": 5.141338162897528e-07, "loss": 0.5301, "step": 22102 }, { "epoch": 1.5968356602308234, "grad_norm": 7.782909538063869, "learning_rate": 5.139561449303162e-07, "loss": 0.5287, "step": 22103 }, { "epoch": 1.596907905431033, "grad_norm": 9.204776220714846, "learning_rate": 5.137785007582785e-07, "loss": 0.6232, "step": 22104 }, { "epoch": 1.5969801506312424, "grad_norm": 6.721010759269822, "learning_rate": 5.136008837760711e-07, "loss": 0.5593, "step": 22105 }, { "epoch": 1.597052395831452, "grad_norm": 8.069418134735807, "learning_rate": 5.13423293986125e-07, "loss": 0.6024, "step": 22106 }, { "epoch": 1.5971246410316615, "grad_norm": 7.056035687120631, "learning_rate": 5.132457313908707e-07, "loss": 0.5856, "step": 22107 }, { "epoch": 1.5971968862318708, "grad_norm": 8.186976758238746, "learning_rate": 5.130681959927402e-07, "loss": 0.528, "step": 22108 }, { "epoch": 1.5972691314320806, "grad_norm": 6.341449188767557, "learning_rate": 5.128906877941644e-07, "loss": 0.6282, "step": 22109 }, { "epoch": 1.59734137663229, "grad_norm": 6.827477745262928, "learning_rate": 5.127132067975712e-07, "loss": 0.5943, "step": 22110 }, { "epoch": 1.5974136218324995, "grad_norm": 6.448350668882466, "learning_rate": 5.125357530053915e-07, "loss": 0.6415, "step": 22111 }, { "epoch": 1.597485867032709, "grad_norm": 7.533519355480181, "learning_rate": 5.123583264200546e-07, "loss": 0.5622, "step": 22112 }, { "epoch": 1.5975581122329185, "grad_norm": 8.22326257785293, "learning_rate": 5.121809270439881e-07, "loss": 0.6395, "step": 22113 }, { "epoch": 1.597630357433128, "grad_norm": 8.282691686123709, "learning_rate": 5.120035548796215e-07, "loss": 0.6443, "step": 22114 }, { "epoch": 1.5977026026333374, "grad_norm": 6.81642527755224, "learning_rate": 5.118262099293825e-07, "loss": 0.5549, "step": 22115 }, { "epoch": 1.5977748478335472, "grad_norm": 6.98539190548692, "learning_rate": 5.116488921956991e-07, "loss": 0.6232, "step": 22116 }, { "epoch": 1.5978470930337565, "grad_norm": 6.645675641364436, "learning_rate": 5.114716016809987e-07, "loss": 0.6518, "step": 22117 }, { "epoch": 1.597919338233966, "grad_norm": 6.819887128326629, "learning_rate": 5.112943383877078e-07, "loss": 0.6057, "step": 22118 }, { "epoch": 1.5979915834341756, "grad_norm": 7.608497396693913, "learning_rate": 5.111171023182541e-07, "loss": 0.5641, "step": 22119 }, { "epoch": 1.5980638286343851, "grad_norm": 7.9674892250948135, "learning_rate": 5.109398934750621e-07, "loss": 0.6772, "step": 22120 }, { "epoch": 1.5981360738345947, "grad_norm": 7.687259577271319, "learning_rate": 5.107627118605588e-07, "loss": 0.6762, "step": 22121 }, { "epoch": 1.598208319034804, "grad_norm": 7.621003150890236, "learning_rate": 5.105855574771693e-07, "loss": 0.5951, "step": 22122 }, { "epoch": 1.5982805642350137, "grad_norm": 6.70848996313248, "learning_rate": 5.104084303273194e-07, "loss": 0.6397, "step": 22123 }, { "epoch": 1.598352809435223, "grad_norm": 7.171078174348484, "learning_rate": 5.102313304134327e-07, "loss": 0.5116, "step": 22124 }, { "epoch": 1.5984250546354326, "grad_norm": 8.162882129084084, "learning_rate": 5.100542577379341e-07, "loss": 0.6478, "step": 22125 }, { "epoch": 1.5984972998356421, "grad_norm": 7.5287362489886664, "learning_rate": 5.098772123032478e-07, "loss": 0.6013, "step": 22126 }, { "epoch": 1.5985695450358517, "grad_norm": 6.685830166270082, "learning_rate": 5.097001941117972e-07, "loss": 0.6194, "step": 22127 }, { "epoch": 1.5986417902360612, "grad_norm": 7.0034860377463595, "learning_rate": 5.095232031660053e-07, "loss": 0.5356, "step": 22128 }, { "epoch": 1.5987140354362706, "grad_norm": 6.317291556309703, "learning_rate": 5.093462394682955e-07, "loss": 0.5807, "step": 22129 }, { "epoch": 1.5987862806364803, "grad_norm": 6.280643870397191, "learning_rate": 5.091693030210907e-07, "loss": 0.5589, "step": 22130 }, { "epoch": 1.5988585258366896, "grad_norm": 7.6309171136956895, "learning_rate": 5.089923938268116e-07, "loss": 0.5737, "step": 22131 }, { "epoch": 1.5989307710368994, "grad_norm": 7.352984137914475, "learning_rate": 5.088155118878807e-07, "loss": 0.567, "step": 22132 }, { "epoch": 1.5990030162371087, "grad_norm": 7.140267594823292, "learning_rate": 5.086386572067195e-07, "loss": 0.5939, "step": 22133 }, { "epoch": 1.5990752614373183, "grad_norm": 7.601363521531819, "learning_rate": 5.084618297857494e-07, "loss": 0.655, "step": 22134 }, { "epoch": 1.5991475066375278, "grad_norm": 6.382107531161367, "learning_rate": 5.082850296273892e-07, "loss": 0.6002, "step": 22135 }, { "epoch": 1.5992197518377371, "grad_norm": 7.017369654484206, "learning_rate": 5.081082567340611e-07, "loss": 0.6453, "step": 22136 }, { "epoch": 1.5992919970379469, "grad_norm": 6.740056095512433, "learning_rate": 5.079315111081854e-07, "loss": 0.5072, "step": 22137 }, { "epoch": 1.5993642422381562, "grad_norm": 7.351730360978775, "learning_rate": 5.077547927521795e-07, "loss": 0.6505, "step": 22138 }, { "epoch": 1.599436487438366, "grad_norm": 7.955296068322534, "learning_rate": 5.075781016684639e-07, "loss": 0.5813, "step": 22139 }, { "epoch": 1.5995087326385753, "grad_norm": 8.030268152683565, "learning_rate": 5.074014378594569e-07, "loss": 0.5765, "step": 22140 }, { "epoch": 1.5995809778387848, "grad_norm": 7.56612366171769, "learning_rate": 5.072248013275777e-07, "loss": 0.6068, "step": 22141 }, { "epoch": 1.5996532230389944, "grad_norm": 6.946768952322592, "learning_rate": 5.070481920752432e-07, "loss": 0.6477, "step": 22142 }, { "epoch": 1.5997254682392037, "grad_norm": 5.916608174352799, "learning_rate": 5.068716101048713e-07, "loss": 0.5688, "step": 22143 }, { "epoch": 1.5997977134394135, "grad_norm": 8.278003675321292, "learning_rate": 5.066950554188796e-07, "loss": 0.6258, "step": 22144 }, { "epoch": 1.5998699586396228, "grad_norm": 6.509954898988567, "learning_rate": 5.06518528019685e-07, "loss": 0.5696, "step": 22145 }, { "epoch": 1.5999422038398325, "grad_norm": 7.894315736481891, "learning_rate": 5.063420279097037e-07, "loss": 0.6334, "step": 22146 }, { "epoch": 1.6000144490400419, "grad_norm": 8.292234110624841, "learning_rate": 5.061655550913522e-07, "loss": 0.637, "step": 22147 }, { "epoch": 1.6000866942402514, "grad_norm": 7.182578616165129, "learning_rate": 5.059891095670466e-07, "loss": 0.6332, "step": 22148 }, { "epoch": 1.600158939440461, "grad_norm": 7.026154566471147, "learning_rate": 5.058126913392014e-07, "loss": 0.5981, "step": 22149 }, { "epoch": 1.6002311846406705, "grad_norm": 7.270595892788325, "learning_rate": 5.056363004102322e-07, "loss": 0.6394, "step": 22150 }, { "epoch": 1.60030342984088, "grad_norm": 6.62863980478809, "learning_rate": 5.054599367825538e-07, "loss": 0.5765, "step": 22151 }, { "epoch": 1.6003756750410894, "grad_norm": 6.320228906180034, "learning_rate": 5.052836004585798e-07, "loss": 0.6087, "step": 22152 }, { "epoch": 1.6004479202412991, "grad_norm": 8.15544504453998, "learning_rate": 5.051072914407246e-07, "loss": 0.6071, "step": 22153 }, { "epoch": 1.6005201654415084, "grad_norm": 6.863073372990185, "learning_rate": 5.049310097314014e-07, "loss": 0.6063, "step": 22154 }, { "epoch": 1.600592410641718, "grad_norm": 6.756357942719323, "learning_rate": 5.047547553330237e-07, "loss": 0.6017, "step": 22155 }, { "epoch": 1.6006646558419275, "grad_norm": 6.376004607386821, "learning_rate": 5.045785282480042e-07, "loss": 0.5535, "step": 22156 }, { "epoch": 1.600736901042137, "grad_norm": 7.024899475898881, "learning_rate": 5.044023284787553e-07, "loss": 0.5976, "step": 22157 }, { "epoch": 1.6008091462423466, "grad_norm": 6.824224898931116, "learning_rate": 5.04226156027689e-07, "loss": 0.6205, "step": 22158 }, { "epoch": 1.600881391442556, "grad_norm": 7.4673672597177045, "learning_rate": 5.040500108972176e-07, "loss": 0.5855, "step": 22159 }, { "epoch": 1.6009536366427657, "grad_norm": 9.706814321016722, "learning_rate": 5.038738930897513e-07, "loss": 0.6076, "step": 22160 }, { "epoch": 1.601025881842975, "grad_norm": 7.0086874867456626, "learning_rate": 5.036978026077014e-07, "loss": 0.5868, "step": 22161 }, { "epoch": 1.6010981270431845, "grad_norm": 7.388057206209665, "learning_rate": 5.035217394534794e-07, "loss": 0.6271, "step": 22162 }, { "epoch": 1.601170372243394, "grad_norm": 7.460727868455493, "learning_rate": 5.033457036294931e-07, "loss": 0.666, "step": 22163 }, { "epoch": 1.6012426174436036, "grad_norm": 8.024206731930441, "learning_rate": 5.031696951381545e-07, "loss": 0.5908, "step": 22164 }, { "epoch": 1.6013148626438132, "grad_norm": 6.4651198959149, "learning_rate": 5.029937139818725e-07, "loss": 0.6107, "step": 22165 }, { "epoch": 1.6013871078440225, "grad_norm": 7.448759387875073, "learning_rate": 5.028177601630563e-07, "loss": 0.7167, "step": 22166 }, { "epoch": 1.6014593530442323, "grad_norm": 6.545909728493358, "learning_rate": 5.026418336841138e-07, "loss": 0.6164, "step": 22167 }, { "epoch": 1.6015315982444416, "grad_norm": 7.286440329612392, "learning_rate": 5.02465934547454e-07, "loss": 0.6001, "step": 22168 }, { "epoch": 1.6016038434446511, "grad_norm": 7.070923769328881, "learning_rate": 5.02290062755485e-07, "loss": 0.7039, "step": 22169 }, { "epoch": 1.6016760886448607, "grad_norm": 7.823537613106453, "learning_rate": 5.021142183106132e-07, "loss": 0.5341, "step": 22170 }, { "epoch": 1.6017483338450702, "grad_norm": 6.854572813606349, "learning_rate": 5.019384012152464e-07, "loss": 0.6711, "step": 22171 }, { "epoch": 1.6018205790452797, "grad_norm": 8.512819832550484, "learning_rate": 5.017626114717916e-07, "loss": 0.6439, "step": 22172 }, { "epoch": 1.601892824245489, "grad_norm": 7.285868870139724, "learning_rate": 5.015868490826553e-07, "loss": 0.6354, "step": 22173 }, { "epoch": 1.6019650694456988, "grad_norm": 6.955738185208805, "learning_rate": 5.014111140502431e-07, "loss": 0.575, "step": 22174 }, { "epoch": 1.6020373146459082, "grad_norm": 6.428879382311095, "learning_rate": 5.012354063769612e-07, "loss": 0.5987, "step": 22175 }, { "epoch": 1.6021095598461177, "grad_norm": 8.437166334548344, "learning_rate": 5.01059726065215e-07, "loss": 0.6034, "step": 22176 }, { "epoch": 1.6021818050463272, "grad_norm": 6.907293318570562, "learning_rate": 5.008840731174086e-07, "loss": 0.6275, "step": 22177 }, { "epoch": 1.6022540502465368, "grad_norm": 7.183071463901607, "learning_rate": 5.007084475359469e-07, "loss": 0.5973, "step": 22178 }, { "epoch": 1.6023262954467463, "grad_norm": 7.048848419287078, "learning_rate": 5.005328493232345e-07, "loss": 0.5453, "step": 22179 }, { "epoch": 1.6023985406469556, "grad_norm": 8.291823751472789, "learning_rate": 5.003572784816754e-07, "loss": 0.6362, "step": 22180 }, { "epoch": 1.6024707858471654, "grad_norm": 7.985302776025758, "learning_rate": 5.001817350136718e-07, "loss": 0.6401, "step": 22181 }, { "epoch": 1.6025430310473747, "grad_norm": 7.15511689523126, "learning_rate": 5.000062189216276e-07, "loss": 0.6716, "step": 22182 }, { "epoch": 1.6026152762475843, "grad_norm": 8.264315120985241, "learning_rate": 4.998307302079453e-07, "loss": 0.5952, "step": 22183 }, { "epoch": 1.6026875214477938, "grad_norm": 8.409750284123103, "learning_rate": 4.996552688750273e-07, "loss": 0.5905, "step": 22184 }, { "epoch": 1.6027597666480033, "grad_norm": 7.058755878049505, "learning_rate": 4.994798349252756e-07, "loss": 0.6312, "step": 22185 }, { "epoch": 1.602832011848213, "grad_norm": 8.489829317675458, "learning_rate": 4.993044283610915e-07, "loss": 0.6501, "step": 22186 }, { "epoch": 1.6029042570484222, "grad_norm": 6.548303853343467, "learning_rate": 4.991290491848768e-07, "loss": 0.6155, "step": 22187 }, { "epoch": 1.602976502248632, "grad_norm": 7.177320087958982, "learning_rate": 4.989536973990317e-07, "loss": 0.6393, "step": 22188 }, { "epoch": 1.6030487474488413, "grad_norm": 6.7525505615962675, "learning_rate": 4.987783730059564e-07, "loss": 0.5934, "step": 22189 }, { "epoch": 1.6031209926490508, "grad_norm": 7.159798487805808, "learning_rate": 4.986030760080513e-07, "loss": 0.6021, "step": 22190 }, { "epoch": 1.6031932378492604, "grad_norm": 6.9050189686044385, "learning_rate": 4.984278064077164e-07, "loss": 0.6011, "step": 22191 }, { "epoch": 1.60326548304947, "grad_norm": 6.962782147647283, "learning_rate": 4.982525642073504e-07, "loss": 0.5938, "step": 22192 }, { "epoch": 1.6033377282496795, "grad_norm": 6.1053840890043585, "learning_rate": 4.980773494093527e-07, "loss": 0.579, "step": 22193 }, { "epoch": 1.6034099734498888, "grad_norm": 6.625517626999689, "learning_rate": 4.979021620161223e-07, "loss": 0.6029, "step": 22194 }, { "epoch": 1.6034822186500985, "grad_norm": 8.746975185178972, "learning_rate": 4.977270020300561e-07, "loss": 0.672, "step": 22195 }, { "epoch": 1.6035544638503079, "grad_norm": 7.031603723741432, "learning_rate": 4.975518694535525e-07, "loss": 0.6805, "step": 22196 }, { "epoch": 1.6036267090505174, "grad_norm": 6.775505199106927, "learning_rate": 4.973767642890093e-07, "loss": 0.6283, "step": 22197 }, { "epoch": 1.603698954250727, "grad_norm": 6.8595468417042795, "learning_rate": 4.972016865388238e-07, "loss": 0.5955, "step": 22198 }, { "epoch": 1.6037711994509365, "grad_norm": 6.707098363452801, "learning_rate": 4.970266362053913e-07, "loss": 0.5776, "step": 22199 }, { "epoch": 1.603843444651146, "grad_norm": 7.5890955848278985, "learning_rate": 4.968516132911091e-07, "loss": 0.5956, "step": 22200 }, { "epoch": 1.6039156898513554, "grad_norm": 6.685033040161199, "learning_rate": 4.966766177983728e-07, "loss": 0.6324, "step": 22201 }, { "epoch": 1.6039879350515651, "grad_norm": 7.986334095666025, "learning_rate": 4.965016497295783e-07, "loss": 0.5578, "step": 22202 }, { "epoch": 1.6040601802517744, "grad_norm": 8.528600260294906, "learning_rate": 4.963267090871208e-07, "loss": 0.5346, "step": 22203 }, { "epoch": 1.604132425451984, "grad_norm": 6.067908008438901, "learning_rate": 4.961517958733944e-07, "loss": 0.5766, "step": 22204 }, { "epoch": 1.6042046706521935, "grad_norm": 6.6635770632104006, "learning_rate": 4.959769100907949e-07, "loss": 0.6477, "step": 22205 }, { "epoch": 1.604276915852403, "grad_norm": 6.597309616134639, "learning_rate": 4.95802051741715e-07, "loss": 0.6721, "step": 22206 }, { "epoch": 1.6043491610526126, "grad_norm": 6.755850058265475, "learning_rate": 4.956272208285487e-07, "loss": 0.58, "step": 22207 }, { "epoch": 1.604421406252822, "grad_norm": 6.346890578821353, "learning_rate": 4.954524173536901e-07, "loss": 0.5854, "step": 22208 }, { "epoch": 1.6044936514530317, "grad_norm": 6.928696829288843, "learning_rate": 4.952776413195309e-07, "loss": 0.5987, "step": 22209 }, { "epoch": 1.604565896653241, "grad_norm": 6.832961684874494, "learning_rate": 4.95102892728464e-07, "loss": 0.5523, "step": 22210 }, { "epoch": 1.6046381418534508, "grad_norm": 8.291553694627952, "learning_rate": 4.949281715828822e-07, "loss": 0.6116, "step": 22211 }, { "epoch": 1.60471038705366, "grad_norm": 7.1317994985076565, "learning_rate": 4.947534778851764e-07, "loss": 0.5967, "step": 22212 }, { "epoch": 1.6047826322538696, "grad_norm": 7.7370682159883675, "learning_rate": 4.945788116377389e-07, "loss": 0.6692, "step": 22213 }, { "epoch": 1.6048548774540792, "grad_norm": 7.9719303740788, "learning_rate": 4.944041728429602e-07, "loss": 0.6222, "step": 22214 }, { "epoch": 1.6049271226542885, "grad_norm": 7.151823182175407, "learning_rate": 4.942295615032311e-07, "loss": 0.5835, "step": 22215 }, { "epoch": 1.6049993678544983, "grad_norm": 6.3744431794776135, "learning_rate": 4.940549776209427e-07, "loss": 0.5531, "step": 22216 }, { "epoch": 1.6050716130547076, "grad_norm": 7.279421073195908, "learning_rate": 4.938804211984835e-07, "loss": 0.6638, "step": 22217 }, { "epoch": 1.6051438582549173, "grad_norm": 6.686637466280698, "learning_rate": 4.937058922382435e-07, "loss": 0.5291, "step": 22218 }, { "epoch": 1.6052161034551267, "grad_norm": 8.062741298169682, "learning_rate": 4.935313907426129e-07, "loss": 0.63, "step": 22219 }, { "epoch": 1.6052883486553362, "grad_norm": 6.773037063747291, "learning_rate": 4.933569167139782e-07, "loss": 0.6297, "step": 22220 }, { "epoch": 1.6053605938555457, "grad_norm": 8.113663496803122, "learning_rate": 4.9318247015473e-07, "loss": 0.5834, "step": 22221 }, { "epoch": 1.605432839055755, "grad_norm": 6.409163542876398, "learning_rate": 4.930080510672555e-07, "loss": 0.6091, "step": 22222 }, { "epoch": 1.6055050842559648, "grad_norm": 8.516272731959516, "learning_rate": 4.928336594539432e-07, "loss": 0.6312, "step": 22223 }, { "epoch": 1.6055773294561742, "grad_norm": 7.213400704192044, "learning_rate": 4.926592953171788e-07, "loss": 0.6319, "step": 22224 }, { "epoch": 1.605649574656384, "grad_norm": 7.989941472494446, "learning_rate": 4.924849586593503e-07, "loss": 0.6166, "step": 22225 }, { "epoch": 1.6057218198565932, "grad_norm": 9.63273606962278, "learning_rate": 4.923106494828445e-07, "loss": 0.623, "step": 22226 }, { "epoch": 1.6057940650568028, "grad_norm": 7.433187051653755, "learning_rate": 4.921363677900462e-07, "loss": 0.6569, "step": 22227 }, { "epoch": 1.6058663102570123, "grad_norm": 5.8184088749363, "learning_rate": 4.919621135833422e-07, "loss": 0.5705, "step": 22228 }, { "epoch": 1.6059385554572219, "grad_norm": 7.641638440905421, "learning_rate": 4.917878868651177e-07, "loss": 0.5241, "step": 22229 }, { "epoch": 1.6060108006574314, "grad_norm": 6.73388698429342, "learning_rate": 4.916136876377578e-07, "loss": 0.6261, "step": 22230 }, { "epoch": 1.6060830458576407, "grad_norm": 6.883513294789298, "learning_rate": 4.914395159036472e-07, "loss": 0.6105, "step": 22231 }, { "epoch": 1.6061552910578505, "grad_norm": 6.538063984863257, "learning_rate": 4.9126537166517e-07, "loss": 0.5431, "step": 22232 }, { "epoch": 1.6062275362580598, "grad_norm": 7.254863554646166, "learning_rate": 4.910912549247107e-07, "loss": 0.6121, "step": 22233 }, { "epoch": 1.6062997814582693, "grad_norm": 6.967093916694605, "learning_rate": 4.909171656846518e-07, "loss": 0.5902, "step": 22234 }, { "epoch": 1.606372026658479, "grad_norm": 6.4519763136616355, "learning_rate": 4.90743103947377e-07, "loss": 0.642, "step": 22235 }, { "epoch": 1.6064442718586884, "grad_norm": 8.091713196148914, "learning_rate": 4.905690697152688e-07, "loss": 0.6108, "step": 22236 }, { "epoch": 1.606516517058898, "grad_norm": 6.770867199935334, "learning_rate": 4.903950629907106e-07, "loss": 0.65, "step": 22237 }, { "epoch": 1.6065887622591073, "grad_norm": 6.465552677802831, "learning_rate": 4.902210837760829e-07, "loss": 0.5688, "step": 22238 }, { "epoch": 1.606661007459317, "grad_norm": 7.419601831163706, "learning_rate": 4.900471320737681e-07, "loss": 0.6385, "step": 22239 }, { "epoch": 1.6067332526595264, "grad_norm": 8.207675449193692, "learning_rate": 4.898732078861471e-07, "loss": 0.6256, "step": 22240 }, { "epoch": 1.606805497859736, "grad_norm": 7.06454951162819, "learning_rate": 4.896993112156021e-07, "loss": 0.5364, "step": 22241 }, { "epoch": 1.6068777430599455, "grad_norm": 8.04783538716749, "learning_rate": 4.895254420645118e-07, "loss": 0.6065, "step": 22242 }, { "epoch": 1.606949988260155, "grad_norm": 7.280435803123219, "learning_rate": 4.893516004352575e-07, "loss": 0.6246, "step": 22243 }, { "epoch": 1.6070222334603645, "grad_norm": 6.980645807771276, "learning_rate": 4.891777863302191e-07, "loss": 0.6176, "step": 22244 }, { "epoch": 1.6070944786605739, "grad_norm": 6.866984463099875, "learning_rate": 4.890039997517751e-07, "loss": 0.5802, "step": 22245 }, { "epoch": 1.6071667238607836, "grad_norm": 8.087022496650304, "learning_rate": 4.888302407023047e-07, "loss": 0.6389, "step": 22246 }, { "epoch": 1.607238969060993, "grad_norm": 7.329223290232461, "learning_rate": 4.886565091841867e-07, "loss": 0.6597, "step": 22247 }, { "epoch": 1.6073112142612025, "grad_norm": 7.4487547787469985, "learning_rate": 4.884828051997995e-07, "loss": 0.6055, "step": 22248 }, { "epoch": 1.607383459461412, "grad_norm": 8.385731667867077, "learning_rate": 4.883091287515207e-07, "loss": 0.6476, "step": 22249 }, { "epoch": 1.6074557046616216, "grad_norm": 7.220690445789517, "learning_rate": 4.881354798417282e-07, "loss": 0.6502, "step": 22250 }, { "epoch": 1.6075279498618311, "grad_norm": 8.959095294100266, "learning_rate": 4.879618584727993e-07, "loss": 0.6032, "step": 22251 }, { "epoch": 1.6076001950620404, "grad_norm": 9.009892007771192, "learning_rate": 4.877882646471097e-07, "loss": 0.6277, "step": 22252 }, { "epoch": 1.6076724402622502, "grad_norm": 7.509529163621202, "learning_rate": 4.876146983670363e-07, "loss": 0.583, "step": 22253 }, { "epoch": 1.6077446854624595, "grad_norm": 6.971015048884027, "learning_rate": 4.874411596349554e-07, "loss": 0.5817, "step": 22254 }, { "epoch": 1.607816930662669, "grad_norm": 6.347842244871204, "learning_rate": 4.872676484532429e-07, "loss": 0.641, "step": 22255 }, { "epoch": 1.6078891758628786, "grad_norm": 7.130114677116904, "learning_rate": 4.870941648242727e-07, "loss": 0.6511, "step": 22256 }, { "epoch": 1.6079614210630881, "grad_norm": 5.580575783942215, "learning_rate": 4.869207087504208e-07, "loss": 0.6026, "step": 22257 }, { "epoch": 1.6080336662632977, "grad_norm": 7.547669047092194, "learning_rate": 4.867472802340608e-07, "loss": 0.657, "step": 22258 }, { "epoch": 1.608105911463507, "grad_norm": 6.6226284172766565, "learning_rate": 4.865738792775679e-07, "loss": 0.5911, "step": 22259 }, { "epoch": 1.6081781566637168, "grad_norm": 7.245356947462463, "learning_rate": 4.864005058833151e-07, "loss": 0.5984, "step": 22260 }, { "epoch": 1.608250401863926, "grad_norm": 7.046956740622098, "learning_rate": 4.862271600536758e-07, "loss": 0.587, "step": 22261 }, { "epoch": 1.6083226470641356, "grad_norm": 7.376608204472766, "learning_rate": 4.860538417910238e-07, "loss": 0.5806, "step": 22262 }, { "epoch": 1.6083948922643452, "grad_norm": 6.355005060969753, "learning_rate": 4.858805510977305e-07, "loss": 0.5577, "step": 22263 }, { "epoch": 1.6084671374645547, "grad_norm": 7.798563497263429, "learning_rate": 4.857072879761681e-07, "loss": 0.5753, "step": 22264 }, { "epoch": 1.6085393826647643, "grad_norm": 6.103667185618247, "learning_rate": 4.855340524287094e-07, "loss": 0.5996, "step": 22265 }, { "epoch": 1.6086116278649736, "grad_norm": 6.798947914863257, "learning_rate": 4.85360844457726e-07, "loss": 0.5828, "step": 22266 }, { "epoch": 1.6086838730651833, "grad_norm": 7.55291190677373, "learning_rate": 4.851876640655875e-07, "loss": 0.623, "step": 22267 }, { "epoch": 1.6087561182653927, "grad_norm": 5.904042956829158, "learning_rate": 4.85014511254665e-07, "loss": 0.5438, "step": 22268 }, { "epoch": 1.6088283634656022, "grad_norm": 6.9059036651352805, "learning_rate": 4.848413860273307e-07, "loss": 0.6281, "step": 22269 }, { "epoch": 1.6089006086658117, "grad_norm": 7.795155796258305, "learning_rate": 4.846682883859522e-07, "loss": 0.5511, "step": 22270 }, { "epoch": 1.6089728538660213, "grad_norm": 7.974531641872553, "learning_rate": 4.844952183329002e-07, "loss": 0.6069, "step": 22271 }, { "epoch": 1.6090450990662308, "grad_norm": 6.981982066568265, "learning_rate": 4.843221758705438e-07, "loss": 0.565, "step": 22272 }, { "epoch": 1.6091173442664402, "grad_norm": 7.23429089491135, "learning_rate": 4.841491610012522e-07, "loss": 0.5356, "step": 22273 }, { "epoch": 1.60918958946665, "grad_norm": 6.964060758094493, "learning_rate": 4.839761737273929e-07, "loss": 0.5621, "step": 22274 }, { "epoch": 1.6092618346668592, "grad_norm": 7.679308240651084, "learning_rate": 4.838032140513344e-07, "loss": 0.6527, "step": 22275 }, { "epoch": 1.6093340798670688, "grad_norm": 6.97471055828313, "learning_rate": 4.836302819754443e-07, "loss": 0.6098, "step": 22276 }, { "epoch": 1.6094063250672783, "grad_norm": 6.401335827178141, "learning_rate": 4.834573775020901e-07, "loss": 0.5648, "step": 22277 }, { "epoch": 1.6094785702674879, "grad_norm": 7.270361359442528, "learning_rate": 4.832845006336387e-07, "loss": 0.6168, "step": 22278 }, { "epoch": 1.6095508154676974, "grad_norm": 7.615499863126727, "learning_rate": 4.831116513724568e-07, "loss": 0.6149, "step": 22279 }, { "epoch": 1.6096230606679067, "grad_norm": 7.306454498848946, "learning_rate": 4.829388297209106e-07, "loss": 0.5746, "step": 22280 }, { "epoch": 1.6096953058681165, "grad_norm": 6.893735291784372, "learning_rate": 4.827660356813655e-07, "loss": 0.6416, "step": 22281 }, { "epoch": 1.6097675510683258, "grad_norm": 6.192273081078105, "learning_rate": 4.825932692561866e-07, "loss": 0.5917, "step": 22282 }, { "epoch": 1.6098397962685356, "grad_norm": 7.884105202091515, "learning_rate": 4.824205304477405e-07, "loss": 0.73, "step": 22283 }, { "epoch": 1.609912041468745, "grad_norm": 7.5249511210226006, "learning_rate": 4.822478192583899e-07, "loss": 0.5799, "step": 22284 }, { "epoch": 1.6099842866689544, "grad_norm": 6.0065053006314475, "learning_rate": 4.820751356905001e-07, "loss": 0.6262, "step": 22285 }, { "epoch": 1.610056531869164, "grad_norm": 7.524878121336371, "learning_rate": 4.819024797464347e-07, "loss": 0.5814, "step": 22286 }, { "epoch": 1.6101287770693733, "grad_norm": 7.5506248872357675, "learning_rate": 4.817298514285576e-07, "loss": 0.6563, "step": 22287 }, { "epoch": 1.610201022269583, "grad_norm": 6.872934204461543, "learning_rate": 4.815572507392316e-07, "loss": 0.5876, "step": 22288 }, { "epoch": 1.6102732674697924, "grad_norm": 8.431935828093216, "learning_rate": 4.813846776808195e-07, "loss": 0.6872, "step": 22289 }, { "epoch": 1.6103455126700021, "grad_norm": 7.316021454443323, "learning_rate": 4.81212132255684e-07, "loss": 0.5439, "step": 22290 }, { "epoch": 1.6104177578702115, "grad_norm": 7.426422075436414, "learning_rate": 4.810396144661877e-07, "loss": 0.5537, "step": 22291 }, { "epoch": 1.610490003070421, "grad_norm": 8.060013261883524, "learning_rate": 4.808671243146906e-07, "loss": 0.5782, "step": 22292 }, { "epoch": 1.6105622482706305, "grad_norm": 7.749759301170212, "learning_rate": 4.806946618035549e-07, "loss": 0.579, "step": 22293 }, { "epoch": 1.6106344934708399, "grad_norm": 5.655229676672986, "learning_rate": 4.80522226935142e-07, "loss": 0.649, "step": 22294 }, { "epoch": 1.6107067386710496, "grad_norm": 7.442669009902751, "learning_rate": 4.803498197118112e-07, "loss": 0.591, "step": 22295 }, { "epoch": 1.610778983871259, "grad_norm": 7.094309511679749, "learning_rate": 4.80177440135923e-07, "loss": 0.5718, "step": 22296 }, { "epoch": 1.6108512290714687, "grad_norm": 7.8720232469168945, "learning_rate": 4.800050882098369e-07, "loss": 0.6331, "step": 22297 }, { "epoch": 1.610923474271678, "grad_norm": 6.843876807026545, "learning_rate": 4.79832763935914e-07, "loss": 0.5908, "step": 22298 }, { "epoch": 1.6109957194718876, "grad_norm": 7.109662060653743, "learning_rate": 4.796604673165114e-07, "loss": 0.5876, "step": 22299 }, { "epoch": 1.6110679646720971, "grad_norm": 7.508189498534146, "learning_rate": 4.794881983539881e-07, "loss": 0.685, "step": 22300 }, { "epoch": 1.6111402098723067, "grad_norm": 8.628182971045291, "learning_rate": 4.793159570507034e-07, "loss": 0.5845, "step": 22301 }, { "epoch": 1.6112124550725162, "grad_norm": 6.533868438190206, "learning_rate": 4.791437434090137e-07, "loss": 0.6057, "step": 22302 }, { "epoch": 1.6112847002727255, "grad_norm": 6.320963709305367, "learning_rate": 4.78971557431277e-07, "loss": 0.6484, "step": 22303 }, { "epoch": 1.6113569454729353, "grad_norm": 7.313521876215775, "learning_rate": 4.787993991198503e-07, "loss": 0.5982, "step": 22304 }, { "epoch": 1.6114291906731446, "grad_norm": 6.714054329778162, "learning_rate": 4.786272684770904e-07, "loss": 0.6188, "step": 22305 }, { "epoch": 1.6115014358733541, "grad_norm": 7.955271612933582, "learning_rate": 4.78455165505354e-07, "loss": 0.649, "step": 22306 }, { "epoch": 1.6115736810735637, "grad_norm": 6.456035410295868, "learning_rate": 4.782830902069965e-07, "loss": 0.6495, "step": 22307 }, { "epoch": 1.6116459262737732, "grad_norm": 6.601919600178918, "learning_rate": 4.781110425843747e-07, "loss": 0.5574, "step": 22308 }, { "epoch": 1.6117181714739828, "grad_norm": 7.5481287070875895, "learning_rate": 4.779390226398417e-07, "loss": 0.5641, "step": 22309 }, { "epoch": 1.611790416674192, "grad_norm": 8.176224493778058, "learning_rate": 4.777670303757539e-07, "loss": 0.5752, "step": 22310 }, { "epoch": 1.6118626618744019, "grad_norm": 7.110653001546232, "learning_rate": 4.77595065794465e-07, "loss": 0.6661, "step": 22311 }, { "epoch": 1.6119349070746112, "grad_norm": 7.795125455403429, "learning_rate": 4.774231288983302e-07, "loss": 0.6442, "step": 22312 }, { "epoch": 1.6120071522748207, "grad_norm": 8.492540564475263, "learning_rate": 4.772512196897017e-07, "loss": 0.602, "step": 22313 }, { "epoch": 1.6120793974750303, "grad_norm": 6.230998642729218, "learning_rate": 4.770793381709332e-07, "loss": 0.5552, "step": 22314 }, { "epoch": 1.6121516426752398, "grad_norm": 8.870141647222827, "learning_rate": 4.76907484344378e-07, "loss": 0.6228, "step": 22315 }, { "epoch": 1.6122238878754493, "grad_norm": 7.053930974974665, "learning_rate": 4.767356582123886e-07, "loss": 0.6067, "step": 22316 }, { "epoch": 1.6122961330756587, "grad_norm": 7.3261393315226275, "learning_rate": 4.76563859777317e-07, "loss": 0.6757, "step": 22317 }, { "epoch": 1.6123683782758684, "grad_norm": 6.294653110051332, "learning_rate": 4.7639208904151494e-07, "loss": 0.5606, "step": 22318 }, { "epoch": 1.6124406234760778, "grad_norm": 7.469599591698523, "learning_rate": 4.762203460073347e-07, "loss": 0.6586, "step": 22319 }, { "epoch": 1.6125128686762873, "grad_norm": 6.6335894485691504, "learning_rate": 4.7604863067712585e-07, "loss": 0.6146, "step": 22320 }, { "epoch": 1.6125851138764968, "grad_norm": 7.154193699348164, "learning_rate": 4.758769430532398e-07, "loss": 0.582, "step": 22321 }, { "epoch": 1.6126573590767064, "grad_norm": 6.306601913205747, "learning_rate": 4.7570528313802683e-07, "loss": 0.6154, "step": 22322 }, { "epoch": 1.612729604276916, "grad_norm": 7.3491443058078465, "learning_rate": 4.7553365093383734e-07, "loss": 0.6076, "step": 22323 }, { "epoch": 1.6128018494771252, "grad_norm": 7.587227740108599, "learning_rate": 4.753620464430198e-07, "loss": 0.497, "step": 22324 }, { "epoch": 1.612874094677335, "grad_norm": 6.963567474553273, "learning_rate": 4.75190469667923e-07, "loss": 0.6049, "step": 22325 }, { "epoch": 1.6129463398775443, "grad_norm": 7.263987235608418, "learning_rate": 4.750189206108979e-07, "loss": 0.562, "step": 22326 }, { "epoch": 1.6130185850777539, "grad_norm": 7.987843098084685, "learning_rate": 4.748473992742908e-07, "loss": 0.6126, "step": 22327 }, { "epoch": 1.6130908302779634, "grad_norm": 7.480952171063667, "learning_rate": 4.7467590566045026e-07, "loss": 0.5755, "step": 22328 }, { "epoch": 1.613163075478173, "grad_norm": 9.806565743901029, "learning_rate": 4.745044397717241e-07, "loss": 0.6076, "step": 22329 }, { "epoch": 1.6132353206783825, "grad_norm": 7.309780038845806, "learning_rate": 4.7433300161046e-07, "loss": 0.6095, "step": 22330 }, { "epoch": 1.6133075658785918, "grad_norm": 7.916338729758684, "learning_rate": 4.7416159117900374e-07, "loss": 0.5868, "step": 22331 }, { "epoch": 1.6133798110788016, "grad_norm": 8.18625357655313, "learning_rate": 4.739902084797024e-07, "loss": 0.682, "step": 22332 }, { "epoch": 1.613452056279011, "grad_norm": 6.84983046941432, "learning_rate": 4.7381885351490186e-07, "loss": 0.6022, "step": 22333 }, { "epoch": 1.6135243014792204, "grad_norm": 7.415795207170132, "learning_rate": 4.7364752628694806e-07, "loss": 0.582, "step": 22334 }, { "epoch": 1.61359654667943, "grad_norm": 6.153044664934722, "learning_rate": 4.734762267981863e-07, "loss": 0.5989, "step": 22335 }, { "epoch": 1.6136687918796395, "grad_norm": 7.72241766275986, "learning_rate": 4.7330495505096164e-07, "loss": 0.5776, "step": 22336 }, { "epoch": 1.613741037079849, "grad_norm": 7.751930734645795, "learning_rate": 4.731337110476189e-07, "loss": 0.6425, "step": 22337 }, { "epoch": 1.6138132822800584, "grad_norm": 6.888024494672546, "learning_rate": 4.729624947905012e-07, "loss": 0.6118, "step": 22338 }, { "epoch": 1.6138855274802681, "grad_norm": 7.271607395128447, "learning_rate": 4.7279130628195335e-07, "loss": 0.6355, "step": 22339 }, { "epoch": 1.6139577726804775, "grad_norm": 7.227992890532332, "learning_rate": 4.72620145524319e-07, "loss": 0.5615, "step": 22340 }, { "epoch": 1.614030017880687, "grad_norm": 7.767114016811295, "learning_rate": 4.724490125199399e-07, "loss": 0.6064, "step": 22341 }, { "epoch": 1.6141022630808965, "grad_norm": 6.944483635685852, "learning_rate": 4.7227790727115973e-07, "loss": 0.6073, "step": 22342 }, { "epoch": 1.614174508281106, "grad_norm": 8.30689480749416, "learning_rate": 4.721068297803205e-07, "loss": 0.6613, "step": 22343 }, { "epoch": 1.6142467534813156, "grad_norm": 8.167964546716997, "learning_rate": 4.7193578004976427e-07, "loss": 0.6288, "step": 22344 }, { "epoch": 1.614318998681525, "grad_norm": 7.439715143718725, "learning_rate": 4.717647580818324e-07, "loss": 0.5925, "step": 22345 }, { "epoch": 1.6143912438817347, "grad_norm": 6.623266317140182, "learning_rate": 4.7159376387886645e-07, "loss": 0.6037, "step": 22346 }, { "epoch": 1.614463489081944, "grad_norm": 7.855697226367212, "learning_rate": 4.714227974432067e-07, "loss": 0.5576, "step": 22347 }, { "epoch": 1.6145357342821536, "grad_norm": 7.154533346914345, "learning_rate": 4.7125185877719445e-07, "loss": 0.5572, "step": 22348 }, { "epoch": 1.6146079794823631, "grad_norm": 7.765867417541081, "learning_rate": 4.710809478831682e-07, "loss": 0.6146, "step": 22349 }, { "epoch": 1.6146802246825727, "grad_norm": 6.668841992559766, "learning_rate": 4.7091006476346875e-07, "loss": 0.5976, "step": 22350 }, { "epoch": 1.6147524698827822, "grad_norm": 7.714667719510733, "learning_rate": 4.707392094204355e-07, "loss": 0.6398, "step": 22351 }, { "epoch": 1.6148247150829915, "grad_norm": 8.117937980375476, "learning_rate": 4.7056838185640607e-07, "loss": 0.6392, "step": 22352 }, { "epoch": 1.6148969602832013, "grad_norm": 5.807373082797654, "learning_rate": 4.7039758207371944e-07, "loss": 0.5934, "step": 22353 }, { "epoch": 1.6149692054834106, "grad_norm": 8.474724383078227, "learning_rate": 4.7022681007471425e-07, "loss": 0.6537, "step": 22354 }, { "epoch": 1.6150414506836204, "grad_norm": 7.852308733194731, "learning_rate": 4.7005606586172895e-07, "loss": 0.6996, "step": 22355 }, { "epoch": 1.6151136958838297, "grad_norm": 7.131979217824219, "learning_rate": 4.698853494370992e-07, "loss": 0.6465, "step": 22356 }, { "epoch": 1.6151859410840392, "grad_norm": 6.905671384574314, "learning_rate": 4.6971466080316305e-07, "loss": 0.6266, "step": 22357 }, { "epoch": 1.6152581862842488, "grad_norm": 6.139038191524157, "learning_rate": 4.6954399996225697e-07, "loss": 0.5971, "step": 22358 }, { "epoch": 1.615330431484458, "grad_norm": 6.6137409624994365, "learning_rate": 4.6937336691671666e-07, "loss": 0.5069, "step": 22359 }, { "epoch": 1.6154026766846679, "grad_norm": 6.993814732965334, "learning_rate": 4.6920276166887794e-07, "loss": 0.5517, "step": 22360 }, { "epoch": 1.6154749218848772, "grad_norm": 6.530133749744394, "learning_rate": 4.690321842210768e-07, "loss": 0.5959, "step": 22361 }, { "epoch": 1.615547167085087, "grad_norm": 7.0510812457820355, "learning_rate": 4.6886163457564833e-07, "loss": 0.6765, "step": 22362 }, { "epoch": 1.6156194122852963, "grad_norm": 7.935348579620062, "learning_rate": 4.6869111273492666e-07, "loss": 0.5771, "step": 22363 }, { "epoch": 1.6156916574855058, "grad_norm": 7.227942488593016, "learning_rate": 4.685206187012467e-07, "loss": 0.6612, "step": 22364 }, { "epoch": 1.6157639026857153, "grad_norm": 9.470604642520026, "learning_rate": 4.6835015247694266e-07, "loss": 0.6619, "step": 22365 }, { "epoch": 1.6158361478859247, "grad_norm": 5.727523327583885, "learning_rate": 4.6817971406434684e-07, "loss": 0.5718, "step": 22366 }, { "epoch": 1.6159083930861344, "grad_norm": 8.001678290755633, "learning_rate": 4.6800930346579314e-07, "loss": 0.6011, "step": 22367 }, { "epoch": 1.6159806382863438, "grad_norm": 6.47388552939085, "learning_rate": 4.6783892068361423e-07, "loss": 0.6033, "step": 22368 }, { "epoch": 1.6160528834865535, "grad_norm": 6.7755446098997965, "learning_rate": 4.6766856572014353e-07, "loss": 0.6031, "step": 22369 }, { "epoch": 1.6161251286867628, "grad_norm": 8.192475881671996, "learning_rate": 4.674982385777116e-07, "loss": 0.5805, "step": 22370 }, { "epoch": 1.6161973738869724, "grad_norm": 7.180825227370802, "learning_rate": 4.673279392586502e-07, "loss": 0.6298, "step": 22371 }, { "epoch": 1.616269619087182, "grad_norm": 7.887528399191148, "learning_rate": 4.671576677652914e-07, "loss": 0.6519, "step": 22372 }, { "epoch": 1.6163418642873915, "grad_norm": 7.521207453192279, "learning_rate": 4.6698742409996556e-07, "loss": 0.6247, "step": 22373 }, { "epoch": 1.616414109487601, "grad_norm": 8.386357591490107, "learning_rate": 4.6681720826500355e-07, "loss": 0.724, "step": 22374 }, { "epoch": 1.6164863546878103, "grad_norm": 7.61364876734475, "learning_rate": 4.6664702026273517e-07, "loss": 0.6027, "step": 22375 }, { "epoch": 1.61655859988802, "grad_norm": 8.112646953268637, "learning_rate": 4.664768600954908e-07, "loss": 0.6203, "step": 22376 }, { "epoch": 1.6166308450882294, "grad_norm": 9.048744443345528, "learning_rate": 4.6630672776559887e-07, "loss": 0.6425, "step": 22377 }, { "epoch": 1.616703090288439, "grad_norm": 7.4976365498074, "learning_rate": 4.661366232753886e-07, "loss": 0.5817, "step": 22378 }, { "epoch": 1.6167753354886485, "grad_norm": 6.471813121667459, "learning_rate": 4.65966546627189e-07, "loss": 0.5667, "step": 22379 }, { "epoch": 1.616847580688858, "grad_norm": 7.304350656504902, "learning_rate": 4.657964978233284e-07, "loss": 0.6247, "step": 22380 }, { "epoch": 1.6169198258890676, "grad_norm": 6.47626828766674, "learning_rate": 4.656264768661331e-07, "loss": 0.6151, "step": 22381 }, { "epoch": 1.616992071089277, "grad_norm": 8.071162811418064, "learning_rate": 4.654564837579326e-07, "loss": 0.5804, "step": 22382 }, { "epoch": 1.6170643162894867, "grad_norm": 6.80547873871024, "learning_rate": 4.6528651850105353e-07, "loss": 0.6132, "step": 22383 }, { "epoch": 1.617136561489696, "grad_norm": 6.185284458733423, "learning_rate": 4.6511658109782173e-07, "loss": 0.5658, "step": 22384 }, { "epoch": 1.6172088066899055, "grad_norm": 6.43125538580162, "learning_rate": 4.649466715505638e-07, "loss": 0.5399, "step": 22385 }, { "epoch": 1.617281051890115, "grad_norm": 6.275200962936895, "learning_rate": 4.647767898616057e-07, "loss": 0.6502, "step": 22386 }, { "epoch": 1.6173532970903246, "grad_norm": 7.058266780438924, "learning_rate": 4.6460693603327387e-07, "loss": 0.603, "step": 22387 }, { "epoch": 1.6174255422905341, "grad_norm": 6.726507773713934, "learning_rate": 4.644371100678921e-07, "loss": 0.531, "step": 22388 }, { "epoch": 1.6174977874907435, "grad_norm": 6.9598693370612095, "learning_rate": 4.642673119677857e-07, "loss": 0.6233, "step": 22389 }, { "epoch": 1.6175700326909532, "grad_norm": 6.186325424144608, "learning_rate": 4.6409754173527913e-07, "loss": 0.5396, "step": 22390 }, { "epoch": 1.6176422778911625, "grad_norm": 8.069287658790737, "learning_rate": 4.639277993726965e-07, "loss": 0.6465, "step": 22391 }, { "epoch": 1.617714523091372, "grad_norm": 6.737907598335773, "learning_rate": 4.637580848823614e-07, "loss": 0.6321, "step": 22392 }, { "epoch": 1.6177867682915816, "grad_norm": 6.956277539797658, "learning_rate": 4.63588398266597e-07, "loss": 0.5865, "step": 22393 }, { "epoch": 1.6178590134917912, "grad_norm": 6.107476527277858, "learning_rate": 4.63418739527727e-07, "loss": 0.6127, "step": 22394 }, { "epoch": 1.6179312586920007, "grad_norm": 7.319840618545475, "learning_rate": 4.6324910866807237e-07, "loss": 0.6303, "step": 22395 }, { "epoch": 1.61800350389221, "grad_norm": 7.137388735570347, "learning_rate": 4.630795056899562e-07, "loss": 0.5458, "step": 22396 }, { "epoch": 1.6180757490924198, "grad_norm": 8.041654860202906, "learning_rate": 4.629099305956999e-07, "loss": 0.6282, "step": 22397 }, { "epoch": 1.6181479942926291, "grad_norm": 6.960651430359154, "learning_rate": 4.6274038338762566e-07, "loss": 0.589, "step": 22398 }, { "epoch": 1.6182202394928387, "grad_norm": 6.970029157580299, "learning_rate": 4.6257086406805317e-07, "loss": 0.6242, "step": 22399 }, { "epoch": 1.6182924846930482, "grad_norm": 6.066533585370779, "learning_rate": 4.624013726393034e-07, "loss": 0.6365, "step": 22400 }, { "epoch": 1.6183647298932577, "grad_norm": 6.827562950623492, "learning_rate": 4.6223190910369697e-07, "loss": 0.5653, "step": 22401 }, { "epoch": 1.6184369750934673, "grad_norm": 8.222710065144563, "learning_rate": 4.620624734635534e-07, "loss": 0.5685, "step": 22402 }, { "epoch": 1.6185092202936766, "grad_norm": 8.955166089739722, "learning_rate": 4.6189306572119203e-07, "loss": 0.6004, "step": 22403 }, { "epoch": 1.6185814654938864, "grad_norm": 6.822034690480197, "learning_rate": 4.6172368587893227e-07, "loss": 0.5945, "step": 22404 }, { "epoch": 1.6186537106940957, "grad_norm": 6.633547181687179, "learning_rate": 4.615543339390932e-07, "loss": 0.5519, "step": 22405 }, { "epoch": 1.6187259558943052, "grad_norm": 6.786204413589203, "learning_rate": 4.613850099039921e-07, "loss": 0.562, "step": 22406 }, { "epoch": 1.6187982010945148, "grad_norm": 6.746756728152939, "learning_rate": 4.612157137759471e-07, "loss": 0.6619, "step": 22407 }, { "epoch": 1.6188704462947243, "grad_norm": 7.4710054064662055, "learning_rate": 4.610464455572766e-07, "loss": 0.657, "step": 22408 }, { "epoch": 1.6189426914949339, "grad_norm": 7.9777011040014, "learning_rate": 4.608772052502966e-07, "loss": 0.6136, "step": 22409 }, { "epoch": 1.6190149366951432, "grad_norm": 7.048775359580146, "learning_rate": 4.607079928573235e-07, "loss": 0.523, "step": 22410 }, { "epoch": 1.619087181895353, "grad_norm": 6.790918445522685, "learning_rate": 4.6053880838067554e-07, "loss": 0.6128, "step": 22411 }, { "epoch": 1.6191594270955623, "grad_norm": 9.827275711019338, "learning_rate": 4.603696518226683e-07, "loss": 0.6536, "step": 22412 }, { "epoch": 1.6192316722957718, "grad_norm": 8.672126175275723, "learning_rate": 4.602005231856163e-07, "loss": 0.6701, "step": 22413 }, { "epoch": 1.6193039174959813, "grad_norm": 6.503800968085563, "learning_rate": 4.60031422471835e-07, "loss": 0.6124, "step": 22414 }, { "epoch": 1.619376162696191, "grad_norm": 6.400634984464096, "learning_rate": 4.5986234968364044e-07, "loss": 0.5592, "step": 22415 }, { "epoch": 1.6194484078964004, "grad_norm": 6.974985386549432, "learning_rate": 4.596933048233457e-07, "loss": 0.6067, "step": 22416 }, { "epoch": 1.6195206530966098, "grad_norm": 6.3392287806327925, "learning_rate": 4.595242878932654e-07, "loss": 0.5883, "step": 22417 }, { "epoch": 1.6195928982968195, "grad_norm": 8.740671034832518, "learning_rate": 4.5935529889571314e-07, "loss": 0.6846, "step": 22418 }, { "epoch": 1.6196651434970288, "grad_norm": 7.287422936164547, "learning_rate": 4.591863378330025e-07, "loss": 0.544, "step": 22419 }, { "epoch": 1.6197373886972384, "grad_norm": 6.760558453578229, "learning_rate": 4.590174047074461e-07, "loss": 0.6377, "step": 22420 }, { "epoch": 1.619809633897448, "grad_norm": 7.691967666153339, "learning_rate": 4.5884849952135684e-07, "loss": 0.5859, "step": 22421 }, { "epoch": 1.6198818790976575, "grad_norm": 8.4032916567822, "learning_rate": 4.5867962227704704e-07, "loss": 0.6267, "step": 22422 }, { "epoch": 1.619954124297867, "grad_norm": 7.243857313649792, "learning_rate": 4.5851077297682845e-07, "loss": 0.5278, "step": 22423 }, { "epoch": 1.6200263694980763, "grad_norm": 6.817885483746586, "learning_rate": 4.58341951623012e-07, "loss": 0.5802, "step": 22424 }, { "epoch": 1.620098614698286, "grad_norm": 7.150544536635665, "learning_rate": 4.581731582179086e-07, "loss": 0.6155, "step": 22425 }, { "epoch": 1.6201708598984954, "grad_norm": 7.100509582285274, "learning_rate": 4.5800439276383e-07, "loss": 0.6647, "step": 22426 }, { "epoch": 1.620243105098705, "grad_norm": 7.738343856227057, "learning_rate": 4.5783565526308525e-07, "loss": 0.6499, "step": 22427 }, { "epoch": 1.6203153502989145, "grad_norm": 7.029353585402664, "learning_rate": 4.576669457179847e-07, "loss": 0.6536, "step": 22428 }, { "epoch": 1.620387595499124, "grad_norm": 6.328294690529633, "learning_rate": 4.574982641308381e-07, "loss": 0.5767, "step": 22429 }, { "epoch": 1.6204598406993336, "grad_norm": 7.008026698144371, "learning_rate": 4.57329610503954e-07, "loss": 0.589, "step": 22430 }, { "epoch": 1.620532085899543, "grad_norm": 7.527601189162849, "learning_rate": 4.5716098483964186e-07, "loss": 0.596, "step": 22431 }, { "epoch": 1.6206043310997527, "grad_norm": 7.912265825949497, "learning_rate": 4.569923871402093e-07, "loss": 0.5597, "step": 22432 }, { "epoch": 1.620676576299962, "grad_norm": 7.465924738171751, "learning_rate": 4.568238174079656e-07, "loss": 0.6366, "step": 22433 }, { "epoch": 1.6207488215001717, "grad_norm": 6.903204475107664, "learning_rate": 4.566552756452167e-07, "loss": 0.5738, "step": 22434 }, { "epoch": 1.620821066700381, "grad_norm": 7.940349157617255, "learning_rate": 4.564867618542704e-07, "loss": 0.6099, "step": 22435 }, { "epoch": 1.6208933119005906, "grad_norm": 7.479121329365837, "learning_rate": 4.5631827603743377e-07, "loss": 0.5519, "step": 22436 }, { "epoch": 1.6209655571008001, "grad_norm": 7.167684290592018, "learning_rate": 4.5614981819701393e-07, "loss": 0.5573, "step": 22437 }, { "epoch": 1.6210378023010095, "grad_norm": 7.249462765487081, "learning_rate": 4.559813883353145e-07, "loss": 0.6089, "step": 22438 }, { "epoch": 1.6211100475012192, "grad_norm": 7.170575458412327, "learning_rate": 4.558129864546437e-07, "loss": 0.6934, "step": 22439 }, { "epoch": 1.6211822927014286, "grad_norm": 7.452318575754865, "learning_rate": 4.5564461255730636e-07, "loss": 0.6363, "step": 22440 }, { "epoch": 1.6212545379016383, "grad_norm": 7.591374029499648, "learning_rate": 4.554762666456064e-07, "loss": 0.6333, "step": 22441 }, { "epoch": 1.6213267831018476, "grad_norm": 6.611155890020174, "learning_rate": 4.553079487218487e-07, "loss": 0.6512, "step": 22442 }, { "epoch": 1.6213990283020572, "grad_norm": 8.01483305054603, "learning_rate": 4.5513965878833776e-07, "loss": 0.6024, "step": 22443 }, { "epoch": 1.6214712735022667, "grad_norm": 6.990738600318175, "learning_rate": 4.549713968473779e-07, "loss": 0.5883, "step": 22444 }, { "epoch": 1.621543518702476, "grad_norm": 7.414995012236632, "learning_rate": 4.5480316290127085e-07, "loss": 0.6234, "step": 22445 }, { "epoch": 1.6216157639026858, "grad_norm": 7.110121333593258, "learning_rate": 4.5463495695232064e-07, "loss": 0.6048, "step": 22446 }, { "epoch": 1.6216880091028951, "grad_norm": 6.6971676547587835, "learning_rate": 4.544667790028298e-07, "loss": 0.6411, "step": 22447 }, { "epoch": 1.6217602543031049, "grad_norm": 8.429195584146852, "learning_rate": 4.542986290551005e-07, "loss": 0.6439, "step": 22448 }, { "epoch": 1.6218324995033142, "grad_norm": 6.503904783689775, "learning_rate": 4.541305071114344e-07, "loss": 0.6076, "step": 22449 }, { "epoch": 1.6219047447035237, "grad_norm": 5.93457858844762, "learning_rate": 4.5396241317413323e-07, "loss": 0.5413, "step": 22450 }, { "epoch": 1.6219769899037333, "grad_norm": 6.8399750910969095, "learning_rate": 4.5379434724549866e-07, "loss": 0.6237, "step": 22451 }, { "epoch": 1.6220492351039428, "grad_norm": 8.488828161309787, "learning_rate": 4.536263093278301e-07, "loss": 0.7034, "step": 22452 }, { "epoch": 1.6221214803041524, "grad_norm": 7.141929991358418, "learning_rate": 4.534582994234285e-07, "loss": 0.571, "step": 22453 }, { "epoch": 1.6221937255043617, "grad_norm": 7.489162498680384, "learning_rate": 4.532903175345937e-07, "loss": 0.5342, "step": 22454 }, { "epoch": 1.6222659707045715, "grad_norm": 7.711906333591955, "learning_rate": 4.531223636636259e-07, "loss": 0.6016, "step": 22455 }, { "epoch": 1.6223382159047808, "grad_norm": 5.893252001987998, "learning_rate": 4.529544378128228e-07, "loss": 0.5068, "step": 22456 }, { "epoch": 1.6224104611049903, "grad_norm": 6.112216298515129, "learning_rate": 4.527865399844844e-07, "loss": 0.5838, "step": 22457 }, { "epoch": 1.6224827063051999, "grad_norm": 6.604062951012619, "learning_rate": 4.526186701809085e-07, "loss": 0.5883, "step": 22458 }, { "epoch": 1.6225549515054094, "grad_norm": 6.989452317246921, "learning_rate": 4.5245082840439327e-07, "loss": 0.6261, "step": 22459 }, { "epoch": 1.622627196705619, "grad_norm": 6.777346845730561, "learning_rate": 4.522830146572366e-07, "loss": 0.6188, "step": 22460 }, { "epoch": 1.6226994419058283, "grad_norm": 6.311716767622538, "learning_rate": 4.521152289417352e-07, "loss": 0.6031, "step": 22461 }, { "epoch": 1.622771687106038, "grad_norm": 6.529959081080519, "learning_rate": 4.519474712601871e-07, "loss": 0.5593, "step": 22462 }, { "epoch": 1.6228439323062473, "grad_norm": 7.128783710419429, "learning_rate": 4.5177974161488694e-07, "loss": 0.5284, "step": 22463 }, { "epoch": 1.622916177506457, "grad_norm": 6.840632037581321, "learning_rate": 4.5161204000813225e-07, "loss": 0.6229, "step": 22464 }, { "epoch": 1.6229884227066664, "grad_norm": 8.187618342119455, "learning_rate": 4.5144436644221857e-07, "loss": 0.5971, "step": 22465 }, { "epoch": 1.623060667906876, "grad_norm": 5.478169726435622, "learning_rate": 4.5127672091943993e-07, "loss": 0.5424, "step": 22466 }, { "epoch": 1.6231329131070855, "grad_norm": 7.555770780577125, "learning_rate": 4.5110910344209254e-07, "loss": 0.6347, "step": 22467 }, { "epoch": 1.6232051583072948, "grad_norm": 7.153095466282822, "learning_rate": 4.5094151401247095e-07, "loss": 0.5775, "step": 22468 }, { "epoch": 1.6232774035075046, "grad_norm": 6.6543587876852675, "learning_rate": 4.5077395263286945e-07, "loss": 0.5704, "step": 22469 }, { "epoch": 1.623349648707714, "grad_norm": 7.163646357273653, "learning_rate": 4.506064193055809e-07, "loss": 0.6018, "step": 22470 }, { "epoch": 1.6234218939079235, "grad_norm": 6.52914088020353, "learning_rate": 4.504389140328994e-07, "loss": 0.5864, "step": 22471 }, { "epoch": 1.623494139108133, "grad_norm": 7.705847949502353, "learning_rate": 4.502714368171182e-07, "loss": 0.6293, "step": 22472 }, { "epoch": 1.6235663843083425, "grad_norm": 8.636813269779037, "learning_rate": 4.5010398766052873e-07, "loss": 0.6624, "step": 22473 }, { "epoch": 1.623638629508552, "grad_norm": 7.310137766571829, "learning_rate": 4.499365665654243e-07, "loss": 0.5511, "step": 22474 }, { "epoch": 1.6237108747087614, "grad_norm": 7.152914144292187, "learning_rate": 4.497691735340964e-07, "loss": 0.6301, "step": 22475 }, { "epoch": 1.6237831199089712, "grad_norm": 5.895547201629432, "learning_rate": 4.4960180856883667e-07, "loss": 0.573, "step": 22476 }, { "epoch": 1.6238553651091805, "grad_norm": 7.548512030493939, "learning_rate": 4.494344716719359e-07, "loss": 0.5641, "step": 22477 }, { "epoch": 1.62392761030939, "grad_norm": 7.225868579437003, "learning_rate": 4.4926716284568545e-07, "loss": 0.5498, "step": 22478 }, { "epoch": 1.6239998555095996, "grad_norm": 6.992269334367487, "learning_rate": 4.4909988209237504e-07, "loss": 0.6035, "step": 22479 }, { "epoch": 1.6240721007098091, "grad_norm": 6.720931865476755, "learning_rate": 4.489326294142954e-07, "loss": 0.6255, "step": 22480 }, { "epoch": 1.6241443459100187, "grad_norm": 7.628693983462081, "learning_rate": 4.4876540481373494e-07, "loss": 0.6112, "step": 22481 }, { "epoch": 1.624216591110228, "grad_norm": 6.565545528898647, "learning_rate": 4.485982082929835e-07, "loss": 0.6669, "step": 22482 }, { "epoch": 1.6242888363104377, "grad_norm": 5.916428288058949, "learning_rate": 4.484310398543304e-07, "loss": 0.5721, "step": 22483 }, { "epoch": 1.624361081510647, "grad_norm": 6.963306165282757, "learning_rate": 4.4826389950006266e-07, "loss": 0.5812, "step": 22484 }, { "epoch": 1.6244333267108566, "grad_norm": 7.1207768824352184, "learning_rate": 4.4809678723246903e-07, "loss": 0.7039, "step": 22485 }, { "epoch": 1.6245055719110661, "grad_norm": 7.227793655684312, "learning_rate": 4.4792970305383736e-07, "loss": 0.5999, "step": 22486 }, { "epoch": 1.6245778171112757, "grad_norm": 6.566528827492129, "learning_rate": 4.477626469664548e-07, "loss": 0.625, "step": 22487 }, { "epoch": 1.6246500623114852, "grad_norm": 6.112107078233415, "learning_rate": 4.4759561897260775e-07, "loss": 0.5512, "step": 22488 }, { "epoch": 1.6247223075116946, "grad_norm": 6.841919257844204, "learning_rate": 4.474286190745833e-07, "loss": 0.5877, "step": 22489 }, { "epoch": 1.6247945527119043, "grad_norm": 6.681788206770454, "learning_rate": 4.47261647274668e-07, "loss": 0.5888, "step": 22490 }, { "epoch": 1.6248667979121136, "grad_norm": 6.7413506369750955, "learning_rate": 4.4709470357514636e-07, "loss": 0.5892, "step": 22491 }, { "epoch": 1.6249390431123232, "grad_norm": 7.771481184420682, "learning_rate": 4.4692778797830406e-07, "loss": 0.6017, "step": 22492 }, { "epoch": 1.6250112883125327, "grad_norm": 6.737228461687395, "learning_rate": 4.467609004864265e-07, "loss": 0.6227, "step": 22493 }, { "epoch": 1.6250835335127423, "grad_norm": 6.536320666399707, "learning_rate": 4.465940411017977e-07, "loss": 0.5593, "step": 22494 }, { "epoch": 1.6251557787129518, "grad_norm": 6.722898542082047, "learning_rate": 4.4642720982670225e-07, "loss": 0.6265, "step": 22495 }, { "epoch": 1.6252280239131611, "grad_norm": 7.453889883527389, "learning_rate": 4.462604066634238e-07, "loss": 0.6232, "step": 22496 }, { "epoch": 1.6253002691133709, "grad_norm": 7.161506954143663, "learning_rate": 4.460936316142464e-07, "loss": 0.6294, "step": 22497 }, { "epoch": 1.6253725143135802, "grad_norm": 8.653282786232959, "learning_rate": 4.4592688468145155e-07, "loss": 0.6525, "step": 22498 }, { "epoch": 1.6254447595137897, "grad_norm": 6.6430019298426055, "learning_rate": 4.45760165867323e-07, "loss": 0.5984, "step": 22499 }, { "epoch": 1.6255170047139993, "grad_norm": 7.944490253283812, "learning_rate": 4.455934751741428e-07, "loss": 0.649, "step": 22500 }, { "epoch": 1.6255892499142088, "grad_norm": 7.646106711618599, "learning_rate": 4.454268126041933e-07, "loss": 0.6152, "step": 22501 }, { "epoch": 1.6256614951144184, "grad_norm": 6.734250379502393, "learning_rate": 4.452601781597549e-07, "loss": 0.6771, "step": 22502 }, { "epoch": 1.6257337403146277, "grad_norm": 7.129911637153246, "learning_rate": 4.450935718431093e-07, "loss": 0.6179, "step": 22503 }, { "epoch": 1.6258059855148375, "grad_norm": 7.882925979810659, "learning_rate": 4.4492699365653704e-07, "loss": 0.6433, "step": 22504 }, { "epoch": 1.6258782307150468, "grad_norm": 7.683488248751421, "learning_rate": 4.4476044360231866e-07, "loss": 0.6362, "step": 22505 }, { "epoch": 1.6259504759152565, "grad_norm": 7.090266107426511, "learning_rate": 4.4459392168273383e-07, "loss": 0.6262, "step": 22506 }, { "epoch": 1.6260227211154659, "grad_norm": 6.667554541909345, "learning_rate": 4.4442742790006266e-07, "loss": 0.5605, "step": 22507 }, { "epoch": 1.6260949663156754, "grad_norm": 6.032480381199806, "learning_rate": 4.4426096225658436e-07, "loss": 0.5843, "step": 22508 }, { "epoch": 1.626167211515885, "grad_norm": 6.719885158011648, "learning_rate": 4.4409452475457666e-07, "loss": 0.6082, "step": 22509 }, { "epoch": 1.6262394567160943, "grad_norm": 8.48770464059147, "learning_rate": 4.4392811539631875e-07, "loss": 0.5811, "step": 22510 }, { "epoch": 1.626311701916304, "grad_norm": 7.953374892470226, "learning_rate": 4.437617341840883e-07, "loss": 0.5803, "step": 22511 }, { "epoch": 1.6263839471165134, "grad_norm": 7.762732844154615, "learning_rate": 4.4359538112016375e-07, "loss": 0.5908, "step": 22512 }, { "epoch": 1.6264561923167231, "grad_norm": 6.263119470106424, "learning_rate": 4.4342905620682137e-07, "loss": 0.588, "step": 22513 }, { "epoch": 1.6265284375169324, "grad_norm": 9.332371344263288, "learning_rate": 4.4326275944633815e-07, "loss": 0.691, "step": 22514 }, { "epoch": 1.626600682717142, "grad_norm": 6.60939895228742, "learning_rate": 4.4309649084099097e-07, "loss": 0.5952, "step": 22515 }, { "epoch": 1.6266729279173515, "grad_norm": 8.512226063357982, "learning_rate": 4.4293025039305576e-07, "loss": 0.6734, "step": 22516 }, { "epoch": 1.6267451731175608, "grad_norm": 6.18307614404249, "learning_rate": 4.4276403810480816e-07, "loss": 0.5556, "step": 22517 }, { "epoch": 1.6268174183177706, "grad_norm": 7.140043740018334, "learning_rate": 4.425978539785233e-07, "loss": 0.6134, "step": 22518 }, { "epoch": 1.62688966351798, "grad_norm": 8.314127597794151, "learning_rate": 4.424316980164772e-07, "loss": 0.6227, "step": 22519 }, { "epoch": 1.6269619087181897, "grad_norm": 6.710908510014509, "learning_rate": 4.42265570220943e-07, "loss": 0.6139, "step": 22520 }, { "epoch": 1.627034153918399, "grad_norm": 7.369671723341173, "learning_rate": 4.4209947059419526e-07, "loss": 0.6394, "step": 22521 }, { "epoch": 1.6271063991186085, "grad_norm": 7.280979135083917, "learning_rate": 4.4193339913850856e-07, "loss": 0.5938, "step": 22522 }, { "epoch": 1.627178644318818, "grad_norm": 6.886346505112603, "learning_rate": 4.4176735585615444e-07, "loss": 0.5462, "step": 22523 }, { "epoch": 1.6272508895190276, "grad_norm": 6.655215474000154, "learning_rate": 4.416013407494077e-07, "loss": 0.6523, "step": 22524 }, { "epoch": 1.6273231347192372, "grad_norm": 7.39310908760321, "learning_rate": 4.4143535382054045e-07, "loss": 0.609, "step": 22525 }, { "epoch": 1.6273953799194465, "grad_norm": 8.58904345550008, "learning_rate": 4.4126939507182554e-07, "loss": 0.6777, "step": 22526 }, { "epoch": 1.6274676251196563, "grad_norm": 8.492536072653003, "learning_rate": 4.4110346450553316e-07, "loss": 0.5202, "step": 22527 }, { "epoch": 1.6275398703198656, "grad_norm": 6.325294445557301, "learning_rate": 4.4093756212393593e-07, "loss": 0.5481, "step": 22528 }, { "epoch": 1.6276121155200751, "grad_norm": 7.860372612738151, "learning_rate": 4.4077168792930476e-07, "loss": 0.8134, "step": 22529 }, { "epoch": 1.6276843607202847, "grad_norm": 8.523423625819367, "learning_rate": 4.406058419239109e-07, "loss": 0.5906, "step": 22530 }, { "epoch": 1.6277566059204942, "grad_norm": 8.20011849666668, "learning_rate": 4.404400241100232e-07, "loss": 0.6815, "step": 22531 }, { "epoch": 1.6278288511207037, "grad_norm": 7.122719650764634, "learning_rate": 4.4027423448991276e-07, "loss": 0.6943, "step": 22532 }, { "epoch": 1.627901096320913, "grad_norm": 9.341626388458435, "learning_rate": 4.4010847306584867e-07, "loss": 0.6108, "step": 22533 }, { "epoch": 1.6279733415211228, "grad_norm": 7.866257674151195, "learning_rate": 4.3994273984009987e-07, "loss": 0.6623, "step": 22534 }, { "epoch": 1.6280455867213321, "grad_norm": 8.06340080965764, "learning_rate": 4.3977703481493577e-07, "loss": 0.6393, "step": 22535 }, { "epoch": 1.6281178319215417, "grad_norm": 8.369376500832981, "learning_rate": 4.396113579926242e-07, "loss": 0.641, "step": 22536 }, { "epoch": 1.6281900771217512, "grad_norm": 7.120828042903547, "learning_rate": 4.3944570937543426e-07, "loss": 0.6717, "step": 22537 }, { "epoch": 1.6282623223219608, "grad_norm": 6.549366624799314, "learning_rate": 4.392800889656318e-07, "loss": 0.5759, "step": 22538 }, { "epoch": 1.6283345675221703, "grad_norm": 6.542545170524206, "learning_rate": 4.3911449676548477e-07, "loss": 0.6184, "step": 22539 }, { "epoch": 1.6284068127223796, "grad_norm": 6.566986294372672, "learning_rate": 4.3894893277726114e-07, "loss": 0.6112, "step": 22540 }, { "epoch": 1.6284790579225894, "grad_norm": 7.633491165772689, "learning_rate": 4.387833970032254e-07, "loss": 0.5804, "step": 22541 }, { "epoch": 1.6285513031227987, "grad_norm": 8.099591713493524, "learning_rate": 4.3861788944564443e-07, "loss": 0.5676, "step": 22542 }, { "epoch": 1.6286235483230083, "grad_norm": 7.252313442099092, "learning_rate": 4.3845241010678356e-07, "loss": 0.6055, "step": 22543 }, { "epoch": 1.6286957935232178, "grad_norm": 7.314768121555129, "learning_rate": 4.3828695898890966e-07, "loss": 0.5919, "step": 22544 }, { "epoch": 1.6287680387234273, "grad_norm": 7.619014704724016, "learning_rate": 4.3812153609428585e-07, "loss": 0.7499, "step": 22545 }, { "epoch": 1.628840283923637, "grad_norm": 6.568143615899551, "learning_rate": 4.3795614142517736e-07, "loss": 0.5762, "step": 22546 }, { "epoch": 1.6289125291238462, "grad_norm": 6.997576975397364, "learning_rate": 4.3779077498384863e-07, "loss": 0.5744, "step": 22547 }, { "epoch": 1.628984774324056, "grad_norm": 7.795446475237388, "learning_rate": 4.376254367725627e-07, "loss": 0.6439, "step": 22548 }, { "epoch": 1.6290570195242653, "grad_norm": 7.239898221053989, "learning_rate": 4.37460126793583e-07, "loss": 0.5944, "step": 22549 }, { "epoch": 1.6291292647244748, "grad_norm": 7.826968779617024, "learning_rate": 4.372948450491726e-07, "loss": 0.5786, "step": 22550 }, { "epoch": 1.6292015099246844, "grad_norm": 6.9253954061831315, "learning_rate": 4.3712959154159434e-07, "loss": 0.6482, "step": 22551 }, { "epoch": 1.629273755124894, "grad_norm": 6.770795647931983, "learning_rate": 4.3696436627311045e-07, "loss": 0.6086, "step": 22552 }, { "epoch": 1.6293460003251035, "grad_norm": 6.355657217165615, "learning_rate": 4.367991692459822e-07, "loss": 0.5311, "step": 22553 }, { "epoch": 1.6294182455253128, "grad_norm": 6.212186944392456, "learning_rate": 4.366340004624714e-07, "loss": 0.5345, "step": 22554 }, { "epoch": 1.6294904907255225, "grad_norm": 9.673473022770345, "learning_rate": 4.3646885992483984e-07, "loss": 0.6218, "step": 22555 }, { "epoch": 1.6295627359257319, "grad_norm": 7.945902064532423, "learning_rate": 4.3630374763534655e-07, "loss": 0.59, "step": 22556 }, { "epoch": 1.6296349811259414, "grad_norm": 7.256370113669954, "learning_rate": 4.361386635962528e-07, "loss": 0.5632, "step": 22557 }, { "epoch": 1.629707226326151, "grad_norm": 7.449583863790806, "learning_rate": 4.359736078098187e-07, "loss": 0.709, "step": 22558 }, { "epoch": 1.6297794715263605, "grad_norm": 6.900648788688929, "learning_rate": 4.358085802783027e-07, "loss": 0.5452, "step": 22559 }, { "epoch": 1.62985171672657, "grad_norm": 8.198845144686109, "learning_rate": 4.356435810039644e-07, "loss": 0.6198, "step": 22560 }, { "epoch": 1.6299239619267794, "grad_norm": 7.768637857525407, "learning_rate": 4.3547860998906287e-07, "loss": 0.6098, "step": 22561 }, { "epoch": 1.6299962071269891, "grad_norm": 9.197389431304636, "learning_rate": 4.35313667235856e-07, "loss": 0.6602, "step": 22562 }, { "epoch": 1.6300684523271984, "grad_norm": 7.295406871846234, "learning_rate": 4.3514875274660195e-07, "loss": 0.6223, "step": 22563 }, { "epoch": 1.630140697527408, "grad_norm": 6.606769454007624, "learning_rate": 4.349838665235581e-07, "loss": 0.6598, "step": 22564 }, { "epoch": 1.6302129427276175, "grad_norm": 8.12316920854925, "learning_rate": 4.3481900856898266e-07, "loss": 0.6306, "step": 22565 }, { "epoch": 1.630285187927827, "grad_norm": 7.596913373466561, "learning_rate": 4.346541788851305e-07, "loss": 0.6136, "step": 22566 }, { "epoch": 1.6303574331280366, "grad_norm": 6.109398414976709, "learning_rate": 4.3448937747425924e-07, "loss": 0.5938, "step": 22567 }, { "epoch": 1.630429678328246, "grad_norm": 7.366255408957797, "learning_rate": 4.3432460433862457e-07, "loss": 0.6169, "step": 22568 }, { "epoch": 1.6305019235284557, "grad_norm": 7.567754008047458, "learning_rate": 4.341598594804827e-07, "loss": 0.5855, "step": 22569 }, { "epoch": 1.630574168728665, "grad_norm": 5.918730613088629, "learning_rate": 4.33995142902088e-07, "loss": 0.5712, "step": 22570 }, { "epoch": 1.6306464139288745, "grad_norm": 6.951410043510423, "learning_rate": 4.3383045460569486e-07, "loss": 0.5393, "step": 22571 }, { "epoch": 1.630718659129084, "grad_norm": 7.029978726220534, "learning_rate": 4.3366579459355964e-07, "loss": 0.6562, "step": 22572 }, { "epoch": 1.6307909043292936, "grad_norm": 8.731311567959155, "learning_rate": 4.3350116286793475e-07, "loss": 0.6226, "step": 22573 }, { "epoch": 1.6308631495295032, "grad_norm": 6.942268451130264, "learning_rate": 4.333365594310743e-07, "loss": 0.6302, "step": 22574 }, { "epoch": 1.6309353947297125, "grad_norm": 8.013809682659229, "learning_rate": 4.3317198428523194e-07, "loss": 0.586, "step": 22575 }, { "epoch": 1.6310076399299223, "grad_norm": 8.978970011411127, "learning_rate": 4.330074374326607e-07, "loss": 0.5961, "step": 22576 }, { "epoch": 1.6310798851301316, "grad_norm": 8.20836002971613, "learning_rate": 4.3284291887561226e-07, "loss": 0.6406, "step": 22577 }, { "epoch": 1.6311521303303413, "grad_norm": 6.604270316512962, "learning_rate": 4.3267842861633907e-07, "loss": 0.603, "step": 22578 }, { "epoch": 1.6312243755305507, "grad_norm": 7.851518287344423, "learning_rate": 4.325139666570932e-07, "loss": 0.6189, "step": 22579 }, { "epoch": 1.6312966207307602, "grad_norm": 6.540558382980893, "learning_rate": 4.3234953300012566e-07, "loss": 0.5671, "step": 22580 }, { "epoch": 1.6313688659309697, "grad_norm": 6.488677507089994, "learning_rate": 4.3218512764768767e-07, "loss": 0.6308, "step": 22581 }, { "epoch": 1.631441111131179, "grad_norm": 7.94280564212079, "learning_rate": 4.3202075060202993e-07, "loss": 0.5574, "step": 22582 }, { "epoch": 1.6315133563313888, "grad_norm": 6.668912350395367, "learning_rate": 4.3185640186540284e-07, "loss": 0.5661, "step": 22583 }, { "epoch": 1.6315856015315982, "grad_norm": 7.879472945736148, "learning_rate": 4.316920814400549e-07, "loss": 0.6464, "step": 22584 }, { "epoch": 1.631657846731808, "grad_norm": 7.3180176874297835, "learning_rate": 4.3152778932823683e-07, "loss": 0.5654, "step": 22585 }, { "epoch": 1.6317300919320172, "grad_norm": 7.054185412209831, "learning_rate": 4.3136352553219715e-07, "loss": 0.6264, "step": 22586 }, { "epoch": 1.6318023371322268, "grad_norm": 6.885962883487223, "learning_rate": 4.311992900541853e-07, "loss": 0.5466, "step": 22587 }, { "epoch": 1.6318745823324363, "grad_norm": 6.841558793673772, "learning_rate": 4.3103508289644824e-07, "loss": 0.5499, "step": 22588 }, { "epoch": 1.6319468275326456, "grad_norm": 6.323500613083081, "learning_rate": 4.308709040612341e-07, "loss": 0.6053, "step": 22589 }, { "epoch": 1.6320190727328554, "grad_norm": 6.967530832431164, "learning_rate": 4.307067535507911e-07, "loss": 0.6346, "step": 22590 }, { "epoch": 1.6320913179330647, "grad_norm": 7.247794506435701, "learning_rate": 4.305426313673658e-07, "loss": 0.6143, "step": 22591 }, { "epoch": 1.6321635631332745, "grad_norm": 8.72479497892738, "learning_rate": 4.30378537513205e-07, "loss": 0.6659, "step": 22592 }, { "epoch": 1.6322358083334838, "grad_norm": 7.299114528933913, "learning_rate": 4.3021447199055517e-07, "loss": 0.6511, "step": 22593 }, { "epoch": 1.6323080535336933, "grad_norm": 6.817488777310878, "learning_rate": 4.3005043480166264e-07, "loss": 0.5536, "step": 22594 }, { "epoch": 1.632380298733903, "grad_norm": 5.809192988277187, "learning_rate": 4.29886425948772e-07, "loss": 0.5705, "step": 22595 }, { "epoch": 1.6324525439341124, "grad_norm": 7.519138839192733, "learning_rate": 4.2972244543412867e-07, "loss": 0.6259, "step": 22596 }, { "epoch": 1.632524789134322, "grad_norm": 6.338091050374852, "learning_rate": 4.295584932599783e-07, "loss": 0.5672, "step": 22597 }, { "epoch": 1.6325970343345313, "grad_norm": 7.764778075673066, "learning_rate": 4.2939456942856396e-07, "loss": 0.6666, "step": 22598 }, { "epoch": 1.632669279534741, "grad_norm": 6.741278771682644, "learning_rate": 4.2923067394213033e-07, "loss": 0.5447, "step": 22599 }, { "epoch": 1.6327415247349504, "grad_norm": 7.0075424975856375, "learning_rate": 4.2906680680292024e-07, "loss": 0.6107, "step": 22600 }, { "epoch": 1.63281376993516, "grad_norm": 6.919897895015605, "learning_rate": 4.289029680131787e-07, "loss": 0.6297, "step": 22601 }, { "epoch": 1.6328860151353695, "grad_norm": 7.697657627673554, "learning_rate": 4.287391575751468e-07, "loss": 0.6494, "step": 22602 }, { "epoch": 1.632958260335579, "grad_norm": 7.221036474588256, "learning_rate": 4.2857537549106766e-07, "loss": 0.5569, "step": 22603 }, { "epoch": 1.6330305055357885, "grad_norm": 7.611943306948833, "learning_rate": 4.2841162176318395e-07, "loss": 0.6668, "step": 22604 }, { "epoch": 1.6331027507359979, "grad_norm": 7.659911731978327, "learning_rate": 4.2824789639373615e-07, "loss": 0.6277, "step": 22605 }, { "epoch": 1.6331749959362076, "grad_norm": 7.12805565345008, "learning_rate": 4.2808419938496577e-07, "loss": 0.5559, "step": 22606 }, { "epoch": 1.633247241136417, "grad_norm": 7.404152875067024, "learning_rate": 4.2792053073911407e-07, "loss": 0.602, "step": 22607 }, { "epoch": 1.6333194863366265, "grad_norm": 8.280906814085895, "learning_rate": 4.2775689045842144e-07, "loss": 0.6385, "step": 22608 }, { "epoch": 1.633391731536836, "grad_norm": 7.021167358542321, "learning_rate": 4.2759327854512813e-07, "loss": 0.548, "step": 22609 }, { "epoch": 1.6334639767370456, "grad_norm": 7.032708182130125, "learning_rate": 4.274296950014734e-07, "loss": 0.6544, "step": 22610 }, { "epoch": 1.6335362219372551, "grad_norm": 6.603256531906956, "learning_rate": 4.2726613982969705e-07, "loss": 0.6386, "step": 22611 }, { "epoch": 1.6336084671374644, "grad_norm": 6.819458653507938, "learning_rate": 4.2710261303203855e-07, "loss": 0.5781, "step": 22612 }, { "epoch": 1.6336807123376742, "grad_norm": 7.064559771191846, "learning_rate": 4.2693911461073516e-07, "loss": 0.6617, "step": 22613 }, { "epoch": 1.6337529575378835, "grad_norm": 8.031996636621752, "learning_rate": 4.2677564456802574e-07, "loss": 0.5847, "step": 22614 }, { "epoch": 1.633825202738093, "grad_norm": 7.245545959331104, "learning_rate": 4.266122029061484e-07, "loss": 0.6727, "step": 22615 }, { "epoch": 1.6338974479383026, "grad_norm": 6.613359120739118, "learning_rate": 4.2644878962733977e-07, "loss": 0.6537, "step": 22616 }, { "epoch": 1.6339696931385121, "grad_norm": 7.686929821175916, "learning_rate": 4.262854047338369e-07, "loss": 0.5441, "step": 22617 }, { "epoch": 1.6340419383387217, "grad_norm": 7.591362220641973, "learning_rate": 4.261220482278769e-07, "loss": 0.598, "step": 22618 }, { "epoch": 1.634114183538931, "grad_norm": 6.614277349488531, "learning_rate": 4.2595872011169575e-07, "loss": 0.583, "step": 22619 }, { "epoch": 1.6341864287391408, "grad_norm": 7.0371727984236, "learning_rate": 4.257954203875292e-07, "loss": 0.5789, "step": 22620 }, { "epoch": 1.63425867393935, "grad_norm": 6.893106096065208, "learning_rate": 4.256321490576129e-07, "loss": 0.6051, "step": 22621 }, { "epoch": 1.6343309191395596, "grad_norm": 7.518331631177054, "learning_rate": 4.254689061241826e-07, "loss": 0.6515, "step": 22622 }, { "epoch": 1.6344031643397692, "grad_norm": 7.758870862237931, "learning_rate": 4.2530569158947154e-07, "loss": 0.5791, "step": 22623 }, { "epoch": 1.6344754095399787, "grad_norm": 6.829608118926405, "learning_rate": 4.2514250545571457e-07, "loss": 0.5594, "step": 22624 }, { "epoch": 1.6345476547401883, "grad_norm": 7.1621706262720375, "learning_rate": 4.249793477251457e-07, "loss": 0.6328, "step": 22625 }, { "epoch": 1.6346198999403976, "grad_norm": 8.285406717689789, "learning_rate": 4.2481621839999903e-07, "loss": 0.624, "step": 22626 }, { "epoch": 1.6346921451406073, "grad_norm": 7.105069040691867, "learning_rate": 4.2465311748250667e-07, "loss": 0.5616, "step": 22627 }, { "epoch": 1.6347643903408167, "grad_norm": 6.560570714827183, "learning_rate": 4.2449004497490095e-07, "loss": 0.6214, "step": 22628 }, { "epoch": 1.6348366355410262, "grad_norm": 7.0273991190842615, "learning_rate": 4.2432700087941593e-07, "loss": 0.5944, "step": 22629 }, { "epoch": 1.6349088807412357, "grad_norm": 8.56749807793557, "learning_rate": 4.241639851982823e-07, "loss": 0.6405, "step": 22630 }, { "epoch": 1.6349811259414453, "grad_norm": 6.174339092268772, "learning_rate": 4.24000997933732e-07, "loss": 0.6271, "step": 22631 }, { "epoch": 1.6350533711416548, "grad_norm": 6.293921599184382, "learning_rate": 4.238380390879959e-07, "loss": 0.6058, "step": 22632 }, { "epoch": 1.6351256163418642, "grad_norm": 6.703713984722562, "learning_rate": 4.2367510866330566e-07, "loss": 0.6585, "step": 22633 }, { "epoch": 1.635197861542074, "grad_norm": 7.553542704271546, "learning_rate": 4.2351220666189046e-07, "loss": 0.6033, "step": 22634 }, { "epoch": 1.6352701067422832, "grad_norm": 7.434515346182103, "learning_rate": 4.2334933308598084e-07, "loss": 0.5682, "step": 22635 }, { "epoch": 1.6353423519424928, "grad_norm": 7.076550036903907, "learning_rate": 4.2318648793780643e-07, "loss": 0.6453, "step": 22636 }, { "epoch": 1.6354145971427023, "grad_norm": 8.958309250436985, "learning_rate": 4.230236712195965e-07, "loss": 0.7079, "step": 22637 }, { "epoch": 1.6354868423429119, "grad_norm": 6.618080988600826, "learning_rate": 4.2286088293358006e-07, "loss": 0.5706, "step": 22638 }, { "epoch": 1.6355590875431214, "grad_norm": 6.983286932864692, "learning_rate": 4.2269812308198486e-07, "loss": 0.611, "step": 22639 }, { "epoch": 1.6356313327433307, "grad_norm": 6.230628855211686, "learning_rate": 4.225353916670405e-07, "loss": 0.6472, "step": 22640 }, { "epoch": 1.6357035779435405, "grad_norm": 7.241708947494687, "learning_rate": 4.2237268869097263e-07, "loss": 0.5852, "step": 22641 }, { "epoch": 1.6357758231437498, "grad_norm": 7.365398816862894, "learning_rate": 4.2221001415600983e-07, "loss": 0.6122, "step": 22642 }, { "epoch": 1.6358480683439593, "grad_norm": 7.2391030863549, "learning_rate": 4.220473680643783e-07, "loss": 0.5783, "step": 22643 }, { "epoch": 1.635920313544169, "grad_norm": 6.990192900303057, "learning_rate": 4.2188475041830573e-07, "loss": 0.6018, "step": 22644 }, { "epoch": 1.6359925587443784, "grad_norm": 6.794068487119188, "learning_rate": 4.217221612200165e-07, "loss": 0.5587, "step": 22645 }, { "epoch": 1.636064803944588, "grad_norm": 7.188138717798813, "learning_rate": 4.215596004717373e-07, "loss": 0.621, "step": 22646 }, { "epoch": 1.6361370491447973, "grad_norm": 6.60722788752333, "learning_rate": 4.213970681756935e-07, "loss": 0.5493, "step": 22647 }, { "epoch": 1.636209294345007, "grad_norm": 6.3258649392968085, "learning_rate": 4.212345643341098e-07, "loss": 0.6356, "step": 22648 }, { "epoch": 1.6362815395452164, "grad_norm": 6.368489849553597, "learning_rate": 4.210720889492109e-07, "loss": 0.5936, "step": 22649 }, { "epoch": 1.636353784745426, "grad_norm": 6.965615700963258, "learning_rate": 4.209096420232209e-07, "loss": 0.6623, "step": 22650 }, { "epoch": 1.6364260299456355, "grad_norm": 7.082098580243922, "learning_rate": 4.2074722355836427e-07, "loss": 0.5752, "step": 22651 }, { "epoch": 1.636498275145845, "grad_norm": 6.8122085368951, "learning_rate": 4.2058483355686295e-07, "loss": 0.6496, "step": 22652 }, { "epoch": 1.6365705203460545, "grad_norm": 7.867604249373239, "learning_rate": 4.2042247202094074e-07, "loss": 0.6532, "step": 22653 }, { "epoch": 1.6366427655462639, "grad_norm": 8.200212001386049, "learning_rate": 4.2026013895282075e-07, "loss": 0.6184, "step": 22654 }, { "epoch": 1.6367150107464736, "grad_norm": 6.390470395631405, "learning_rate": 4.2009783435472436e-07, "loss": 0.6231, "step": 22655 }, { "epoch": 1.636787255946683, "grad_norm": 9.23191095774004, "learning_rate": 4.1993555822887277e-07, "loss": 0.5747, "step": 22656 }, { "epoch": 1.6368595011468927, "grad_norm": 7.96124871930618, "learning_rate": 4.19773310577489e-07, "loss": 0.6101, "step": 22657 }, { "epoch": 1.636931746347102, "grad_norm": 6.721265313143647, "learning_rate": 4.19611091402794e-07, "loss": 0.6408, "step": 22658 }, { "epoch": 1.6370039915473116, "grad_norm": 7.169658509532414, "learning_rate": 4.194489007070071e-07, "loss": 0.5261, "step": 22659 }, { "epoch": 1.6370762367475211, "grad_norm": 7.316786854422293, "learning_rate": 4.192867384923496e-07, "loss": 0.5846, "step": 22660 }, { "epoch": 1.6371484819477304, "grad_norm": 6.764093168369346, "learning_rate": 4.191246047610409e-07, "loss": 0.6057, "step": 22661 }, { "epoch": 1.6372207271479402, "grad_norm": 6.495316799034842, "learning_rate": 4.1896249951530133e-07, "loss": 0.5886, "step": 22662 }, { "epoch": 1.6372929723481495, "grad_norm": 7.319007521088498, "learning_rate": 4.1880042275734864e-07, "loss": 0.641, "step": 22663 }, { "epoch": 1.6373652175483593, "grad_norm": 6.627162652338981, "learning_rate": 4.186383744894024e-07, "loss": 0.5534, "step": 22664 }, { "epoch": 1.6374374627485686, "grad_norm": 6.6668522173173175, "learning_rate": 4.184763547136805e-07, "loss": 0.5762, "step": 22665 }, { "epoch": 1.6375097079487781, "grad_norm": 7.537711383139302, "learning_rate": 4.1831436343240136e-07, "loss": 0.6112, "step": 22666 }, { "epoch": 1.6375819531489877, "grad_norm": 6.506619823839596, "learning_rate": 4.1815240064778215e-07, "loss": 0.6023, "step": 22667 }, { "epoch": 1.637654198349197, "grad_norm": 6.95397313120854, "learning_rate": 4.1799046636204014e-07, "loss": 0.6138, "step": 22668 }, { "epoch": 1.6377264435494068, "grad_norm": 7.049800020452293, "learning_rate": 4.178285605773924e-07, "loss": 0.6113, "step": 22669 }, { "epoch": 1.637798688749616, "grad_norm": 6.899526784398537, "learning_rate": 4.1766668329605463e-07, "loss": 0.5997, "step": 22670 }, { "epoch": 1.6378709339498259, "grad_norm": 7.834080058549921, "learning_rate": 4.175048345202432e-07, "loss": 0.629, "step": 22671 }, { "epoch": 1.6379431791500352, "grad_norm": 6.8143338876068515, "learning_rate": 4.1734301425217423e-07, "loss": 0.6229, "step": 22672 }, { "epoch": 1.6380154243502447, "grad_norm": 6.886835349356799, "learning_rate": 4.1718122249406163e-07, "loss": 0.5424, "step": 22673 }, { "epoch": 1.6380876695504543, "grad_norm": 7.0741216862474126, "learning_rate": 4.1701945924812084e-07, "loss": 0.589, "step": 22674 }, { "epoch": 1.6381599147506638, "grad_norm": 6.547236539558628, "learning_rate": 4.168577245165664e-07, "loss": 0.5452, "step": 22675 }, { "epoch": 1.6382321599508733, "grad_norm": 7.701470806793377, "learning_rate": 4.166960183016122e-07, "loss": 0.615, "step": 22676 }, { "epoch": 1.6383044051510827, "grad_norm": 5.661586878164452, "learning_rate": 4.1653434060547197e-07, "loss": 0.5292, "step": 22677 }, { "epoch": 1.6383766503512924, "grad_norm": 6.21026216110231, "learning_rate": 4.163726914303587e-07, "loss": 0.6089, "step": 22678 }, { "epoch": 1.6384488955515017, "grad_norm": 6.432479530298938, "learning_rate": 4.162110707784864e-07, "loss": 0.5713, "step": 22679 }, { "epoch": 1.6385211407517113, "grad_norm": 7.358047657220476, "learning_rate": 4.160494786520658e-07, "loss": 0.65, "step": 22680 }, { "epoch": 1.6385933859519208, "grad_norm": 6.681389985433508, "learning_rate": 4.158879150533096e-07, "loss": 0.6199, "step": 22681 }, { "epoch": 1.6386656311521304, "grad_norm": 6.595242733598809, "learning_rate": 4.157263799844299e-07, "loss": 0.5397, "step": 22682 }, { "epoch": 1.63873787635234, "grad_norm": 5.884328699968041, "learning_rate": 4.155648734476384e-07, "loss": 0.5863, "step": 22683 }, { "epoch": 1.6388101215525492, "grad_norm": 9.114468284244133, "learning_rate": 4.1540339544514446e-07, "loss": 0.5954, "step": 22684 }, { "epoch": 1.638882366752759, "grad_norm": 7.584713680971382, "learning_rate": 4.15241945979159e-07, "loss": 0.6269, "step": 22685 }, { "epoch": 1.6389546119529683, "grad_norm": 8.328467614317436, "learning_rate": 4.15080525051893e-07, "loss": 0.5806, "step": 22686 }, { "epoch": 1.6390268571531779, "grad_norm": 6.856923701553473, "learning_rate": 4.149191326655566e-07, "loss": 0.5268, "step": 22687 }, { "epoch": 1.6390991023533874, "grad_norm": 6.928769503474292, "learning_rate": 4.1475776882235755e-07, "loss": 0.578, "step": 22688 }, { "epoch": 1.639171347553597, "grad_norm": 7.663353439023708, "learning_rate": 4.145964335245056e-07, "loss": 0.6023, "step": 22689 }, { "epoch": 1.6392435927538065, "grad_norm": 6.945767811460635, "learning_rate": 4.1443512677421e-07, "loss": 0.6274, "step": 22690 }, { "epoch": 1.6393158379540158, "grad_norm": 7.502709217300457, "learning_rate": 4.1427384857367747e-07, "loss": 0.5946, "step": 22691 }, { "epoch": 1.6393880831542256, "grad_norm": 6.431284153498204, "learning_rate": 4.1411259892511657e-07, "loss": 0.6118, "step": 22692 }, { "epoch": 1.639460328354435, "grad_norm": 6.616733215663574, "learning_rate": 4.1395137783073444e-07, "loss": 0.5625, "step": 22693 }, { "epoch": 1.6395325735546444, "grad_norm": 7.890586082438062, "learning_rate": 4.137901852927384e-07, "loss": 0.6262, "step": 22694 }, { "epoch": 1.639604818754854, "grad_norm": 7.709605866289495, "learning_rate": 4.136290213133348e-07, "loss": 0.5669, "step": 22695 }, { "epoch": 1.6396770639550635, "grad_norm": 6.723991301377188, "learning_rate": 4.1346788589472997e-07, "loss": 0.589, "step": 22696 }, { "epoch": 1.639749309155273, "grad_norm": 6.564164886002999, "learning_rate": 4.133067790391301e-07, "loss": 0.6309, "step": 22697 }, { "epoch": 1.6398215543554824, "grad_norm": 7.838527688724732, "learning_rate": 4.1314570074873994e-07, "loss": 0.626, "step": 22698 }, { "epoch": 1.6398937995556921, "grad_norm": 6.34962169091008, "learning_rate": 4.129846510257646e-07, "loss": 0.5612, "step": 22699 }, { "epoch": 1.6399660447559015, "grad_norm": 7.97600557211908, "learning_rate": 4.1282362987240894e-07, "loss": 0.6699, "step": 22700 }, { "epoch": 1.640038289956111, "grad_norm": 6.858776301018434, "learning_rate": 4.126626372908779e-07, "loss": 0.6297, "step": 22701 }, { "epoch": 1.6401105351563205, "grad_norm": 7.028568553370155, "learning_rate": 4.125016732833739e-07, "loss": 0.6967, "step": 22702 }, { "epoch": 1.64018278035653, "grad_norm": 6.603976594935439, "learning_rate": 4.123407378521013e-07, "loss": 0.6679, "step": 22703 }, { "epoch": 1.6402550255567396, "grad_norm": 7.614299332262231, "learning_rate": 4.121798309992631e-07, "loss": 0.5765, "step": 22704 }, { "epoch": 1.640327270756949, "grad_norm": 6.8917686568635315, "learning_rate": 4.1201895272706155e-07, "loss": 0.6244, "step": 22705 }, { "epoch": 1.6403995159571587, "grad_norm": 7.081827639447584, "learning_rate": 4.1185810303769973e-07, "loss": 0.5944, "step": 22706 }, { "epoch": 1.640471761157368, "grad_norm": 7.365610384693948, "learning_rate": 4.1169728193337895e-07, "loss": 0.6669, "step": 22707 }, { "epoch": 1.6405440063575776, "grad_norm": 7.29379697652168, "learning_rate": 4.115364894163015e-07, "loss": 0.5726, "step": 22708 }, { "epoch": 1.6406162515577871, "grad_norm": 6.884873669364982, "learning_rate": 4.1137572548866747e-07, "loss": 0.5948, "step": 22709 }, { "epoch": 1.6406884967579967, "grad_norm": 5.853454365503604, "learning_rate": 4.1121499015267794e-07, "loss": 0.5513, "step": 22710 }, { "epoch": 1.6407607419582062, "grad_norm": 7.2016001512698224, "learning_rate": 4.1105428341053383e-07, "loss": 0.5296, "step": 22711 }, { "epoch": 1.6408329871584155, "grad_norm": 7.810407434598632, "learning_rate": 4.108936052644341e-07, "loss": 0.5724, "step": 22712 }, { "epoch": 1.6409052323586253, "grad_norm": 8.272326228189138, "learning_rate": 4.1073295571657844e-07, "loss": 0.6527, "step": 22713 }, { "epoch": 1.6409774775588346, "grad_norm": 7.450936369967646, "learning_rate": 4.1057233476916674e-07, "loss": 0.5927, "step": 22714 }, { "epoch": 1.6410497227590441, "grad_norm": 7.454860141396565, "learning_rate": 4.104117424243981e-07, "loss": 0.5959, "step": 22715 }, { "epoch": 1.6411219679592537, "grad_norm": 7.064711233016407, "learning_rate": 4.1025117868446985e-07, "loss": 0.63, "step": 22716 }, { "epoch": 1.6411942131594632, "grad_norm": 6.371386831759009, "learning_rate": 4.100906435515803e-07, "loss": 0.5838, "step": 22717 }, { "epoch": 1.6412664583596728, "grad_norm": 7.20939234486359, "learning_rate": 4.099301370279268e-07, "loss": 0.5834, "step": 22718 }, { "epoch": 1.641338703559882, "grad_norm": 6.626335153455337, "learning_rate": 4.0976965911570796e-07, "loss": 0.6133, "step": 22719 }, { "epoch": 1.6414109487600919, "grad_norm": 8.658887550844165, "learning_rate": 4.0960920981711867e-07, "loss": 0.6507, "step": 22720 }, { "epoch": 1.6414831939603012, "grad_norm": 9.33656795400044, "learning_rate": 4.0944878913435626e-07, "loss": 0.6136, "step": 22721 }, { "epoch": 1.6415554391605107, "grad_norm": 7.61132662143942, "learning_rate": 4.092883970696165e-07, "loss": 0.5669, "step": 22722 }, { "epoch": 1.6416276843607203, "grad_norm": 6.6058279489118545, "learning_rate": 4.091280336250955e-07, "loss": 0.5856, "step": 22723 }, { "epoch": 1.6416999295609298, "grad_norm": 7.568722521529651, "learning_rate": 4.0896769880298835e-07, "loss": 0.579, "step": 22724 }, { "epoch": 1.6417721747611393, "grad_norm": 6.95975478343223, "learning_rate": 4.0880739260548973e-07, "loss": 0.629, "step": 22725 }, { "epoch": 1.6418444199613487, "grad_norm": 7.61281399885753, "learning_rate": 4.086471150347948e-07, "loss": 0.562, "step": 22726 }, { "epoch": 1.6419166651615584, "grad_norm": 7.239166320938952, "learning_rate": 4.0848686609309627e-07, "loss": 0.6203, "step": 22727 }, { "epoch": 1.6419889103617678, "grad_norm": 6.532828838306973, "learning_rate": 4.083266457825888e-07, "loss": 0.6239, "step": 22728 }, { "epoch": 1.6420611555619775, "grad_norm": 6.972035270842445, "learning_rate": 4.0816645410546586e-07, "loss": 0.5657, "step": 22729 }, { "epoch": 1.6421334007621868, "grad_norm": 8.48141602119654, "learning_rate": 4.080062910639196e-07, "loss": 0.5895, "step": 22730 }, { "epoch": 1.6422056459623964, "grad_norm": 7.332541365196275, "learning_rate": 4.078461566601427e-07, "loss": 0.5744, "step": 22731 }, { "epoch": 1.642277891162606, "grad_norm": 7.048516668011667, "learning_rate": 4.076860508963276e-07, "loss": 0.5751, "step": 22732 }, { "epoch": 1.6423501363628152, "grad_norm": 7.019606244382364, "learning_rate": 4.075259737746659e-07, "loss": 0.6718, "step": 22733 }, { "epoch": 1.642422381563025, "grad_norm": 8.110726833675747, "learning_rate": 4.0736592529734887e-07, "loss": 0.6223, "step": 22734 }, { "epoch": 1.6424946267632343, "grad_norm": 6.5981841161315815, "learning_rate": 4.0720590546656727e-07, "loss": 0.5954, "step": 22735 }, { "epoch": 1.642566871963444, "grad_norm": 7.95327512827975, "learning_rate": 4.0704591428451263e-07, "loss": 0.609, "step": 22736 }, { "epoch": 1.6426391171636534, "grad_norm": 7.598700271697246, "learning_rate": 4.068859517533738e-07, "loss": 0.6219, "step": 22737 }, { "epoch": 1.642711362363863, "grad_norm": 8.30157073451502, "learning_rate": 4.0672601787534126e-07, "loss": 0.6486, "step": 22738 }, { "epoch": 1.6427836075640725, "grad_norm": 8.538664449923003, "learning_rate": 4.065661126526041e-07, "loss": 0.6068, "step": 22739 }, { "epoch": 1.6428558527642818, "grad_norm": 7.18216832649564, "learning_rate": 4.064062360873519e-07, "loss": 0.6706, "step": 22740 }, { "epoch": 1.6429280979644916, "grad_norm": 6.874734491943438, "learning_rate": 4.062463881817716e-07, "loss": 0.6353, "step": 22741 }, { "epoch": 1.643000343164701, "grad_norm": 6.99789887638297, "learning_rate": 4.0608656893805304e-07, "loss": 0.5725, "step": 22742 }, { "epoch": 1.6430725883649107, "grad_norm": 6.7017001258200155, "learning_rate": 4.0592677835838366e-07, "loss": 0.6438, "step": 22743 }, { "epoch": 1.64314483356512, "grad_norm": 6.071745439483178, "learning_rate": 4.057670164449515e-07, "loss": 0.5501, "step": 22744 }, { "epoch": 1.6432170787653295, "grad_norm": 8.268035594846955, "learning_rate": 4.056072831999419e-07, "loss": 0.6068, "step": 22745 }, { "epoch": 1.643289323965539, "grad_norm": 6.352239331058219, "learning_rate": 4.054475786255427e-07, "loss": 0.6013, "step": 22746 }, { "epoch": 1.6433615691657486, "grad_norm": 6.1507056971139304, "learning_rate": 4.0528790272394007e-07, "loss": 0.5535, "step": 22747 }, { "epoch": 1.6434338143659581, "grad_norm": 7.405387989211095, "learning_rate": 4.051282554973193e-07, "loss": 0.5823, "step": 22748 }, { "epoch": 1.6435060595661675, "grad_norm": 6.694686878223726, "learning_rate": 4.0496863694786617e-07, "loss": 0.6144, "step": 22749 }, { "epoch": 1.6435783047663772, "grad_norm": 8.254281522467503, "learning_rate": 4.048090470777655e-07, "loss": 0.6098, "step": 22750 }, { "epoch": 1.6436505499665865, "grad_norm": 7.098688633125036, "learning_rate": 4.046494858892022e-07, "loss": 0.6188, "step": 22751 }, { "epoch": 1.643722795166796, "grad_norm": 6.584381535132613, "learning_rate": 4.0448995338436077e-07, "loss": 0.5861, "step": 22752 }, { "epoch": 1.6437950403670056, "grad_norm": 7.481338171570801, "learning_rate": 4.043304495654246e-07, "loss": 0.6675, "step": 22753 }, { "epoch": 1.6438672855672152, "grad_norm": 7.793534597251409, "learning_rate": 4.0417097443457813e-07, "loss": 0.5857, "step": 22754 }, { "epoch": 1.6439395307674247, "grad_norm": 7.151633560197677, "learning_rate": 4.0401152799400297e-07, "loss": 0.6319, "step": 22755 }, { "epoch": 1.644011775967634, "grad_norm": 9.216344694155291, "learning_rate": 4.0385211024588253e-07, "loss": 0.6487, "step": 22756 }, { "epoch": 1.6440840211678438, "grad_norm": 7.85359309873811, "learning_rate": 4.036927211923991e-07, "loss": 0.6556, "step": 22757 }, { "epoch": 1.6441562663680531, "grad_norm": 7.021196969083128, "learning_rate": 4.0353336083573557e-07, "loss": 0.5932, "step": 22758 }, { "epoch": 1.6442285115682627, "grad_norm": 6.761731725688012, "learning_rate": 4.033740291780716e-07, "loss": 0.6258, "step": 22759 }, { "epoch": 1.6443007567684722, "grad_norm": 7.46672049684648, "learning_rate": 4.0321472622158934e-07, "loss": 0.5706, "step": 22760 }, { "epoch": 1.6443730019686817, "grad_norm": 7.153947884693742, "learning_rate": 4.0305545196846955e-07, "loss": 0.6065, "step": 22761 }, { "epoch": 1.6444452471688913, "grad_norm": 6.953189189486013, "learning_rate": 4.0289620642089246e-07, "loss": 0.5926, "step": 22762 }, { "epoch": 1.6445174923691006, "grad_norm": 8.501733042249661, "learning_rate": 4.027369895810379e-07, "loss": 0.6811, "step": 22763 }, { "epoch": 1.6445897375693104, "grad_norm": 7.743609839549508, "learning_rate": 4.0257780145108534e-07, "loss": 0.5932, "step": 22764 }, { "epoch": 1.6446619827695197, "grad_norm": 7.506729413047529, "learning_rate": 4.024186420332149e-07, "loss": 0.6378, "step": 22765 }, { "epoch": 1.6447342279697292, "grad_norm": 6.15442394148418, "learning_rate": 4.0225951132960405e-07, "loss": 0.6004, "step": 22766 }, { "epoch": 1.6448064731699388, "grad_norm": 6.400748519040871, "learning_rate": 4.021004093424319e-07, "loss": 0.5709, "step": 22767 }, { "epoch": 1.6448787183701483, "grad_norm": 8.277820265601893, "learning_rate": 4.019413360738758e-07, "loss": 0.615, "step": 22768 }, { "epoch": 1.6449509635703579, "grad_norm": 6.4105885770762905, "learning_rate": 4.0178229152611407e-07, "loss": 0.5693, "step": 22769 }, { "epoch": 1.6450232087705672, "grad_norm": 7.425037874581169, "learning_rate": 4.016232757013236e-07, "loss": 0.6352, "step": 22770 }, { "epoch": 1.645095453970777, "grad_norm": 7.519933785897793, "learning_rate": 4.014642886016809e-07, "loss": 0.6026, "step": 22771 }, { "epoch": 1.6451676991709863, "grad_norm": 6.660072053170143, "learning_rate": 4.013053302293635e-07, "loss": 0.5727, "step": 22772 }, { "epoch": 1.6452399443711958, "grad_norm": 7.651020365963017, "learning_rate": 4.011464005865462e-07, "loss": 0.6536, "step": 22773 }, { "epoch": 1.6453121895714053, "grad_norm": 6.601443751243366, "learning_rate": 4.0098749967540456e-07, "loss": 0.6067, "step": 22774 }, { "epoch": 1.645384434771615, "grad_norm": 8.768578560445423, "learning_rate": 4.0082862749811463e-07, "loss": 0.6753, "step": 22775 }, { "epoch": 1.6454566799718244, "grad_norm": 6.588745242706764, "learning_rate": 4.0066978405685133e-07, "loss": 0.578, "step": 22776 }, { "epoch": 1.6455289251720338, "grad_norm": 9.64467482616399, "learning_rate": 4.0051096935378814e-07, "loss": 0.7025, "step": 22777 }, { "epoch": 1.6456011703722435, "grad_norm": 6.764010829291989, "learning_rate": 4.0035218339109977e-07, "loss": 0.5671, "step": 22778 }, { "epoch": 1.6456734155724528, "grad_norm": 6.6595098545377605, "learning_rate": 4.001934261709595e-07, "loss": 0.6057, "step": 22779 }, { "epoch": 1.6457456607726624, "grad_norm": 8.420890679809862, "learning_rate": 4.0003469769554074e-07, "loss": 0.6501, "step": 22780 }, { "epoch": 1.645817905972872, "grad_norm": 7.289757207901131, "learning_rate": 3.9987599796701664e-07, "loss": 0.5427, "step": 22781 }, { "epoch": 1.6458901511730815, "grad_norm": 8.000733818730854, "learning_rate": 3.9971732698755955e-07, "loss": 0.5524, "step": 22782 }, { "epoch": 1.645962396373291, "grad_norm": 7.367147111832558, "learning_rate": 3.995586847593419e-07, "loss": 0.6543, "step": 22783 }, { "epoch": 1.6460346415735003, "grad_norm": 6.486578362475787, "learning_rate": 3.994000712845347e-07, "loss": 0.623, "step": 22784 }, { "epoch": 1.64610688677371, "grad_norm": 8.764629232656537, "learning_rate": 3.992414865653091e-07, "loss": 0.5823, "step": 22785 }, { "epoch": 1.6461791319739194, "grad_norm": 8.213935863555635, "learning_rate": 3.990829306038374e-07, "loss": 0.6511, "step": 22786 }, { "epoch": 1.646251377174129, "grad_norm": 5.920256622525961, "learning_rate": 3.989244034022882e-07, "loss": 0.5187, "step": 22787 }, { "epoch": 1.6463236223743385, "grad_norm": 6.355812068170238, "learning_rate": 3.9876590496283255e-07, "loss": 0.6048, "step": 22788 }, { "epoch": 1.646395867574548, "grad_norm": 7.3322586081225145, "learning_rate": 3.986074352876404e-07, "loss": 0.6627, "step": 22789 }, { "epoch": 1.6464681127747576, "grad_norm": 6.964742423372582, "learning_rate": 3.984489943788808e-07, "loss": 0.5988, "step": 22790 }, { "epoch": 1.646540357974967, "grad_norm": 6.137765776867789, "learning_rate": 3.9829058223872257e-07, "loss": 0.5985, "step": 22791 }, { "epoch": 1.6466126031751767, "grad_norm": 6.5234421781420355, "learning_rate": 3.981321988693343e-07, "loss": 0.5981, "step": 22792 }, { "epoch": 1.646684848375386, "grad_norm": 8.556193681619346, "learning_rate": 3.979738442728845e-07, "loss": 0.6401, "step": 22793 }, { "epoch": 1.6467570935755955, "grad_norm": 7.315944805756732, "learning_rate": 3.9781551845154113e-07, "loss": 0.6082, "step": 22794 }, { "epoch": 1.646829338775805, "grad_norm": 6.101000106623945, "learning_rate": 3.976572214074706e-07, "loss": 0.6195, "step": 22795 }, { "epoch": 1.6469015839760146, "grad_norm": 8.196322048906724, "learning_rate": 3.9749895314284e-07, "loss": 0.5922, "step": 22796 }, { "epoch": 1.6469738291762241, "grad_norm": 7.214697026205026, "learning_rate": 3.9734071365981704e-07, "loss": 0.5781, "step": 22797 }, { "epoch": 1.6470460743764335, "grad_norm": 6.03062032466347, "learning_rate": 3.971825029605661e-07, "loss": 0.6087, "step": 22798 }, { "epoch": 1.6471183195766432, "grad_norm": 6.115821656665015, "learning_rate": 3.970243210472544e-07, "loss": 0.5669, "step": 22799 }, { "epoch": 1.6471905647768526, "grad_norm": 7.957715581382004, "learning_rate": 3.9686616792204677e-07, "loss": 0.6934, "step": 22800 }, { "epoch": 1.6472628099770623, "grad_norm": 8.610876045983758, "learning_rate": 3.9670804358710903e-07, "loss": 0.671, "step": 22801 }, { "epoch": 1.6473350551772716, "grad_norm": 8.10576524307121, "learning_rate": 3.9654994804460445e-07, "loss": 0.5437, "step": 22802 }, { "epoch": 1.6474073003774812, "grad_norm": 6.130355984431052, "learning_rate": 3.9639188129669797e-07, "loss": 0.6444, "step": 22803 }, { "epoch": 1.6474795455776907, "grad_norm": 6.756519701124217, "learning_rate": 3.962338433455537e-07, "loss": 0.6363, "step": 22804 }, { "epoch": 1.6475517907779, "grad_norm": 7.454994463032662, "learning_rate": 3.9607583419333433e-07, "loss": 0.6254, "step": 22805 }, { "epoch": 1.6476240359781098, "grad_norm": 6.467215093317403, "learning_rate": 3.9591785384220286e-07, "loss": 0.5869, "step": 22806 }, { "epoch": 1.6476962811783191, "grad_norm": 6.30773594932417, "learning_rate": 3.957599022943226e-07, "loss": 0.5976, "step": 22807 }, { "epoch": 1.6477685263785289, "grad_norm": 7.53396772678593, "learning_rate": 3.956019795518551e-07, "loss": 0.6244, "step": 22808 }, { "epoch": 1.6478407715787382, "grad_norm": 7.854992594578857, "learning_rate": 3.954440856169628e-07, "loss": 0.635, "step": 22809 }, { "epoch": 1.6479130167789477, "grad_norm": 7.848774915213057, "learning_rate": 3.9528622049180675e-07, "loss": 0.5685, "step": 22810 }, { "epoch": 1.6479852619791573, "grad_norm": 8.372384645129253, "learning_rate": 3.951283841785486e-07, "loss": 0.6159, "step": 22811 }, { "epoch": 1.6480575071793666, "grad_norm": 6.564733216093883, "learning_rate": 3.9497057667934824e-07, "loss": 0.6005, "step": 22812 }, { "epoch": 1.6481297523795764, "grad_norm": 6.701881417823969, "learning_rate": 3.948127979963662e-07, "loss": 0.5883, "step": 22813 }, { "epoch": 1.6482019975797857, "grad_norm": 6.070863283912996, "learning_rate": 3.9465504813176206e-07, "loss": 0.5446, "step": 22814 }, { "epoch": 1.6482742427799955, "grad_norm": 7.138822833133702, "learning_rate": 3.9449732708769643e-07, "loss": 0.5625, "step": 22815 }, { "epoch": 1.6483464879802048, "grad_norm": 7.062191846758337, "learning_rate": 3.9433963486632726e-07, "loss": 0.6005, "step": 22816 }, { "epoch": 1.6484187331804143, "grad_norm": 7.614810576659685, "learning_rate": 3.941819714698131e-07, "loss": 0.6112, "step": 22817 }, { "epoch": 1.6484909783806239, "grad_norm": 7.8956345773876, "learning_rate": 3.94024336900313e-07, "loss": 0.5852, "step": 22818 }, { "epoch": 1.6485632235808334, "grad_norm": 6.877042709672819, "learning_rate": 3.938667311599842e-07, "loss": 0.6935, "step": 22819 }, { "epoch": 1.648635468781043, "grad_norm": 7.151541814426196, "learning_rate": 3.937091542509849e-07, "loss": 0.5838, "step": 22820 }, { "epoch": 1.6487077139812523, "grad_norm": 6.797120819908212, "learning_rate": 3.9355160617547153e-07, "loss": 0.6272, "step": 22821 }, { "epoch": 1.648779959181462, "grad_norm": 7.926038742582942, "learning_rate": 3.9339408693560205e-07, "loss": 0.5536, "step": 22822 }, { "epoch": 1.6488522043816713, "grad_norm": 6.137868325549768, "learning_rate": 3.932365965335311e-07, "loss": 0.5905, "step": 22823 }, { "epoch": 1.648924449581881, "grad_norm": 7.336990167169012, "learning_rate": 3.9307913497141524e-07, "loss": 0.6059, "step": 22824 }, { "epoch": 1.6489966947820904, "grad_norm": 7.612476758498619, "learning_rate": 3.9292170225141037e-07, "loss": 0.578, "step": 22825 }, { "epoch": 1.6490689399823, "grad_norm": 8.336213440694078, "learning_rate": 3.9276429837567103e-07, "loss": 0.6519, "step": 22826 }, { "epoch": 1.6491411851825095, "grad_norm": 6.337068092330706, "learning_rate": 3.9260692334635254e-07, "loss": 0.5926, "step": 22827 }, { "epoch": 1.6492134303827188, "grad_norm": 8.7894626645014, "learning_rate": 3.924495771656089e-07, "loss": 0.6266, "step": 22828 }, { "epoch": 1.6492856755829286, "grad_norm": 7.522708085016529, "learning_rate": 3.922922598355947e-07, "loss": 0.6252, "step": 22829 }, { "epoch": 1.649357920783138, "grad_norm": 6.986120360081615, "learning_rate": 3.921349713584624e-07, "loss": 0.5318, "step": 22830 }, { "epoch": 1.6494301659833475, "grad_norm": 6.432572339893193, "learning_rate": 3.9197771173636545e-07, "loss": 0.6084, "step": 22831 }, { "epoch": 1.649502411183557, "grad_norm": 6.72234131398423, "learning_rate": 3.918204809714571e-07, "loss": 0.5457, "step": 22832 }, { "epoch": 1.6495746563837665, "grad_norm": 8.357952931027167, "learning_rate": 3.9166327906589004e-07, "loss": 0.6066, "step": 22833 }, { "epoch": 1.649646901583976, "grad_norm": 6.773452705973416, "learning_rate": 3.9150610602181507e-07, "loss": 0.5461, "step": 22834 }, { "epoch": 1.6497191467841854, "grad_norm": 8.337276390542938, "learning_rate": 3.913489618413843e-07, "loss": 0.6778, "step": 22835 }, { "epoch": 1.6497913919843952, "grad_norm": 6.060052751368195, "learning_rate": 3.9119184652674917e-07, "loss": 0.5542, "step": 22836 }, { "epoch": 1.6498636371846045, "grad_norm": 7.664785928442974, "learning_rate": 3.9103476008006016e-07, "loss": 0.6173, "step": 22837 }, { "epoch": 1.649935882384814, "grad_norm": 7.170856611297908, "learning_rate": 3.908777025034677e-07, "loss": 0.5845, "step": 22838 }, { "epoch": 1.6500081275850236, "grad_norm": 8.96862410081495, "learning_rate": 3.9072067379912206e-07, "loss": 0.6769, "step": 22839 }, { "epoch": 1.6500803727852331, "grad_norm": 7.9026838258664585, "learning_rate": 3.905636739691729e-07, "loss": 0.5403, "step": 22840 }, { "epoch": 1.6501526179854427, "grad_norm": 7.466563140060607, "learning_rate": 3.9040670301576875e-07, "loss": 0.5981, "step": 22841 }, { "epoch": 1.650224863185652, "grad_norm": 7.630838628955034, "learning_rate": 3.902497609410591e-07, "loss": 0.544, "step": 22842 }, { "epoch": 1.6502971083858617, "grad_norm": 6.308918758559156, "learning_rate": 3.900928477471924e-07, "loss": 0.6765, "step": 22843 }, { "epoch": 1.650369353586071, "grad_norm": 9.272050710465372, "learning_rate": 3.899359634363159e-07, "loss": 0.6314, "step": 22844 }, { "epoch": 1.6504415987862806, "grad_norm": 7.618119684546586, "learning_rate": 3.8977910801057757e-07, "loss": 0.5798, "step": 22845 }, { "epoch": 1.6505138439864901, "grad_norm": 8.187963108622563, "learning_rate": 3.896222814721243e-07, "loss": 0.6663, "step": 22846 }, { "epoch": 1.6505860891866997, "grad_norm": 7.940144496342792, "learning_rate": 3.894654838231046e-07, "loss": 0.6136, "step": 22847 }, { "epoch": 1.6506583343869092, "grad_norm": 8.380395773780336, "learning_rate": 3.89308715065663e-07, "loss": 0.6416, "step": 22848 }, { "epoch": 1.6507305795871186, "grad_norm": 6.432718816208913, "learning_rate": 3.8915197520194624e-07, "loss": 0.5975, "step": 22849 }, { "epoch": 1.6508028247873283, "grad_norm": 7.379294486709555, "learning_rate": 3.889952642340999e-07, "loss": 0.683, "step": 22850 }, { "epoch": 1.6508750699875376, "grad_norm": 7.697601380729036, "learning_rate": 3.888385821642701e-07, "loss": 0.5873, "step": 22851 }, { "epoch": 1.6509473151877472, "grad_norm": 7.748386338117541, "learning_rate": 3.8868192899460034e-07, "loss": 0.4857, "step": 22852 }, { "epoch": 1.6510195603879567, "grad_norm": 6.404090182315707, "learning_rate": 3.8852530472723555e-07, "loss": 0.6136, "step": 22853 }, { "epoch": 1.6510918055881663, "grad_norm": 8.921483767828583, "learning_rate": 3.883687093643199e-07, "loss": 0.605, "step": 22854 }, { "epoch": 1.6511640507883758, "grad_norm": 6.337456348276381, "learning_rate": 3.8821214290799687e-07, "loss": 0.5896, "step": 22855 }, { "epoch": 1.6512362959885851, "grad_norm": 7.924305920938015, "learning_rate": 3.8805560536041007e-07, "loss": 0.5742, "step": 22856 }, { "epoch": 1.6513085411887949, "grad_norm": 7.154081990698644, "learning_rate": 3.878990967237023e-07, "loss": 0.6962, "step": 22857 }, { "epoch": 1.6513807863890042, "grad_norm": 6.027868083662184, "learning_rate": 3.877426170000165e-07, "loss": 0.5747, "step": 22858 }, { "epoch": 1.6514530315892137, "grad_norm": 7.105544447954391, "learning_rate": 3.875861661914934e-07, "loss": 0.5677, "step": 22859 }, { "epoch": 1.6515252767894233, "grad_norm": 8.220773426062554, "learning_rate": 3.8742974430027587e-07, "loss": 0.5454, "step": 22860 }, { "epoch": 1.6515975219896328, "grad_norm": 7.3320010737924575, "learning_rate": 3.8727335132850514e-07, "loss": 0.6166, "step": 22861 }, { "epoch": 1.6516697671898424, "grad_norm": 6.035467224997865, "learning_rate": 3.8711698727832117e-07, "loss": 0.5882, "step": 22862 }, { "epoch": 1.6517420123900517, "grad_norm": 6.421658477474363, "learning_rate": 3.869606521518654e-07, "loss": 0.6232, "step": 22863 }, { "epoch": 1.6518142575902615, "grad_norm": 7.145545916620591, "learning_rate": 3.8680434595127743e-07, "loss": 0.6045, "step": 22864 }, { "epoch": 1.6518865027904708, "grad_norm": 6.791183579315095, "learning_rate": 3.866480686786972e-07, "loss": 0.5726, "step": 22865 }, { "epoch": 1.6519587479906803, "grad_norm": 7.052369270181337, "learning_rate": 3.864918203362639e-07, "loss": 0.567, "step": 22866 }, { "epoch": 1.6520309931908899, "grad_norm": 6.90422145689007, "learning_rate": 3.8633560092611653e-07, "loss": 0.563, "step": 22867 }, { "epoch": 1.6521032383910994, "grad_norm": 8.193470416038377, "learning_rate": 3.861794104503944e-07, "loss": 0.6572, "step": 22868 }, { "epoch": 1.652175483591309, "grad_norm": 7.689875290036787, "learning_rate": 3.860232489112342e-07, "loss": 0.6462, "step": 22869 }, { "epoch": 1.6522477287915183, "grad_norm": 7.185224886184082, "learning_rate": 3.858671163107744e-07, "loss": 0.6058, "step": 22870 }, { "epoch": 1.652319973991728, "grad_norm": 8.223435607364879, "learning_rate": 3.857110126511521e-07, "loss": 0.5874, "step": 22871 }, { "epoch": 1.6523922191919374, "grad_norm": 6.953998365093189, "learning_rate": 3.8555493793450527e-07, "loss": 0.5026, "step": 22872 }, { "epoch": 1.652464464392147, "grad_norm": 6.581963440859909, "learning_rate": 3.8539889216296866e-07, "loss": 0.626, "step": 22873 }, { "epoch": 1.6525367095923564, "grad_norm": 6.854328219317686, "learning_rate": 3.8524287533867943e-07, "loss": 0.6392, "step": 22874 }, { "epoch": 1.652608954792566, "grad_norm": 7.613532025516439, "learning_rate": 3.850868874637728e-07, "loss": 0.7058, "step": 22875 }, { "epoch": 1.6526811999927755, "grad_norm": 8.44167919503199, "learning_rate": 3.8493092854038545e-07, "loss": 0.6231, "step": 22876 }, { "epoch": 1.6527534451929848, "grad_norm": 7.089335004560355, "learning_rate": 3.847749985706512e-07, "loss": 0.5448, "step": 22877 }, { "epoch": 1.6528256903931946, "grad_norm": 6.644877142764314, "learning_rate": 3.8461909755670464e-07, "loss": 0.6087, "step": 22878 }, { "epoch": 1.652897935593404, "grad_norm": 6.50420478387677, "learning_rate": 3.8446322550068085e-07, "loss": 0.6325, "step": 22879 }, { "epoch": 1.6529701807936137, "grad_norm": 6.750261089785597, "learning_rate": 3.843073824047122e-07, "loss": 0.623, "step": 22880 }, { "epoch": 1.653042425993823, "grad_norm": 7.0316994756509, "learning_rate": 3.841515682709326e-07, "loss": 0.6302, "step": 22881 }, { "epoch": 1.6531146711940325, "grad_norm": 6.6389172321532115, "learning_rate": 3.839957831014754e-07, "loss": 0.6582, "step": 22882 }, { "epoch": 1.653186916394242, "grad_norm": 7.362784403827542, "learning_rate": 3.8384002689847296e-07, "loss": 0.5997, "step": 22883 }, { "epoch": 1.6532591615944514, "grad_norm": 6.371132668680946, "learning_rate": 3.8368429966405745e-07, "loss": 0.6305, "step": 22884 }, { "epoch": 1.6533314067946612, "grad_norm": 7.435893165695084, "learning_rate": 3.8352860140036055e-07, "loss": 0.5372, "step": 22885 }, { "epoch": 1.6534036519948705, "grad_norm": 9.371563090711367, "learning_rate": 3.833729321095145e-07, "loss": 0.5747, "step": 22886 }, { "epoch": 1.6534758971950803, "grad_norm": 8.209701140118232, "learning_rate": 3.8321729179364883e-07, "loss": 0.6191, "step": 22887 }, { "epoch": 1.6535481423952896, "grad_norm": 6.645913565060699, "learning_rate": 3.8306168045489507e-07, "loss": 0.6273, "step": 22888 }, { "epoch": 1.6536203875954991, "grad_norm": 7.564981605119144, "learning_rate": 3.82906098095383e-07, "loss": 0.6148, "step": 22889 }, { "epoch": 1.6536926327957087, "grad_norm": 7.093562590018211, "learning_rate": 3.8275054471724333e-07, "loss": 0.6597, "step": 22890 }, { "epoch": 1.653764877995918, "grad_norm": 7.738682019688848, "learning_rate": 3.825950203226042e-07, "loss": 0.6559, "step": 22891 }, { "epoch": 1.6538371231961277, "grad_norm": 6.621965109053171, "learning_rate": 3.8243952491359507e-07, "loss": 0.6142, "step": 22892 }, { "epoch": 1.653909368396337, "grad_norm": 6.744968092880396, "learning_rate": 3.8228405849234484e-07, "loss": 0.6423, "step": 22893 }, { "epoch": 1.6539816135965468, "grad_norm": 6.133467135144721, "learning_rate": 3.821286210609812e-07, "loss": 0.5773, "step": 22894 }, { "epoch": 1.6540538587967561, "grad_norm": 6.327158008679479, "learning_rate": 3.8197321262163275e-07, "loss": 0.5883, "step": 22895 }, { "epoch": 1.6541261039969657, "grad_norm": 7.130384585587155, "learning_rate": 3.8181783317642605e-07, "loss": 0.6268, "step": 22896 }, { "epoch": 1.6541983491971752, "grad_norm": 6.533454486476161, "learning_rate": 3.816624827274895e-07, "loss": 0.5692, "step": 22897 }, { "epoch": 1.6542705943973848, "grad_norm": 7.044090109790868, "learning_rate": 3.81507161276948e-07, "loss": 0.5956, "step": 22898 }, { "epoch": 1.6543428395975943, "grad_norm": 7.323763527519936, "learning_rate": 3.813518688269285e-07, "loss": 0.6125, "step": 22899 }, { "epoch": 1.6544150847978036, "grad_norm": 7.344227617031724, "learning_rate": 3.811966053795571e-07, "loss": 0.5866, "step": 22900 }, { "epoch": 1.6544873299980134, "grad_norm": 6.643799505470984, "learning_rate": 3.8104137093695955e-07, "loss": 0.5904, "step": 22901 }, { "epoch": 1.6545595751982227, "grad_norm": 7.775841238748052, "learning_rate": 3.808861655012597e-07, "loss": 0.6016, "step": 22902 }, { "epoch": 1.6546318203984323, "grad_norm": 9.76880846549103, "learning_rate": 3.8073098907458224e-07, "loss": 0.6261, "step": 22903 }, { "epoch": 1.6547040655986418, "grad_norm": 8.160209542370259, "learning_rate": 3.8057584165905324e-07, "loss": 0.6172, "step": 22904 }, { "epoch": 1.6547763107988513, "grad_norm": 7.060965312203192, "learning_rate": 3.8042072325679497e-07, "loss": 0.5982, "step": 22905 }, { "epoch": 1.6548485559990609, "grad_norm": 6.796922144233052, "learning_rate": 3.8026563386993094e-07, "loss": 0.6069, "step": 22906 }, { "epoch": 1.6549208011992702, "grad_norm": 6.952184309172274, "learning_rate": 3.801105735005847e-07, "loss": 0.6145, "step": 22907 }, { "epoch": 1.65499304639948, "grad_norm": 7.404535666569512, "learning_rate": 3.799555421508794e-07, "loss": 0.537, "step": 22908 }, { "epoch": 1.6550652915996893, "grad_norm": 6.7325343473162444, "learning_rate": 3.7980053982293597e-07, "loss": 0.5655, "step": 22909 }, { "epoch": 1.6551375367998988, "grad_norm": 7.002220619018806, "learning_rate": 3.796455665188767e-07, "loss": 0.6373, "step": 22910 }, { "epoch": 1.6552097820001084, "grad_norm": 6.519015740366373, "learning_rate": 3.7949062224082343e-07, "loss": 0.5907, "step": 22911 }, { "epoch": 1.655282027200318, "grad_norm": 7.17177048151986, "learning_rate": 3.7933570699089704e-07, "loss": 0.6274, "step": 22912 }, { "epoch": 1.6553542724005275, "grad_norm": 7.986818660039153, "learning_rate": 3.791808207712183e-07, "loss": 0.6947, "step": 22913 }, { "epoch": 1.6554265176007368, "grad_norm": 6.024881588100696, "learning_rate": 3.7902596358390745e-07, "loss": 0.5853, "step": 22914 }, { "epoch": 1.6554987628009465, "grad_norm": 5.36005417326695, "learning_rate": 3.7887113543108476e-07, "loss": 0.5341, "step": 22915 }, { "epoch": 1.6555710080011559, "grad_norm": 9.038316753620085, "learning_rate": 3.787163363148688e-07, "loss": 0.6153, "step": 22916 }, { "epoch": 1.6556432532013654, "grad_norm": 7.233341942472945, "learning_rate": 3.78561566237379e-07, "loss": 0.5688, "step": 22917 }, { "epoch": 1.655715498401575, "grad_norm": 8.058431382422688, "learning_rate": 3.784068252007347e-07, "loss": 0.6487, "step": 22918 }, { "epoch": 1.6557877436017845, "grad_norm": 6.024051145176893, "learning_rate": 3.7825211320705353e-07, "loss": 0.564, "step": 22919 }, { "epoch": 1.655859988801994, "grad_norm": 8.923792003442435, "learning_rate": 3.7809743025845307e-07, "loss": 0.5995, "step": 22920 }, { "epoch": 1.6559322340022034, "grad_norm": 9.006470791230116, "learning_rate": 3.779427763570512e-07, "loss": 0.6849, "step": 22921 }, { "epoch": 1.6560044792024131, "grad_norm": 6.659426222362103, "learning_rate": 3.7778815150496527e-07, "loss": 0.725, "step": 22922 }, { "epoch": 1.6560767244026224, "grad_norm": 7.405864206559293, "learning_rate": 3.7763355570431177e-07, "loss": 0.7407, "step": 22923 }, { "epoch": 1.656148969602832, "grad_norm": 6.317820506870328, "learning_rate": 3.77478988957207e-07, "loss": 0.5686, "step": 22924 }, { "epoch": 1.6562212148030415, "grad_norm": 6.680453857719338, "learning_rate": 3.7732445126576674e-07, "loss": 0.6373, "step": 22925 }, { "epoch": 1.656293460003251, "grad_norm": 7.792141932172006, "learning_rate": 3.771699426321071e-07, "loss": 0.6007, "step": 22926 }, { "epoch": 1.6563657052034606, "grad_norm": 6.130385541879878, "learning_rate": 3.7701546305834197e-07, "loss": 0.6193, "step": 22927 }, { "epoch": 1.65643795040367, "grad_norm": 5.280549132661872, "learning_rate": 3.7686101254658715e-07, "loss": 0.5812, "step": 22928 }, { "epoch": 1.6565101956038797, "grad_norm": 6.867290821535015, "learning_rate": 3.7670659109895675e-07, "loss": 0.5489, "step": 22929 }, { "epoch": 1.656582440804089, "grad_norm": 8.897849313857218, "learning_rate": 3.765521987175641e-07, "loss": 0.6238, "step": 22930 }, { "epoch": 1.6566546860042985, "grad_norm": 7.070412583459454, "learning_rate": 3.763978354045225e-07, "loss": 0.5887, "step": 22931 }, { "epoch": 1.656726931204508, "grad_norm": 8.031906397969426, "learning_rate": 3.7624350116194643e-07, "loss": 0.5899, "step": 22932 }, { "epoch": 1.6567991764047176, "grad_norm": 7.224904000987579, "learning_rate": 3.760891959919483e-07, "loss": 0.6074, "step": 22933 }, { "epoch": 1.6568714216049272, "grad_norm": 6.964113343729167, "learning_rate": 3.759349198966392e-07, "loss": 0.5159, "step": 22934 }, { "epoch": 1.6569436668051365, "grad_norm": 7.267074998121343, "learning_rate": 3.7578067287813186e-07, "loss": 0.6244, "step": 22935 }, { "epoch": 1.6570159120053463, "grad_norm": 7.6990624290479, "learning_rate": 3.7562645493853854e-07, "loss": 0.4872, "step": 22936 }, { "epoch": 1.6570881572055556, "grad_norm": 6.345463563595182, "learning_rate": 3.7547226607996894e-07, "loss": 0.559, "step": 22937 }, { "epoch": 1.6571604024057651, "grad_norm": 7.505449731047648, "learning_rate": 3.753181063045344e-07, "loss": 0.6346, "step": 22938 }, { "epoch": 1.6572326476059747, "grad_norm": 7.931858490913277, "learning_rate": 3.751639756143452e-07, "loss": 0.5751, "step": 22939 }, { "epoch": 1.6573048928061842, "grad_norm": 6.758422400675121, "learning_rate": 3.7500987401151127e-07, "loss": 0.594, "step": 22940 }, { "epoch": 1.6573771380063937, "grad_norm": 6.443924744065353, "learning_rate": 3.748558014981424e-07, "loss": 0.6131, "step": 22941 }, { "epoch": 1.657449383206603, "grad_norm": 6.733881036118306, "learning_rate": 3.7470175807634764e-07, "loss": 0.5506, "step": 22942 }, { "epoch": 1.6575216284068128, "grad_norm": 8.63595195318358, "learning_rate": 3.7454774374823587e-07, "loss": 0.5739, "step": 22943 }, { "epoch": 1.6575938736070222, "grad_norm": 5.446747206361324, "learning_rate": 3.7439375851591494e-07, "loss": 0.5938, "step": 22944 }, { "epoch": 1.6576661188072317, "grad_norm": 6.07017393211777, "learning_rate": 3.7423980238149275e-07, "loss": 0.5925, "step": 22945 }, { "epoch": 1.6577383640074412, "grad_norm": 6.653114690812967, "learning_rate": 3.740858753470772e-07, "loss": 0.6234, "step": 22946 }, { "epoch": 1.6578106092076508, "grad_norm": 8.622468438625777, "learning_rate": 3.739319774147759e-07, "loss": 0.6561, "step": 22947 }, { "epoch": 1.6578828544078603, "grad_norm": 6.692758649545377, "learning_rate": 3.7377810858669453e-07, "loss": 0.6436, "step": 22948 }, { "epoch": 1.6579550996080696, "grad_norm": 6.618312123108634, "learning_rate": 3.7362426886493997e-07, "loss": 0.5784, "step": 22949 }, { "epoch": 1.6580273448082794, "grad_norm": 7.256236846675336, "learning_rate": 3.7347045825161827e-07, "loss": 0.5763, "step": 22950 }, { "epoch": 1.6580995900084887, "grad_norm": 7.056519263158581, "learning_rate": 3.733166767488347e-07, "loss": 0.6317, "step": 22951 }, { "epoch": 1.6581718352086985, "grad_norm": 6.885366496385707, "learning_rate": 3.7316292435869453e-07, "loss": 0.6531, "step": 22952 }, { "epoch": 1.6582440804089078, "grad_norm": 6.685175126149288, "learning_rate": 3.730092010833025e-07, "loss": 0.5763, "step": 22953 }, { "epoch": 1.6583163256091173, "grad_norm": 6.9878227036535, "learning_rate": 3.7285550692476386e-07, "loss": 0.6314, "step": 22954 }, { "epoch": 1.658388570809327, "grad_norm": 6.065453193507222, "learning_rate": 3.7270184188518077e-07, "loss": 0.5717, "step": 22955 }, { "epoch": 1.6584608160095362, "grad_norm": 7.064123726596116, "learning_rate": 3.7254820596665797e-07, "loss": 0.6417, "step": 22956 }, { "epoch": 1.658533061209746, "grad_norm": 6.227708660781605, "learning_rate": 3.7239459917129825e-07, "loss": 0.6027, "step": 22957 }, { "epoch": 1.6586053064099553, "grad_norm": 6.491238778360071, "learning_rate": 3.722410215012051e-07, "loss": 0.5894, "step": 22958 }, { "epoch": 1.658677551610165, "grad_norm": 6.593165656497904, "learning_rate": 3.7208747295847917e-07, "loss": 0.6143, "step": 22959 }, { "epoch": 1.6587497968103744, "grad_norm": 7.703177492531593, "learning_rate": 3.7193395354522406e-07, "loss": 0.6501, "step": 22960 }, { "epoch": 1.658822042010584, "grad_norm": 6.9829572565725435, "learning_rate": 3.7178046326354133e-07, "loss": 0.561, "step": 22961 }, { "epoch": 1.6588942872107935, "grad_norm": 6.596474902562483, "learning_rate": 3.7162700211553104e-07, "loss": 0.5198, "step": 22962 }, { "epoch": 1.6589665324110028, "grad_norm": 6.229452307370583, "learning_rate": 3.7147357010329457e-07, "loss": 0.5848, "step": 22963 }, { "epoch": 1.6590387776112125, "grad_norm": 7.158953693170325, "learning_rate": 3.713201672289321e-07, "loss": 0.572, "step": 22964 }, { "epoch": 1.6591110228114219, "grad_norm": 7.2233552321152095, "learning_rate": 3.7116679349454455e-07, "loss": 0.5577, "step": 22965 }, { "epoch": 1.6591832680116316, "grad_norm": 7.966186589842073, "learning_rate": 3.710134489022299e-07, "loss": 0.6993, "step": 22966 }, { "epoch": 1.659255513211841, "grad_norm": 6.870897768488447, "learning_rate": 3.7086013345408815e-07, "loss": 0.596, "step": 22967 }, { "epoch": 1.6593277584120505, "grad_norm": 8.242770639916268, "learning_rate": 3.707068471522179e-07, "loss": 0.6154, "step": 22968 }, { "epoch": 1.65940000361226, "grad_norm": 5.921325497056368, "learning_rate": 3.705535899987178e-07, "loss": 0.5544, "step": 22969 }, { "epoch": 1.6594722488124696, "grad_norm": 7.3885461573199676, "learning_rate": 3.7040036199568556e-07, "loss": 0.5636, "step": 22970 }, { "epoch": 1.6595444940126791, "grad_norm": 5.456769627038807, "learning_rate": 3.7024716314521867e-07, "loss": 0.5137, "step": 22971 }, { "epoch": 1.6596167392128884, "grad_norm": 8.029674806007673, "learning_rate": 3.7009399344941523e-07, "loss": 0.6099, "step": 22972 }, { "epoch": 1.6596889844130982, "grad_norm": 8.902646697757788, "learning_rate": 3.699408529103707e-07, "loss": 0.5768, "step": 22973 }, { "epoch": 1.6597612296133075, "grad_norm": 6.821109476094287, "learning_rate": 3.697877415301818e-07, "loss": 0.6804, "step": 22974 }, { "epoch": 1.659833474813517, "grad_norm": 6.627479808867271, "learning_rate": 3.6963465931094566e-07, "loss": 0.574, "step": 22975 }, { "epoch": 1.6599057200137266, "grad_norm": 8.621784440076912, "learning_rate": 3.69481606254756e-07, "loss": 0.6333, "step": 22976 }, { "epoch": 1.6599779652139361, "grad_norm": 7.204718357699316, "learning_rate": 3.693285823637091e-07, "loss": 0.6025, "step": 22977 }, { "epoch": 1.6600502104141457, "grad_norm": 6.761977694890113, "learning_rate": 3.6917558763989946e-07, "loss": 0.6209, "step": 22978 }, { "epoch": 1.660122455614355, "grad_norm": 8.00965013213797, "learning_rate": 3.690226220854215e-07, "loss": 0.627, "step": 22979 }, { "epoch": 1.6601947008145648, "grad_norm": 7.253536808628661, "learning_rate": 3.6886968570236937e-07, "loss": 0.5493, "step": 22980 }, { "epoch": 1.660266946014774, "grad_norm": 6.455850464385453, "learning_rate": 3.687167784928364e-07, "loss": 0.5801, "step": 22981 }, { "epoch": 1.6603391912149836, "grad_norm": 6.518712032862229, "learning_rate": 3.6856390045891586e-07, "loss": 0.6267, "step": 22982 }, { "epoch": 1.6604114364151932, "grad_norm": 7.58156232449971, "learning_rate": 3.6841105160270116e-07, "loss": 0.6763, "step": 22983 }, { "epoch": 1.6604836816154027, "grad_norm": 7.3062393906306635, "learning_rate": 3.6825823192628366e-07, "loss": 0.6886, "step": 22984 }, { "epoch": 1.6605559268156123, "grad_norm": 8.207233439174107, "learning_rate": 3.6810544143175584e-07, "loss": 0.6345, "step": 22985 }, { "epoch": 1.6606281720158216, "grad_norm": 6.73155319678617, "learning_rate": 3.6795268012120966e-07, "loss": 0.5942, "step": 22986 }, { "epoch": 1.6607004172160313, "grad_norm": 6.152665852183349, "learning_rate": 3.6779994799673534e-07, "loss": 0.6568, "step": 22987 }, { "epoch": 1.6607726624162407, "grad_norm": 7.469835019006888, "learning_rate": 3.6764724506042346e-07, "loss": 0.6005, "step": 22988 }, { "epoch": 1.6608449076164502, "grad_norm": 7.798412347262707, "learning_rate": 3.674945713143657e-07, "loss": 0.5958, "step": 22989 }, { "epoch": 1.6609171528166597, "grad_norm": 5.985629356138125, "learning_rate": 3.673419267606523e-07, "loss": 0.6164, "step": 22990 }, { "epoch": 1.6609893980168693, "grad_norm": 9.089907119326814, "learning_rate": 3.6718931140137136e-07, "loss": 0.5758, "step": 22991 }, { "epoch": 1.6610616432170788, "grad_norm": 7.500163775881101, "learning_rate": 3.6703672523861253e-07, "loss": 0.6331, "step": 22992 }, { "epoch": 1.6611338884172882, "grad_norm": 8.580037888663337, "learning_rate": 3.668841682744656e-07, "loss": 0.6412, "step": 22993 }, { "epoch": 1.661206133617498, "grad_norm": 6.161277615714955, "learning_rate": 3.6673164051101743e-07, "loss": 0.5769, "step": 22994 }, { "epoch": 1.6612783788177072, "grad_norm": 6.511459812263203, "learning_rate": 3.66579141950357e-07, "loss": 0.6542, "step": 22995 }, { "epoch": 1.6613506240179168, "grad_norm": 7.146428860982798, "learning_rate": 3.6642667259457116e-07, "loss": 0.6353, "step": 22996 }, { "epoch": 1.6614228692181263, "grad_norm": 6.455365916428769, "learning_rate": 3.6627423244574803e-07, "loss": 0.5835, "step": 22997 }, { "epoch": 1.6614951144183359, "grad_norm": 6.528889352654034, "learning_rate": 3.6612182150597364e-07, "loss": 0.6395, "step": 22998 }, { "epoch": 1.6615673596185454, "grad_norm": 6.780622462053199, "learning_rate": 3.6596943977733475e-07, "loss": 0.6218, "step": 22999 }, { "epoch": 1.6616396048187547, "grad_norm": 7.534392528284126, "learning_rate": 3.6581708726191767e-07, "loss": 0.6163, "step": 23000 }, { "epoch": 1.6617118500189645, "grad_norm": 6.932746427099504, "learning_rate": 3.6566476396180716e-07, "loss": 0.5324, "step": 23001 }, { "epoch": 1.6617840952191738, "grad_norm": 7.186071635415027, "learning_rate": 3.655124698790888e-07, "loss": 0.6299, "step": 23002 }, { "epoch": 1.6618563404193833, "grad_norm": 7.664078676476939, "learning_rate": 3.653602050158475e-07, "loss": 0.6279, "step": 23003 }, { "epoch": 1.661928585619593, "grad_norm": 7.999996900557871, "learning_rate": 3.652079693741681e-07, "loss": 0.5723, "step": 23004 }, { "epoch": 1.6620008308198024, "grad_norm": 7.678334797099411, "learning_rate": 3.6505576295613327e-07, "loss": 0.6426, "step": 23005 }, { "epoch": 1.662073076020012, "grad_norm": 6.271969748229425, "learning_rate": 3.649035857638275e-07, "loss": 0.5684, "step": 23006 }, { "epoch": 1.6621453212202213, "grad_norm": 6.900925737282944, "learning_rate": 3.647514377993339e-07, "loss": 0.6216, "step": 23007 }, { "epoch": 1.662217566420431, "grad_norm": 6.680820444856025, "learning_rate": 3.645993190647351e-07, "loss": 0.6141, "step": 23008 }, { "epoch": 1.6622898116206404, "grad_norm": 7.522986472802587, "learning_rate": 3.6444722956211374e-07, "loss": 0.5976, "step": 23009 }, { "epoch": 1.66236205682085, "grad_norm": 6.5689935437613665, "learning_rate": 3.6429516929355144e-07, "loss": 0.5923, "step": 23010 }, { "epoch": 1.6624343020210595, "grad_norm": 7.0738005575074405, "learning_rate": 3.6414313826113076e-07, "loss": 0.5796, "step": 23011 }, { "epoch": 1.662506547221269, "grad_norm": 7.357340213493586, "learning_rate": 3.639911364669316e-07, "loss": 0.6654, "step": 23012 }, { "epoch": 1.6625787924214785, "grad_norm": 7.321243666596789, "learning_rate": 3.6383916391303507e-07, "loss": 0.6168, "step": 23013 }, { "epoch": 1.6626510376216879, "grad_norm": 6.886849197130258, "learning_rate": 3.6368722060152185e-07, "loss": 0.655, "step": 23014 }, { "epoch": 1.6627232828218976, "grad_norm": 11.129552017066363, "learning_rate": 3.635353065344724e-07, "loss": 0.6152, "step": 23015 }, { "epoch": 1.662795528022107, "grad_norm": 7.398863122574427, "learning_rate": 3.633834217139648e-07, "loss": 0.6134, "step": 23016 }, { "epoch": 1.6628677732223165, "grad_norm": 7.069862781363897, "learning_rate": 3.632315661420796e-07, "loss": 0.6881, "step": 23017 }, { "epoch": 1.662940018422526, "grad_norm": 7.662244295268904, "learning_rate": 3.6307973982089573e-07, "loss": 0.5637, "step": 23018 }, { "epoch": 1.6630122636227356, "grad_norm": 6.9451815031761805, "learning_rate": 3.629279427524904e-07, "loss": 0.6139, "step": 23019 }, { "epoch": 1.6630845088229451, "grad_norm": 8.032829159101817, "learning_rate": 3.627761749389422e-07, "loss": 0.5795, "step": 23020 }, { "epoch": 1.6631567540231544, "grad_norm": 8.01530137626426, "learning_rate": 3.6262443638232865e-07, "loss": 0.55, "step": 23021 }, { "epoch": 1.6632289992233642, "grad_norm": 5.724734317661606, "learning_rate": 3.624727270847278e-07, "loss": 0.536, "step": 23022 }, { "epoch": 1.6633012444235735, "grad_norm": 5.8717090236071945, "learning_rate": 3.62321047048215e-07, "loss": 0.5504, "step": 23023 }, { "epoch": 1.6633734896237833, "grad_norm": 7.0623765782737475, "learning_rate": 3.6216939627486706e-07, "loss": 0.5806, "step": 23024 }, { "epoch": 1.6634457348239926, "grad_norm": 6.990222369191981, "learning_rate": 3.6201777476676025e-07, "loss": 0.6154, "step": 23025 }, { "epoch": 1.6635179800242021, "grad_norm": 7.306220594466061, "learning_rate": 3.618661825259703e-07, "loss": 0.504, "step": 23026 }, { "epoch": 1.6635902252244117, "grad_norm": 6.990253202248326, "learning_rate": 3.6171461955457206e-07, "loss": 0.5805, "step": 23027 }, { "epoch": 1.663662470424621, "grad_norm": 6.630272548591311, "learning_rate": 3.6156308585464013e-07, "loss": 0.5791, "step": 23028 }, { "epoch": 1.6637347156248308, "grad_norm": 7.377528387601316, "learning_rate": 3.6141158142825014e-07, "loss": 0.6284, "step": 23029 }, { "epoch": 1.66380696082504, "grad_norm": 8.254311562013696, "learning_rate": 3.612601062774743e-07, "loss": 0.6295, "step": 23030 }, { "epoch": 1.6638792060252499, "grad_norm": 7.508088899698861, "learning_rate": 3.61108660404387e-07, "loss": 0.6309, "step": 23031 }, { "epoch": 1.6639514512254592, "grad_norm": 6.785310575479422, "learning_rate": 3.6095724381106145e-07, "loss": 0.5904, "step": 23032 }, { "epoch": 1.6640236964256687, "grad_norm": 8.374066201148098, "learning_rate": 3.608058564995709e-07, "loss": 0.6403, "step": 23033 }, { "epoch": 1.6640959416258783, "grad_norm": 7.156392058582494, "learning_rate": 3.6065449847198645e-07, "loss": 0.5691, "step": 23034 }, { "epoch": 1.6641681868260876, "grad_norm": 7.528168993829655, "learning_rate": 3.60503169730381e-07, "loss": 0.5643, "step": 23035 }, { "epoch": 1.6642404320262973, "grad_norm": 6.8412872481835025, "learning_rate": 3.6035187027682607e-07, "loss": 0.5878, "step": 23036 }, { "epoch": 1.6643126772265067, "grad_norm": 6.515059705037635, "learning_rate": 3.602006001133926e-07, "loss": 0.6396, "step": 23037 }, { "epoch": 1.6643849224267164, "grad_norm": 7.886502539291171, "learning_rate": 3.600493592421514e-07, "loss": 0.6461, "step": 23038 }, { "epoch": 1.6644571676269257, "grad_norm": 7.030726434500019, "learning_rate": 3.598981476651728e-07, "loss": 0.6754, "step": 23039 }, { "epoch": 1.6645294128271353, "grad_norm": 6.851477314714176, "learning_rate": 3.5974696538452784e-07, "loss": 0.6411, "step": 23040 }, { "epoch": 1.6646016580273448, "grad_norm": 7.7713810486502535, "learning_rate": 3.5959581240228413e-07, "loss": 0.6124, "step": 23041 }, { "epoch": 1.6646739032275544, "grad_norm": 7.637056668115142, "learning_rate": 3.5944468872051217e-07, "loss": 0.5645, "step": 23042 }, { "epoch": 1.664746148427764, "grad_norm": 7.86687910579757, "learning_rate": 3.5929359434128064e-07, "loss": 0.5706, "step": 23043 }, { "epoch": 1.6648183936279732, "grad_norm": 7.180692683307744, "learning_rate": 3.591425292666567e-07, "loss": 0.5718, "step": 23044 }, { "epoch": 1.664890638828183, "grad_norm": 7.872828350663672, "learning_rate": 3.589914934987099e-07, "loss": 0.6219, "step": 23045 }, { "epoch": 1.6649628840283923, "grad_norm": 6.680073545131025, "learning_rate": 3.5884048703950707e-07, "loss": 0.5664, "step": 23046 }, { "epoch": 1.6650351292286019, "grad_norm": 7.950251871593587, "learning_rate": 3.586895098911161e-07, "loss": 0.544, "step": 23047 }, { "epoch": 1.6651073744288114, "grad_norm": 7.343388974669718, "learning_rate": 3.585385620556026e-07, "loss": 0.6178, "step": 23048 }, { "epoch": 1.665179619629021, "grad_norm": 6.11160463979742, "learning_rate": 3.5838764353503307e-07, "loss": 0.5962, "step": 23049 }, { "epoch": 1.6652518648292305, "grad_norm": 8.872531601330117, "learning_rate": 3.582367543314749e-07, "loss": 0.5842, "step": 23050 }, { "epoch": 1.6653241100294398, "grad_norm": 7.436959126784737, "learning_rate": 3.580858944469917e-07, "loss": 0.616, "step": 23051 }, { "epoch": 1.6653963552296496, "grad_norm": 7.447550162165324, "learning_rate": 3.5793506388364957e-07, "loss": 0.6146, "step": 23052 }, { "epoch": 1.665468600429859, "grad_norm": 6.3980600993322945, "learning_rate": 3.5778426264351324e-07, "loss": 0.5833, "step": 23053 }, { "epoch": 1.6655408456300684, "grad_norm": 6.265303225352723, "learning_rate": 3.576334907286472e-07, "loss": 0.5975, "step": 23054 }, { "epoch": 1.665613090830278, "grad_norm": 7.781605662601544, "learning_rate": 3.57482748141115e-07, "loss": 0.6499, "step": 23055 }, { "epoch": 1.6656853360304875, "grad_norm": 7.353002814539445, "learning_rate": 3.573320348829806e-07, "loss": 0.583, "step": 23056 }, { "epoch": 1.665757581230697, "grad_norm": 6.317632118597719, "learning_rate": 3.5718135095630687e-07, "loss": 0.6266, "step": 23057 }, { "epoch": 1.6658298264309064, "grad_norm": 7.611387014222403, "learning_rate": 3.570306963631573e-07, "loss": 0.6449, "step": 23058 }, { "epoch": 1.6659020716311161, "grad_norm": 7.336648567358787, "learning_rate": 3.568800711055928e-07, "loss": 0.5826, "step": 23059 }, { "epoch": 1.6659743168313255, "grad_norm": 6.146607879280316, "learning_rate": 3.567294751856762e-07, "loss": 0.5996, "step": 23060 }, { "epoch": 1.666046562031535, "grad_norm": 7.719885862228151, "learning_rate": 3.565789086054697e-07, "loss": 0.6633, "step": 23061 }, { "epoch": 1.6661188072317445, "grad_norm": 6.957148041727423, "learning_rate": 3.56428371367033e-07, "loss": 0.5851, "step": 23062 }, { "epoch": 1.666191052431954, "grad_norm": 6.506888627399043, "learning_rate": 3.562778634724276e-07, "loss": 0.6114, "step": 23063 }, { "epoch": 1.6662632976321636, "grad_norm": 6.73454323401378, "learning_rate": 3.5612738492371345e-07, "loss": 0.604, "step": 23064 }, { "epoch": 1.666335542832373, "grad_norm": 8.833040148589927, "learning_rate": 3.559769357229512e-07, "loss": 0.6225, "step": 23065 }, { "epoch": 1.6664077880325827, "grad_norm": 6.1157823607677, "learning_rate": 3.558265158721996e-07, "loss": 0.576, "step": 23066 }, { "epoch": 1.666480033232792, "grad_norm": 6.2234810320274825, "learning_rate": 3.5567612537351833e-07, "loss": 0.577, "step": 23067 }, { "epoch": 1.6665522784330016, "grad_norm": 10.09107316842117, "learning_rate": 3.5552576422896666e-07, "loss": 0.5968, "step": 23068 }, { "epoch": 1.6666245236332111, "grad_norm": 7.626227608338194, "learning_rate": 3.553754324406014e-07, "loss": 0.5953, "step": 23069 }, { "epoch": 1.6666967688334207, "grad_norm": 7.2528733446083065, "learning_rate": 3.552251300104814e-07, "loss": 0.575, "step": 23070 }, { "epoch": 1.6667690140336302, "grad_norm": 6.878008929620367, "learning_rate": 3.5507485694066397e-07, "loss": 0.6076, "step": 23071 }, { "epoch": 1.6668412592338395, "grad_norm": 7.616050345807402, "learning_rate": 3.549246132332068e-07, "loss": 0.6881, "step": 23072 }, { "epoch": 1.6669135044340493, "grad_norm": 6.612602005227671, "learning_rate": 3.547743988901653e-07, "loss": 0.6234, "step": 23073 }, { "epoch": 1.6669857496342586, "grad_norm": 7.708903716008233, "learning_rate": 3.546242139135969e-07, "loss": 0.6824, "step": 23074 }, { "epoch": 1.6670579948344681, "grad_norm": 7.91416637356252, "learning_rate": 3.544740583055581e-07, "loss": 0.5783, "step": 23075 }, { "epoch": 1.6671302400346777, "grad_norm": 7.538106875557946, "learning_rate": 3.543239320681027e-07, "loss": 0.618, "step": 23076 }, { "epoch": 1.6672024852348872, "grad_norm": 6.862784613594389, "learning_rate": 3.541738352032867e-07, "loss": 0.5888, "step": 23077 }, { "epoch": 1.6672747304350968, "grad_norm": 7.347837179208464, "learning_rate": 3.54023767713165e-07, "loss": 0.716, "step": 23078 }, { "epoch": 1.667346975635306, "grad_norm": 6.475250074351617, "learning_rate": 3.538737295997921e-07, "loss": 0.5965, "step": 23079 }, { "epoch": 1.6674192208355159, "grad_norm": 7.7211929964910535, "learning_rate": 3.537237208652208e-07, "loss": 0.6291, "step": 23080 }, { "epoch": 1.6674914660357252, "grad_norm": 7.617589632814624, "learning_rate": 3.535737415115054e-07, "loss": 0.5555, "step": 23081 }, { "epoch": 1.6675637112359347, "grad_norm": 8.178559291970998, "learning_rate": 3.5342379154069876e-07, "loss": 0.6334, "step": 23082 }, { "epoch": 1.6676359564361443, "grad_norm": 5.715349697784795, "learning_rate": 3.532738709548539e-07, "loss": 0.5632, "step": 23083 }, { "epoch": 1.6677082016363538, "grad_norm": 7.846028882177959, "learning_rate": 3.531239797560229e-07, "loss": 0.5898, "step": 23084 }, { "epoch": 1.6677804468365633, "grad_norm": 6.943763723282597, "learning_rate": 3.529741179462576e-07, "loss": 0.6325, "step": 23085 }, { "epoch": 1.6678526920367727, "grad_norm": 6.553968715244997, "learning_rate": 3.528242855276101e-07, "loss": 0.5604, "step": 23086 }, { "epoch": 1.6679249372369824, "grad_norm": 6.942787973147955, "learning_rate": 3.526744825021303e-07, "loss": 0.6223, "step": 23087 }, { "epoch": 1.6679971824371917, "grad_norm": 6.830314093275138, "learning_rate": 3.525247088718697e-07, "loss": 0.5942, "step": 23088 }, { "epoch": 1.6680694276374013, "grad_norm": 7.451805397789413, "learning_rate": 3.5237496463887855e-07, "loss": 0.5927, "step": 23089 }, { "epoch": 1.6681416728376108, "grad_norm": 6.49885901193673, "learning_rate": 3.52225249805207e-07, "loss": 0.6768, "step": 23090 }, { "epoch": 1.6682139180378204, "grad_norm": 7.089170616289008, "learning_rate": 3.5207556437290346e-07, "loss": 0.5882, "step": 23091 }, { "epoch": 1.66828616323803, "grad_norm": 7.415981675422687, "learning_rate": 3.5192590834401797e-07, "loss": 0.5219, "step": 23092 }, { "epoch": 1.6683584084382392, "grad_norm": 7.025211436935018, "learning_rate": 3.517762817205989e-07, "loss": 0.5396, "step": 23093 }, { "epoch": 1.668430653638449, "grad_norm": 6.304967030280943, "learning_rate": 3.5162668450469423e-07, "loss": 0.5819, "step": 23094 }, { "epoch": 1.6685028988386583, "grad_norm": 6.4220701313305915, "learning_rate": 3.5147711669835245e-07, "loss": 0.5906, "step": 23095 }, { "epoch": 1.6685751440388679, "grad_norm": 6.059151894926469, "learning_rate": 3.5132757830362045e-07, "loss": 0.6317, "step": 23096 }, { "epoch": 1.6686473892390774, "grad_norm": 6.560885276141433, "learning_rate": 3.5117806932254637e-07, "loss": 0.6248, "step": 23097 }, { "epoch": 1.668719634439287, "grad_norm": 8.377404166788807, "learning_rate": 3.510285897571755e-07, "loss": 0.6296, "step": 23098 }, { "epoch": 1.6687918796394965, "grad_norm": 7.6904499627708915, "learning_rate": 3.5087913960955453e-07, "loss": 0.5555, "step": 23099 }, { "epoch": 1.6688641248397058, "grad_norm": 7.417866670489711, "learning_rate": 3.5072971888173017e-07, "loss": 0.67, "step": 23100 }, { "epoch": 1.6689363700399156, "grad_norm": 7.019497556610845, "learning_rate": 3.505803275757458e-07, "loss": 0.6754, "step": 23101 }, { "epoch": 1.669008615240125, "grad_norm": 6.409431374691128, "learning_rate": 3.5043096569364857e-07, "loss": 0.5688, "step": 23102 }, { "epoch": 1.6690808604403347, "grad_norm": 6.8992868248396695, "learning_rate": 3.5028163323748255e-07, "loss": 0.6092, "step": 23103 }, { "epoch": 1.669153105640544, "grad_norm": 8.47341891593942, "learning_rate": 3.501323302092921e-07, "loss": 0.626, "step": 23104 }, { "epoch": 1.6692253508407535, "grad_norm": 5.981572144701226, "learning_rate": 3.499830566111204e-07, "loss": 0.6439, "step": 23105 }, { "epoch": 1.669297596040963, "grad_norm": 7.748221408656221, "learning_rate": 3.4983381244501095e-07, "loss": 0.6517, "step": 23106 }, { "epoch": 1.6693698412411724, "grad_norm": 8.772895286701294, "learning_rate": 3.4968459771300813e-07, "loss": 0.6378, "step": 23107 }, { "epoch": 1.6694420864413821, "grad_norm": 6.829242536283564, "learning_rate": 3.495354124171527e-07, "loss": 0.5259, "step": 23108 }, { "epoch": 1.6695143316415915, "grad_norm": 9.005456330102263, "learning_rate": 3.493862565594877e-07, "loss": 0.6104, "step": 23109 }, { "epoch": 1.6695865768418012, "grad_norm": 6.959066325734837, "learning_rate": 3.4923713014205525e-07, "loss": 0.5491, "step": 23110 }, { "epoch": 1.6696588220420105, "grad_norm": 7.35098131337111, "learning_rate": 3.490880331668964e-07, "loss": 0.6013, "step": 23111 }, { "epoch": 1.66973106724222, "grad_norm": 8.478989157978283, "learning_rate": 3.489389656360523e-07, "loss": 0.6069, "step": 23112 }, { "epoch": 1.6698033124424296, "grad_norm": 7.174592211894863, "learning_rate": 3.4878992755156354e-07, "loss": 0.5619, "step": 23113 }, { "epoch": 1.669875557642639, "grad_norm": 7.691282504797462, "learning_rate": 3.486409189154705e-07, "loss": 0.6225, "step": 23114 }, { "epoch": 1.6699478028428487, "grad_norm": 7.551334682495575, "learning_rate": 3.484919397298134e-07, "loss": 0.5486, "step": 23115 }, { "epoch": 1.670020048043058, "grad_norm": 6.297951428814534, "learning_rate": 3.4834298999663035e-07, "loss": 0.5734, "step": 23116 }, { "epoch": 1.6700922932432678, "grad_norm": 6.980927779395287, "learning_rate": 3.4819406971796115e-07, "loss": 0.5931, "step": 23117 }, { "epoch": 1.6701645384434771, "grad_norm": 7.2002344517153025, "learning_rate": 3.48045178895845e-07, "loss": 0.6149, "step": 23118 }, { "epoch": 1.6702367836436867, "grad_norm": 7.73483170210968, "learning_rate": 3.478963175323191e-07, "loss": 0.5564, "step": 23119 }, { "epoch": 1.6703090288438962, "grad_norm": 8.219903785161033, "learning_rate": 3.4774748562942135e-07, "loss": 0.6231, "step": 23120 }, { "epoch": 1.6703812740441057, "grad_norm": 7.643692867286994, "learning_rate": 3.4759868318918894e-07, "loss": 0.5963, "step": 23121 }, { "epoch": 1.6704535192443153, "grad_norm": 7.899459878347943, "learning_rate": 3.474499102136605e-07, "loss": 0.6814, "step": 23122 }, { "epoch": 1.6705257644445246, "grad_norm": 7.229583405750509, "learning_rate": 3.4730116670487063e-07, "loss": 0.5472, "step": 23123 }, { "epoch": 1.6705980096447344, "grad_norm": 6.082461812894699, "learning_rate": 3.4715245266485647e-07, "loss": 0.6402, "step": 23124 }, { "epoch": 1.6706702548449437, "grad_norm": 8.20199071398909, "learning_rate": 3.470037680956545e-07, "loss": 0.6595, "step": 23125 }, { "epoch": 1.6707425000451532, "grad_norm": 8.32142148932665, "learning_rate": 3.4685511299929837e-07, "loss": 0.578, "step": 23126 }, { "epoch": 1.6708147452453628, "grad_norm": 8.349303950919726, "learning_rate": 3.4670648737782394e-07, "loss": 0.6888, "step": 23127 }, { "epoch": 1.6708869904455723, "grad_norm": 7.220982854417331, "learning_rate": 3.4655789123326597e-07, "loss": 0.5938, "step": 23128 }, { "epoch": 1.6709592356457819, "grad_norm": 6.682805490300567, "learning_rate": 3.4640932456765805e-07, "loss": 0.5768, "step": 23129 }, { "epoch": 1.6710314808459912, "grad_norm": 7.5208158906715, "learning_rate": 3.4626078738303484e-07, "loss": 0.5892, "step": 23130 }, { "epoch": 1.671103726046201, "grad_norm": 6.7177532077622795, "learning_rate": 3.4611227968142866e-07, "loss": 0.571, "step": 23131 }, { "epoch": 1.6711759712464103, "grad_norm": 7.3638676822177445, "learning_rate": 3.459638014648739e-07, "loss": 0.5778, "step": 23132 }, { "epoch": 1.6712482164466198, "grad_norm": 8.20800728965272, "learning_rate": 3.458153527354016e-07, "loss": 0.6546, "step": 23133 }, { "epoch": 1.6713204616468293, "grad_norm": 6.700314517254168, "learning_rate": 3.4566693349504437e-07, "loss": 0.5788, "step": 23134 }, { "epoch": 1.671392706847039, "grad_norm": 7.503024699324391, "learning_rate": 3.4551854374583416e-07, "loss": 0.6244, "step": 23135 }, { "epoch": 1.6714649520472484, "grad_norm": 8.606561384475572, "learning_rate": 3.453701834898027e-07, "loss": 0.5718, "step": 23136 }, { "epoch": 1.6715371972474578, "grad_norm": 5.986874210410579, "learning_rate": 3.4522185272897997e-07, "loss": 0.602, "step": 23137 }, { "epoch": 1.6716094424476675, "grad_norm": 7.515874973509847, "learning_rate": 3.450735514653972e-07, "loss": 0.6068, "step": 23138 }, { "epoch": 1.6716816876478768, "grad_norm": 7.381804349982854, "learning_rate": 3.4492527970108407e-07, "loss": 0.6565, "step": 23139 }, { "epoch": 1.6717539328480864, "grad_norm": 7.762759134632751, "learning_rate": 3.447770374380707e-07, "loss": 0.6655, "step": 23140 }, { "epoch": 1.671826178048296, "grad_norm": 6.403539465638381, "learning_rate": 3.4462882467838605e-07, "loss": 0.6111, "step": 23141 }, { "epoch": 1.6718984232485055, "grad_norm": 6.364053041267063, "learning_rate": 3.444806414240595e-07, "loss": 0.6138, "step": 23142 }, { "epoch": 1.671970668448715, "grad_norm": 6.765208312983895, "learning_rate": 3.443324876771198e-07, "loss": 0.61, "step": 23143 }, { "epoch": 1.6720429136489243, "grad_norm": 6.433813131884981, "learning_rate": 3.441843634395939e-07, "loss": 0.5776, "step": 23144 }, { "epoch": 1.672115158849134, "grad_norm": 6.613020520554389, "learning_rate": 3.4403626871351045e-07, "loss": 0.5806, "step": 23145 }, { "epoch": 1.6721874040493434, "grad_norm": 6.039086187595038, "learning_rate": 3.4388820350089615e-07, "loss": 0.6599, "step": 23146 }, { "epoch": 1.672259649249553, "grad_norm": 6.069567464032297, "learning_rate": 3.437401678037791e-07, "loss": 0.6612, "step": 23147 }, { "epoch": 1.6723318944497625, "grad_norm": 7.899461085613071, "learning_rate": 3.435921616241841e-07, "loss": 0.5481, "step": 23148 }, { "epoch": 1.672404139649972, "grad_norm": 7.530425093811495, "learning_rate": 3.4344418496413763e-07, "loss": 0.6142, "step": 23149 }, { "epoch": 1.6724763848501816, "grad_norm": 6.1584927109401635, "learning_rate": 3.432962378256668e-07, "loss": 0.5826, "step": 23150 }, { "epoch": 1.672548630050391, "grad_norm": 6.466307542929434, "learning_rate": 3.4314832021079547e-07, "loss": 0.5773, "step": 23151 }, { "epoch": 1.6726208752506007, "grad_norm": 6.453221946154349, "learning_rate": 3.430004321215491e-07, "loss": 0.6419, "step": 23152 }, { "epoch": 1.67269312045081, "grad_norm": 8.418053037571061, "learning_rate": 3.4285257355995166e-07, "loss": 0.6616, "step": 23153 }, { "epoch": 1.6727653656510195, "grad_norm": 7.893669885514254, "learning_rate": 3.427047445280285e-07, "loss": 0.6795, "step": 23154 }, { "epoch": 1.672837610851229, "grad_norm": 7.246792313867704, "learning_rate": 3.425569450278016e-07, "loss": 0.6499, "step": 23155 }, { "epoch": 1.6729098560514386, "grad_norm": 7.974344122532606, "learning_rate": 3.4240917506129483e-07, "loss": 0.6173, "step": 23156 }, { "epoch": 1.6729821012516481, "grad_norm": 7.257815653591887, "learning_rate": 3.422614346305314e-07, "loss": 0.5672, "step": 23157 }, { "epoch": 1.6730543464518575, "grad_norm": 7.861617327754935, "learning_rate": 3.4211372373753355e-07, "loss": 0.6181, "step": 23158 }, { "epoch": 1.6731265916520672, "grad_norm": 8.686973061807116, "learning_rate": 3.4196604238432323e-07, "loss": 0.6527, "step": 23159 }, { "epoch": 1.6731988368522765, "grad_norm": 7.057314969595598, "learning_rate": 3.418183905729222e-07, "loss": 0.6295, "step": 23160 }, { "epoch": 1.673271082052486, "grad_norm": 7.275369389650217, "learning_rate": 3.4167076830535246e-07, "loss": 0.6078, "step": 23161 }, { "epoch": 1.6733433272526956, "grad_norm": 6.938748633938289, "learning_rate": 3.4152317558363316e-07, "loss": 0.6267, "step": 23162 }, { "epoch": 1.6734155724529052, "grad_norm": 6.589943605219297, "learning_rate": 3.413756124097858e-07, "loss": 0.5781, "step": 23163 }, { "epoch": 1.6734878176531147, "grad_norm": 6.7386098764973, "learning_rate": 3.412280787858305e-07, "loss": 0.5358, "step": 23164 }, { "epoch": 1.673560062853324, "grad_norm": 7.261771806725217, "learning_rate": 3.410805747137869e-07, "loss": 0.5812, "step": 23165 }, { "epoch": 1.6736323080535338, "grad_norm": 7.514600148581853, "learning_rate": 3.409331001956734e-07, "loss": 0.6335, "step": 23166 }, { "epoch": 1.6737045532537431, "grad_norm": 7.470515724773746, "learning_rate": 3.407856552335093e-07, "loss": 0.6112, "step": 23167 }, { "epoch": 1.6737767984539527, "grad_norm": 6.798049018577637, "learning_rate": 3.4063823982931315e-07, "loss": 0.5733, "step": 23168 }, { "epoch": 1.6738490436541622, "grad_norm": 7.958408002519745, "learning_rate": 3.404908539851029e-07, "loss": 0.6277, "step": 23169 }, { "epoch": 1.6739212888543717, "grad_norm": 6.849652257849796, "learning_rate": 3.40343497702896e-07, "loss": 0.6272, "step": 23170 }, { "epoch": 1.6739935340545813, "grad_norm": 7.589269250162682, "learning_rate": 3.4019617098470986e-07, "loss": 0.6486, "step": 23171 }, { "epoch": 1.6740657792547906, "grad_norm": 6.99149214201276, "learning_rate": 3.400488738325616e-07, "loss": 0.5357, "step": 23172 }, { "epoch": 1.6741380244550004, "grad_norm": 6.376694996211363, "learning_rate": 3.399016062484667e-07, "loss": 0.5771, "step": 23173 }, { "epoch": 1.6742102696552097, "grad_norm": 7.163786937942552, "learning_rate": 3.397543682344415e-07, "loss": 0.6664, "step": 23174 }, { "epoch": 1.6742825148554195, "grad_norm": 7.319286360009712, "learning_rate": 3.396071597925024e-07, "loss": 0.6084, "step": 23175 }, { "epoch": 1.6743547600556288, "grad_norm": 7.232614916914694, "learning_rate": 3.394599809246632e-07, "loss": 0.6027, "step": 23176 }, { "epoch": 1.6744270052558383, "grad_norm": 8.527451113043778, "learning_rate": 3.3931283163293916e-07, "loss": 0.6672, "step": 23177 }, { "epoch": 1.6744992504560479, "grad_norm": 7.386651097819496, "learning_rate": 3.391657119193445e-07, "loss": 0.609, "step": 23178 }, { "epoch": 1.6745714956562572, "grad_norm": 5.816435435307066, "learning_rate": 3.3901862178589427e-07, "loss": 0.5988, "step": 23179 }, { "epoch": 1.674643740856467, "grad_norm": 8.39044132422332, "learning_rate": 3.388715612346011e-07, "loss": 0.6311, "step": 23180 }, { "epoch": 1.6747159860566763, "grad_norm": 7.658350317780178, "learning_rate": 3.38724530267478e-07, "loss": 0.6095, "step": 23181 }, { "epoch": 1.674788231256886, "grad_norm": 7.500131733055333, "learning_rate": 3.385775288865384e-07, "loss": 0.6215, "step": 23182 }, { "epoch": 1.6748604764570953, "grad_norm": 8.533079070038397, "learning_rate": 3.3843055709379376e-07, "loss": 0.6041, "step": 23183 }, { "epoch": 1.674932721657305, "grad_norm": 6.359859663656601, "learning_rate": 3.3828361489125634e-07, "loss": 0.6153, "step": 23184 }, { "epoch": 1.6750049668575144, "grad_norm": 6.499785493098893, "learning_rate": 3.381367022809379e-07, "loss": 0.5367, "step": 23185 }, { "epoch": 1.6750772120577238, "grad_norm": 5.776952847661262, "learning_rate": 3.3798981926484926e-07, "loss": 0.5916, "step": 23186 }, { "epoch": 1.6751494572579335, "grad_norm": 7.586049889341882, "learning_rate": 3.3784296584500136e-07, "loss": 0.5471, "step": 23187 }, { "epoch": 1.6752217024581428, "grad_norm": 7.509300822742125, "learning_rate": 3.3769614202340457e-07, "loss": 0.6175, "step": 23188 }, { "epoch": 1.6752939476583526, "grad_norm": 8.306322139659223, "learning_rate": 3.3754934780206917e-07, "loss": 0.5598, "step": 23189 }, { "epoch": 1.675366192858562, "grad_norm": 7.049277562129982, "learning_rate": 3.374025831830036e-07, "loss": 0.6131, "step": 23190 }, { "epoch": 1.6754384380587715, "grad_norm": 7.975705690163317, "learning_rate": 3.3725584816821753e-07, "loss": 0.5625, "step": 23191 }, { "epoch": 1.675510683258981, "grad_norm": 7.6634549863544486, "learning_rate": 3.371091427597198e-07, "loss": 0.5965, "step": 23192 }, { "epoch": 1.6755829284591905, "grad_norm": 7.214776601101977, "learning_rate": 3.3696246695951894e-07, "loss": 0.5939, "step": 23193 }, { "epoch": 1.6756551736594, "grad_norm": 7.111950083090783, "learning_rate": 3.3681582076962176e-07, "loss": 0.5985, "step": 23194 }, { "epoch": 1.6757274188596094, "grad_norm": 9.541410040146017, "learning_rate": 3.366692041920364e-07, "loss": 0.6569, "step": 23195 }, { "epoch": 1.6757996640598192, "grad_norm": 7.583324921432133, "learning_rate": 3.3652261722877e-07, "loss": 0.5921, "step": 23196 }, { "epoch": 1.6758719092600285, "grad_norm": 6.797712322705752, "learning_rate": 3.3637605988182916e-07, "loss": 0.5718, "step": 23197 }, { "epoch": 1.675944154460238, "grad_norm": 7.570721909482139, "learning_rate": 3.3622953215322e-07, "loss": 0.6017, "step": 23198 }, { "epoch": 1.6760163996604476, "grad_norm": 7.429644114210206, "learning_rate": 3.360830340449486e-07, "loss": 0.6414, "step": 23199 }, { "epoch": 1.6760886448606571, "grad_norm": 5.964474409918469, "learning_rate": 3.359365655590208e-07, "loss": 0.5843, "step": 23200 }, { "epoch": 1.6761608900608667, "grad_norm": 8.264459134755224, "learning_rate": 3.357901266974406e-07, "loss": 0.5896, "step": 23201 }, { "epoch": 1.676233135261076, "grad_norm": 7.6485267766431555, "learning_rate": 3.356437174622132e-07, "loss": 0.5961, "step": 23202 }, { "epoch": 1.6763053804612857, "grad_norm": 7.582966750230903, "learning_rate": 3.354973378553428e-07, "loss": 0.6352, "step": 23203 }, { "epoch": 1.676377625661495, "grad_norm": 6.547921108527947, "learning_rate": 3.3535098787883356e-07, "loss": 0.5908, "step": 23204 }, { "epoch": 1.6764498708617046, "grad_norm": 7.740425840979747, "learning_rate": 3.3520466753468814e-07, "loss": 0.6144, "step": 23205 }, { "epoch": 1.6765221160619141, "grad_norm": 6.471236630964405, "learning_rate": 3.3505837682490963e-07, "loss": 0.6542, "step": 23206 }, { "epoch": 1.6765943612621237, "grad_norm": 6.595693871217968, "learning_rate": 3.3491211575150173e-07, "loss": 0.5763, "step": 23207 }, { "epoch": 1.6766666064623332, "grad_norm": 7.6854739116612825, "learning_rate": 3.3476588431646556e-07, "loss": 0.6179, "step": 23208 }, { "epoch": 1.6767388516625426, "grad_norm": 7.766601009937268, "learning_rate": 3.3461968252180313e-07, "loss": 0.5578, "step": 23209 }, { "epoch": 1.6768110968627523, "grad_norm": 7.520378909203198, "learning_rate": 3.34473510369516e-07, "loss": 0.5811, "step": 23210 }, { "epoch": 1.6768833420629616, "grad_norm": 7.73713280418147, "learning_rate": 3.3432736786160544e-07, "loss": 0.5592, "step": 23211 }, { "epoch": 1.6769555872631712, "grad_norm": 6.815978955536354, "learning_rate": 3.341812550000714e-07, "loss": 0.5631, "step": 23212 }, { "epoch": 1.6770278324633807, "grad_norm": 6.252419575597627, "learning_rate": 3.340351717869142e-07, "loss": 0.4967, "step": 23213 }, { "epoch": 1.6771000776635903, "grad_norm": 9.356395113322307, "learning_rate": 3.338891182241338e-07, "loss": 0.612, "step": 23214 }, { "epoch": 1.6771723228637998, "grad_norm": 5.744411240331499, "learning_rate": 3.337430943137296e-07, "loss": 0.5842, "step": 23215 }, { "epoch": 1.6772445680640091, "grad_norm": 6.881340570919195, "learning_rate": 3.3359710005770013e-07, "loss": 0.5971, "step": 23216 }, { "epoch": 1.6773168132642189, "grad_norm": 8.817830522970825, "learning_rate": 3.334511354580444e-07, "loss": 0.6515, "step": 23217 }, { "epoch": 1.6773890584644282, "grad_norm": 8.131669344340287, "learning_rate": 3.333052005167611e-07, "loss": 0.6203, "step": 23218 }, { "epoch": 1.6774613036646377, "grad_norm": 8.013853475961467, "learning_rate": 3.3315929523584646e-07, "loss": 0.6855, "step": 23219 }, { "epoch": 1.6775335488648473, "grad_norm": 7.478248334378497, "learning_rate": 3.3301341961729877e-07, "loss": 0.6259, "step": 23220 }, { "epoch": 1.6776057940650568, "grad_norm": 6.261193284935257, "learning_rate": 3.3286757366311486e-07, "loss": 0.656, "step": 23221 }, { "epoch": 1.6776780392652664, "grad_norm": 7.644552208389768, "learning_rate": 3.3272175737529166e-07, "loss": 0.5948, "step": 23222 }, { "epoch": 1.6777502844654757, "grad_norm": 7.2793817701985395, "learning_rate": 3.325759707558243e-07, "loss": 0.6163, "step": 23223 }, { "epoch": 1.6778225296656855, "grad_norm": 7.799444902670633, "learning_rate": 3.3243021380670925e-07, "loss": 0.5211, "step": 23224 }, { "epoch": 1.6778947748658948, "grad_norm": 7.823711925437716, "learning_rate": 3.3228448652994127e-07, "loss": 0.63, "step": 23225 }, { "epoch": 1.6779670200661043, "grad_norm": 6.640164563541193, "learning_rate": 3.3213878892751543e-07, "loss": 0.5869, "step": 23226 }, { "epoch": 1.6780392652663139, "grad_norm": 6.671210012417924, "learning_rate": 3.3199312100142655e-07, "loss": 0.6338, "step": 23227 }, { "epoch": 1.6781115104665234, "grad_norm": 7.781205142707677, "learning_rate": 3.3184748275366855e-07, "loss": 0.6156, "step": 23228 }, { "epoch": 1.678183755666733, "grad_norm": 7.5801314696332165, "learning_rate": 3.3170187418623545e-07, "loss": 0.6303, "step": 23229 }, { "epoch": 1.6782560008669423, "grad_norm": 6.286530206412183, "learning_rate": 3.315562953011198e-07, "loss": 0.6137, "step": 23230 }, { "epoch": 1.678328246067152, "grad_norm": 7.227351888895941, "learning_rate": 3.3141074610031436e-07, "loss": 0.5704, "step": 23231 }, { "epoch": 1.6784004912673613, "grad_norm": 7.336401586508565, "learning_rate": 3.3126522658581293e-07, "loss": 0.5976, "step": 23232 }, { "epoch": 1.678472736467571, "grad_norm": 7.22572867866931, "learning_rate": 3.3111973675960587e-07, "loss": 0.6571, "step": 23233 }, { "epoch": 1.6785449816677804, "grad_norm": 7.701963387393629, "learning_rate": 3.309742766236851e-07, "loss": 0.6117, "step": 23234 }, { "epoch": 1.67861722686799, "grad_norm": 6.717412771177524, "learning_rate": 3.3082884618004294e-07, "loss": 0.5911, "step": 23235 }, { "epoch": 1.6786894720681995, "grad_norm": 8.163961572342652, "learning_rate": 3.306834454306701e-07, "loss": 0.6058, "step": 23236 }, { "epoch": 1.6787617172684088, "grad_norm": 7.663918652287922, "learning_rate": 3.3053807437755604e-07, "loss": 0.6504, "step": 23237 }, { "epoch": 1.6788339624686186, "grad_norm": 8.921252441063839, "learning_rate": 3.303927330226914e-07, "loss": 0.6249, "step": 23238 }, { "epoch": 1.678906207668828, "grad_norm": 7.36553062666591, "learning_rate": 3.3024742136806594e-07, "loss": 0.6387, "step": 23239 }, { "epoch": 1.6789784528690375, "grad_norm": 6.94956457297191, "learning_rate": 3.301021394156681e-07, "loss": 0.6211, "step": 23240 }, { "epoch": 1.679050698069247, "grad_norm": 7.318555362501137, "learning_rate": 3.2995688716748743e-07, "loss": 0.5691, "step": 23241 }, { "epoch": 1.6791229432694565, "grad_norm": 7.658569980931952, "learning_rate": 3.298116646255117e-07, "loss": 0.6073, "step": 23242 }, { "epoch": 1.679195188469666, "grad_norm": 7.631676924227155, "learning_rate": 3.2966647179172945e-07, "loss": 0.5802, "step": 23243 }, { "epoch": 1.6792674336698754, "grad_norm": 7.241134748103058, "learning_rate": 3.2952130866812787e-07, "loss": 0.5908, "step": 23244 }, { "epoch": 1.6793396788700852, "grad_norm": 6.699995114908288, "learning_rate": 3.293761752566943e-07, "loss": 0.6638, "step": 23245 }, { "epoch": 1.6794119240702945, "grad_norm": 7.150925166956245, "learning_rate": 3.2923107155941546e-07, "loss": 0.6557, "step": 23246 }, { "epoch": 1.6794841692705043, "grad_norm": 7.687150900752974, "learning_rate": 3.290859975782787e-07, "loss": 0.5949, "step": 23247 }, { "epoch": 1.6795564144707136, "grad_norm": 7.259623750127013, "learning_rate": 3.289409533152682e-07, "loss": 0.602, "step": 23248 }, { "epoch": 1.6796286596709231, "grad_norm": 8.8197389995684, "learning_rate": 3.287959387723702e-07, "loss": 0.6083, "step": 23249 }, { "epoch": 1.6797009048711327, "grad_norm": 8.304185882663521, "learning_rate": 3.286509539515706e-07, "loss": 0.6322, "step": 23250 }, { "epoch": 1.679773150071342, "grad_norm": 7.389901317304053, "learning_rate": 3.285059988548531e-07, "loss": 0.6251, "step": 23251 }, { "epoch": 1.6798453952715517, "grad_norm": 6.001364552779753, "learning_rate": 3.2836107348420245e-07, "loss": 0.5261, "step": 23252 }, { "epoch": 1.679917640471761, "grad_norm": 8.48995243198472, "learning_rate": 3.2821617784160237e-07, "loss": 0.5818, "step": 23253 }, { "epoch": 1.6799898856719708, "grad_norm": 6.4949887838993625, "learning_rate": 3.280713119290366e-07, "loss": 0.5286, "step": 23254 }, { "epoch": 1.6800621308721801, "grad_norm": 8.017525549842844, "learning_rate": 3.279264757484882e-07, "loss": 0.5754, "step": 23255 }, { "epoch": 1.6801343760723897, "grad_norm": 6.123821845893636, "learning_rate": 3.277816693019398e-07, "loss": 0.5544, "step": 23256 }, { "epoch": 1.6802066212725992, "grad_norm": 6.546670478047947, "learning_rate": 3.276368925913742e-07, "loss": 0.6478, "step": 23257 }, { "epoch": 1.6802788664728086, "grad_norm": 7.202147311970251, "learning_rate": 3.2749214561877215e-07, "loss": 0.62, "step": 23258 }, { "epoch": 1.6803511116730183, "grad_norm": 9.624535487472077, "learning_rate": 3.273474283861161e-07, "loss": 0.6471, "step": 23259 }, { "epoch": 1.6804233568732276, "grad_norm": 6.61406222315733, "learning_rate": 3.272027408953865e-07, "loss": 0.6548, "step": 23260 }, { "epoch": 1.6804956020734374, "grad_norm": 8.145385053418718, "learning_rate": 3.270580831485651e-07, "loss": 0.5282, "step": 23261 }, { "epoch": 1.6805678472736467, "grad_norm": 6.862570329038245, "learning_rate": 3.269134551476308e-07, "loss": 0.5875, "step": 23262 }, { "epoch": 1.6806400924738563, "grad_norm": 7.1139411096502805, "learning_rate": 3.2676885689456345e-07, "loss": 0.6045, "step": 23263 }, { "epoch": 1.6807123376740658, "grad_norm": 7.221449574279212, "learning_rate": 3.26624288391344e-07, "loss": 0.5926, "step": 23264 }, { "epoch": 1.6807845828742753, "grad_norm": 7.401122044219223, "learning_rate": 3.2647974963994995e-07, "loss": 0.595, "step": 23265 }, { "epoch": 1.6808568280744849, "grad_norm": 7.344538999337737, "learning_rate": 3.263352406423606e-07, "loss": 0.558, "step": 23266 }, { "epoch": 1.6809290732746942, "grad_norm": 6.221423930472529, "learning_rate": 3.261907614005541e-07, "loss": 0.6218, "step": 23267 }, { "epoch": 1.681001318474904, "grad_norm": 8.19395227480829, "learning_rate": 3.2604631191650886e-07, "loss": 0.5558, "step": 23268 }, { "epoch": 1.6810735636751133, "grad_norm": 7.533593537329536, "learning_rate": 3.259018921922011e-07, "loss": 0.6199, "step": 23269 }, { "epoch": 1.6811458088753228, "grad_norm": 7.442183358373296, "learning_rate": 3.2575750222960804e-07, "loss": 0.6028, "step": 23270 }, { "epoch": 1.6812180540755324, "grad_norm": 7.7215242535457795, "learning_rate": 3.2561314203070683e-07, "loss": 0.612, "step": 23271 }, { "epoch": 1.681290299275742, "grad_norm": 7.93478419182894, "learning_rate": 3.254688115974733e-07, "loss": 0.6113, "step": 23272 }, { "epoch": 1.6813625444759515, "grad_norm": 7.570051978070137, "learning_rate": 3.2532451093188337e-07, "loss": 0.564, "step": 23273 }, { "epoch": 1.6814347896761608, "grad_norm": 6.361412078237849, "learning_rate": 3.251802400359125e-07, "loss": 0.5166, "step": 23274 }, { "epoch": 1.6815070348763705, "grad_norm": 7.022064583990133, "learning_rate": 3.2503599891153593e-07, "loss": 0.6596, "step": 23275 }, { "epoch": 1.6815792800765799, "grad_norm": 6.72827155081626, "learning_rate": 3.2489178756072715e-07, "loss": 0.6248, "step": 23276 }, { "epoch": 1.6816515252767894, "grad_norm": 8.603999126240975, "learning_rate": 3.2474760598546094e-07, "loss": 0.6538, "step": 23277 }, { "epoch": 1.681723770476999, "grad_norm": 6.408427840562403, "learning_rate": 3.2460345418771104e-07, "loss": 0.5554, "step": 23278 }, { "epoch": 1.6817960156772085, "grad_norm": 8.861432087292398, "learning_rate": 3.244593321694514e-07, "loss": 0.7173, "step": 23279 }, { "epoch": 1.681868260877418, "grad_norm": 6.736902032761307, "learning_rate": 3.243152399326538e-07, "loss": 0.5663, "step": 23280 }, { "epoch": 1.6819405060776274, "grad_norm": 7.556795853681556, "learning_rate": 3.241711774792913e-07, "loss": 0.5746, "step": 23281 }, { "epoch": 1.6820127512778371, "grad_norm": 6.4913780543263275, "learning_rate": 3.240271448113358e-07, "loss": 0.6379, "step": 23282 }, { "epoch": 1.6820849964780464, "grad_norm": 7.85826779523956, "learning_rate": 3.238831419307592e-07, "loss": 0.6986, "step": 23283 }, { "epoch": 1.682157241678256, "grad_norm": 8.024809039449407, "learning_rate": 3.2373916883953275e-07, "loss": 0.6651, "step": 23284 }, { "epoch": 1.6822294868784655, "grad_norm": 6.9637871420559145, "learning_rate": 3.2359522553962743e-07, "loss": 0.6026, "step": 23285 }, { "epoch": 1.682301732078675, "grad_norm": 6.4752356409033185, "learning_rate": 3.234513120330138e-07, "loss": 0.5119, "step": 23286 }, { "epoch": 1.6823739772788846, "grad_norm": 8.652635170741293, "learning_rate": 3.233074283216614e-07, "loss": 0.6436, "step": 23287 }, { "epoch": 1.682446222479094, "grad_norm": 6.644000750427387, "learning_rate": 3.2316357440754036e-07, "loss": 0.5793, "step": 23288 }, { "epoch": 1.6825184676793037, "grad_norm": 7.30660877766382, "learning_rate": 3.230197502926202e-07, "loss": 0.5388, "step": 23289 }, { "epoch": 1.682590712879513, "grad_norm": 6.769440524312702, "learning_rate": 3.2287595597886884e-07, "loss": 0.6955, "step": 23290 }, { "epoch": 1.6826629580797225, "grad_norm": 7.695309525697395, "learning_rate": 3.227321914682546e-07, "loss": 0.6292, "step": 23291 }, { "epoch": 1.682735203279932, "grad_norm": 8.714441472253965, "learning_rate": 3.225884567627466e-07, "loss": 0.6561, "step": 23292 }, { "epoch": 1.6828074484801416, "grad_norm": 6.624842228000485, "learning_rate": 3.224447518643126e-07, "loss": 0.5871, "step": 23293 }, { "epoch": 1.6828796936803512, "grad_norm": 7.386417150770414, "learning_rate": 3.223010767749188e-07, "loss": 0.5778, "step": 23294 }, { "epoch": 1.6829519388805605, "grad_norm": 7.226268205578525, "learning_rate": 3.22157431496532e-07, "loss": 0.6672, "step": 23295 }, { "epoch": 1.6830241840807703, "grad_norm": 8.02943013868673, "learning_rate": 3.2201381603111923e-07, "loss": 0.5917, "step": 23296 }, { "epoch": 1.6830964292809796, "grad_norm": 6.792045754304402, "learning_rate": 3.2187023038064676e-07, "loss": 0.6174, "step": 23297 }, { "epoch": 1.6831686744811891, "grad_norm": 8.393278302695789, "learning_rate": 3.2172667454707927e-07, "loss": 0.6316, "step": 23298 }, { "epoch": 1.6832409196813987, "grad_norm": 7.699300253643311, "learning_rate": 3.21583148532382e-07, "loss": 0.6118, "step": 23299 }, { "epoch": 1.6833131648816082, "grad_norm": 8.041915757928404, "learning_rate": 3.214396523385199e-07, "loss": 0.6365, "step": 23300 }, { "epoch": 1.6833854100818177, "grad_norm": 7.262460720986858, "learning_rate": 3.2129618596745766e-07, "loss": 0.6016, "step": 23301 }, { "epoch": 1.683457655282027, "grad_norm": 7.030318948773546, "learning_rate": 3.2115274942115887e-07, "loss": 0.6366, "step": 23302 }, { "epoch": 1.6835299004822368, "grad_norm": 6.373383560195565, "learning_rate": 3.210093427015873e-07, "loss": 0.5941, "step": 23303 }, { "epoch": 1.6836021456824461, "grad_norm": 5.563022374688437, "learning_rate": 3.208659658107063e-07, "loss": 0.5539, "step": 23304 }, { "epoch": 1.6836743908826557, "grad_norm": 7.822191742394529, "learning_rate": 3.2072261875047807e-07, "loss": 0.6374, "step": 23305 }, { "epoch": 1.6837466360828652, "grad_norm": 6.34976737730207, "learning_rate": 3.2057930152286474e-07, "loss": 0.6128, "step": 23306 }, { "epoch": 1.6838188812830748, "grad_norm": 7.772897918223237, "learning_rate": 3.204360141298293e-07, "loss": 0.5757, "step": 23307 }, { "epoch": 1.6838911264832843, "grad_norm": 7.391384738962991, "learning_rate": 3.202927565733319e-07, "loss": 0.6037, "step": 23308 }, { "epoch": 1.6839633716834936, "grad_norm": 6.900719270672929, "learning_rate": 3.2014952885533413e-07, "loss": 0.6612, "step": 23309 }, { "epoch": 1.6840356168837034, "grad_norm": 7.298023729757764, "learning_rate": 3.200063309777968e-07, "loss": 0.5875, "step": 23310 }, { "epoch": 1.6841078620839127, "grad_norm": 6.867995144130832, "learning_rate": 3.1986316294268016e-07, "loss": 0.6132, "step": 23311 }, { "epoch": 1.6841801072841223, "grad_norm": 6.03569033336643, "learning_rate": 3.197200247519441e-07, "loss": 0.5789, "step": 23312 }, { "epoch": 1.6842523524843318, "grad_norm": 6.993317275172811, "learning_rate": 3.1957691640754793e-07, "loss": 0.5509, "step": 23313 }, { "epoch": 1.6843245976845413, "grad_norm": 6.155799190842608, "learning_rate": 3.1943383791145144e-07, "loss": 0.6311, "step": 23314 }, { "epoch": 1.6843968428847509, "grad_norm": 7.405186315197813, "learning_rate": 3.192907892656119e-07, "loss": 0.5778, "step": 23315 }, { "epoch": 1.6844690880849602, "grad_norm": 6.652653971957768, "learning_rate": 3.1914777047198836e-07, "loss": 0.614, "step": 23316 }, { "epoch": 1.68454133328517, "grad_norm": 7.741116508368969, "learning_rate": 3.1900478153253837e-07, "loss": 0.6276, "step": 23317 }, { "epoch": 1.6846135784853793, "grad_norm": 6.064778951593327, "learning_rate": 3.188618224492204e-07, "loss": 0.588, "step": 23318 }, { "epoch": 1.6846858236855888, "grad_norm": 7.8059679724123985, "learning_rate": 3.18718893223989e-07, "loss": 0.6431, "step": 23319 }, { "epoch": 1.6847580688857984, "grad_norm": 6.20451650698499, "learning_rate": 3.1857599385880317e-07, "loss": 0.6099, "step": 23320 }, { "epoch": 1.684830314086008, "grad_norm": 7.324196093714958, "learning_rate": 3.184331243556185e-07, "loss": 0.6884, "step": 23321 }, { "epoch": 1.6849025592862175, "grad_norm": 7.006640690332831, "learning_rate": 3.182902847163902e-07, "loss": 0.6288, "step": 23322 }, { "epoch": 1.6849748044864268, "grad_norm": 8.090681635900408, "learning_rate": 3.181474749430738e-07, "loss": 0.6351, "step": 23323 }, { "epoch": 1.6850470496866365, "grad_norm": 6.575157819239048, "learning_rate": 3.180046950376245e-07, "loss": 0.6234, "step": 23324 }, { "epoch": 1.6851192948868459, "grad_norm": 6.958529380122872, "learning_rate": 3.1786194500199707e-07, "loss": 0.6555, "step": 23325 }, { "epoch": 1.6851915400870556, "grad_norm": 6.888820558981607, "learning_rate": 3.177192248381447e-07, "loss": 0.5601, "step": 23326 }, { "epoch": 1.685263785287265, "grad_norm": 6.2127171683779245, "learning_rate": 3.1757653454802194e-07, "loss": 0.6012, "step": 23327 }, { "epoch": 1.6853360304874745, "grad_norm": 6.528577339858932, "learning_rate": 3.174338741335817e-07, "loss": 0.6119, "step": 23328 }, { "epoch": 1.685408275687684, "grad_norm": 7.348205513428304, "learning_rate": 3.17291243596777e-07, "loss": 0.6461, "step": 23329 }, { "epoch": 1.6854805208878934, "grad_norm": 6.827260955682185, "learning_rate": 3.1714864293956066e-07, "loss": 0.5554, "step": 23330 }, { "epoch": 1.6855527660881031, "grad_norm": 6.000611274098814, "learning_rate": 3.170060721638843e-07, "loss": 0.6064, "step": 23331 }, { "epoch": 1.6856250112883124, "grad_norm": 6.756503610147452, "learning_rate": 3.168635312717006e-07, "loss": 0.5542, "step": 23332 }, { "epoch": 1.6856972564885222, "grad_norm": 6.697329134012818, "learning_rate": 3.1672102026495925e-07, "loss": 0.6537, "step": 23333 }, { "epoch": 1.6857695016887315, "grad_norm": 6.306051756632213, "learning_rate": 3.165785391456122e-07, "loss": 0.5766, "step": 23334 }, { "epoch": 1.685841746888941, "grad_norm": 6.970172275072508, "learning_rate": 3.1643608791560946e-07, "loss": 0.634, "step": 23335 }, { "epoch": 1.6859139920891506, "grad_norm": 7.489604102801377, "learning_rate": 3.1629366657690175e-07, "loss": 0.608, "step": 23336 }, { "epoch": 1.68598623728936, "grad_norm": 6.473118583084496, "learning_rate": 3.161512751314377e-07, "loss": 0.6067, "step": 23337 }, { "epoch": 1.6860584824895697, "grad_norm": 6.379461186057492, "learning_rate": 3.1600891358116726e-07, "loss": 0.6413, "step": 23338 }, { "epoch": 1.686130727689779, "grad_norm": 7.80109665815161, "learning_rate": 3.1586658192803875e-07, "loss": 0.6393, "step": 23339 }, { "epoch": 1.6862029728899888, "grad_norm": 7.7056684956810875, "learning_rate": 3.15724280174001e-07, "loss": 0.6044, "step": 23340 }, { "epoch": 1.686275218090198, "grad_norm": 6.957519514116123, "learning_rate": 3.1558200832100177e-07, "loss": 0.6669, "step": 23341 }, { "epoch": 1.6863474632904076, "grad_norm": 6.8911273799112225, "learning_rate": 3.1543976637098907e-07, "loss": 0.572, "step": 23342 }, { "epoch": 1.6864197084906172, "grad_norm": 7.494863149705437, "learning_rate": 3.1529755432590984e-07, "loss": 0.6371, "step": 23343 }, { "epoch": 1.6864919536908267, "grad_norm": 7.186321924937533, "learning_rate": 3.1515537218771067e-07, "loss": 0.5873, "step": 23344 }, { "epoch": 1.6865641988910363, "grad_norm": 6.932327679302433, "learning_rate": 3.150132199583378e-07, "loss": 0.5836, "step": 23345 }, { "epoch": 1.6866364440912456, "grad_norm": 7.1612832305771095, "learning_rate": 3.148710976397379e-07, "loss": 0.6079, "step": 23346 }, { "epoch": 1.6867086892914553, "grad_norm": 7.911354317063985, "learning_rate": 3.14729005233855e-07, "loss": 0.5691, "step": 23347 }, { "epoch": 1.6867809344916647, "grad_norm": 7.804607830080103, "learning_rate": 3.1458694274263596e-07, "loss": 0.5886, "step": 23348 }, { "epoch": 1.6868531796918742, "grad_norm": 6.663044644937455, "learning_rate": 3.144449101680247e-07, "loss": 0.6339, "step": 23349 }, { "epoch": 1.6869254248920837, "grad_norm": 6.206041395171642, "learning_rate": 3.1430290751196605e-07, "loss": 0.6176, "step": 23350 }, { "epoch": 1.6869976700922933, "grad_norm": 6.022421903377029, "learning_rate": 3.1416093477640327e-07, "loss": 0.675, "step": 23351 }, { "epoch": 1.6870699152925028, "grad_norm": 5.031423814507807, "learning_rate": 3.1401899196327996e-07, "loss": 0.5533, "step": 23352 }, { "epoch": 1.6871421604927122, "grad_norm": 6.987586595138592, "learning_rate": 3.1387707907453905e-07, "loss": 0.5611, "step": 23353 }, { "epoch": 1.687214405692922, "grad_norm": 6.994036450285337, "learning_rate": 3.137351961121246e-07, "loss": 0.6041, "step": 23354 }, { "epoch": 1.6872866508931312, "grad_norm": 7.273135136958407, "learning_rate": 3.1359334307797695e-07, "loss": 0.628, "step": 23355 }, { "epoch": 1.6873588960933408, "grad_norm": 6.549675317535726, "learning_rate": 3.134515199740387e-07, "loss": 0.5174, "step": 23356 }, { "epoch": 1.6874311412935503, "grad_norm": 7.774069050192954, "learning_rate": 3.1330972680225143e-07, "loss": 0.5414, "step": 23357 }, { "epoch": 1.6875033864937599, "grad_norm": 7.890529035230182, "learning_rate": 3.1316796356455626e-07, "loss": 0.4858, "step": 23358 }, { "epoch": 1.6875756316939694, "grad_norm": 6.713694955177207, "learning_rate": 3.1302623026289364e-07, "loss": 0.5335, "step": 23359 }, { "epoch": 1.6876478768941787, "grad_norm": 7.804187961053047, "learning_rate": 3.1288452689920397e-07, "loss": 0.6136, "step": 23360 }, { "epoch": 1.6877201220943885, "grad_norm": 7.529742458037658, "learning_rate": 3.127428534754273e-07, "loss": 0.5927, "step": 23361 }, { "epoch": 1.6877923672945978, "grad_norm": 6.613028307976582, "learning_rate": 3.126012099935022e-07, "loss": 0.6113, "step": 23362 }, { "epoch": 1.6878646124948073, "grad_norm": 7.4181168527869366, "learning_rate": 3.124595964553684e-07, "loss": 0.5261, "step": 23363 }, { "epoch": 1.687936857695017, "grad_norm": 6.829062670260517, "learning_rate": 3.123180128629644e-07, "loss": 0.6038, "step": 23364 }, { "epoch": 1.6880091028952264, "grad_norm": 6.413579850617293, "learning_rate": 3.1217645921822777e-07, "loss": 0.5571, "step": 23365 }, { "epoch": 1.688081348095436, "grad_norm": 6.741713346957067, "learning_rate": 3.120349355230967e-07, "loss": 0.5849, "step": 23366 }, { "epoch": 1.6881535932956453, "grad_norm": 7.543269652691905, "learning_rate": 3.118934417795083e-07, "loss": 0.527, "step": 23367 }, { "epoch": 1.688225838495855, "grad_norm": 7.9230936832264955, "learning_rate": 3.1175197798939986e-07, "loss": 0.7093, "step": 23368 }, { "epoch": 1.6882980836960644, "grad_norm": 7.0599809053070945, "learning_rate": 3.116105441547079e-07, "loss": 0.5942, "step": 23369 }, { "epoch": 1.688370328896274, "grad_norm": 7.12276838724179, "learning_rate": 3.1146914027736843e-07, "loss": 0.5323, "step": 23370 }, { "epoch": 1.6884425740964835, "grad_norm": 6.572115293600155, "learning_rate": 3.1132776635931754e-07, "loss": 0.5759, "step": 23371 }, { "epoch": 1.688514819296693, "grad_norm": 7.5572032191549185, "learning_rate": 3.1118642240248964e-07, "loss": 0.6123, "step": 23372 }, { "epoch": 1.6885870644969025, "grad_norm": 8.046541729293992, "learning_rate": 3.110451084088201e-07, "loss": 0.6585, "step": 23373 }, { "epoch": 1.6886593096971119, "grad_norm": 8.663849397176357, "learning_rate": 3.109038243802434e-07, "loss": 0.5752, "step": 23374 }, { "epoch": 1.6887315548973216, "grad_norm": 7.938093028105149, "learning_rate": 3.107625703186942e-07, "loss": 0.5996, "step": 23375 }, { "epoch": 1.688803800097531, "grad_norm": 7.036890099015687, "learning_rate": 3.1062134622610456e-07, "loss": 0.5666, "step": 23376 }, { "epoch": 1.6888760452977405, "grad_norm": 7.153703661336466, "learning_rate": 3.10480152104409e-07, "loss": 0.6144, "step": 23377 }, { "epoch": 1.68894829049795, "grad_norm": 8.08549732724021, "learning_rate": 3.1033898795554045e-07, "loss": 0.6733, "step": 23378 }, { "epoch": 1.6890205356981596, "grad_norm": 7.298422802416425, "learning_rate": 3.1019785378143126e-07, "loss": 0.6305, "step": 23379 }, { "epoch": 1.6890927808983691, "grad_norm": 6.9484985046934105, "learning_rate": 3.100567495840129e-07, "loss": 0.6044, "step": 23380 }, { "epoch": 1.6891650260985784, "grad_norm": 7.675583201563961, "learning_rate": 3.0991567536521715e-07, "loss": 0.5538, "step": 23381 }, { "epoch": 1.6892372712987882, "grad_norm": 6.526654100311827, "learning_rate": 3.0977463112697587e-07, "loss": 0.6111, "step": 23382 }, { "epoch": 1.6893095164989975, "grad_norm": 7.447444390393127, "learning_rate": 3.096336168712186e-07, "loss": 0.6186, "step": 23383 }, { "epoch": 1.689381761699207, "grad_norm": 6.826576179461003, "learning_rate": 3.094926325998765e-07, "loss": 0.5739, "step": 23384 }, { "epoch": 1.6894540068994166, "grad_norm": 7.201975699729359, "learning_rate": 3.093516783148795e-07, "loss": 0.5641, "step": 23385 }, { "epoch": 1.6895262520996261, "grad_norm": 6.6250538373954875, "learning_rate": 3.0921075401815714e-07, "loss": 0.5637, "step": 23386 }, { "epoch": 1.6895984972998357, "grad_norm": 7.59287913159956, "learning_rate": 3.0906985971163846e-07, "loss": 0.6944, "step": 23387 }, { "epoch": 1.689670742500045, "grad_norm": 7.481726191637905, "learning_rate": 3.089289953972521e-07, "loss": 0.5937, "step": 23388 }, { "epoch": 1.6897429877002548, "grad_norm": 7.36800039424336, "learning_rate": 3.087881610769272e-07, "loss": 0.615, "step": 23389 }, { "epoch": 1.689815232900464, "grad_norm": 7.5588478538521695, "learning_rate": 3.086473567525905e-07, "loss": 0.6124, "step": 23390 }, { "epoch": 1.6898874781006736, "grad_norm": 6.592372370711556, "learning_rate": 3.0850658242617017e-07, "loss": 0.5579, "step": 23391 }, { "epoch": 1.6899597233008832, "grad_norm": 6.504969531141112, "learning_rate": 3.083658380995927e-07, "loss": 0.556, "step": 23392 }, { "epoch": 1.6900319685010927, "grad_norm": 7.10918531898404, "learning_rate": 3.08225123774786e-07, "loss": 0.5831, "step": 23393 }, { "epoch": 1.6901042137013023, "grad_norm": 6.973764574029179, "learning_rate": 3.080844394536753e-07, "loss": 0.6673, "step": 23394 }, { "epoch": 1.6901764589015116, "grad_norm": 7.05335555065288, "learning_rate": 3.079437851381864e-07, "loss": 0.6079, "step": 23395 }, { "epoch": 1.6902487041017213, "grad_norm": 7.982285914932788, "learning_rate": 3.078031608302451e-07, "loss": 0.6477, "step": 23396 }, { "epoch": 1.6903209493019307, "grad_norm": 7.72287433106027, "learning_rate": 3.0766256653177616e-07, "loss": 0.651, "step": 23397 }, { "epoch": 1.6903931945021404, "grad_norm": 6.9320781244012775, "learning_rate": 3.075220022447048e-07, "loss": 0.6053, "step": 23398 }, { "epoch": 1.6904654397023497, "grad_norm": 8.587893512266401, "learning_rate": 3.073814679709547e-07, "loss": 0.6605, "step": 23399 }, { "epoch": 1.6905376849025593, "grad_norm": 8.365658688967233, "learning_rate": 3.072409637124504e-07, "loss": 0.5467, "step": 23400 }, { "epoch": 1.6906099301027688, "grad_norm": 9.057794862222995, "learning_rate": 3.071004894711144e-07, "loss": 0.6428, "step": 23401 }, { "epoch": 1.6906821753029782, "grad_norm": 7.593480737252927, "learning_rate": 3.069600452488697e-07, "loss": 0.5923, "step": 23402 }, { "epoch": 1.690754420503188, "grad_norm": 7.070352965181165, "learning_rate": 3.0681963104763954e-07, "loss": 0.643, "step": 23403 }, { "epoch": 1.6908266657033972, "grad_norm": 5.5336495060194, "learning_rate": 3.0667924686934555e-07, "loss": 0.6381, "step": 23404 }, { "epoch": 1.690898910903607, "grad_norm": 8.592126089467303, "learning_rate": 3.0653889271590965e-07, "loss": 0.6201, "step": 23405 }, { "epoch": 1.6909711561038163, "grad_norm": 7.687531013736348, "learning_rate": 3.0639856858925317e-07, "loss": 0.6279, "step": 23406 }, { "epoch": 1.6910434013040259, "grad_norm": 7.280219664141618, "learning_rate": 3.0625827449129756e-07, "loss": 0.6042, "step": 23407 }, { "epoch": 1.6911156465042354, "grad_norm": 6.925604166605955, "learning_rate": 3.061180104239625e-07, "loss": 0.617, "step": 23408 }, { "epoch": 1.6911878917044447, "grad_norm": 7.396987466799195, "learning_rate": 3.059777763891683e-07, "loss": 0.5859, "step": 23409 }, { "epoch": 1.6912601369046545, "grad_norm": 6.664065934568282, "learning_rate": 3.058375723888349e-07, "loss": 0.6519, "step": 23410 }, { "epoch": 1.6913323821048638, "grad_norm": 9.038664100986868, "learning_rate": 3.0569739842488176e-07, "loss": 0.6456, "step": 23411 }, { "epoch": 1.6914046273050736, "grad_norm": 8.219552932796228, "learning_rate": 3.0555725449922717e-07, "loss": 0.5616, "step": 23412 }, { "epoch": 1.691476872505283, "grad_norm": 8.404532218047548, "learning_rate": 3.0541714061378987e-07, "loss": 0.6097, "step": 23413 }, { "epoch": 1.6915491177054924, "grad_norm": 6.818399377299586, "learning_rate": 3.0527705677048796e-07, "loss": 0.576, "step": 23414 }, { "epoch": 1.691621362905702, "grad_norm": 6.383563703691069, "learning_rate": 3.0513700297123886e-07, "loss": 0.5352, "step": 23415 }, { "epoch": 1.6916936081059115, "grad_norm": 8.329704664474201, "learning_rate": 3.0499697921796026e-07, "loss": 0.5761, "step": 23416 }, { "epoch": 1.691765853306121, "grad_norm": 6.528550169499644, "learning_rate": 3.0485698551256855e-07, "loss": 0.6211, "step": 23417 }, { "epoch": 1.6918380985063304, "grad_norm": 6.68273470790735, "learning_rate": 3.0471702185698086e-07, "loss": 0.6039, "step": 23418 }, { "epoch": 1.6919103437065401, "grad_norm": 7.694978627354299, "learning_rate": 3.045770882531121e-07, "loss": 0.559, "step": 23419 }, { "epoch": 1.6919825889067495, "grad_norm": 6.659831484933748, "learning_rate": 3.044371847028782e-07, "loss": 0.5621, "step": 23420 }, { "epoch": 1.692054834106959, "grad_norm": 7.083271370411045, "learning_rate": 3.042973112081951e-07, "loss": 0.6291, "step": 23421 }, { "epoch": 1.6921270793071685, "grad_norm": 7.254964509925821, "learning_rate": 3.041574677709766e-07, "loss": 0.6075, "step": 23422 }, { "epoch": 1.692199324507378, "grad_norm": 7.040447272484246, "learning_rate": 3.040176543931372e-07, "loss": 0.5049, "step": 23423 }, { "epoch": 1.6922715697075876, "grad_norm": 7.236266974269288, "learning_rate": 3.038778710765905e-07, "loss": 0.5841, "step": 23424 }, { "epoch": 1.692343814907797, "grad_norm": 6.188430870240225, "learning_rate": 3.037381178232518e-07, "loss": 0.63, "step": 23425 }, { "epoch": 1.6924160601080067, "grad_norm": 6.577459523661122, "learning_rate": 3.035983946350324e-07, "loss": 0.5947, "step": 23426 }, { "epoch": 1.692488305308216, "grad_norm": 6.657646784838463, "learning_rate": 3.0345870151384544e-07, "loss": 0.5949, "step": 23427 }, { "epoch": 1.6925605505084256, "grad_norm": 6.6278219870476445, "learning_rate": 3.033190384616036e-07, "loss": 0.6101, "step": 23428 }, { "epoch": 1.6926327957086351, "grad_norm": 7.898880611256804, "learning_rate": 3.03179405480219e-07, "loss": 0.6148, "step": 23429 }, { "epoch": 1.6927050409088447, "grad_norm": 7.042759135295837, "learning_rate": 3.0303980257160194e-07, "loss": 0.5937, "step": 23430 }, { "epoch": 1.6927772861090542, "grad_norm": 8.173020000000621, "learning_rate": 3.0290022973766403e-07, "loss": 0.5625, "step": 23431 }, { "epoch": 1.6928495313092635, "grad_norm": 6.321256399696434, "learning_rate": 3.027606869803162e-07, "loss": 0.5771, "step": 23432 }, { "epoch": 1.6929217765094733, "grad_norm": 6.530426411514867, "learning_rate": 3.026211743014687e-07, "loss": 0.6135, "step": 23433 }, { "epoch": 1.6929940217096826, "grad_norm": 6.199764542569402, "learning_rate": 3.02481691703031e-07, "loss": 0.5599, "step": 23434 }, { "epoch": 1.6930662669098921, "grad_norm": 8.197333800273444, "learning_rate": 3.023422391869127e-07, "loss": 0.6372, "step": 23435 }, { "epoch": 1.6931385121101017, "grad_norm": 5.893229022821842, "learning_rate": 3.022028167550231e-07, "loss": 0.6052, "step": 23436 }, { "epoch": 1.6932107573103112, "grad_norm": 9.863376778621548, "learning_rate": 3.0206342440927015e-07, "loss": 0.6185, "step": 23437 }, { "epoch": 1.6932830025105208, "grad_norm": 7.104756022270316, "learning_rate": 3.019240621515623e-07, "loss": 0.6686, "step": 23438 }, { "epoch": 1.69335524771073, "grad_norm": 8.80914848932254, "learning_rate": 3.017847299838078e-07, "loss": 0.6643, "step": 23439 }, { "epoch": 1.6934274929109399, "grad_norm": 8.965824939496818, "learning_rate": 3.016454279079131e-07, "loss": 0.6287, "step": 23440 }, { "epoch": 1.6934997381111492, "grad_norm": 6.562407284036268, "learning_rate": 3.0150615592578566e-07, "loss": 0.5537, "step": 23441 }, { "epoch": 1.6935719833113587, "grad_norm": 6.5941807032207205, "learning_rate": 3.013669140393319e-07, "loss": 0.6044, "step": 23442 }, { "epoch": 1.6936442285115683, "grad_norm": 7.774058990937751, "learning_rate": 3.0122770225045783e-07, "loss": 0.6086, "step": 23443 }, { "epoch": 1.6937164737117778, "grad_norm": 7.748334890348897, "learning_rate": 3.010885205610692e-07, "loss": 0.6272, "step": 23444 }, { "epoch": 1.6937887189119873, "grad_norm": 8.017696358369998, "learning_rate": 3.0094936897307165e-07, "loss": 0.6082, "step": 23445 }, { "epoch": 1.6938609641121967, "grad_norm": 5.852227085403544, "learning_rate": 3.008102474883701e-07, "loss": 0.5285, "step": 23446 }, { "epoch": 1.6939332093124064, "grad_norm": 6.850457514499231, "learning_rate": 3.006711561088682e-07, "loss": 0.6454, "step": 23447 }, { "epoch": 1.6940054545126157, "grad_norm": 6.921605503183436, "learning_rate": 3.005320948364707e-07, "loss": 0.5307, "step": 23448 }, { "epoch": 1.6940776997128253, "grad_norm": 7.649087849010168, "learning_rate": 3.003930636730809e-07, "loss": 0.611, "step": 23449 }, { "epoch": 1.6941499449130348, "grad_norm": 7.426743366295121, "learning_rate": 3.002540626206027e-07, "loss": 0.6266, "step": 23450 }, { "epoch": 1.6942221901132444, "grad_norm": 7.939134084149449, "learning_rate": 3.0011509168093783e-07, "loss": 0.5761, "step": 23451 }, { "epoch": 1.694294435313454, "grad_norm": 7.614487201659629, "learning_rate": 2.9997615085598924e-07, "loss": 0.6244, "step": 23452 }, { "epoch": 1.6943666805136632, "grad_norm": 8.531938126866814, "learning_rate": 2.9983724014765903e-07, "loss": 0.6179, "step": 23453 }, { "epoch": 1.694438925713873, "grad_norm": 8.994668865120275, "learning_rate": 2.996983595578487e-07, "loss": 0.5856, "step": 23454 }, { "epoch": 1.6945111709140823, "grad_norm": 7.800452869668775, "learning_rate": 2.995595090884593e-07, "loss": 0.6572, "step": 23455 }, { "epoch": 1.6945834161142919, "grad_norm": 6.674306243390874, "learning_rate": 2.994206887413917e-07, "loss": 0.5869, "step": 23456 }, { "epoch": 1.6946556613145014, "grad_norm": 7.180579527522428, "learning_rate": 2.9928189851854666e-07, "loss": 0.537, "step": 23457 }, { "epoch": 1.694727906514711, "grad_norm": 6.6963485218894725, "learning_rate": 2.991431384218232e-07, "loss": 0.5944, "step": 23458 }, { "epoch": 1.6948001517149205, "grad_norm": 7.382244366780955, "learning_rate": 2.9900440845312157e-07, "loss": 0.6478, "step": 23459 }, { "epoch": 1.6948723969151298, "grad_norm": 7.034901937900692, "learning_rate": 2.9886570861434037e-07, "loss": 0.549, "step": 23460 }, { "epoch": 1.6949446421153396, "grad_norm": 7.435774659139029, "learning_rate": 2.9872703890737873e-07, "loss": 0.5682, "step": 23461 }, { "epoch": 1.695016887315549, "grad_norm": 7.132396138501679, "learning_rate": 2.985883993341346e-07, "loss": 0.6148, "step": 23462 }, { "epoch": 1.6950891325157584, "grad_norm": 6.8875246577159, "learning_rate": 2.984497898965061e-07, "loss": 0.6115, "step": 23463 }, { "epoch": 1.695161377715968, "grad_norm": 7.373623477039788, "learning_rate": 2.9831121059639116e-07, "loss": 0.5662, "step": 23464 }, { "epoch": 1.6952336229161775, "grad_norm": 7.037721902116707, "learning_rate": 2.981726614356856e-07, "loss": 0.6169, "step": 23465 }, { "epoch": 1.695305868116387, "grad_norm": 7.597587212491609, "learning_rate": 2.9803414241628664e-07, "loss": 0.5812, "step": 23466 }, { "epoch": 1.6953781133165964, "grad_norm": 6.8096164585280325, "learning_rate": 2.9789565354009065e-07, "loss": 0.6696, "step": 23467 }, { "epoch": 1.6954503585168061, "grad_norm": 15.739820308992572, "learning_rate": 2.9775719480899384e-07, "loss": 0.5895, "step": 23468 }, { "epoch": 1.6955226037170155, "grad_norm": 8.23464090978386, "learning_rate": 2.9761876622489054e-07, "loss": 0.6363, "step": 23469 }, { "epoch": 1.6955948489172252, "grad_norm": 7.205207573298385, "learning_rate": 2.974803677896765e-07, "loss": 0.5924, "step": 23470 }, { "epoch": 1.6956670941174345, "grad_norm": 7.546076825189375, "learning_rate": 2.9734199950524593e-07, "loss": 0.641, "step": 23471 }, { "epoch": 1.695739339317644, "grad_norm": 7.208620569169521, "learning_rate": 2.97203661373493e-07, "loss": 0.7198, "step": 23472 }, { "epoch": 1.6958115845178536, "grad_norm": 8.912879163212489, "learning_rate": 2.9706535339631166e-07, "loss": 0.5949, "step": 23473 }, { "epoch": 1.695883829718063, "grad_norm": 7.7909311637594065, "learning_rate": 2.969270755755954e-07, "loss": 0.6318, "step": 23474 }, { "epoch": 1.6959560749182727, "grad_norm": 8.025044816031647, "learning_rate": 2.9678882791323723e-07, "loss": 0.5939, "step": 23475 }, { "epoch": 1.696028320118482, "grad_norm": 8.343998708840084, "learning_rate": 2.966506104111286e-07, "loss": 0.6448, "step": 23476 }, { "epoch": 1.6961005653186918, "grad_norm": 10.252470137527757, "learning_rate": 2.9651242307116273e-07, "loss": 0.6797, "step": 23477 }, { "epoch": 1.6961728105189011, "grad_norm": 7.393648267160558, "learning_rate": 2.963742658952312e-07, "loss": 0.6724, "step": 23478 }, { "epoch": 1.6962450557191107, "grad_norm": 7.657523660402176, "learning_rate": 2.962361388852245e-07, "loss": 0.6173, "step": 23479 }, { "epoch": 1.6963173009193202, "grad_norm": 7.212562981236336, "learning_rate": 2.960980420430337e-07, "loss": 0.6272, "step": 23480 }, { "epoch": 1.6963895461195295, "grad_norm": 9.728233494139824, "learning_rate": 2.9595997537054915e-07, "loss": 0.6232, "step": 23481 }, { "epoch": 1.6964617913197393, "grad_norm": 6.4274827368321565, "learning_rate": 2.9582193886966233e-07, "loss": 0.5729, "step": 23482 }, { "epoch": 1.6965340365199486, "grad_norm": 6.68336858319938, "learning_rate": 2.9568393254226116e-07, "loss": 0.5813, "step": 23483 }, { "epoch": 1.6966062817201584, "grad_norm": 6.918386988433685, "learning_rate": 2.9554595639023545e-07, "loss": 0.6464, "step": 23484 }, { "epoch": 1.6966785269203677, "grad_norm": 7.304495840567183, "learning_rate": 2.9540801041547385e-07, "loss": 0.5888, "step": 23485 }, { "epoch": 1.6967507721205772, "grad_norm": 6.813207467117835, "learning_rate": 2.9527009461986527e-07, "loss": 0.5973, "step": 23486 }, { "epoch": 1.6968230173207868, "grad_norm": 8.006130730412847, "learning_rate": 2.9513220900529686e-07, "loss": 0.6164, "step": 23487 }, { "epoch": 1.696895262520996, "grad_norm": 6.717769391557329, "learning_rate": 2.949943535736566e-07, "loss": 0.5741, "step": 23488 }, { "epoch": 1.6969675077212059, "grad_norm": 6.909125786560951, "learning_rate": 2.9485652832683146e-07, "loss": 0.5918, "step": 23489 }, { "epoch": 1.6970397529214152, "grad_norm": 7.065031155074121, "learning_rate": 2.9471873326670843e-07, "loss": 0.6195, "step": 23490 }, { "epoch": 1.697111998121625, "grad_norm": 6.372380335489771, "learning_rate": 2.945809683951734e-07, "loss": 0.6108, "step": 23491 }, { "epoch": 1.6971842433218343, "grad_norm": 6.601576368328629, "learning_rate": 2.944432337141126e-07, "loss": 0.5926, "step": 23492 }, { "epoch": 1.6972564885220438, "grad_norm": 8.619825497164227, "learning_rate": 2.943055292254121e-07, "loss": 0.6787, "step": 23493 }, { "epoch": 1.6973287337222533, "grad_norm": 7.441635136325703, "learning_rate": 2.941678549309557e-07, "loss": 0.5994, "step": 23494 }, { "epoch": 1.6974009789224629, "grad_norm": 8.381199150869937, "learning_rate": 2.940302108326284e-07, "loss": 0.5494, "step": 23495 }, { "epoch": 1.6974732241226724, "grad_norm": 6.731350035707745, "learning_rate": 2.938925969323156e-07, "loss": 0.6233, "step": 23496 }, { "epoch": 1.6975454693228818, "grad_norm": 6.975267313548563, "learning_rate": 2.937550132318995e-07, "loss": 0.6515, "step": 23497 }, { "epoch": 1.6976177145230915, "grad_norm": 7.219171420920733, "learning_rate": 2.9361745973326414e-07, "loss": 0.5905, "step": 23498 }, { "epoch": 1.6976899597233008, "grad_norm": 7.296572823976057, "learning_rate": 2.934799364382926e-07, "loss": 0.5609, "step": 23499 }, { "epoch": 1.6977622049235104, "grad_norm": 6.9044698086546115, "learning_rate": 2.933424433488677e-07, "loss": 0.6848, "step": 23500 }, { "epoch": 1.69783445012372, "grad_norm": 6.31991323145926, "learning_rate": 2.932049804668713e-07, "loss": 0.6285, "step": 23501 }, { "epoch": 1.6979066953239295, "grad_norm": 6.030780991418531, "learning_rate": 2.9306754779418525e-07, "loss": 0.6784, "step": 23502 }, { "epoch": 1.697978940524139, "grad_norm": 6.983571802163045, "learning_rate": 2.929301453326913e-07, "loss": 0.615, "step": 23503 }, { "epoch": 1.6980511857243483, "grad_norm": 7.170734256619244, "learning_rate": 2.9279277308426947e-07, "loss": 0.6149, "step": 23504 }, { "epoch": 1.698123430924558, "grad_norm": 7.862993078735985, "learning_rate": 2.9265543105080093e-07, "loss": 0.684, "step": 23505 }, { "epoch": 1.6981956761247674, "grad_norm": 8.304653049228872, "learning_rate": 2.925181192341656e-07, "loss": 0.6009, "step": 23506 }, { "epoch": 1.698267921324977, "grad_norm": 7.257161253915475, "learning_rate": 2.9238083763624387e-07, "loss": 0.5832, "step": 23507 }, { "epoch": 1.6983401665251865, "grad_norm": 7.447473330518818, "learning_rate": 2.922435862589135e-07, "loss": 0.6238, "step": 23508 }, { "epoch": 1.698412411725396, "grad_norm": 7.137854240966483, "learning_rate": 2.92106365104054e-07, "loss": 0.6852, "step": 23509 }, { "epoch": 1.6984846569256056, "grad_norm": 8.39576765339216, "learning_rate": 2.9196917417354446e-07, "loss": 0.6112, "step": 23510 }, { "epoch": 1.698556902125815, "grad_norm": 7.910705517195363, "learning_rate": 2.9183201346926317e-07, "loss": 0.714, "step": 23511 }, { "epoch": 1.6986291473260247, "grad_norm": 8.08782813041842, "learning_rate": 2.9169488299308627e-07, "loss": 0.6651, "step": 23512 }, { "epoch": 1.698701392526234, "grad_norm": 8.158553576459552, "learning_rate": 2.9155778274689184e-07, "loss": 0.6026, "step": 23513 }, { "epoch": 1.6987736377264435, "grad_norm": 6.937688601567832, "learning_rate": 2.9142071273255744e-07, "loss": 0.6077, "step": 23514 }, { "epoch": 1.698845882926653, "grad_norm": 7.044495174387082, "learning_rate": 2.9128367295195775e-07, "loss": 0.5718, "step": 23515 }, { "epoch": 1.6989181281268626, "grad_norm": 7.435764655252396, "learning_rate": 2.911466634069698e-07, "loss": 0.6339, "step": 23516 }, { "epoch": 1.6989903733270721, "grad_norm": 6.609064478151744, "learning_rate": 2.910096840994689e-07, "loss": 0.5804, "step": 23517 }, { "epoch": 1.6990626185272815, "grad_norm": 7.111698517115292, "learning_rate": 2.9087273503133035e-07, "loss": 0.6195, "step": 23518 }, { "epoch": 1.6991348637274912, "grad_norm": 7.925082127479495, "learning_rate": 2.907358162044288e-07, "loss": 0.5663, "step": 23519 }, { "epoch": 1.6992071089277005, "grad_norm": 6.523621406932374, "learning_rate": 2.9059892762063835e-07, "loss": 0.6357, "step": 23520 }, { "epoch": 1.69927935412791, "grad_norm": 6.93692619081439, "learning_rate": 2.9046206928183386e-07, "loss": 0.6371, "step": 23521 }, { "epoch": 1.6993515993281196, "grad_norm": 7.76234191635277, "learning_rate": 2.903252411898874e-07, "loss": 0.6235, "step": 23522 }, { "epoch": 1.6994238445283292, "grad_norm": 7.0412678201193595, "learning_rate": 2.901884433466726e-07, "loss": 0.6404, "step": 23523 }, { "epoch": 1.6994960897285387, "grad_norm": 8.417865881767062, "learning_rate": 2.900516757540625e-07, "loss": 0.6019, "step": 23524 }, { "epoch": 1.699568334928748, "grad_norm": 6.312893694696364, "learning_rate": 2.899149384139291e-07, "loss": 0.6061, "step": 23525 }, { "epoch": 1.6996405801289578, "grad_norm": 7.045271934312966, "learning_rate": 2.8977823132814414e-07, "loss": 0.5635, "step": 23526 }, { "epoch": 1.6997128253291671, "grad_norm": 6.837290044765543, "learning_rate": 2.8964155449857864e-07, "loss": 0.6409, "step": 23527 }, { "epoch": 1.6997850705293767, "grad_norm": 8.301356597707672, "learning_rate": 2.8950490792710425e-07, "loss": 0.5947, "step": 23528 }, { "epoch": 1.6998573157295862, "grad_norm": 8.172268042949907, "learning_rate": 2.8936829161559125e-07, "loss": 0.6396, "step": 23529 }, { "epoch": 1.6999295609297957, "grad_norm": 7.296570994152327, "learning_rate": 2.8923170556590996e-07, "loss": 0.5889, "step": 23530 }, { "epoch": 1.7000018061300053, "grad_norm": 6.669490057398123, "learning_rate": 2.890951497799302e-07, "loss": 0.5299, "step": 23531 }, { "epoch": 1.7000740513302146, "grad_norm": 7.53720401922956, "learning_rate": 2.889586242595216e-07, "loss": 0.589, "step": 23532 }, { "epoch": 1.7001462965304244, "grad_norm": 7.368113519231051, "learning_rate": 2.8882212900655207e-07, "loss": 0.6598, "step": 23533 }, { "epoch": 1.7002185417306337, "grad_norm": 6.721800781294266, "learning_rate": 2.8868566402289077e-07, "loss": 0.5573, "step": 23534 }, { "epoch": 1.7002907869308432, "grad_norm": 7.077464496034364, "learning_rate": 2.885492293104056e-07, "loss": 0.6411, "step": 23535 }, { "epoch": 1.7003630321310528, "grad_norm": 7.344037249202633, "learning_rate": 2.884128248709653e-07, "loss": 0.6214, "step": 23536 }, { "epoch": 1.7004352773312623, "grad_norm": 7.83625976873444, "learning_rate": 2.8827645070643477e-07, "loss": 0.5274, "step": 23537 }, { "epoch": 1.7005075225314719, "grad_norm": 7.929136901682001, "learning_rate": 2.881401068186832e-07, "loss": 0.6847, "step": 23538 }, { "epoch": 1.7005797677316812, "grad_norm": 7.667236417525324, "learning_rate": 2.8800379320957637e-07, "loss": 0.6114, "step": 23539 }, { "epoch": 1.700652012931891, "grad_norm": 7.9400318343055245, "learning_rate": 2.878675098809799e-07, "loss": 0.6194, "step": 23540 }, { "epoch": 1.7007242581321003, "grad_norm": 8.048838314798816, "learning_rate": 2.8773125683475955e-07, "loss": 0.6162, "step": 23541 }, { "epoch": 1.7007965033323098, "grad_norm": 7.930782569476145, "learning_rate": 2.875950340727804e-07, "loss": 0.5077, "step": 23542 }, { "epoch": 1.7008687485325193, "grad_norm": 6.829980942795893, "learning_rate": 2.874588415969082e-07, "loss": 0.6227, "step": 23543 }, { "epoch": 1.700940993732729, "grad_norm": 8.008711363424, "learning_rate": 2.873226794090056e-07, "loss": 0.5911, "step": 23544 }, { "epoch": 1.7010132389329384, "grad_norm": 7.426600828900539, "learning_rate": 2.8718654751093783e-07, "loss": 0.5649, "step": 23545 }, { "epoch": 1.7010854841331478, "grad_norm": 6.445303622152976, "learning_rate": 2.870504459045681e-07, "loss": 0.5894, "step": 23546 }, { "epoch": 1.7011577293333575, "grad_norm": 7.119699748705612, "learning_rate": 2.8691437459175935e-07, "loss": 0.6458, "step": 23547 }, { "epoch": 1.7012299745335668, "grad_norm": 7.669985536564167, "learning_rate": 2.8677833357437454e-07, "loss": 0.5866, "step": 23548 }, { "epoch": 1.7013022197337766, "grad_norm": 5.983668671599755, "learning_rate": 2.8664232285427586e-07, "loss": 0.5981, "step": 23549 }, { "epoch": 1.701374464933986, "grad_norm": 7.087052599259986, "learning_rate": 2.865063424333256e-07, "loss": 0.6107, "step": 23550 }, { "epoch": 1.7014467101341955, "grad_norm": 7.159226777079652, "learning_rate": 2.8637039231338454e-07, "loss": 0.6572, "step": 23551 }, { "epoch": 1.701518955334405, "grad_norm": 9.123664235627738, "learning_rate": 2.86234472496314e-07, "loss": 0.6681, "step": 23552 }, { "epoch": 1.7015912005346143, "grad_norm": 7.9780711989899125, "learning_rate": 2.860985829839752e-07, "loss": 0.6721, "step": 23553 }, { "epoch": 1.701663445734824, "grad_norm": 6.850014802123178, "learning_rate": 2.859627237782273e-07, "loss": 0.5868, "step": 23554 }, { "epoch": 1.7017356909350334, "grad_norm": 7.673008603876074, "learning_rate": 2.8582689488093033e-07, "loss": 0.589, "step": 23555 }, { "epoch": 1.7018079361352432, "grad_norm": 6.905398493268153, "learning_rate": 2.856910962939444e-07, "loss": 0.5728, "step": 23556 }, { "epoch": 1.7018801813354525, "grad_norm": 7.410585060240425, "learning_rate": 2.855553280191278e-07, "loss": 0.5707, "step": 23557 }, { "epoch": 1.701952426535662, "grad_norm": 7.322682651418051, "learning_rate": 2.854195900583395e-07, "loss": 0.612, "step": 23558 }, { "epoch": 1.7020246717358716, "grad_norm": 7.644475111172892, "learning_rate": 2.852838824134374e-07, "loss": 0.6058, "step": 23559 }, { "epoch": 1.702096916936081, "grad_norm": 7.4082584796409385, "learning_rate": 2.851482050862797e-07, "loss": 0.6455, "step": 23560 }, { "epoch": 1.7021691621362907, "grad_norm": 6.357567382647418, "learning_rate": 2.8501255807872303e-07, "loss": 0.5903, "step": 23561 }, { "epoch": 1.7022414073365, "grad_norm": 7.048034978496167, "learning_rate": 2.848769413926247e-07, "loss": 0.6173, "step": 23562 }, { "epoch": 1.7023136525367097, "grad_norm": 8.561881606889377, "learning_rate": 2.847413550298411e-07, "loss": 0.6083, "step": 23563 }, { "epoch": 1.702385897736919, "grad_norm": 7.3148844165235865, "learning_rate": 2.846057989922285e-07, "loss": 0.6368, "step": 23564 }, { "epoch": 1.7024581429371286, "grad_norm": 8.480139474261593, "learning_rate": 2.8447027328164236e-07, "loss": 0.647, "step": 23565 }, { "epoch": 1.7025303881373381, "grad_norm": 7.050130629174734, "learning_rate": 2.8433477789993695e-07, "loss": 0.6014, "step": 23566 }, { "epoch": 1.7026026333375477, "grad_norm": 8.826183328369975, "learning_rate": 2.8419931284896896e-07, "loss": 0.6156, "step": 23567 }, { "epoch": 1.7026748785377572, "grad_norm": 6.61074908607216, "learning_rate": 2.840638781305921e-07, "loss": 0.6307, "step": 23568 }, { "epoch": 1.7027471237379666, "grad_norm": 6.373694791133895, "learning_rate": 2.8392847374665996e-07, "loss": 0.559, "step": 23569 }, { "epoch": 1.7028193689381763, "grad_norm": 7.231679723264371, "learning_rate": 2.837930996990262e-07, "loss": 0.5281, "step": 23570 }, { "epoch": 1.7028916141383856, "grad_norm": 6.7225524078630166, "learning_rate": 2.8365775598954454e-07, "loss": 0.6405, "step": 23571 }, { "epoch": 1.7029638593385952, "grad_norm": 8.349257348207777, "learning_rate": 2.8352244262006693e-07, "loss": 0.6607, "step": 23572 }, { "epoch": 1.7030361045388047, "grad_norm": 6.667483883678409, "learning_rate": 2.8338715959244615e-07, "loss": 0.6064, "step": 23573 }, { "epoch": 1.7031083497390143, "grad_norm": 6.155053656659495, "learning_rate": 2.8325190690853403e-07, "loss": 0.5382, "step": 23574 }, { "epoch": 1.7031805949392238, "grad_norm": 6.724264747105081, "learning_rate": 2.8311668457018213e-07, "loss": 0.5987, "step": 23575 }, { "epoch": 1.7032528401394331, "grad_norm": 7.185264438761571, "learning_rate": 2.829814925792415e-07, "loss": 0.6669, "step": 23576 }, { "epoch": 1.7033250853396429, "grad_norm": 6.151548807769878, "learning_rate": 2.828463309375626e-07, "loss": 0.564, "step": 23577 }, { "epoch": 1.7033973305398522, "grad_norm": 7.347579152556042, "learning_rate": 2.8271119964699667e-07, "loss": 0.5805, "step": 23578 }, { "epoch": 1.7034695757400617, "grad_norm": 7.151257235025454, "learning_rate": 2.825760987093923e-07, "loss": 0.5512, "step": 23579 }, { "epoch": 1.7035418209402713, "grad_norm": 7.3706321066355756, "learning_rate": 2.8244102812659935e-07, "loss": 0.6205, "step": 23580 }, { "epoch": 1.7036140661404808, "grad_norm": 6.8242875059143655, "learning_rate": 2.823059879004669e-07, "loss": 0.6019, "step": 23581 }, { "epoch": 1.7036863113406904, "grad_norm": 6.566488452694121, "learning_rate": 2.821709780328444e-07, "loss": 0.5533, "step": 23582 }, { "epoch": 1.7037585565408997, "grad_norm": 7.232604104593936, "learning_rate": 2.8203599852557827e-07, "loss": 0.7156, "step": 23583 }, { "epoch": 1.7038308017411095, "grad_norm": 7.077567981755947, "learning_rate": 2.8190104938051716e-07, "loss": 0.6513, "step": 23584 }, { "epoch": 1.7039030469413188, "grad_norm": 6.0224326714362215, "learning_rate": 2.817661305995087e-07, "loss": 0.5713, "step": 23585 }, { "epoch": 1.7039752921415283, "grad_norm": 7.023372866255716, "learning_rate": 2.8163124218439946e-07, "loss": 0.6126, "step": 23586 }, { "epoch": 1.7040475373417379, "grad_norm": 6.3905617260773475, "learning_rate": 2.814963841370358e-07, "loss": 0.5499, "step": 23587 }, { "epoch": 1.7041197825419474, "grad_norm": 7.747373812775078, "learning_rate": 2.813615564592645e-07, "loss": 0.6309, "step": 23588 }, { "epoch": 1.704192027742157, "grad_norm": 7.070634866183423, "learning_rate": 2.812267591529311e-07, "loss": 0.5525, "step": 23589 }, { "epoch": 1.7042642729423663, "grad_norm": 6.694995423045456, "learning_rate": 2.8109199221988013e-07, "loss": 0.6037, "step": 23590 }, { "epoch": 1.704336518142576, "grad_norm": 7.369463119102935, "learning_rate": 2.8095725566195693e-07, "loss": 0.5465, "step": 23591 }, { "epoch": 1.7044087633427853, "grad_norm": 7.791744156994417, "learning_rate": 2.80822549481006e-07, "loss": 0.5929, "step": 23592 }, { "epoch": 1.704481008542995, "grad_norm": 7.895670329670178, "learning_rate": 2.806878736788715e-07, "loss": 0.6688, "step": 23593 }, { "epoch": 1.7045532537432044, "grad_norm": 8.527485111059177, "learning_rate": 2.805532282573961e-07, "loss": 0.6799, "step": 23594 }, { "epoch": 1.704625498943414, "grad_norm": 7.4735447005986195, "learning_rate": 2.804186132184242e-07, "loss": 0.593, "step": 23595 }, { "epoch": 1.7046977441436235, "grad_norm": 7.333158086358922, "learning_rate": 2.8028402856379865e-07, "loss": 0.6782, "step": 23596 }, { "epoch": 1.7047699893438328, "grad_norm": 8.348368191739254, "learning_rate": 2.801494742953606e-07, "loss": 0.5644, "step": 23597 }, { "epoch": 1.7048422345440426, "grad_norm": 7.186869187246948, "learning_rate": 2.800149504149527e-07, "loss": 0.5822, "step": 23598 }, { "epoch": 1.704914479744252, "grad_norm": 9.045313039816879, "learning_rate": 2.7988045692441624e-07, "loss": 0.5438, "step": 23599 }, { "epoch": 1.7049867249444615, "grad_norm": 7.660268546394497, "learning_rate": 2.7974599382559335e-07, "loss": 0.6459, "step": 23600 }, { "epoch": 1.705058970144671, "grad_norm": 7.248200818140848, "learning_rate": 2.7961156112032315e-07, "loss": 0.5689, "step": 23601 }, { "epoch": 1.7051312153448805, "grad_norm": 6.634972030937158, "learning_rate": 2.7947715881044666e-07, "loss": 0.6144, "step": 23602 }, { "epoch": 1.70520346054509, "grad_norm": 6.708187156964341, "learning_rate": 2.793427868978038e-07, "loss": 0.5752, "step": 23603 }, { "epoch": 1.7052757057452994, "grad_norm": 6.8749151051221515, "learning_rate": 2.792084453842342e-07, "loss": 0.6754, "step": 23604 }, { "epoch": 1.7053479509455092, "grad_norm": 8.035753939091771, "learning_rate": 2.790741342715764e-07, "loss": 0.6876, "step": 23605 }, { "epoch": 1.7054201961457185, "grad_norm": 9.71449112875322, "learning_rate": 2.789398535616691e-07, "loss": 0.6164, "step": 23606 }, { "epoch": 1.705492441345928, "grad_norm": 8.372134956974413, "learning_rate": 2.788056032563516e-07, "loss": 0.5638, "step": 23607 }, { "epoch": 1.7055646865461376, "grad_norm": 7.833801039131842, "learning_rate": 2.7867138335745974e-07, "loss": 0.5661, "step": 23608 }, { "epoch": 1.7056369317463471, "grad_norm": 7.7348647453846615, "learning_rate": 2.7853719386683217e-07, "loss": 0.603, "step": 23609 }, { "epoch": 1.7057091769465567, "grad_norm": 8.170855427552782, "learning_rate": 2.784030347863062e-07, "loss": 0.5758, "step": 23610 }, { "epoch": 1.705781422146766, "grad_norm": 6.534329942997605, "learning_rate": 2.782689061177171e-07, "loss": 0.5475, "step": 23611 }, { "epoch": 1.7058536673469757, "grad_norm": 6.273953427279008, "learning_rate": 2.7813480786290146e-07, "loss": 0.5949, "step": 23612 }, { "epoch": 1.705925912547185, "grad_norm": 7.112284775412364, "learning_rate": 2.7800074002369534e-07, "loss": 0.6553, "step": 23613 }, { "epoch": 1.7059981577473946, "grad_norm": 7.557537878448224, "learning_rate": 2.778667026019335e-07, "loss": 0.602, "step": 23614 }, { "epoch": 1.7060704029476041, "grad_norm": 7.507026432737293, "learning_rate": 2.777326955994514e-07, "loss": 0.5897, "step": 23615 }, { "epoch": 1.7061426481478137, "grad_norm": 7.191893188633904, "learning_rate": 2.7759871901808316e-07, "loss": 0.6171, "step": 23616 }, { "epoch": 1.7062148933480232, "grad_norm": 7.77221351073459, "learning_rate": 2.77464772859663e-07, "loss": 0.568, "step": 23617 }, { "epoch": 1.7062871385482326, "grad_norm": 6.946003968496641, "learning_rate": 2.773308571260247e-07, "loss": 0.6194, "step": 23618 }, { "epoch": 1.7063593837484423, "grad_norm": 7.691796071571361, "learning_rate": 2.7719697181900097e-07, "loss": 0.5326, "step": 23619 }, { "epoch": 1.7064316289486516, "grad_norm": 8.420884790755641, "learning_rate": 2.7706311694042447e-07, "loss": 0.6449, "step": 23620 }, { "epoch": 1.7065038741488614, "grad_norm": 6.819257831631503, "learning_rate": 2.7692929249212853e-07, "loss": 0.6195, "step": 23621 }, { "epoch": 1.7065761193490707, "grad_norm": 7.139927268632866, "learning_rate": 2.7679549847594354e-07, "loss": 0.6254, "step": 23622 }, { "epoch": 1.7066483645492803, "grad_norm": 7.403132430363119, "learning_rate": 2.7666173489370227e-07, "loss": 0.522, "step": 23623 }, { "epoch": 1.7067206097494898, "grad_norm": 7.350500761866181, "learning_rate": 2.7652800174723574e-07, "loss": 0.6316, "step": 23624 }, { "epoch": 1.7067928549496991, "grad_norm": 7.013152164589858, "learning_rate": 2.76394299038375e-07, "loss": 0.5689, "step": 23625 }, { "epoch": 1.7068651001499089, "grad_norm": 7.003673679385999, "learning_rate": 2.762606267689491e-07, "loss": 0.5777, "step": 23626 }, { "epoch": 1.7069373453501182, "grad_norm": 8.693992768222564, "learning_rate": 2.761269849407885e-07, "loss": 0.6415, "step": 23627 }, { "epoch": 1.707009590550328, "grad_norm": 7.081068896194026, "learning_rate": 2.759933735557235e-07, "loss": 0.6397, "step": 23628 }, { "epoch": 1.7070818357505373, "grad_norm": 7.335024580859061, "learning_rate": 2.7585979261558175e-07, "loss": 0.6432, "step": 23629 }, { "epoch": 1.7071540809507468, "grad_norm": 5.687390672240777, "learning_rate": 2.757262421221926e-07, "loss": 0.5991, "step": 23630 }, { "epoch": 1.7072263261509564, "grad_norm": 7.4382003326229675, "learning_rate": 2.7559272207738405e-07, "loss": 0.6422, "step": 23631 }, { "epoch": 1.7072985713511657, "grad_norm": 9.872815626928334, "learning_rate": 2.7545923248298414e-07, "loss": 0.664, "step": 23632 }, { "epoch": 1.7073708165513755, "grad_norm": 7.116273320902114, "learning_rate": 2.7532577334082015e-07, "loss": 0.5912, "step": 23633 }, { "epoch": 1.7074430617515848, "grad_norm": 7.351977864610891, "learning_rate": 2.751923446527188e-07, "loss": 0.6446, "step": 23634 }, { "epoch": 1.7075153069517945, "grad_norm": 7.267967847233363, "learning_rate": 2.7505894642050725e-07, "loss": 0.6273, "step": 23635 }, { "epoch": 1.7075875521520039, "grad_norm": 6.34915517158167, "learning_rate": 2.749255786460109e-07, "loss": 0.6256, "step": 23636 }, { "epoch": 1.7076597973522134, "grad_norm": 8.486223613064736, "learning_rate": 2.7479224133105554e-07, "loss": 0.6535, "step": 23637 }, { "epoch": 1.707732042552423, "grad_norm": 6.640184670603431, "learning_rate": 2.7465893447746665e-07, "loss": 0.5508, "step": 23638 }, { "epoch": 1.7078042877526325, "grad_norm": 8.19539907753363, "learning_rate": 2.745256580870698e-07, "loss": 0.5873, "step": 23639 }, { "epoch": 1.707876532952842, "grad_norm": 7.0169877595744445, "learning_rate": 2.743924121616878e-07, "loss": 0.6009, "step": 23640 }, { "epoch": 1.7079487781530514, "grad_norm": 8.838171395747283, "learning_rate": 2.742591967031458e-07, "loss": 0.6634, "step": 23641 }, { "epoch": 1.7080210233532611, "grad_norm": 5.883759481901783, "learning_rate": 2.741260117132674e-07, "loss": 0.585, "step": 23642 }, { "epoch": 1.7080932685534704, "grad_norm": 7.476566581764821, "learning_rate": 2.739928571938755e-07, "loss": 0.6029, "step": 23643 }, { "epoch": 1.70816551375368, "grad_norm": 7.66679285470802, "learning_rate": 2.738597331467929e-07, "loss": 0.6621, "step": 23644 }, { "epoch": 1.7082377589538895, "grad_norm": 7.411063002557788, "learning_rate": 2.737266395738422e-07, "loss": 0.6521, "step": 23645 }, { "epoch": 1.708310004154099, "grad_norm": 6.870841415829259, "learning_rate": 2.7359357647684546e-07, "loss": 0.5844, "step": 23646 }, { "epoch": 1.7083822493543086, "grad_norm": 6.949486078156304, "learning_rate": 2.734605438576238e-07, "loss": 0.5783, "step": 23647 }, { "epoch": 1.708454494554518, "grad_norm": 6.555049190613097, "learning_rate": 2.7332754171799844e-07, "loss": 0.5829, "step": 23648 }, { "epoch": 1.7085267397547277, "grad_norm": 7.263969642995849, "learning_rate": 2.7319457005978987e-07, "loss": 0.5754, "step": 23649 }, { "epoch": 1.708598984954937, "grad_norm": 8.693754071934704, "learning_rate": 2.730616288848195e-07, "loss": 0.6762, "step": 23650 }, { "epoch": 1.7086712301551465, "grad_norm": 7.724207381290941, "learning_rate": 2.7292871819490504e-07, "loss": 0.5489, "step": 23651 }, { "epoch": 1.708743475355356, "grad_norm": 6.6095749845077085, "learning_rate": 2.727958379918677e-07, "loss": 0.6151, "step": 23652 }, { "epoch": 1.7088157205555656, "grad_norm": 6.220329045856071, "learning_rate": 2.726629882775267e-07, "loss": 0.5553, "step": 23653 }, { "epoch": 1.7088879657557752, "grad_norm": 7.983753874446179, "learning_rate": 2.725301690536994e-07, "loss": 0.5851, "step": 23654 }, { "epoch": 1.7089602109559845, "grad_norm": 7.342761423654389, "learning_rate": 2.7239738032220467e-07, "loss": 0.6202, "step": 23655 }, { "epoch": 1.7090324561561943, "grad_norm": 7.266469992446336, "learning_rate": 2.7226462208486017e-07, "loss": 0.582, "step": 23656 }, { "epoch": 1.7091047013564036, "grad_norm": 8.609840671248065, "learning_rate": 2.7213189434348364e-07, "loss": 0.6112, "step": 23657 }, { "epoch": 1.7091769465566131, "grad_norm": 7.299779015945663, "learning_rate": 2.719991970998911e-07, "loss": 0.5099, "step": 23658 }, { "epoch": 1.7092491917568227, "grad_norm": 6.629288634835195, "learning_rate": 2.718665303558998e-07, "loss": 0.6419, "step": 23659 }, { "epoch": 1.7093214369570322, "grad_norm": 6.93035878539395, "learning_rate": 2.7173389411332576e-07, "loss": 0.5471, "step": 23660 }, { "epoch": 1.7093936821572417, "grad_norm": 7.043118780466628, "learning_rate": 2.7160128837398435e-07, "loss": 0.5778, "step": 23661 }, { "epoch": 1.709465927357451, "grad_norm": 7.569431879653379, "learning_rate": 2.714687131396912e-07, "loss": 0.6243, "step": 23662 }, { "epoch": 1.7095381725576608, "grad_norm": 8.063949572935321, "learning_rate": 2.713361684122609e-07, "loss": 0.5881, "step": 23663 }, { "epoch": 1.7096104177578701, "grad_norm": 6.490392774267041, "learning_rate": 2.712036541935084e-07, "loss": 0.5627, "step": 23664 }, { "epoch": 1.7096826629580797, "grad_norm": 6.927231621170871, "learning_rate": 2.71071170485247e-07, "loss": 0.5985, "step": 23665 }, { "epoch": 1.7097549081582892, "grad_norm": 8.005371674505355, "learning_rate": 2.709387172892908e-07, "loss": 0.6886, "step": 23666 }, { "epoch": 1.7098271533584988, "grad_norm": 6.692914820552105, "learning_rate": 2.7080629460745246e-07, "loss": 0.66, "step": 23667 }, { "epoch": 1.7098993985587083, "grad_norm": 6.647151549217282, "learning_rate": 2.7067390244154583e-07, "loss": 0.6075, "step": 23668 }, { "epoch": 1.7099716437589176, "grad_norm": 8.877508104557515, "learning_rate": 2.7054154079338174e-07, "loss": 0.6432, "step": 23669 }, { "epoch": 1.7100438889591274, "grad_norm": 7.830846378187948, "learning_rate": 2.7040920966477315e-07, "loss": 0.5759, "step": 23670 }, { "epoch": 1.7101161341593367, "grad_norm": 7.454720956043271, "learning_rate": 2.7027690905753134e-07, "loss": 0.606, "step": 23671 }, { "epoch": 1.7101883793595463, "grad_norm": 7.870950535685323, "learning_rate": 2.7014463897346717e-07, "loss": 0.608, "step": 23672 }, { "epoch": 1.7102606245597558, "grad_norm": 7.036516582286506, "learning_rate": 2.700123994143916e-07, "loss": 0.6389, "step": 23673 }, { "epoch": 1.7103328697599653, "grad_norm": 6.9954437686617, "learning_rate": 2.698801903821149e-07, "loss": 0.5661, "step": 23674 }, { "epoch": 1.7104051149601749, "grad_norm": 6.205490007379711, "learning_rate": 2.6974801187844736e-07, "loss": 0.5599, "step": 23675 }, { "epoch": 1.7104773601603842, "grad_norm": 7.353759956100237, "learning_rate": 2.6961586390519714e-07, "loss": 0.5685, "step": 23676 }, { "epoch": 1.710549605360594, "grad_norm": 6.352631163460901, "learning_rate": 2.694837464641742e-07, "loss": 0.6343, "step": 23677 }, { "epoch": 1.7106218505608033, "grad_norm": 6.955991552715207, "learning_rate": 2.693516595571871e-07, "loss": 0.6584, "step": 23678 }, { "epoch": 1.7106940957610128, "grad_norm": 6.859264329438929, "learning_rate": 2.6921960318604307e-07, "loss": 0.5758, "step": 23679 }, { "epoch": 1.7107663409612224, "grad_norm": 7.361571937707528, "learning_rate": 2.6908757735255087e-07, "loss": 0.5484, "step": 23680 }, { "epoch": 1.710838586161432, "grad_norm": 6.862545870712758, "learning_rate": 2.689555820585177e-07, "loss": 0.5745, "step": 23681 }, { "epoch": 1.7109108313616415, "grad_norm": 8.010448308583745, "learning_rate": 2.6882361730575066e-07, "loss": 0.5865, "step": 23682 }, { "epoch": 1.7109830765618508, "grad_norm": 6.393763734386978, "learning_rate": 2.686916830960556e-07, "loss": 0.5548, "step": 23683 }, { "epoch": 1.7110553217620605, "grad_norm": 8.782715155932925, "learning_rate": 2.685597794312389e-07, "loss": 0.5482, "step": 23684 }, { "epoch": 1.7111275669622699, "grad_norm": 7.871897010424146, "learning_rate": 2.684279063131065e-07, "loss": 0.5987, "step": 23685 }, { "epoch": 1.7111998121624794, "grad_norm": 7.8119499318070735, "learning_rate": 2.682960637434631e-07, "loss": 0.5938, "step": 23686 }, { "epoch": 1.711272057362689, "grad_norm": 6.853665071664269, "learning_rate": 2.681642517241137e-07, "loss": 0.5865, "step": 23687 }, { "epoch": 1.7113443025628985, "grad_norm": 7.089639018652143, "learning_rate": 2.6803247025686265e-07, "loss": 0.6783, "step": 23688 }, { "epoch": 1.711416547763108, "grad_norm": 6.600488482807156, "learning_rate": 2.679007193435143e-07, "loss": 0.5571, "step": 23689 }, { "epoch": 1.7114887929633174, "grad_norm": 7.438204691865231, "learning_rate": 2.6776899898587175e-07, "loss": 0.6164, "step": 23690 }, { "epoch": 1.7115610381635271, "grad_norm": 7.326118243272101, "learning_rate": 2.6763730918573846e-07, "loss": 0.6427, "step": 23691 }, { "epoch": 1.7116332833637364, "grad_norm": 6.9094987365602245, "learning_rate": 2.6750564994491775e-07, "loss": 0.636, "step": 23692 }, { "epoch": 1.7117055285639462, "grad_norm": 6.9221607433437615, "learning_rate": 2.673740212652107e-07, "loss": 0.6794, "step": 23693 }, { "epoch": 1.7117777737641555, "grad_norm": 7.424322170567682, "learning_rate": 2.672424231484197e-07, "loss": 0.6727, "step": 23694 }, { "epoch": 1.711850018964365, "grad_norm": 7.692223067184936, "learning_rate": 2.6711085559634613e-07, "loss": 0.5888, "step": 23695 }, { "epoch": 1.7119222641645746, "grad_norm": 6.53042144630478, "learning_rate": 2.669793186107919e-07, "loss": 0.6661, "step": 23696 }, { "epoch": 1.711994509364784, "grad_norm": 6.567945564295343, "learning_rate": 2.6684781219355665e-07, "loss": 0.5707, "step": 23697 }, { "epoch": 1.7120667545649937, "grad_norm": 6.899469006880478, "learning_rate": 2.667163363464406e-07, "loss": 0.5477, "step": 23698 }, { "epoch": 1.712138999765203, "grad_norm": 7.054163781345932, "learning_rate": 2.6658489107124373e-07, "loss": 0.5737, "step": 23699 }, { "epoch": 1.7122112449654128, "grad_norm": 7.272536929847527, "learning_rate": 2.6645347636976627e-07, "loss": 0.6047, "step": 23700 }, { "epoch": 1.712283490165622, "grad_norm": 6.494831965081057, "learning_rate": 2.663220922438062e-07, "loss": 0.5599, "step": 23701 }, { "epoch": 1.7123557353658316, "grad_norm": 7.830006995654727, "learning_rate": 2.6619073869516206e-07, "loss": 0.6623, "step": 23702 }, { "epoch": 1.7124279805660412, "grad_norm": 5.752340752537247, "learning_rate": 2.66059415725633e-07, "loss": 0.6371, "step": 23703 }, { "epoch": 1.7125002257662505, "grad_norm": 7.170763249547059, "learning_rate": 2.659281233370153e-07, "loss": 0.6395, "step": 23704 }, { "epoch": 1.7125724709664603, "grad_norm": 6.838485295299917, "learning_rate": 2.6579686153110713e-07, "loss": 0.5245, "step": 23705 }, { "epoch": 1.7126447161666696, "grad_norm": 6.686597246219806, "learning_rate": 2.656656303097052e-07, "loss": 0.5818, "step": 23706 }, { "epoch": 1.7127169613668793, "grad_norm": 7.148073014102571, "learning_rate": 2.6553442967460594e-07, "loss": 0.651, "step": 23707 }, { "epoch": 1.7127892065670887, "grad_norm": 6.778415351160739, "learning_rate": 2.6540325962760514e-07, "loss": 0.607, "step": 23708 }, { "epoch": 1.7128614517672982, "grad_norm": 8.02197299803795, "learning_rate": 2.6527212017049884e-07, "loss": 0.5863, "step": 23709 }, { "epoch": 1.7129336969675077, "grad_norm": 8.35711901874426, "learning_rate": 2.651410113050826e-07, "loss": 0.6503, "step": 23710 }, { "epoch": 1.713005942167717, "grad_norm": 7.627161688889701, "learning_rate": 2.6500993303315015e-07, "loss": 0.6224, "step": 23711 }, { "epoch": 1.7130781873679268, "grad_norm": 6.082026545939896, "learning_rate": 2.6487888535649627e-07, "loss": 0.6056, "step": 23712 }, { "epoch": 1.7131504325681362, "grad_norm": 8.241770020528907, "learning_rate": 2.64747868276915e-07, "loss": 0.5937, "step": 23713 }, { "epoch": 1.713222677768346, "grad_norm": 7.824981728431893, "learning_rate": 2.6461688179620055e-07, "loss": 0.6439, "step": 23714 }, { "epoch": 1.7132949229685552, "grad_norm": 7.183122977284122, "learning_rate": 2.6448592591614474e-07, "loss": 0.6031, "step": 23715 }, { "epoch": 1.7133671681687648, "grad_norm": 8.309656560344651, "learning_rate": 2.6435500063854064e-07, "loss": 0.6077, "step": 23716 }, { "epoch": 1.7134394133689743, "grad_norm": 6.4708779193679185, "learning_rate": 2.6422410596518095e-07, "loss": 0.5597, "step": 23717 }, { "epoch": 1.7135116585691839, "grad_norm": 7.894265722974597, "learning_rate": 2.64093241897857e-07, "loss": 0.6673, "step": 23718 }, { "epoch": 1.7135839037693934, "grad_norm": 7.548614112021294, "learning_rate": 2.639624084383607e-07, "loss": 0.6506, "step": 23719 }, { "epoch": 1.7136561489696027, "grad_norm": 7.480929734490414, "learning_rate": 2.638316055884829e-07, "loss": 0.5446, "step": 23720 }, { "epoch": 1.7137283941698125, "grad_norm": 7.972976580945333, "learning_rate": 2.637008333500146e-07, "loss": 0.5676, "step": 23721 }, { "epoch": 1.7138006393700218, "grad_norm": 7.51689356132278, "learning_rate": 2.63570091724745e-07, "loss": 0.5818, "step": 23722 }, { "epoch": 1.7138728845702313, "grad_norm": 9.290709533204183, "learning_rate": 2.6343938071446447e-07, "loss": 0.6239, "step": 23723 }, { "epoch": 1.713945129770441, "grad_norm": 7.36700704602365, "learning_rate": 2.633087003209622e-07, "loss": 0.5468, "step": 23724 }, { "epoch": 1.7140173749706504, "grad_norm": 8.27447163257811, "learning_rate": 2.6317805054602743e-07, "loss": 0.5864, "step": 23725 }, { "epoch": 1.71408962017086, "grad_norm": 7.492911485809751, "learning_rate": 2.63047431391448e-07, "loss": 0.6629, "step": 23726 }, { "epoch": 1.7141618653710693, "grad_norm": 8.285363438887465, "learning_rate": 2.62916842859012e-07, "loss": 0.6426, "step": 23727 }, { "epoch": 1.714234110571279, "grad_norm": 7.17753401080889, "learning_rate": 2.6278628495050835e-07, "loss": 0.5389, "step": 23728 }, { "epoch": 1.7143063557714884, "grad_norm": 7.3710702027478945, "learning_rate": 2.626557576677227e-07, "loss": 0.5675, "step": 23729 }, { "epoch": 1.714378600971698, "grad_norm": 6.444350958105884, "learning_rate": 2.625252610124426e-07, "loss": 0.5481, "step": 23730 }, { "epoch": 1.7144508461719075, "grad_norm": 6.64756502014245, "learning_rate": 2.623947949864547e-07, "loss": 0.5722, "step": 23731 }, { "epoch": 1.714523091372117, "grad_norm": 7.28255703045678, "learning_rate": 2.62264359591545e-07, "loss": 0.5571, "step": 23732 }, { "epoch": 1.7145953365723265, "grad_norm": 7.818119560945476, "learning_rate": 2.6213395482949796e-07, "loss": 0.6594, "step": 23733 }, { "epoch": 1.7146675817725359, "grad_norm": 8.196147050721502, "learning_rate": 2.6200358070209985e-07, "loss": 0.5632, "step": 23734 }, { "epoch": 1.7147398269727456, "grad_norm": 9.539920252820554, "learning_rate": 2.6187323721113486e-07, "loss": 0.6742, "step": 23735 }, { "epoch": 1.714812072172955, "grad_norm": 7.2314766336467216, "learning_rate": 2.6174292435838745e-07, "loss": 0.5792, "step": 23736 }, { "epoch": 1.7148843173731645, "grad_norm": 6.861536332860737, "learning_rate": 2.616126421456416e-07, "loss": 0.6052, "step": 23737 }, { "epoch": 1.714956562573374, "grad_norm": 6.358302970337105, "learning_rate": 2.614823905746805e-07, "loss": 0.6101, "step": 23738 }, { "epoch": 1.7150288077735836, "grad_norm": 7.51098972061951, "learning_rate": 2.6135216964728815e-07, "loss": 0.6094, "step": 23739 }, { "epoch": 1.7151010529737931, "grad_norm": 7.538128382868095, "learning_rate": 2.612219793652457e-07, "loss": 0.5231, "step": 23740 }, { "epoch": 1.7151732981740024, "grad_norm": 8.162860632227643, "learning_rate": 2.610918197303361e-07, "loss": 0.5899, "step": 23741 }, { "epoch": 1.7152455433742122, "grad_norm": 8.770081500693628, "learning_rate": 2.609616907443416e-07, "loss": 0.5904, "step": 23742 }, { "epoch": 1.7153177885744215, "grad_norm": 7.310360326483733, "learning_rate": 2.6083159240904237e-07, "loss": 0.6471, "step": 23743 }, { "epoch": 1.715390033774631, "grad_norm": 7.353209551233101, "learning_rate": 2.607015247262204e-07, "loss": 0.573, "step": 23744 }, { "epoch": 1.7154622789748406, "grad_norm": 7.453463052884404, "learning_rate": 2.605714876976556e-07, "loss": 0.6015, "step": 23745 }, { "epoch": 1.7155345241750501, "grad_norm": 8.165060495221391, "learning_rate": 2.6044148132512824e-07, "loss": 0.5949, "step": 23746 }, { "epoch": 1.7156067693752597, "grad_norm": 6.66337612324738, "learning_rate": 2.603115056104183e-07, "loss": 0.6183, "step": 23747 }, { "epoch": 1.715679014575469, "grad_norm": 8.708878154838333, "learning_rate": 2.6018156055530483e-07, "loss": 0.6893, "step": 23748 }, { "epoch": 1.7157512597756788, "grad_norm": 6.702302895663894, "learning_rate": 2.6005164616156655e-07, "loss": 0.6045, "step": 23749 }, { "epoch": 1.715823504975888, "grad_norm": 7.731809865035644, "learning_rate": 2.599217624309827e-07, "loss": 0.6054, "step": 23750 }, { "epoch": 1.7158957501760976, "grad_norm": 8.027757649761282, "learning_rate": 2.5979190936533004e-07, "loss": 0.6963, "step": 23751 }, { "epoch": 1.7159679953763072, "grad_norm": 8.46037841527715, "learning_rate": 2.5966208696638686e-07, "loss": 0.6431, "step": 23752 }, { "epoch": 1.7160402405765167, "grad_norm": 7.298588226924518, "learning_rate": 2.5953229523593055e-07, "loss": 0.5812, "step": 23753 }, { "epoch": 1.7161124857767263, "grad_norm": 7.024033568890762, "learning_rate": 2.59402534175737e-07, "loss": 0.5862, "step": 23754 }, { "epoch": 1.7161847309769356, "grad_norm": 7.353455967553491, "learning_rate": 2.59272803787583e-07, "loss": 0.5289, "step": 23755 }, { "epoch": 1.7162569761771453, "grad_norm": 6.728684572443521, "learning_rate": 2.591431040732442e-07, "loss": 0.5524, "step": 23756 }, { "epoch": 1.7163292213773547, "grad_norm": 6.857300325129667, "learning_rate": 2.590134350344972e-07, "loss": 0.6682, "step": 23757 }, { "epoch": 1.7164014665775642, "grad_norm": 6.392653611970284, "learning_rate": 2.5888379667311586e-07, "loss": 0.5921, "step": 23758 }, { "epoch": 1.7164737117777737, "grad_norm": 6.667298223462479, "learning_rate": 2.5875418899087514e-07, "loss": 0.5587, "step": 23759 }, { "epoch": 1.7165459569779833, "grad_norm": 9.036372543645962, "learning_rate": 2.5862461198954997e-07, "loss": 0.6537, "step": 23760 }, { "epoch": 1.7166182021781928, "grad_norm": 7.374215553073653, "learning_rate": 2.5849506567091286e-07, "loss": 0.585, "step": 23761 }, { "epoch": 1.7166904473784022, "grad_norm": 7.620480307913079, "learning_rate": 2.5836555003673794e-07, "loss": 0.6742, "step": 23762 }, { "epoch": 1.716762692578612, "grad_norm": 6.682437584924831, "learning_rate": 2.5823606508879826e-07, "loss": 0.5677, "step": 23763 }, { "epoch": 1.7168349377788212, "grad_norm": 7.415322456988184, "learning_rate": 2.5810661082886627e-07, "loss": 0.617, "step": 23764 }, { "epoch": 1.7169071829790308, "grad_norm": 7.886330582247376, "learning_rate": 2.579771872587139e-07, "loss": 0.5852, "step": 23765 }, { "epoch": 1.7169794281792403, "grad_norm": 7.822906155500872, "learning_rate": 2.57847794380113e-07, "loss": 0.6346, "step": 23766 }, { "epoch": 1.7170516733794499, "grad_norm": 7.503913112877523, "learning_rate": 2.577184321948356e-07, "loss": 0.5855, "step": 23767 }, { "epoch": 1.7171239185796594, "grad_norm": 6.9832462363020085, "learning_rate": 2.575891007046513e-07, "loss": 0.5242, "step": 23768 }, { "epoch": 1.7171961637798687, "grad_norm": 8.910558254165611, "learning_rate": 2.5745979991133094e-07, "loss": 0.6498, "step": 23769 }, { "epoch": 1.7172684089800785, "grad_norm": 7.11895093625889, "learning_rate": 2.573305298166448e-07, "loss": 0.6013, "step": 23770 }, { "epoch": 1.7173406541802878, "grad_norm": 7.856040049852889, "learning_rate": 2.572012904223628e-07, "loss": 0.5664, "step": 23771 }, { "epoch": 1.7174128993804976, "grad_norm": 6.994879348292433, "learning_rate": 2.570720817302533e-07, "loss": 0.5805, "step": 23772 }, { "epoch": 1.717485144580707, "grad_norm": 7.738205333108172, "learning_rate": 2.5694290374208537e-07, "loss": 0.6417, "step": 23773 }, { "epoch": 1.7175573897809164, "grad_norm": 6.311965768699508, "learning_rate": 2.5681375645962734e-07, "loss": 0.5628, "step": 23774 }, { "epoch": 1.717629634981126, "grad_norm": 7.78169341145316, "learning_rate": 2.566846398846476e-07, "loss": 0.592, "step": 23775 }, { "epoch": 1.7177018801813353, "grad_norm": 7.49039072522607, "learning_rate": 2.565555540189132e-07, "loss": 0.6562, "step": 23776 }, { "epoch": 1.717774125381545, "grad_norm": 7.141285003944904, "learning_rate": 2.564264988641912e-07, "loss": 0.547, "step": 23777 }, { "epoch": 1.7178463705817544, "grad_norm": 8.036640658796017, "learning_rate": 2.562974744222491e-07, "loss": 0.6505, "step": 23778 }, { "epoch": 1.7179186157819641, "grad_norm": 7.188540972086968, "learning_rate": 2.561684806948519e-07, "loss": 0.5809, "step": 23779 }, { "epoch": 1.7179908609821735, "grad_norm": 7.295411577857142, "learning_rate": 2.56039517683766e-07, "loss": 0.6329, "step": 23780 }, { "epoch": 1.718063106182383, "grad_norm": 8.354112226985835, "learning_rate": 2.5591058539075676e-07, "loss": 0.6253, "step": 23781 }, { "epoch": 1.7181353513825925, "grad_norm": 6.699868716192055, "learning_rate": 2.5578168381758944e-07, "loss": 0.6536, "step": 23782 }, { "epoch": 1.7182075965828019, "grad_norm": 7.982308853876294, "learning_rate": 2.5565281296602824e-07, "loss": 0.6186, "step": 23783 }, { "epoch": 1.7182798417830116, "grad_norm": 6.72508104693844, "learning_rate": 2.555239728378367e-07, "loss": 0.5963, "step": 23784 }, { "epoch": 1.718352086983221, "grad_norm": 6.647337198261615, "learning_rate": 2.553951634347801e-07, "loss": 0.5427, "step": 23785 }, { "epoch": 1.7184243321834307, "grad_norm": 6.8953545011325605, "learning_rate": 2.5526638475862065e-07, "loss": 0.5719, "step": 23786 }, { "epoch": 1.71849657738364, "grad_norm": 7.144784596468367, "learning_rate": 2.5513763681112134e-07, "loss": 0.5496, "step": 23787 }, { "epoch": 1.7185688225838496, "grad_norm": 7.702880607891039, "learning_rate": 2.55008919594045e-07, "loss": 0.622, "step": 23788 }, { "epoch": 1.7186410677840591, "grad_norm": 8.98867084797754, "learning_rate": 2.548802331091535e-07, "loss": 0.5957, "step": 23789 }, { "epoch": 1.7187133129842687, "grad_norm": 7.432301218997526, "learning_rate": 2.547515773582082e-07, "loss": 0.5171, "step": 23790 }, { "epoch": 1.7187855581844782, "grad_norm": 8.219780338814376, "learning_rate": 2.546229523429705e-07, "loss": 0.6384, "step": 23791 }, { "epoch": 1.7188578033846875, "grad_norm": 7.339524632166029, "learning_rate": 2.54494358065201e-07, "loss": 0.5404, "step": 23792 }, { "epoch": 1.7189300485848973, "grad_norm": 7.811868382670936, "learning_rate": 2.543657945266603e-07, "loss": 0.5802, "step": 23793 }, { "epoch": 1.7190022937851066, "grad_norm": 8.017767725757539, "learning_rate": 2.5423726172910806e-07, "loss": 0.6785, "step": 23794 }, { "epoch": 1.7190745389853161, "grad_norm": 6.958580637056804, "learning_rate": 2.541087596743041e-07, "loss": 0.5001, "step": 23795 }, { "epoch": 1.7191467841855257, "grad_norm": 7.240294438481643, "learning_rate": 2.53980288364008e-07, "loss": 0.5967, "step": 23796 }, { "epoch": 1.7192190293857352, "grad_norm": 8.17191887984156, "learning_rate": 2.538518477999774e-07, "loss": 0.6383, "step": 23797 }, { "epoch": 1.7192912745859448, "grad_norm": 7.119896382638705, "learning_rate": 2.537234379839709e-07, "loss": 0.5684, "step": 23798 }, { "epoch": 1.719363519786154, "grad_norm": 7.534720606493642, "learning_rate": 2.5359505891774634e-07, "loss": 0.6806, "step": 23799 }, { "epoch": 1.7194357649863639, "grad_norm": 7.205218956166345, "learning_rate": 2.5346671060306166e-07, "loss": 0.5433, "step": 23800 }, { "epoch": 1.7195080101865732, "grad_norm": 7.3251663449508895, "learning_rate": 2.53338393041673e-07, "loss": 0.5385, "step": 23801 }, { "epoch": 1.7195802553867827, "grad_norm": 7.668301311321869, "learning_rate": 2.532101062353373e-07, "loss": 0.5633, "step": 23802 }, { "epoch": 1.7196525005869923, "grad_norm": 8.491109349683423, "learning_rate": 2.530818501858107e-07, "loss": 0.6716, "step": 23803 }, { "epoch": 1.7197247457872018, "grad_norm": 7.937368076459675, "learning_rate": 2.5295362489484914e-07, "loss": 0.5747, "step": 23804 }, { "epoch": 1.7197969909874113, "grad_norm": 7.385655867805935, "learning_rate": 2.528254303642075e-07, "loss": 0.5421, "step": 23805 }, { "epoch": 1.7198692361876207, "grad_norm": 7.954716791066431, "learning_rate": 2.5269726659564094e-07, "loss": 0.6098, "step": 23806 }, { "epoch": 1.7199414813878304, "grad_norm": 5.878602242451106, "learning_rate": 2.5256913359090445e-07, "loss": 0.5966, "step": 23807 }, { "epoch": 1.7200137265880397, "grad_norm": 8.029285235107714, "learning_rate": 2.5244103135175085e-07, "loss": 0.7017, "step": 23808 }, { "epoch": 1.7200859717882493, "grad_norm": 8.449246290552665, "learning_rate": 2.523129598799345e-07, "loss": 0.5757, "step": 23809 }, { "epoch": 1.7201582169884588, "grad_norm": 6.958447971275298, "learning_rate": 2.521849191772091e-07, "loss": 0.6101, "step": 23810 }, { "epoch": 1.7202304621886684, "grad_norm": 7.832402627183222, "learning_rate": 2.520569092453262e-07, "loss": 0.6215, "step": 23811 }, { "epoch": 1.720302707388878, "grad_norm": 8.06301665314094, "learning_rate": 2.5192893008603837e-07, "loss": 0.5898, "step": 23812 }, { "epoch": 1.7203749525890872, "grad_norm": 6.680778762285537, "learning_rate": 2.5180098170109857e-07, "loss": 0.6478, "step": 23813 }, { "epoch": 1.720447197789297, "grad_norm": 9.026683669344255, "learning_rate": 2.5167306409225796e-07, "loss": 0.6061, "step": 23814 }, { "epoch": 1.7205194429895063, "grad_norm": 6.92264099704899, "learning_rate": 2.51545177261267e-07, "loss": 0.5302, "step": 23815 }, { "epoch": 1.7205916881897159, "grad_norm": 7.788746359635569, "learning_rate": 2.5141732120987696e-07, "loss": 0.5587, "step": 23816 }, { "epoch": 1.7206639333899254, "grad_norm": 7.3752718083353495, "learning_rate": 2.512894959398382e-07, "loss": 0.5771, "step": 23817 }, { "epoch": 1.720736178590135, "grad_norm": 7.068203943614346, "learning_rate": 2.511617014528997e-07, "loss": 0.608, "step": 23818 }, { "epoch": 1.7208084237903445, "grad_norm": 8.557766907942959, "learning_rate": 2.510339377508114e-07, "loss": 0.5563, "step": 23819 }, { "epoch": 1.7208806689905538, "grad_norm": 6.538714801837256, "learning_rate": 2.509062048353225e-07, "loss": 0.6801, "step": 23820 }, { "epoch": 1.7209529141907636, "grad_norm": 6.607525505856703, "learning_rate": 2.5077850270818105e-07, "loss": 0.5584, "step": 23821 }, { "epoch": 1.721025159390973, "grad_norm": 6.031818580509828, "learning_rate": 2.506508313711356e-07, "loss": 0.5582, "step": 23822 }, { "epoch": 1.7210974045911824, "grad_norm": 6.431763991566613, "learning_rate": 2.5052319082593397e-07, "loss": 0.5942, "step": 23823 }, { "epoch": 1.721169649791392, "grad_norm": 8.480985129373146, "learning_rate": 2.503955810743236e-07, "loss": 0.6433, "step": 23824 }, { "epoch": 1.7212418949916015, "grad_norm": 6.658289350699376, "learning_rate": 2.502680021180504e-07, "loss": 0.5605, "step": 23825 }, { "epoch": 1.721314140191811, "grad_norm": 8.011986812159806, "learning_rate": 2.5014045395886146e-07, "loss": 0.5739, "step": 23826 }, { "epoch": 1.7213863853920204, "grad_norm": 6.850347813595769, "learning_rate": 2.5001293659850296e-07, "loss": 0.578, "step": 23827 }, { "epoch": 1.7214586305922301, "grad_norm": 7.712826575686834, "learning_rate": 2.498854500387207e-07, "loss": 0.5858, "step": 23828 }, { "epoch": 1.7215308757924395, "grad_norm": 7.059550251670384, "learning_rate": 2.497579942812592e-07, "loss": 0.6088, "step": 23829 }, { "epoch": 1.721603120992649, "grad_norm": 8.103976707553057, "learning_rate": 2.496305693278633e-07, "loss": 0.6077, "step": 23830 }, { "epoch": 1.7216753661928585, "grad_norm": 7.6483370001856095, "learning_rate": 2.4950317518027787e-07, "loss": 0.5955, "step": 23831 }, { "epoch": 1.721747611393068, "grad_norm": 6.977288596033831, "learning_rate": 2.4937581184024644e-07, "loss": 0.5831, "step": 23832 }, { "epoch": 1.7218198565932776, "grad_norm": 6.905512291358514, "learning_rate": 2.4924847930951264e-07, "loss": 0.6138, "step": 23833 }, { "epoch": 1.721892101793487, "grad_norm": 7.843727628516404, "learning_rate": 2.491211775898197e-07, "loss": 0.5898, "step": 23834 }, { "epoch": 1.7219643469936967, "grad_norm": 9.31595542344746, "learning_rate": 2.4899390668291056e-07, "loss": 0.5503, "step": 23835 }, { "epoch": 1.722036592193906, "grad_norm": 8.010591171870622, "learning_rate": 2.488666665905265e-07, "loss": 0.6202, "step": 23836 }, { "epoch": 1.7221088373941156, "grad_norm": 6.913383838514451, "learning_rate": 2.487394573144097e-07, "loss": 0.58, "step": 23837 }, { "epoch": 1.7221810825943251, "grad_norm": 6.816047794568477, "learning_rate": 2.4861227885630225e-07, "loss": 0.6255, "step": 23838 }, { "epoch": 1.7222533277945347, "grad_norm": 8.43261622620507, "learning_rate": 2.4848513121794453e-07, "loss": 0.6235, "step": 23839 }, { "epoch": 1.7223255729947442, "grad_norm": 6.631043179061506, "learning_rate": 2.483580144010772e-07, "loss": 0.6254, "step": 23840 }, { "epoch": 1.7223978181949535, "grad_norm": 8.076721423639553, "learning_rate": 2.482309284074394e-07, "loss": 0.738, "step": 23841 }, { "epoch": 1.7224700633951633, "grad_norm": 7.196832055292112, "learning_rate": 2.4810387323877305e-07, "loss": 0.5792, "step": 23842 }, { "epoch": 1.7225423085953726, "grad_norm": 8.391026890398495, "learning_rate": 2.4797684889681564e-07, "loss": 0.6412, "step": 23843 }, { "epoch": 1.7226145537955824, "grad_norm": 6.912986542339737, "learning_rate": 2.478498553833067e-07, "loss": 0.6567, "step": 23844 }, { "epoch": 1.7226867989957917, "grad_norm": 7.22338929487214, "learning_rate": 2.4772289269998443e-07, "loss": 0.6197, "step": 23845 }, { "epoch": 1.7227590441960012, "grad_norm": 8.026992084754472, "learning_rate": 2.4759596084858755e-07, "loss": 0.6813, "step": 23846 }, { "epoch": 1.7228312893962108, "grad_norm": 8.280961632656163, "learning_rate": 2.4746905983085267e-07, "loss": 0.6045, "step": 23847 }, { "epoch": 1.72290353459642, "grad_norm": 7.354159375872434, "learning_rate": 2.473421896485173e-07, "loss": 0.6, "step": 23848 }, { "epoch": 1.7229757797966299, "grad_norm": 7.027215910889031, "learning_rate": 2.4721535030331814e-07, "loss": 0.6328, "step": 23849 }, { "epoch": 1.7230480249968392, "grad_norm": 7.360766107243653, "learning_rate": 2.4708854179699175e-07, "loss": 0.6687, "step": 23850 }, { "epoch": 1.723120270197049, "grad_norm": 8.75368051778347, "learning_rate": 2.469617641312741e-07, "loss": 0.617, "step": 23851 }, { "epoch": 1.7231925153972583, "grad_norm": 7.439310318349095, "learning_rate": 2.468350173079004e-07, "loss": 0.5487, "step": 23852 }, { "epoch": 1.7232647605974678, "grad_norm": 8.522363303071, "learning_rate": 2.4670830132860616e-07, "loss": 0.6347, "step": 23853 }, { "epoch": 1.7233370057976773, "grad_norm": 7.7799536915947884, "learning_rate": 2.465816161951254e-07, "loss": 0.5677, "step": 23854 }, { "epoch": 1.7234092509978867, "grad_norm": 7.209865105963592, "learning_rate": 2.464549619091927e-07, "loss": 0.5818, "step": 23855 }, { "epoch": 1.7234814961980964, "grad_norm": 6.842778106378634, "learning_rate": 2.463283384725418e-07, "loss": 0.5939, "step": 23856 }, { "epoch": 1.7235537413983057, "grad_norm": 8.324975769190994, "learning_rate": 2.462017458869065e-07, "loss": 0.6409, "step": 23857 }, { "epoch": 1.7236259865985155, "grad_norm": 6.63276735252229, "learning_rate": 2.4607518415401874e-07, "loss": 0.5835, "step": 23858 }, { "epoch": 1.7236982317987248, "grad_norm": 6.825681759944358, "learning_rate": 2.4594865327561196e-07, "loss": 0.6172, "step": 23859 }, { "epoch": 1.7237704769989344, "grad_norm": 6.6393924062867535, "learning_rate": 2.4582215325341774e-07, "loss": 0.6259, "step": 23860 }, { "epoch": 1.723842722199144, "grad_norm": 6.377876380810956, "learning_rate": 2.4569568408916806e-07, "loss": 0.5855, "step": 23861 }, { "epoch": 1.7239149673993535, "grad_norm": 6.810941465393718, "learning_rate": 2.4556924578459403e-07, "loss": 0.6246, "step": 23862 }, { "epoch": 1.723987212599563, "grad_norm": 7.734744161407773, "learning_rate": 2.4544283834142647e-07, "loss": 0.6718, "step": 23863 }, { "epoch": 1.7240594577997723, "grad_norm": 7.277586182975422, "learning_rate": 2.453164617613965e-07, "loss": 0.6188, "step": 23864 }, { "epoch": 1.724131702999982, "grad_norm": 6.822664569730414, "learning_rate": 2.4519011604623305e-07, "loss": 0.6467, "step": 23865 }, { "epoch": 1.7242039482001914, "grad_norm": 9.356342926277422, "learning_rate": 2.450638011976664e-07, "loss": 0.6069, "step": 23866 }, { "epoch": 1.724276193400401, "grad_norm": 7.318157127075932, "learning_rate": 2.4493751721742564e-07, "loss": 0.5584, "step": 23867 }, { "epoch": 1.7243484386006105, "grad_norm": 7.7374959178414615, "learning_rate": 2.4481126410723894e-07, "loss": 0.6139, "step": 23868 }, { "epoch": 1.72442068380082, "grad_norm": 6.503353134325868, "learning_rate": 2.446850418688346e-07, "loss": 0.5873, "step": 23869 }, { "epoch": 1.7244929290010296, "grad_norm": 8.561509569343308, "learning_rate": 2.4455885050394116e-07, "loss": 0.6568, "step": 23870 }, { "epoch": 1.724565174201239, "grad_norm": 8.652054083046234, "learning_rate": 2.4443269001428626e-07, "loss": 0.6191, "step": 23871 }, { "epoch": 1.7246374194014487, "grad_norm": 6.599144053586816, "learning_rate": 2.443065604015962e-07, "loss": 0.6575, "step": 23872 }, { "epoch": 1.724709664601658, "grad_norm": 7.136089328514674, "learning_rate": 2.44180461667598e-07, "loss": 0.6475, "step": 23873 }, { "epoch": 1.7247819098018675, "grad_norm": 7.755350050561114, "learning_rate": 2.44054393814018e-07, "loss": 0.6086, "step": 23874 }, { "epoch": 1.724854155002077, "grad_norm": 7.616058610257856, "learning_rate": 2.4392835684258153e-07, "loss": 0.6133, "step": 23875 }, { "epoch": 1.7249264002022866, "grad_norm": 8.10531485170162, "learning_rate": 2.4380235075501383e-07, "loss": 0.6036, "step": 23876 }, { "epoch": 1.7249986454024961, "grad_norm": 7.077994844364166, "learning_rate": 2.4367637555304025e-07, "loss": 0.6134, "step": 23877 }, { "epoch": 1.7250708906027055, "grad_norm": 7.8482198555547, "learning_rate": 2.435504312383852e-07, "loss": 0.6724, "step": 23878 }, { "epoch": 1.7251431358029152, "grad_norm": 7.602068579392784, "learning_rate": 2.4342451781277255e-07, "loss": 0.6132, "step": 23879 }, { "epoch": 1.7252153810031245, "grad_norm": 6.84994240618252, "learning_rate": 2.4329863527792627e-07, "loss": 0.5938, "step": 23880 }, { "epoch": 1.725287626203334, "grad_norm": 7.357280068566387, "learning_rate": 2.4317278363556965e-07, "loss": 0.6697, "step": 23881 }, { "epoch": 1.7253598714035436, "grad_norm": 6.377174754475613, "learning_rate": 2.4304696288742545e-07, "loss": 0.5941, "step": 23882 }, { "epoch": 1.7254321166037532, "grad_norm": 6.716698053758359, "learning_rate": 2.4292117303521574e-07, "loss": 0.5394, "step": 23883 }, { "epoch": 1.7255043618039627, "grad_norm": 6.040529060687302, "learning_rate": 2.427954140806624e-07, "loss": 0.5434, "step": 23884 }, { "epoch": 1.725576607004172, "grad_norm": 6.812973146019211, "learning_rate": 2.426696860254879e-07, "loss": 0.6461, "step": 23885 }, { "epoch": 1.7256488522043818, "grad_norm": 7.489827187025339, "learning_rate": 2.4254398887141205e-07, "loss": 0.6211, "step": 23886 }, { "epoch": 1.7257210974045911, "grad_norm": 6.770640991608982, "learning_rate": 2.4241832262015625e-07, "loss": 0.5114, "step": 23887 }, { "epoch": 1.7257933426048007, "grad_norm": 7.70211494431747, "learning_rate": 2.4229268727344075e-07, "loss": 0.6, "step": 23888 }, { "epoch": 1.7258655878050102, "grad_norm": 7.799820276781873, "learning_rate": 2.421670828329853e-07, "loss": 0.6073, "step": 23889 }, { "epoch": 1.7259378330052197, "grad_norm": 7.662638587382913, "learning_rate": 2.4204150930050964e-07, "loss": 0.6023, "step": 23890 }, { "epoch": 1.7260100782054293, "grad_norm": 7.731372720643328, "learning_rate": 2.419159666777321e-07, "loss": 0.634, "step": 23891 }, { "epoch": 1.7260823234056386, "grad_norm": 7.309544675243116, "learning_rate": 2.4179045496637243e-07, "loss": 0.5417, "step": 23892 }, { "epoch": 1.7261545686058484, "grad_norm": 7.656189213725533, "learning_rate": 2.416649741681476e-07, "loss": 0.557, "step": 23893 }, { "epoch": 1.7262268138060577, "grad_norm": 7.481306812959796, "learning_rate": 2.4153952428477565e-07, "loss": 0.5632, "step": 23894 }, { "epoch": 1.7262990590062672, "grad_norm": 6.726445390875893, "learning_rate": 2.4141410531797416e-07, "loss": 0.583, "step": 23895 }, { "epoch": 1.7263713042064768, "grad_norm": 8.899615041572506, "learning_rate": 2.412887172694603e-07, "loss": 0.6637, "step": 23896 }, { "epoch": 1.7264435494066863, "grad_norm": 6.193608207118934, "learning_rate": 2.411633601409491e-07, "loss": 0.5968, "step": 23897 }, { "epoch": 1.7265157946068959, "grad_norm": 7.38973303285545, "learning_rate": 2.410380339341584e-07, "loss": 0.6014, "step": 23898 }, { "epoch": 1.7265880398071052, "grad_norm": 7.545195398549657, "learning_rate": 2.4091273865080314e-07, "loss": 0.543, "step": 23899 }, { "epoch": 1.726660285007315, "grad_norm": 7.848165416766873, "learning_rate": 2.407874742925981e-07, "loss": 0.5848, "step": 23900 }, { "epoch": 1.7267325302075243, "grad_norm": 7.452902864548714, "learning_rate": 2.4066224086125856e-07, "loss": 0.6379, "step": 23901 }, { "epoch": 1.7268047754077338, "grad_norm": 7.392790463017204, "learning_rate": 2.4053703835849845e-07, "loss": 0.6166, "step": 23902 }, { "epoch": 1.7268770206079433, "grad_norm": 7.682323418675555, "learning_rate": 2.4041186678603224e-07, "loss": 0.6284, "step": 23903 }, { "epoch": 1.726949265808153, "grad_norm": 7.274508650399444, "learning_rate": 2.4028672614557296e-07, "loss": 0.6316, "step": 23904 }, { "epoch": 1.7270215110083624, "grad_norm": 7.068685877717427, "learning_rate": 2.4016161643883373e-07, "loss": 0.6124, "step": 23905 }, { "epoch": 1.7270937562085718, "grad_norm": 7.444590034136681, "learning_rate": 2.4003653766752735e-07, "loss": 0.5533, "step": 23906 }, { "epoch": 1.7271660014087815, "grad_norm": 7.663560016736801, "learning_rate": 2.39911489833366e-07, "loss": 0.6912, "step": 23907 }, { "epoch": 1.7272382466089908, "grad_norm": 6.631131483739308, "learning_rate": 2.3978647293806137e-07, "loss": 0.5918, "step": 23908 }, { "epoch": 1.7273104918092004, "grad_norm": 7.676249140876209, "learning_rate": 2.3966148698332527e-07, "loss": 0.5943, "step": 23909 }, { "epoch": 1.72738273700941, "grad_norm": 7.500409941596005, "learning_rate": 2.395365319708687e-07, "loss": 0.6703, "step": 23910 }, { "epoch": 1.7274549822096195, "grad_norm": 6.841925112093961, "learning_rate": 2.3941160790240143e-07, "loss": 0.5813, "step": 23911 }, { "epoch": 1.727527227409829, "grad_norm": 7.055731306448597, "learning_rate": 2.3928671477963407e-07, "loss": 0.6322, "step": 23912 }, { "epoch": 1.7275994726100383, "grad_norm": 6.672632536216789, "learning_rate": 2.3916185260427603e-07, "loss": 0.5981, "step": 23913 }, { "epoch": 1.727671717810248, "grad_norm": 8.233165789712876, "learning_rate": 2.390370213780374e-07, "loss": 0.6771, "step": 23914 }, { "epoch": 1.7277439630104574, "grad_norm": 6.682641376750938, "learning_rate": 2.3891222110262595e-07, "loss": 0.5595, "step": 23915 }, { "epoch": 1.727816208210667, "grad_norm": 7.99567940388049, "learning_rate": 2.387874517797506e-07, "loss": 0.6104, "step": 23916 }, { "epoch": 1.7278884534108765, "grad_norm": 6.556691823787293, "learning_rate": 2.386627134111194e-07, "loss": 0.5755, "step": 23917 }, { "epoch": 1.727960698611086, "grad_norm": 8.590547165946507, "learning_rate": 2.3853800599843964e-07, "loss": 0.5527, "step": 23918 }, { "epoch": 1.7280329438112956, "grad_norm": 7.402494742028644, "learning_rate": 2.3841332954341879e-07, "loss": 0.6586, "step": 23919 }, { "epoch": 1.728105189011505, "grad_norm": 7.352922919755598, "learning_rate": 2.3828868404776328e-07, "loss": 0.6105, "step": 23920 }, { "epoch": 1.7281774342117147, "grad_norm": 7.9879710839081985, "learning_rate": 2.381640695131804e-07, "loss": 0.6254, "step": 23921 }, { "epoch": 1.728249679411924, "grad_norm": 7.734800631438762, "learning_rate": 2.3803948594137428e-07, "loss": 0.5787, "step": 23922 }, { "epoch": 1.7283219246121337, "grad_norm": 7.24771424139042, "learning_rate": 2.3791493333405163e-07, "loss": 0.6041, "step": 23923 }, { "epoch": 1.728394169812343, "grad_norm": 7.252381558429132, "learning_rate": 2.377904116929175e-07, "loss": 0.57, "step": 23924 }, { "epoch": 1.7284664150125526, "grad_norm": 7.0842231939513844, "learning_rate": 2.3766592101967524e-07, "loss": 0.5735, "step": 23925 }, { "epoch": 1.7285386602127621, "grad_norm": 7.7908444982303084, "learning_rate": 2.375414613160304e-07, "loss": 0.6431, "step": 23926 }, { "epoch": 1.7286109054129715, "grad_norm": 7.85233278054589, "learning_rate": 2.3741703258368638e-07, "loss": 0.5417, "step": 23927 }, { "epoch": 1.7286831506131812, "grad_norm": 7.2992535457702425, "learning_rate": 2.3729263482434679e-07, "loss": 0.5801, "step": 23928 }, { "epoch": 1.7287553958133905, "grad_norm": 7.632868474717991, "learning_rate": 2.371682680397136e-07, "loss": 0.6244, "step": 23929 }, { "epoch": 1.7288276410136003, "grad_norm": 7.618456174396567, "learning_rate": 2.370439322314899e-07, "loss": 0.6389, "step": 23930 }, { "epoch": 1.7288998862138096, "grad_norm": 7.45737168347784, "learning_rate": 2.3691962740137824e-07, "loss": 0.6718, "step": 23931 }, { "epoch": 1.7289721314140192, "grad_norm": 7.63161919127998, "learning_rate": 2.367953535510789e-07, "loss": 0.5956, "step": 23932 }, { "epoch": 1.7290443766142287, "grad_norm": 6.148623627265343, "learning_rate": 2.3667111068229443e-07, "loss": 0.5003, "step": 23933 }, { "epoch": 1.729116621814438, "grad_norm": 10.499967665849582, "learning_rate": 2.365468987967248e-07, "loss": 0.6556, "step": 23934 }, { "epoch": 1.7291888670146478, "grad_norm": 6.770371109272769, "learning_rate": 2.3642271789607068e-07, "loss": 0.5858, "step": 23935 }, { "epoch": 1.7292611122148571, "grad_norm": 7.361067721001082, "learning_rate": 2.3629856798203203e-07, "loss": 0.6059, "step": 23936 }, { "epoch": 1.7293333574150669, "grad_norm": 7.450872884728659, "learning_rate": 2.361744490563081e-07, "loss": 0.6312, "step": 23937 }, { "epoch": 1.7294056026152762, "grad_norm": 7.005572417108928, "learning_rate": 2.360503611205986e-07, "loss": 0.6098, "step": 23938 }, { "epoch": 1.7294778478154857, "grad_norm": 8.05827516563064, "learning_rate": 2.3592630417660217e-07, "loss": 0.6494, "step": 23939 }, { "epoch": 1.7295500930156953, "grad_norm": 8.71771999824343, "learning_rate": 2.358022782260161e-07, "loss": 0.61, "step": 23940 }, { "epoch": 1.7296223382159048, "grad_norm": 8.350035853508823, "learning_rate": 2.3567828327053898e-07, "loss": 0.5527, "step": 23941 }, { "epoch": 1.7296945834161144, "grad_norm": 7.256124606104584, "learning_rate": 2.3555431931186838e-07, "loss": 0.5944, "step": 23942 }, { "epoch": 1.7297668286163237, "grad_norm": 11.298624032566837, "learning_rate": 2.3543038635170045e-07, "loss": 0.666, "step": 23943 }, { "epoch": 1.7298390738165335, "grad_norm": 7.33061153301674, "learning_rate": 2.3530648439173216e-07, "loss": 0.6032, "step": 23944 }, { "epoch": 1.7299113190167428, "grad_norm": 6.680674554721516, "learning_rate": 2.351826134336596e-07, "loss": 0.6305, "step": 23945 }, { "epoch": 1.7299835642169523, "grad_norm": 7.418649329873914, "learning_rate": 2.3505877347917873e-07, "loss": 0.6756, "step": 23946 }, { "epoch": 1.7300558094171619, "grad_norm": 7.016333461409828, "learning_rate": 2.3493496452998476e-07, "loss": 0.6266, "step": 23947 }, { "epoch": 1.7301280546173714, "grad_norm": 9.024964782138913, "learning_rate": 2.3481118658777224e-07, "loss": 0.591, "step": 23948 }, { "epoch": 1.730200299817581, "grad_norm": 8.094200092305437, "learning_rate": 2.3468743965423617e-07, "loss": 0.6331, "step": 23949 }, { "epoch": 1.7302725450177903, "grad_norm": 7.789905315014717, "learning_rate": 2.3456372373106967e-07, "loss": 0.6583, "step": 23950 }, { "epoch": 1.730344790218, "grad_norm": 6.53513611165083, "learning_rate": 2.3444003881996692e-07, "loss": 0.5476, "step": 23951 }, { "epoch": 1.7304170354182093, "grad_norm": 6.219494952338027, "learning_rate": 2.3431638492262098e-07, "loss": 0.5417, "step": 23952 }, { "epoch": 1.730489280618419, "grad_norm": 8.353188880022529, "learning_rate": 2.34192762040725e-07, "loss": 0.577, "step": 23953 }, { "epoch": 1.7305615258186284, "grad_norm": 8.285479462401376, "learning_rate": 2.340691701759698e-07, "loss": 0.6063, "step": 23954 }, { "epoch": 1.730633771018838, "grad_norm": 8.122491067709433, "learning_rate": 2.3394560933004878e-07, "loss": 0.6209, "step": 23955 }, { "epoch": 1.7307060162190475, "grad_norm": 6.720199925309723, "learning_rate": 2.3382207950465335e-07, "loss": 0.5294, "step": 23956 }, { "epoch": 1.7307782614192568, "grad_norm": 7.251420506716899, "learning_rate": 2.3369858070147356e-07, "loss": 0.602, "step": 23957 }, { "epoch": 1.7308505066194666, "grad_norm": 6.99302952986867, "learning_rate": 2.3357511292220054e-07, "loss": 0.5879, "step": 23958 }, { "epoch": 1.730922751819676, "grad_norm": 7.557091662763795, "learning_rate": 2.334516761685246e-07, "loss": 0.6112, "step": 23959 }, { "epoch": 1.7309949970198855, "grad_norm": 7.657120386590908, "learning_rate": 2.333282704421355e-07, "loss": 0.5936, "step": 23960 }, { "epoch": 1.731067242220095, "grad_norm": 6.113629432975963, "learning_rate": 2.332048957447222e-07, "loss": 0.609, "step": 23961 }, { "epoch": 1.7311394874203045, "grad_norm": 9.440799363769633, "learning_rate": 2.330815520779739e-07, "loss": 0.546, "step": 23962 }, { "epoch": 1.731211732620514, "grad_norm": 7.626893855803639, "learning_rate": 2.3295823944357893e-07, "loss": 0.6789, "step": 23963 }, { "epoch": 1.7312839778207234, "grad_norm": 7.284300856701847, "learning_rate": 2.328349578432254e-07, "loss": 0.5257, "step": 23964 }, { "epoch": 1.7313562230209332, "grad_norm": 8.074491823817189, "learning_rate": 2.3271170727860088e-07, "loss": 0.6025, "step": 23965 }, { "epoch": 1.7314284682211425, "grad_norm": 6.478383432948136, "learning_rate": 2.3258848775139285e-07, "loss": 0.5806, "step": 23966 }, { "epoch": 1.731500713421352, "grad_norm": 8.546371082465306, "learning_rate": 2.3246529926328808e-07, "loss": 0.615, "step": 23967 }, { "epoch": 1.7315729586215616, "grad_norm": 6.785318446265903, "learning_rate": 2.3234214181597242e-07, "loss": 0.5408, "step": 23968 }, { "epoch": 1.7316452038217711, "grad_norm": 7.793667487080131, "learning_rate": 2.322190154111323e-07, "loss": 0.5696, "step": 23969 }, { "epoch": 1.7317174490219807, "grad_norm": 6.217693603348205, "learning_rate": 2.3209592005045274e-07, "loss": 0.5994, "step": 23970 }, { "epoch": 1.73178969422219, "grad_norm": 6.764980787657349, "learning_rate": 2.3197285573561962e-07, "loss": 0.6238, "step": 23971 }, { "epoch": 1.7318619394223997, "grad_norm": 7.340921319832723, "learning_rate": 2.3184982246831688e-07, "loss": 0.6309, "step": 23972 }, { "epoch": 1.731934184622609, "grad_norm": 6.372695207026628, "learning_rate": 2.3172682025022876e-07, "loss": 0.6243, "step": 23973 }, { "epoch": 1.7320064298228186, "grad_norm": 6.619398231930215, "learning_rate": 2.3160384908303914e-07, "loss": 0.5679, "step": 23974 }, { "epoch": 1.7320786750230281, "grad_norm": 7.4835232626326, "learning_rate": 2.3148090896843169e-07, "loss": 0.6527, "step": 23975 }, { "epoch": 1.7321509202232377, "grad_norm": 7.126884077495069, "learning_rate": 2.3135799990808922e-07, "loss": 0.5846, "step": 23976 }, { "epoch": 1.7322231654234472, "grad_norm": 8.834962610620074, "learning_rate": 2.312351219036943e-07, "loss": 0.5999, "step": 23977 }, { "epoch": 1.7322954106236566, "grad_norm": 7.392288633697719, "learning_rate": 2.3111227495692944e-07, "loss": 0.6651, "step": 23978 }, { "epoch": 1.7323676558238663, "grad_norm": 7.05402831655237, "learning_rate": 2.3098945906947529e-07, "loss": 0.602, "step": 23979 }, { "epoch": 1.7324399010240756, "grad_norm": 5.99107428086302, "learning_rate": 2.3086667424301379e-07, "loss": 0.5533, "step": 23980 }, { "epoch": 1.7325121462242852, "grad_norm": 8.925803473714128, "learning_rate": 2.3074392047922582e-07, "loss": 0.6256, "step": 23981 }, { "epoch": 1.7325843914244947, "grad_norm": 8.131779116774133, "learning_rate": 2.306211977797912e-07, "loss": 0.6266, "step": 23982 }, { "epoch": 1.7326566366247043, "grad_norm": 6.290273381585882, "learning_rate": 2.3049850614639047e-07, "loss": 0.5913, "step": 23983 }, { "epoch": 1.7327288818249138, "grad_norm": 7.107001342240853, "learning_rate": 2.3037584558070286e-07, "loss": 0.6296, "step": 23984 }, { "epoch": 1.7328011270251231, "grad_norm": 7.716960745934689, "learning_rate": 2.3025321608440843e-07, "loss": 0.5788, "step": 23985 }, { "epoch": 1.7328733722253329, "grad_norm": 6.555271199986397, "learning_rate": 2.3013061765918444e-07, "loss": 0.5597, "step": 23986 }, { "epoch": 1.7329456174255422, "grad_norm": 8.802318995704963, "learning_rate": 2.3000805030671007e-07, "loss": 0.5875, "step": 23987 }, { "epoch": 1.7330178626257517, "grad_norm": 7.541622632426767, "learning_rate": 2.2988551402866294e-07, "loss": 0.6347, "step": 23988 }, { "epoch": 1.7330901078259613, "grad_norm": 7.417687192277031, "learning_rate": 2.297630088267208e-07, "loss": 0.5621, "step": 23989 }, { "epoch": 1.7331623530261708, "grad_norm": 7.586452415583073, "learning_rate": 2.2964053470256014e-07, "loss": 0.6022, "step": 23990 }, { "epoch": 1.7332345982263804, "grad_norm": 7.024651852054885, "learning_rate": 2.2951809165785737e-07, "loss": 0.6099, "step": 23991 }, { "epoch": 1.7333068434265897, "grad_norm": 7.431593914110757, "learning_rate": 2.2939567969428921e-07, "loss": 0.5389, "step": 23992 }, { "epoch": 1.7333790886267995, "grad_norm": 8.318839273181526, "learning_rate": 2.2927329881353127e-07, "loss": 0.6008, "step": 23993 }, { "epoch": 1.7334513338270088, "grad_norm": 7.02940052716021, "learning_rate": 2.291509490172586e-07, "loss": 0.6276, "step": 23994 }, { "epoch": 1.7335235790272185, "grad_norm": 6.666733201012796, "learning_rate": 2.290286303071465e-07, "loss": 0.6239, "step": 23995 }, { "epoch": 1.7335958242274279, "grad_norm": 8.233365020485055, "learning_rate": 2.2890634268486923e-07, "loss": 0.5775, "step": 23996 }, { "epoch": 1.7336680694276374, "grad_norm": 7.924454910353185, "learning_rate": 2.287840861521007e-07, "loss": 0.634, "step": 23997 }, { "epoch": 1.733740314627847, "grad_norm": 6.38599121429575, "learning_rate": 2.2866186071051427e-07, "loss": 0.5752, "step": 23998 }, { "epoch": 1.7338125598280563, "grad_norm": 6.762593706930761, "learning_rate": 2.2853966636178393e-07, "loss": 0.5595, "step": 23999 }, { "epoch": 1.733884805028266, "grad_norm": 6.975738715974454, "learning_rate": 2.2841750310758136e-07, "loss": 0.6589, "step": 24000 }, { "epoch": 1.7339570502284753, "grad_norm": 7.775556449971247, "learning_rate": 2.2829537094957943e-07, "loss": 0.6432, "step": 24001 }, { "epoch": 1.7340292954286851, "grad_norm": 7.066102046122792, "learning_rate": 2.2817326988944954e-07, "loss": 0.5708, "step": 24002 }, { "epoch": 1.7341015406288944, "grad_norm": 7.073401215875018, "learning_rate": 2.2805119992886455e-07, "loss": 0.6227, "step": 24003 }, { "epoch": 1.734173785829104, "grad_norm": 6.367892023905949, "learning_rate": 2.279291610694942e-07, "loss": 0.5435, "step": 24004 }, { "epoch": 1.7342460310293135, "grad_norm": 7.498463791238551, "learning_rate": 2.278071533130094e-07, "loss": 0.6075, "step": 24005 }, { "epoch": 1.7343182762295228, "grad_norm": 8.449911751370959, "learning_rate": 2.2768517666108075e-07, "loss": 0.5412, "step": 24006 }, { "epoch": 1.7343905214297326, "grad_norm": 7.1956753882601525, "learning_rate": 2.2756323111537748e-07, "loss": 0.6179, "step": 24007 }, { "epoch": 1.734462766629942, "grad_norm": 6.24099143246156, "learning_rate": 2.274413166775691e-07, "loss": 0.6345, "step": 24008 }, { "epoch": 1.7345350118301517, "grad_norm": 7.745680066487565, "learning_rate": 2.273194333493245e-07, "loss": 0.6256, "step": 24009 }, { "epoch": 1.734607257030361, "grad_norm": 7.845434729819216, "learning_rate": 2.2719758113231211e-07, "loss": 0.617, "step": 24010 }, { "epoch": 1.7346795022305705, "grad_norm": 7.153870032738399, "learning_rate": 2.270757600282003e-07, "loss": 0.5938, "step": 24011 }, { "epoch": 1.73475174743078, "grad_norm": 7.456883920100377, "learning_rate": 2.2695397003865666e-07, "loss": 0.6281, "step": 24012 }, { "epoch": 1.7348239926309896, "grad_norm": 7.8828091125178545, "learning_rate": 2.2683221116534787e-07, "loss": 0.6444, "step": 24013 }, { "epoch": 1.7348962378311992, "grad_norm": 7.272138797186621, "learning_rate": 2.267104834099418e-07, "loss": 0.6142, "step": 24014 }, { "epoch": 1.7349684830314085, "grad_norm": 8.336864587914725, "learning_rate": 2.2658878677410374e-07, "loss": 0.5971, "step": 24015 }, { "epoch": 1.7350407282316183, "grad_norm": 6.569682232794261, "learning_rate": 2.2646712125949987e-07, "loss": 0.5838, "step": 24016 }, { "epoch": 1.7351129734318276, "grad_norm": 7.397263108431478, "learning_rate": 2.263454868677964e-07, "loss": 0.5855, "step": 24017 }, { "epoch": 1.7351852186320371, "grad_norm": 7.033396753444059, "learning_rate": 2.2622388360065722e-07, "loss": 0.6487, "step": 24018 }, { "epoch": 1.7352574638322467, "grad_norm": 7.599245445539493, "learning_rate": 2.261023114597477e-07, "loss": 0.6076, "step": 24019 }, { "epoch": 1.7353297090324562, "grad_norm": 6.692824481198712, "learning_rate": 2.2598077044673206e-07, "loss": 0.5836, "step": 24020 }, { "epoch": 1.7354019542326657, "grad_norm": 6.761232990025573, "learning_rate": 2.2585926056327368e-07, "loss": 0.5584, "step": 24021 }, { "epoch": 1.735474199432875, "grad_norm": 9.038567452816906, "learning_rate": 2.2573778181103651e-07, "loss": 0.5663, "step": 24022 }, { "epoch": 1.7355464446330848, "grad_norm": 7.717572450585047, "learning_rate": 2.256163341916834e-07, "loss": 0.6217, "step": 24023 }, { "epoch": 1.7356186898332941, "grad_norm": 7.5030976574203745, "learning_rate": 2.2549491770687688e-07, "loss": 0.676, "step": 24024 }, { "epoch": 1.7356909350335037, "grad_norm": 7.752532053133562, "learning_rate": 2.253735323582787e-07, "loss": 0.6032, "step": 24025 }, { "epoch": 1.7357631802337132, "grad_norm": 7.224521460079445, "learning_rate": 2.2525217814755057e-07, "loss": 0.5615, "step": 24026 }, { "epoch": 1.7358354254339228, "grad_norm": 8.324818597140299, "learning_rate": 2.2513085507635397e-07, "loss": 0.6157, "step": 24027 }, { "epoch": 1.7359076706341323, "grad_norm": 6.89379865087944, "learning_rate": 2.2500956314635004e-07, "loss": 0.595, "step": 24028 }, { "epoch": 1.7359799158343416, "grad_norm": 7.236702925516408, "learning_rate": 2.2488830235919828e-07, "loss": 0.6554, "step": 24029 }, { "epoch": 1.7360521610345514, "grad_norm": 6.694280306684423, "learning_rate": 2.2476707271655908e-07, "loss": 0.5359, "step": 24030 }, { "epoch": 1.7361244062347607, "grad_norm": 7.282547078005014, "learning_rate": 2.2464587422009215e-07, "loss": 0.6147, "step": 24031 }, { "epoch": 1.7361966514349703, "grad_norm": 9.47621147845356, "learning_rate": 2.245247068714565e-07, "loss": 0.6238, "step": 24032 }, { "epoch": 1.7362688966351798, "grad_norm": 8.010259248917183, "learning_rate": 2.2440357067231104e-07, "loss": 0.6502, "step": 24033 }, { "epoch": 1.7363411418353893, "grad_norm": 7.682523031252977, "learning_rate": 2.2428246562431367e-07, "loss": 0.6032, "step": 24034 }, { "epoch": 1.7364133870355989, "grad_norm": 8.96531053046769, "learning_rate": 2.241613917291227e-07, "loss": 0.606, "step": 24035 }, { "epoch": 1.7364856322358082, "grad_norm": 5.449686359864982, "learning_rate": 2.2404034898839465e-07, "loss": 0.6137, "step": 24036 }, { "epoch": 1.736557877436018, "grad_norm": 6.203068115588516, "learning_rate": 2.2391933740378734e-07, "loss": 0.6031, "step": 24037 }, { "epoch": 1.7366301226362273, "grad_norm": 7.013384964804854, "learning_rate": 2.2379835697695695e-07, "loss": 0.6713, "step": 24038 }, { "epoch": 1.7367023678364368, "grad_norm": 8.147163093954372, "learning_rate": 2.2367740770955965e-07, "loss": 0.6364, "step": 24039 }, { "epoch": 1.7367746130366464, "grad_norm": 7.205878072968639, "learning_rate": 2.2355648960325104e-07, "loss": 0.5591, "step": 24040 }, { "epoch": 1.736846858236856, "grad_norm": 6.888935184665017, "learning_rate": 2.2343560265968678e-07, "loss": 0.6333, "step": 24041 }, { "epoch": 1.7369191034370655, "grad_norm": 6.839424333942628, "learning_rate": 2.233147468805219e-07, "loss": 0.627, "step": 24042 }, { "epoch": 1.7369913486372748, "grad_norm": 6.989727384520236, "learning_rate": 2.2319392226740983e-07, "loss": 0.6846, "step": 24043 }, { "epoch": 1.7370635938374845, "grad_norm": 7.025321122293122, "learning_rate": 2.2307312882200534e-07, "loss": 0.5714, "step": 24044 }, { "epoch": 1.7371358390376939, "grad_norm": 7.490768855197188, "learning_rate": 2.2295236654596152e-07, "loss": 0.6072, "step": 24045 }, { "epoch": 1.7372080842379034, "grad_norm": 6.918982458990792, "learning_rate": 2.2283163544093268e-07, "loss": 0.6291, "step": 24046 }, { "epoch": 1.737280329438113, "grad_norm": 7.542016655263463, "learning_rate": 2.2271093550856964e-07, "loss": 0.6684, "step": 24047 }, { "epoch": 1.7373525746383225, "grad_norm": 7.228471031762613, "learning_rate": 2.2259026675052614e-07, "loss": 0.6562, "step": 24048 }, { "epoch": 1.737424819838532, "grad_norm": 7.203422622986351, "learning_rate": 2.2246962916845332e-07, "loss": 0.5521, "step": 24049 }, { "epoch": 1.7374970650387414, "grad_norm": 8.238642417403241, "learning_rate": 2.2234902276400294e-07, "loss": 0.6509, "step": 24050 }, { "epoch": 1.7375693102389511, "grad_norm": 7.591539854071573, "learning_rate": 2.2222844753882617e-07, "loss": 0.6772, "step": 24051 }, { "epoch": 1.7376415554391604, "grad_norm": 8.115702312396476, "learning_rate": 2.2210790349457307e-07, "loss": 0.5699, "step": 24052 }, { "epoch": 1.73771380063937, "grad_norm": 7.048860595831279, "learning_rate": 2.219873906328948e-07, "loss": 0.6036, "step": 24053 }, { "epoch": 1.7377860458395795, "grad_norm": 7.0546737113133515, "learning_rate": 2.2186690895543982e-07, "loss": 0.6088, "step": 24054 }, { "epoch": 1.737858291039789, "grad_norm": 8.138014947914538, "learning_rate": 2.217464584638579e-07, "loss": 0.551, "step": 24055 }, { "epoch": 1.7379305362399986, "grad_norm": 7.058975285413466, "learning_rate": 2.2162603915979852e-07, "loss": 0.5947, "step": 24056 }, { "epoch": 1.738002781440208, "grad_norm": 6.869170335537824, "learning_rate": 2.215056510449093e-07, "loss": 0.5743, "step": 24057 }, { "epoch": 1.7380750266404177, "grad_norm": 8.120778557044797, "learning_rate": 2.213852941208386e-07, "loss": 0.6139, "step": 24058 }, { "epoch": 1.738147271840627, "grad_norm": 8.474011352848374, "learning_rate": 2.2126496838923346e-07, "loss": 0.6349, "step": 24059 }, { "epoch": 1.7382195170408365, "grad_norm": 8.040371120105306, "learning_rate": 2.2114467385174255e-07, "loss": 0.6636, "step": 24060 }, { "epoch": 1.738291762241046, "grad_norm": 7.364630181479647, "learning_rate": 2.2102441051001094e-07, "loss": 0.651, "step": 24061 }, { "epoch": 1.7383640074412556, "grad_norm": 6.773382870850925, "learning_rate": 2.2090417836568595e-07, "loss": 0.5823, "step": 24062 }, { "epoch": 1.7384362526414652, "grad_norm": 8.162687720671133, "learning_rate": 2.2078397742041347e-07, "loss": 0.5575, "step": 24063 }, { "epoch": 1.7385084978416745, "grad_norm": 7.942516513863729, "learning_rate": 2.20663807675838e-07, "loss": 0.5934, "step": 24064 }, { "epoch": 1.7385807430418843, "grad_norm": 8.803464328995405, "learning_rate": 2.2054366913360548e-07, "loss": 0.5687, "step": 24065 }, { "epoch": 1.7386529882420936, "grad_norm": 6.397347866334954, "learning_rate": 2.204235617953601e-07, "loss": 0.628, "step": 24066 }, { "epoch": 1.7387252334423033, "grad_norm": 8.2057338671407, "learning_rate": 2.2030348566274618e-07, "loss": 0.6891, "step": 24067 }, { "epoch": 1.7387974786425127, "grad_norm": 6.263195908307503, "learning_rate": 2.201834407374076e-07, "loss": 0.563, "step": 24068 }, { "epoch": 1.7388697238427222, "grad_norm": 6.0009494666037995, "learning_rate": 2.2006342702098753e-07, "loss": 0.521, "step": 24069 }, { "epoch": 1.7389419690429317, "grad_norm": 7.8698723463008164, "learning_rate": 2.1994344451512883e-07, "loss": 0.6292, "step": 24070 }, { "epoch": 1.739014214243141, "grad_norm": 8.060076682054921, "learning_rate": 2.1982349322147462e-07, "loss": 0.6051, "step": 24071 }, { "epoch": 1.7390864594433508, "grad_norm": 7.0970849083815635, "learning_rate": 2.1970357314166553e-07, "loss": 0.6226, "step": 24072 }, { "epoch": 1.7391587046435601, "grad_norm": 7.138710349588521, "learning_rate": 2.1958368427734438e-07, "loss": 0.647, "step": 24073 }, { "epoch": 1.73923094984377, "grad_norm": 6.976387252400699, "learning_rate": 2.1946382663015214e-07, "loss": 0.5747, "step": 24074 }, { "epoch": 1.7393031950439792, "grad_norm": 7.130160420500391, "learning_rate": 2.1934400020172914e-07, "loss": 0.5777, "step": 24075 }, { "epoch": 1.7393754402441888, "grad_norm": 7.404391413683588, "learning_rate": 2.19224204993716e-07, "loss": 0.6754, "step": 24076 }, { "epoch": 1.7394476854443983, "grad_norm": 7.882869845012551, "learning_rate": 2.1910444100775224e-07, "loss": 0.5663, "step": 24077 }, { "epoch": 1.7395199306446076, "grad_norm": 7.511098406603501, "learning_rate": 2.1898470824547795e-07, "loss": 0.6404, "step": 24078 }, { "epoch": 1.7395921758448174, "grad_norm": 8.39663452667268, "learning_rate": 2.1886500670853183e-07, "loss": 0.6786, "step": 24079 }, { "epoch": 1.7396644210450267, "grad_norm": 10.107340541641358, "learning_rate": 2.1874533639855284e-07, "loss": 0.5822, "step": 24080 }, { "epoch": 1.7397366662452365, "grad_norm": 6.72094662265954, "learning_rate": 2.1862569731717908e-07, "loss": 0.5758, "step": 24081 }, { "epoch": 1.7398089114454458, "grad_norm": 6.839144337098895, "learning_rate": 2.1850608946604763e-07, "loss": 0.6499, "step": 24082 }, { "epoch": 1.7398811566456553, "grad_norm": 7.222154747411373, "learning_rate": 2.1838651284679658e-07, "loss": 0.5866, "step": 24083 }, { "epoch": 1.7399534018458649, "grad_norm": 7.030892460527604, "learning_rate": 2.1826696746106242e-07, "loss": 0.5602, "step": 24084 }, { "epoch": 1.7400256470460744, "grad_norm": 6.505729497634121, "learning_rate": 2.1814745331048242e-07, "loss": 0.6019, "step": 24085 }, { "epoch": 1.740097892246284, "grad_norm": 6.986127458590759, "learning_rate": 2.180279703966917e-07, "loss": 0.6161, "step": 24086 }, { "epoch": 1.7401701374464933, "grad_norm": 7.9608919780653835, "learning_rate": 2.1790851872132563e-07, "loss": 0.5676, "step": 24087 }, { "epoch": 1.740242382646703, "grad_norm": 6.699934477983896, "learning_rate": 2.177890982860209e-07, "loss": 0.6165, "step": 24088 }, { "epoch": 1.7403146278469124, "grad_norm": 7.707690512857414, "learning_rate": 2.176697090924107e-07, "loss": 0.542, "step": 24089 }, { "epoch": 1.740386873047122, "grad_norm": 7.4853734124966484, "learning_rate": 2.1755035114213034e-07, "loss": 0.654, "step": 24090 }, { "epoch": 1.7404591182473315, "grad_norm": 6.780588425389883, "learning_rate": 2.1743102443681358e-07, "loss": 0.6145, "step": 24091 }, { "epoch": 1.740531363447541, "grad_norm": 6.961906322871452, "learning_rate": 2.1731172897809404e-07, "loss": 0.6113, "step": 24092 }, { "epoch": 1.7406036086477505, "grad_norm": 6.741207471501536, "learning_rate": 2.171924647676041e-07, "loss": 0.5443, "step": 24093 }, { "epoch": 1.7406758538479599, "grad_norm": 6.739308402238208, "learning_rate": 2.170732318069768e-07, "loss": 0.619, "step": 24094 }, { "epoch": 1.7407480990481696, "grad_norm": 10.966041860976079, "learning_rate": 2.1695403009784454e-07, "loss": 0.562, "step": 24095 }, { "epoch": 1.740820344248379, "grad_norm": 7.932837611292612, "learning_rate": 2.1683485964183875e-07, "loss": 0.579, "step": 24096 }, { "epoch": 1.7408925894485885, "grad_norm": 7.168815941079134, "learning_rate": 2.1671572044059119e-07, "loss": 0.5375, "step": 24097 }, { "epoch": 1.740964834648798, "grad_norm": 6.518339254984079, "learning_rate": 2.165966124957325e-07, "loss": 0.5439, "step": 24098 }, { "epoch": 1.7410370798490076, "grad_norm": 6.991631546508219, "learning_rate": 2.164775358088936e-07, "loss": 0.5931, "step": 24099 }, { "epoch": 1.7411093250492171, "grad_norm": 7.163886780492203, "learning_rate": 2.163584903817037e-07, "loss": 0.5768, "step": 24100 }, { "epoch": 1.7411815702494264, "grad_norm": 7.658428022472407, "learning_rate": 2.1623947621579293e-07, "loss": 0.6712, "step": 24101 }, { "epoch": 1.7412538154496362, "grad_norm": 6.290276717025875, "learning_rate": 2.1612049331279084e-07, "loss": 0.5527, "step": 24102 }, { "epoch": 1.7413260606498455, "grad_norm": 7.046855512029595, "learning_rate": 2.1600154167432609e-07, "loss": 0.5702, "step": 24103 }, { "epoch": 1.741398305850055, "grad_norm": 8.618173302491506, "learning_rate": 2.1588262130202625e-07, "loss": 0.6135, "step": 24104 }, { "epoch": 1.7414705510502646, "grad_norm": 6.674351681431811, "learning_rate": 2.1576373219751978e-07, "loss": 0.6353, "step": 24105 }, { "epoch": 1.7415427962504741, "grad_norm": 7.931184914805103, "learning_rate": 2.1564487436243453e-07, "loss": 0.567, "step": 24106 }, { "epoch": 1.7416150414506837, "grad_norm": 8.315367240021681, "learning_rate": 2.1552604779839698e-07, "loss": 0.6508, "step": 24107 }, { "epoch": 1.741687286650893, "grad_norm": 8.324872668326384, "learning_rate": 2.1540725250703414e-07, "loss": 0.6164, "step": 24108 }, { "epoch": 1.7417595318511028, "grad_norm": 7.77432911422391, "learning_rate": 2.1528848848997226e-07, "loss": 0.6089, "step": 24109 }, { "epoch": 1.741831777051312, "grad_norm": 7.892131518080628, "learning_rate": 2.1516975574883747e-07, "loss": 0.6213, "step": 24110 }, { "epoch": 1.7419040222515216, "grad_norm": 7.304175439522134, "learning_rate": 2.150510542852541e-07, "loss": 0.5873, "step": 24111 }, { "epoch": 1.7419762674517312, "grad_norm": 8.823983144126695, "learning_rate": 2.1493238410084771e-07, "loss": 0.6945, "step": 24112 }, { "epoch": 1.7420485126519407, "grad_norm": 7.298038626771686, "learning_rate": 2.1481374519724317e-07, "loss": 0.6064, "step": 24113 }, { "epoch": 1.7421207578521503, "grad_norm": 8.427150227800265, "learning_rate": 2.1469513757606364e-07, "loss": 0.6179, "step": 24114 }, { "epoch": 1.7421930030523596, "grad_norm": 6.854573648384197, "learning_rate": 2.1457656123893277e-07, "loss": 0.625, "step": 24115 }, { "epoch": 1.7422652482525693, "grad_norm": 7.180485495301984, "learning_rate": 2.1445801618747487e-07, "loss": 0.5947, "step": 24116 }, { "epoch": 1.7423374934527787, "grad_norm": 5.559014246162541, "learning_rate": 2.1433950242331246e-07, "loss": 0.5434, "step": 24117 }, { "epoch": 1.7424097386529882, "grad_norm": 6.1947112014117724, "learning_rate": 2.142210199480671e-07, "loss": 0.5767, "step": 24118 }, { "epoch": 1.7424819838531977, "grad_norm": 8.290222604521517, "learning_rate": 2.1410256876336106e-07, "loss": 0.6144, "step": 24119 }, { "epoch": 1.7425542290534073, "grad_norm": 7.740820586005502, "learning_rate": 2.139841488708161e-07, "loss": 0.636, "step": 24120 }, { "epoch": 1.7426264742536168, "grad_norm": 8.072109912226084, "learning_rate": 2.138657602720537e-07, "loss": 0.5902, "step": 24121 }, { "epoch": 1.7426987194538262, "grad_norm": 7.077481474264323, "learning_rate": 2.137474029686934e-07, "loss": 0.565, "step": 24122 }, { "epoch": 1.742770964654036, "grad_norm": 7.014192906537926, "learning_rate": 2.1362907696235614e-07, "loss": 0.6327, "step": 24123 }, { "epoch": 1.7428432098542452, "grad_norm": 8.416578789683443, "learning_rate": 2.1351078225466142e-07, "loss": 0.6082, "step": 24124 }, { "epoch": 1.7429154550544548, "grad_norm": 7.822851784373983, "learning_rate": 2.1339251884722883e-07, "loss": 0.6395, "step": 24125 }, { "epoch": 1.7429877002546643, "grad_norm": 6.9615011104776565, "learning_rate": 2.1327428674167704e-07, "loss": 0.5172, "step": 24126 }, { "epoch": 1.7430599454548739, "grad_norm": 7.516416511968751, "learning_rate": 2.1315608593962505e-07, "loss": 0.578, "step": 24127 }, { "epoch": 1.7431321906550834, "grad_norm": 6.842414621162648, "learning_rate": 2.1303791644269072e-07, "loss": 0.624, "step": 24128 }, { "epoch": 1.7432044358552927, "grad_norm": 6.397259912431808, "learning_rate": 2.1291977825249138e-07, "loss": 0.5998, "step": 24129 }, { "epoch": 1.7432766810555025, "grad_norm": 7.7649600935952625, "learning_rate": 2.1280167137064433e-07, "loss": 0.6783, "step": 24130 }, { "epoch": 1.7433489262557118, "grad_norm": 7.392427188436023, "learning_rate": 2.126835957987672e-07, "loss": 0.6504, "step": 24131 }, { "epoch": 1.7434211714559213, "grad_norm": 6.173705165257675, "learning_rate": 2.1256555153847503e-07, "loss": 0.6032, "step": 24132 }, { "epoch": 1.743493416656131, "grad_norm": 7.816546559444701, "learning_rate": 2.1244753859138434e-07, "loss": 0.6012, "step": 24133 }, { "epoch": 1.7435656618563404, "grad_norm": 7.67277816770373, "learning_rate": 2.1232955695911079e-07, "loss": 0.6533, "step": 24134 }, { "epoch": 1.74363790705655, "grad_norm": 7.405630608906724, "learning_rate": 2.122116066432692e-07, "loss": 0.7342, "step": 24135 }, { "epoch": 1.7437101522567593, "grad_norm": 7.518692627748949, "learning_rate": 2.120936876454746e-07, "loss": 0.5494, "step": 24136 }, { "epoch": 1.743782397456969, "grad_norm": 8.613009314017916, "learning_rate": 2.119757999673408e-07, "loss": 0.6224, "step": 24137 }, { "epoch": 1.7438546426571784, "grad_norm": 6.021959492081259, "learning_rate": 2.118579436104823e-07, "loss": 0.5758, "step": 24138 }, { "epoch": 1.743926887857388, "grad_norm": 7.091830762794276, "learning_rate": 2.117401185765114e-07, "loss": 0.5804, "step": 24139 }, { "epoch": 1.7439991330575975, "grad_norm": 8.199169433212809, "learning_rate": 2.1162232486704153e-07, "loss": 0.6541, "step": 24140 }, { "epoch": 1.744071378257807, "grad_norm": 7.105735031431587, "learning_rate": 2.1150456248368534e-07, "loss": 0.6058, "step": 24141 }, { "epoch": 1.7441436234580165, "grad_norm": 6.868856841237843, "learning_rate": 2.113868314280551e-07, "loss": 0.6477, "step": 24142 }, { "epoch": 1.7442158686582259, "grad_norm": 9.566387508275843, "learning_rate": 2.1126913170176178e-07, "loss": 0.5912, "step": 24143 }, { "epoch": 1.7442881138584356, "grad_norm": 6.788473334170226, "learning_rate": 2.1115146330641657e-07, "loss": 0.6095, "step": 24144 }, { "epoch": 1.744360359058645, "grad_norm": 7.117539900630031, "learning_rate": 2.1103382624363093e-07, "loss": 0.579, "step": 24145 }, { "epoch": 1.7444326042588547, "grad_norm": 8.365810989860242, "learning_rate": 2.1091622051501558e-07, "loss": 0.5796, "step": 24146 }, { "epoch": 1.744504849459064, "grad_norm": 7.213491927616952, "learning_rate": 2.107986461221792e-07, "loss": 0.5624, "step": 24147 }, { "epoch": 1.7445770946592736, "grad_norm": 7.140131893702851, "learning_rate": 2.1068110306673188e-07, "loss": 0.5777, "step": 24148 }, { "epoch": 1.7446493398594831, "grad_norm": 8.567390771486243, "learning_rate": 2.105635913502832e-07, "loss": 0.6359, "step": 24149 }, { "epoch": 1.7447215850596924, "grad_norm": 6.584100252058013, "learning_rate": 2.10446110974441e-07, "loss": 0.6399, "step": 24150 }, { "epoch": 1.7447938302599022, "grad_norm": 8.017821964547217, "learning_rate": 2.1032866194081375e-07, "loss": 0.6374, "step": 24151 }, { "epoch": 1.7448660754601115, "grad_norm": 7.997798139828813, "learning_rate": 2.102112442510093e-07, "loss": 0.5681, "step": 24152 }, { "epoch": 1.7449383206603213, "grad_norm": 8.211929333976911, "learning_rate": 2.10093857906635e-07, "loss": 0.5781, "step": 24153 }, { "epoch": 1.7450105658605306, "grad_norm": 7.15985443095124, "learning_rate": 2.0997650290929788e-07, "loss": 0.553, "step": 24154 }, { "epoch": 1.7450828110607401, "grad_norm": 8.618932829114806, "learning_rate": 2.0985917926060418e-07, "loss": 0.6243, "step": 24155 }, { "epoch": 1.7451550562609497, "grad_norm": 6.817477866158892, "learning_rate": 2.0974188696216064e-07, "loss": 0.6375, "step": 24156 }, { "epoch": 1.745227301461159, "grad_norm": 8.04595669441602, "learning_rate": 2.0962462601557208e-07, "loss": 0.6679, "step": 24157 }, { "epoch": 1.7452995466613688, "grad_norm": 6.889039841232015, "learning_rate": 2.0950739642244395e-07, "loss": 0.5909, "step": 24158 }, { "epoch": 1.745371791861578, "grad_norm": 7.048467688727331, "learning_rate": 2.09390198184381e-07, "loss": 0.6771, "step": 24159 }, { "epoch": 1.7454440370617879, "grad_norm": 6.628586572009674, "learning_rate": 2.0927303130298837e-07, "loss": 0.587, "step": 24160 }, { "epoch": 1.7455162822619972, "grad_norm": 8.083817090637492, "learning_rate": 2.0915589577986867e-07, "loss": 0.5614, "step": 24161 }, { "epoch": 1.7455885274622067, "grad_norm": 5.826736243826981, "learning_rate": 2.0903879161662615e-07, "loss": 0.5868, "step": 24162 }, { "epoch": 1.7456607726624163, "grad_norm": 7.046477921964726, "learning_rate": 2.0892171881486373e-07, "loss": 0.5947, "step": 24163 }, { "epoch": 1.7457330178626258, "grad_norm": 8.375533357404535, "learning_rate": 2.08804677376184e-07, "loss": 0.578, "step": 24164 }, { "epoch": 1.7458052630628353, "grad_norm": 8.093998849938917, "learning_rate": 2.0868766730218925e-07, "loss": 0.6194, "step": 24165 }, { "epoch": 1.7458775082630447, "grad_norm": 7.6843374306472825, "learning_rate": 2.0857068859448131e-07, "loss": 0.5782, "step": 24166 }, { "epoch": 1.7459497534632544, "grad_norm": 6.652968479822061, "learning_rate": 2.0845374125466166e-07, "loss": 0.508, "step": 24167 }, { "epoch": 1.7460219986634637, "grad_norm": 8.663447393488697, "learning_rate": 2.0833682528433096e-07, "loss": 0.6016, "step": 24168 }, { "epoch": 1.7460942438636733, "grad_norm": 7.532641523660515, "learning_rate": 2.082199406850896e-07, "loss": 0.6836, "step": 24169 }, { "epoch": 1.7461664890638828, "grad_norm": 7.194492024084754, "learning_rate": 2.0810308745853768e-07, "loss": 0.5921, "step": 24170 }, { "epoch": 1.7462387342640924, "grad_norm": 5.738945659608178, "learning_rate": 2.079862656062756e-07, "loss": 0.614, "step": 24171 }, { "epoch": 1.746310979464302, "grad_norm": 7.5965827085378494, "learning_rate": 2.0786947512990068e-07, "loss": 0.6616, "step": 24172 }, { "epoch": 1.7463832246645112, "grad_norm": 7.436984517161246, "learning_rate": 2.077527160310136e-07, "loss": 0.5962, "step": 24173 }, { "epoch": 1.746455469864721, "grad_norm": 7.4947376863410335, "learning_rate": 2.076359883112125e-07, "loss": 0.6665, "step": 24174 }, { "epoch": 1.7465277150649303, "grad_norm": 7.90687745510754, "learning_rate": 2.0751929197209447e-07, "loss": 0.6067, "step": 24175 }, { "epoch": 1.7465999602651399, "grad_norm": 6.22925818470693, "learning_rate": 2.074026270152571e-07, "loss": 0.553, "step": 24176 }, { "epoch": 1.7466722054653494, "grad_norm": 7.584462958518594, "learning_rate": 2.0728599344229773e-07, "loss": 0.5854, "step": 24177 }, { "epoch": 1.746744450665559, "grad_norm": 6.754706543280755, "learning_rate": 2.0716939125481367e-07, "loss": 0.5486, "step": 24178 }, { "epoch": 1.7468166958657685, "grad_norm": 6.869288065751315, "learning_rate": 2.070528204543995e-07, "loss": 0.6209, "step": 24179 }, { "epoch": 1.7468889410659778, "grad_norm": 7.001954214609399, "learning_rate": 2.06936281042652e-07, "loss": 0.5727, "step": 24180 }, { "epoch": 1.7469611862661876, "grad_norm": 7.259035728037694, "learning_rate": 2.0681977302116658e-07, "loss": 0.5746, "step": 24181 }, { "epoch": 1.747033431466397, "grad_norm": 6.780412050852492, "learning_rate": 2.0670329639153773e-07, "loss": 0.6886, "step": 24182 }, { "epoch": 1.7471056766666064, "grad_norm": 7.618285678048788, "learning_rate": 2.065868511553601e-07, "loss": 0.6579, "step": 24183 }, { "epoch": 1.747177921866816, "grad_norm": 8.53614884460012, "learning_rate": 2.0647043731422788e-07, "loss": 0.6284, "step": 24184 }, { "epoch": 1.7472501670670255, "grad_norm": 7.568094753438377, "learning_rate": 2.0635405486973487e-07, "loss": 0.6877, "step": 24185 }, { "epoch": 1.747322412267235, "grad_norm": 6.762045391265512, "learning_rate": 2.0623770382347363e-07, "loss": 0.5846, "step": 24186 }, { "epoch": 1.7473946574674444, "grad_norm": 7.085233844782676, "learning_rate": 2.061213841770371e-07, "loss": 0.6384, "step": 24187 }, { "epoch": 1.7474669026676541, "grad_norm": 7.972413421476552, "learning_rate": 2.060050959320184e-07, "loss": 0.6067, "step": 24188 }, { "epoch": 1.7475391478678635, "grad_norm": 6.512419353432405, "learning_rate": 2.0588883909000794e-07, "loss": 0.6544, "step": 24189 }, { "epoch": 1.747611393068073, "grad_norm": 7.134481178459371, "learning_rate": 2.0577261365259839e-07, "loss": 0.5602, "step": 24190 }, { "epoch": 1.7476836382682825, "grad_norm": 5.888991556932068, "learning_rate": 2.0565641962138034e-07, "loss": 0.7043, "step": 24191 }, { "epoch": 1.747755883468492, "grad_norm": 7.927651129767425, "learning_rate": 2.0554025699794423e-07, "loss": 0.6877, "step": 24192 }, { "epoch": 1.7478281286687016, "grad_norm": 7.685817627457671, "learning_rate": 2.0542412578388072e-07, "loss": 0.5615, "step": 24193 }, { "epoch": 1.747900373868911, "grad_norm": 7.201424288276948, "learning_rate": 2.053080259807791e-07, "loss": 0.5714, "step": 24194 }, { "epoch": 1.7479726190691207, "grad_norm": 7.252943559486701, "learning_rate": 2.0519195759022948e-07, "loss": 0.5879, "step": 24195 }, { "epoch": 1.74804486426933, "grad_norm": 10.321801631896568, "learning_rate": 2.0507592061381977e-07, "loss": 0.7136, "step": 24196 }, { "epoch": 1.7481171094695396, "grad_norm": 7.600655386924934, "learning_rate": 2.0495991505313868e-07, "loss": 0.6169, "step": 24197 }, { "epoch": 1.7481893546697491, "grad_norm": 7.763212938479391, "learning_rate": 2.048439409097744e-07, "loss": 0.6125, "step": 24198 }, { "epoch": 1.7482615998699587, "grad_norm": 7.008773210520077, "learning_rate": 2.0472799818531508e-07, "loss": 0.5901, "step": 24199 }, { "epoch": 1.7483338450701682, "grad_norm": 8.188485821495213, "learning_rate": 2.0461208688134615e-07, "loss": 0.6475, "step": 24200 }, { "epoch": 1.7484060902703775, "grad_norm": 7.399693415708814, "learning_rate": 2.0449620699945604e-07, "loss": 0.6199, "step": 24201 }, { "epoch": 1.7484783354705873, "grad_norm": 6.60044224731248, "learning_rate": 2.0438035854123072e-07, "loss": 0.5958, "step": 24202 }, { "epoch": 1.7485505806707966, "grad_norm": 8.007201291460872, "learning_rate": 2.0426454150825615e-07, "loss": 0.604, "step": 24203 }, { "epoch": 1.7486228258710061, "grad_norm": 8.140495665359351, "learning_rate": 2.0414875590211684e-07, "loss": 0.5755, "step": 24204 }, { "epoch": 1.7486950710712157, "grad_norm": 6.238307850111573, "learning_rate": 2.0403300172439882e-07, "loss": 0.5689, "step": 24205 }, { "epoch": 1.7487673162714252, "grad_norm": 6.54740492103178, "learning_rate": 2.0391727897668634e-07, "loss": 0.5395, "step": 24206 }, { "epoch": 1.7488395614716348, "grad_norm": 6.080303047056369, "learning_rate": 2.038015876605634e-07, "loss": 0.6005, "step": 24207 }, { "epoch": 1.748911806671844, "grad_norm": 7.745192236290821, "learning_rate": 2.0368592777761377e-07, "loss": 0.5737, "step": 24208 }, { "epoch": 1.7489840518720539, "grad_norm": 6.138906456800882, "learning_rate": 2.0357029932942084e-07, "loss": 0.5625, "step": 24209 }, { "epoch": 1.7490562970722632, "grad_norm": 7.151716770460962, "learning_rate": 2.034547023175673e-07, "loss": 0.5605, "step": 24210 }, { "epoch": 1.7491285422724727, "grad_norm": 6.0451960635514, "learning_rate": 2.0333913674363575e-07, "loss": 0.6035, "step": 24211 }, { "epoch": 1.7492007874726823, "grad_norm": 6.745183285767241, "learning_rate": 2.0322360260920826e-07, "loss": 0.5861, "step": 24212 }, { "epoch": 1.7492730326728918, "grad_norm": 7.447645688407097, "learning_rate": 2.0310809991586688e-07, "loss": 0.6147, "step": 24213 }, { "epoch": 1.7493452778731013, "grad_norm": 7.766130212203526, "learning_rate": 2.029926286651915e-07, "loss": 0.5978, "step": 24214 }, { "epoch": 1.7494175230733107, "grad_norm": 6.304256079376493, "learning_rate": 2.0287718885876355e-07, "loss": 0.5351, "step": 24215 }, { "epoch": 1.7494897682735204, "grad_norm": 7.687299524189842, "learning_rate": 2.0276178049816352e-07, "loss": 0.5822, "step": 24216 }, { "epoch": 1.7495620134737297, "grad_norm": 7.245504629859027, "learning_rate": 2.0264640358497123e-07, "loss": 0.5805, "step": 24217 }, { "epoch": 1.7496342586739395, "grad_norm": 6.00828711094671, "learning_rate": 2.0253105812076567e-07, "loss": 0.5754, "step": 24218 }, { "epoch": 1.7497065038741488, "grad_norm": 7.101956766764508, "learning_rate": 2.024157441071259e-07, "loss": 0.5633, "step": 24219 }, { "epoch": 1.7497787490743584, "grad_norm": 6.090493295418932, "learning_rate": 2.023004615456306e-07, "loss": 0.5863, "step": 24220 }, { "epoch": 1.749850994274568, "grad_norm": 8.23861834006894, "learning_rate": 2.0218521043785799e-07, "loss": 0.5865, "step": 24221 }, { "epoch": 1.7499232394747772, "grad_norm": 7.602730922936328, "learning_rate": 2.020699907853857e-07, "loss": 0.6596, "step": 24222 }, { "epoch": 1.749995484674987, "grad_norm": 7.575076404349878, "learning_rate": 2.0195480258979106e-07, "loss": 0.6248, "step": 24223 }, { "epoch": 1.7500677298751963, "grad_norm": 7.067072105772897, "learning_rate": 2.0183964585265115e-07, "loss": 0.6663, "step": 24224 }, { "epoch": 1.750139975075406, "grad_norm": 7.565978960331163, "learning_rate": 2.0172452057554191e-07, "loss": 0.6453, "step": 24225 }, { "epoch": 1.7502122202756154, "grad_norm": 8.451490318200264, "learning_rate": 2.0160942676003935e-07, "loss": 0.5912, "step": 24226 }, { "epoch": 1.750284465475825, "grad_norm": 8.760317577719428, "learning_rate": 2.0149436440771914e-07, "loss": 0.5726, "step": 24227 }, { "epoch": 1.7503567106760345, "grad_norm": 6.6515666980474775, "learning_rate": 2.0137933352015664e-07, "loss": 0.6274, "step": 24228 }, { "epoch": 1.7504289558762438, "grad_norm": 7.990377838405909, "learning_rate": 2.0126433409892565e-07, "loss": 0.5615, "step": 24229 }, { "epoch": 1.7505012010764536, "grad_norm": 5.468673618328198, "learning_rate": 2.0114936614560155e-07, "loss": 0.5739, "step": 24230 }, { "epoch": 1.750573446276663, "grad_norm": 5.952701102956756, "learning_rate": 2.0103442966175806e-07, "loss": 0.5771, "step": 24231 }, { "epoch": 1.7506456914768727, "grad_norm": 7.488048695120648, "learning_rate": 2.0091952464896787e-07, "loss": 0.614, "step": 24232 }, { "epoch": 1.750717936677082, "grad_norm": 8.57397449467013, "learning_rate": 2.008046511088041e-07, "loss": 0.6122, "step": 24233 }, { "epoch": 1.7507901818772915, "grad_norm": 7.66394254413642, "learning_rate": 2.006898090428394e-07, "loss": 0.5729, "step": 24234 }, { "epoch": 1.750862427077501, "grad_norm": 6.828126117348306, "learning_rate": 2.0057499845264644e-07, "loss": 0.6809, "step": 24235 }, { "epoch": 1.7509346722777106, "grad_norm": 6.3231660981184525, "learning_rate": 2.0046021933979614e-07, "loss": 0.5767, "step": 24236 }, { "epoch": 1.7510069174779201, "grad_norm": 7.4851693068361485, "learning_rate": 2.0034547170585977e-07, "loss": 0.5097, "step": 24237 }, { "epoch": 1.7510791626781295, "grad_norm": 6.72063501218754, "learning_rate": 2.0023075555240829e-07, "loss": 0.4878, "step": 24238 }, { "epoch": 1.7511514078783392, "grad_norm": 7.2527613149678745, "learning_rate": 2.001160708810121e-07, "loss": 0.6124, "step": 24239 }, { "epoch": 1.7512236530785485, "grad_norm": 6.4628386831170515, "learning_rate": 2.0000141769324106e-07, "loss": 0.6099, "step": 24240 }, { "epoch": 1.751295898278758, "grad_norm": 5.744845858399299, "learning_rate": 1.99886795990665e-07, "loss": 0.6332, "step": 24241 }, { "epoch": 1.7513681434789676, "grad_norm": 7.995388132184537, "learning_rate": 1.99772205774853e-07, "loss": 0.6384, "step": 24242 }, { "epoch": 1.7514403886791772, "grad_norm": 7.647422466003507, "learning_rate": 1.9965764704737322e-07, "loss": 0.6133, "step": 24243 }, { "epoch": 1.7515126338793867, "grad_norm": 8.028159173912766, "learning_rate": 1.9954311980979413e-07, "loss": 0.6186, "step": 24244 }, { "epoch": 1.751584879079596, "grad_norm": 7.346506453680866, "learning_rate": 1.994286240636839e-07, "loss": 0.6112, "step": 24245 }, { "epoch": 1.7516571242798058, "grad_norm": 7.363062362048288, "learning_rate": 1.9931415981060937e-07, "loss": 0.5846, "step": 24246 }, { "epoch": 1.7517293694800151, "grad_norm": 6.683272977764151, "learning_rate": 1.991997270521373e-07, "loss": 0.6126, "step": 24247 }, { "epoch": 1.7518016146802247, "grad_norm": 7.951931067522953, "learning_rate": 1.9908532578983453e-07, "loss": 0.6571, "step": 24248 }, { "epoch": 1.7518738598804342, "grad_norm": 6.412994853477013, "learning_rate": 1.9897095602526727e-07, "loss": 0.6205, "step": 24249 }, { "epoch": 1.7519461050806437, "grad_norm": 9.370969592627917, "learning_rate": 1.9885661776000098e-07, "loss": 0.6117, "step": 24250 }, { "epoch": 1.7520183502808533, "grad_norm": 7.82487593969896, "learning_rate": 1.9874231099560076e-07, "loss": 0.6285, "step": 24251 }, { "epoch": 1.7520905954810626, "grad_norm": 8.045236957822905, "learning_rate": 1.9862803573363148e-07, "loss": 0.6198, "step": 24252 }, { "epoch": 1.7521628406812724, "grad_norm": 6.935598576792648, "learning_rate": 1.98513791975658e-07, "loss": 0.5977, "step": 24253 }, { "epoch": 1.7522350858814817, "grad_norm": 7.2378920374816476, "learning_rate": 1.9839957972324325e-07, "loss": 0.6184, "step": 24254 }, { "epoch": 1.7523073310816912, "grad_norm": 6.9931978145786875, "learning_rate": 1.9828539897795123e-07, "loss": 0.627, "step": 24255 }, { "epoch": 1.7523795762819008, "grad_norm": 6.718454469790391, "learning_rate": 1.9817124974134516e-07, "loss": 0.5909, "step": 24256 }, { "epoch": 1.7524518214821103, "grad_norm": 6.164258366362931, "learning_rate": 1.9805713201498683e-07, "loss": 0.5352, "step": 24257 }, { "epoch": 1.7525240666823199, "grad_norm": 7.27730050435017, "learning_rate": 1.9794304580043943e-07, "loss": 0.5909, "step": 24258 }, { "epoch": 1.7525963118825292, "grad_norm": 8.283861432296668, "learning_rate": 1.9782899109926423e-07, "loss": 0.6143, "step": 24259 }, { "epoch": 1.752668557082739, "grad_norm": 6.729628729137384, "learning_rate": 1.9771496791302301e-07, "loss": 0.6267, "step": 24260 }, { "epoch": 1.7527408022829483, "grad_norm": 7.117404034164906, "learning_rate": 1.9760097624327595e-07, "loss": 0.5889, "step": 24261 }, { "epoch": 1.7528130474831578, "grad_norm": 8.000825362544392, "learning_rate": 1.9748701609158372e-07, "loss": 0.5686, "step": 24262 }, { "epoch": 1.7528852926833673, "grad_norm": 7.360361604100388, "learning_rate": 1.9737308745950672e-07, "loss": 0.5762, "step": 24263 }, { "epoch": 1.7529575378835769, "grad_norm": 6.449286180371292, "learning_rate": 1.9725919034860401e-07, "loss": 0.5843, "step": 24264 }, { "epoch": 1.7530297830837864, "grad_norm": 8.025589547531647, "learning_rate": 1.9714532476043518e-07, "loss": 0.5435, "step": 24265 }, { "epoch": 1.7531020282839958, "grad_norm": 7.531673182077862, "learning_rate": 1.970314906965587e-07, "loss": 0.5837, "step": 24266 }, { "epoch": 1.7531742734842055, "grad_norm": 7.819119266172682, "learning_rate": 1.96917688158533e-07, "loss": 0.5605, "step": 24267 }, { "epoch": 1.7532465186844148, "grad_norm": 7.59595623855078, "learning_rate": 1.968039171479158e-07, "loss": 0.5913, "step": 24268 }, { "epoch": 1.7533187638846244, "grad_norm": 7.712985090583824, "learning_rate": 1.9669017766626468e-07, "loss": 0.5834, "step": 24269 }, { "epoch": 1.753391009084834, "grad_norm": 7.071584614494896, "learning_rate": 1.9657646971513706e-07, "loss": 0.571, "step": 24270 }, { "epoch": 1.7534632542850435, "grad_norm": 6.831957267591855, "learning_rate": 1.9646279329608886e-07, "loss": 0.616, "step": 24271 }, { "epoch": 1.753535499485253, "grad_norm": 6.996539214077278, "learning_rate": 1.963491484106761e-07, "loss": 0.5444, "step": 24272 }, { "epoch": 1.7536077446854623, "grad_norm": 7.517080059088508, "learning_rate": 1.96235535060455e-07, "loss": 0.6566, "step": 24273 }, { "epoch": 1.753679989885672, "grad_norm": 8.135047217140025, "learning_rate": 1.9612195324698102e-07, "loss": 0.6021, "step": 24274 }, { "epoch": 1.7537522350858814, "grad_norm": 6.5913865613038585, "learning_rate": 1.9600840297180846e-07, "loss": 0.5568, "step": 24275 }, { "epoch": 1.753824480286091, "grad_norm": 7.365001819452396, "learning_rate": 1.9589488423649162e-07, "loss": 0.5943, "step": 24276 }, { "epoch": 1.7538967254863005, "grad_norm": 9.433437660458143, "learning_rate": 1.9578139704258454e-07, "loss": 0.644, "step": 24277 }, { "epoch": 1.75396897068651, "grad_norm": 6.777841581589743, "learning_rate": 1.956679413916418e-07, "loss": 0.5365, "step": 24278 }, { "epoch": 1.7540412158867196, "grad_norm": 8.936636076033798, "learning_rate": 1.9555451728521553e-07, "loss": 0.6076, "step": 24279 }, { "epoch": 1.754113461086929, "grad_norm": 7.316615584882355, "learning_rate": 1.9544112472485833e-07, "loss": 0.6527, "step": 24280 }, { "epoch": 1.7541857062871387, "grad_norm": 7.876789434667891, "learning_rate": 1.9532776371212342e-07, "loss": 0.6186, "step": 24281 }, { "epoch": 1.754257951487348, "grad_norm": 6.782464687706839, "learning_rate": 1.952144342485615e-07, "loss": 0.6457, "step": 24282 }, { "epoch": 1.7543301966875575, "grad_norm": 6.798139081896357, "learning_rate": 1.951011363357244e-07, "loss": 0.6207, "step": 24283 }, { "epoch": 1.754402441887767, "grad_norm": 8.232231657425794, "learning_rate": 1.9498786997516312e-07, "loss": 0.6712, "step": 24284 }, { "epoch": 1.7544746870879766, "grad_norm": 6.749136127717309, "learning_rate": 1.9487463516842803e-07, "loss": 0.5946, "step": 24285 }, { "epoch": 1.7545469322881861, "grad_norm": 8.302006343419155, "learning_rate": 1.9476143191706932e-07, "loss": 0.5501, "step": 24286 }, { "epoch": 1.7546191774883955, "grad_norm": 6.947827324776582, "learning_rate": 1.9464826022263684e-07, "loss": 0.6331, "step": 24287 }, { "epoch": 1.7546914226886052, "grad_norm": 6.38626509522233, "learning_rate": 1.945351200866802e-07, "loss": 0.6389, "step": 24288 }, { "epoch": 1.7547636678888145, "grad_norm": 6.931198970238776, "learning_rate": 1.944220115107473e-07, "loss": 0.5415, "step": 24289 }, { "epoch": 1.7548359130890243, "grad_norm": 7.038708609132597, "learning_rate": 1.9430893449638666e-07, "loss": 0.6084, "step": 24290 }, { "epoch": 1.7549081582892336, "grad_norm": 6.544494341732205, "learning_rate": 1.9419588904514675e-07, "loss": 0.5291, "step": 24291 }, { "epoch": 1.7549804034894432, "grad_norm": 6.340970337075693, "learning_rate": 1.9408287515857495e-07, "loss": 0.5537, "step": 24292 }, { "epoch": 1.7550526486896527, "grad_norm": 6.992788141974212, "learning_rate": 1.9396989283821776e-07, "loss": 0.6015, "step": 24293 }, { "epoch": 1.755124893889862, "grad_norm": 8.343846924179555, "learning_rate": 1.9385694208562234e-07, "loss": 0.6262, "step": 24294 }, { "epoch": 1.7551971390900718, "grad_norm": 6.846343084833969, "learning_rate": 1.9374402290233463e-07, "loss": 0.5897, "step": 24295 }, { "epoch": 1.7552693842902811, "grad_norm": 6.423406784886124, "learning_rate": 1.9363113528990063e-07, "loss": 0.5699, "step": 24296 }, { "epoch": 1.7553416294904909, "grad_norm": 6.526213679947807, "learning_rate": 1.935182792498655e-07, "loss": 0.6387, "step": 24297 }, { "epoch": 1.7554138746907002, "grad_norm": 6.240106610585641, "learning_rate": 1.9340545478377437e-07, "loss": 0.5534, "step": 24298 }, { "epoch": 1.7554861198909097, "grad_norm": 7.59326019591269, "learning_rate": 1.9329266189317215e-07, "loss": 0.5531, "step": 24299 }, { "epoch": 1.7555583650911193, "grad_norm": 7.449590776712403, "learning_rate": 1.9317990057960174e-07, "loss": 0.6579, "step": 24300 }, { "epoch": 1.7556306102913286, "grad_norm": 7.668370955845516, "learning_rate": 1.9306717084460747e-07, "loss": 0.7066, "step": 24301 }, { "epoch": 1.7557028554915384, "grad_norm": 8.815055747112835, "learning_rate": 1.9295447268973283e-07, "loss": 0.6472, "step": 24302 }, { "epoch": 1.7557751006917477, "grad_norm": 6.402304723130746, "learning_rate": 1.9284180611651964e-07, "loss": 0.546, "step": 24303 }, { "epoch": 1.7558473458919575, "grad_norm": 7.189746804056824, "learning_rate": 1.9272917112651085e-07, "loss": 0.6186, "step": 24304 }, { "epoch": 1.7559195910921668, "grad_norm": 7.349717592795498, "learning_rate": 1.9261656772124826e-07, "loss": 0.5557, "step": 24305 }, { "epoch": 1.7559918362923763, "grad_norm": 6.983487680894768, "learning_rate": 1.9250399590227315e-07, "loss": 0.6568, "step": 24306 }, { "epoch": 1.7560640814925859, "grad_norm": 8.13413933475971, "learning_rate": 1.9239145567112676e-07, "loss": 0.5796, "step": 24307 }, { "epoch": 1.7561363266927954, "grad_norm": 7.629646011480352, "learning_rate": 1.9227894702934958e-07, "loss": 0.6333, "step": 24308 }, { "epoch": 1.756208571893005, "grad_norm": 7.839453261248691, "learning_rate": 1.921664699784817e-07, "loss": 0.5738, "step": 24309 }, { "epoch": 1.7562808170932143, "grad_norm": 7.47067376423969, "learning_rate": 1.9205402452006334e-07, "loss": 0.6041, "step": 24310 }, { "epoch": 1.756353062293424, "grad_norm": 7.19245143010802, "learning_rate": 1.9194161065563323e-07, "loss": 0.529, "step": 24311 }, { "epoch": 1.7564253074936333, "grad_norm": 7.157701370119362, "learning_rate": 1.9182922838673012e-07, "loss": 0.5733, "step": 24312 }, { "epoch": 1.756497552693843, "grad_norm": 6.595816482776252, "learning_rate": 1.9171687771489284e-07, "loss": 0.6201, "step": 24313 }, { "epoch": 1.7565697978940524, "grad_norm": 6.3471934927474045, "learning_rate": 1.91604558641659e-07, "loss": 0.6128, "step": 24314 }, { "epoch": 1.756642043094262, "grad_norm": 7.914200596108204, "learning_rate": 1.9149227116856655e-07, "loss": 0.6011, "step": 24315 }, { "epoch": 1.7567142882944715, "grad_norm": 8.498360419620795, "learning_rate": 1.9138001529715262e-07, "loss": 0.6097, "step": 24316 }, { "epoch": 1.7567865334946808, "grad_norm": 7.6652179123173365, "learning_rate": 1.91267791028954e-07, "loss": 0.6874, "step": 24317 }, { "epoch": 1.7568587786948906, "grad_norm": 8.103898097068829, "learning_rate": 1.9115559836550612e-07, "loss": 0.5738, "step": 24318 }, { "epoch": 1.7569310238951, "grad_norm": 7.38649849081647, "learning_rate": 1.9104343730834585e-07, "loss": 0.5954, "step": 24319 }, { "epoch": 1.7570032690953095, "grad_norm": 6.503576615145123, "learning_rate": 1.9093130785900833e-07, "loss": 0.6431, "step": 24320 }, { "epoch": 1.757075514295519, "grad_norm": 7.532204975031459, "learning_rate": 1.908192100190276e-07, "loss": 0.5262, "step": 24321 }, { "epoch": 1.7571477594957285, "grad_norm": 6.609816516170581, "learning_rate": 1.9070714378993938e-07, "loss": 0.6693, "step": 24322 }, { "epoch": 1.757220004695938, "grad_norm": 9.237738240516071, "learning_rate": 1.9059510917327718e-07, "loss": 0.6914, "step": 24323 }, { "epoch": 1.7572922498961474, "grad_norm": 8.806344554168788, "learning_rate": 1.9048310617057474e-07, "loss": 0.6974, "step": 24324 }, { "epoch": 1.7573644950963572, "grad_norm": 7.69299740120147, "learning_rate": 1.9037113478336533e-07, "loss": 0.5648, "step": 24325 }, { "epoch": 1.7574367402965665, "grad_norm": 6.601248721278102, "learning_rate": 1.9025919501318185e-07, "loss": 0.561, "step": 24326 }, { "epoch": 1.757508985496776, "grad_norm": 7.7016508537327715, "learning_rate": 1.90147286861557e-07, "loss": 0.6315, "step": 24327 }, { "epoch": 1.7575812306969856, "grad_norm": 6.744976010742962, "learning_rate": 1.9003541033002172e-07, "loss": 0.5586, "step": 24328 }, { "epoch": 1.7576534758971951, "grad_norm": 6.634062991754333, "learning_rate": 1.8992356542010816e-07, "loss": 0.5808, "step": 24329 }, { "epoch": 1.7577257210974047, "grad_norm": 8.560331863672747, "learning_rate": 1.8981175213334758e-07, "loss": 0.6426, "step": 24330 }, { "epoch": 1.757797966297614, "grad_norm": 7.536234239165364, "learning_rate": 1.8969997047127043e-07, "loss": 0.5825, "step": 24331 }, { "epoch": 1.7578702114978237, "grad_norm": 6.727007666199978, "learning_rate": 1.895882204354066e-07, "loss": 0.5973, "step": 24332 }, { "epoch": 1.757942456698033, "grad_norm": 8.815171290057096, "learning_rate": 1.8947650202728623e-07, "loss": 0.5912, "step": 24333 }, { "epoch": 1.7580147018982426, "grad_norm": 7.224747185425283, "learning_rate": 1.8936481524843814e-07, "loss": 0.6401, "step": 24334 }, { "epoch": 1.7580869470984521, "grad_norm": 6.861638349626454, "learning_rate": 1.8925316010039218e-07, "loss": 0.5646, "step": 24335 }, { "epoch": 1.7581591922986617, "grad_norm": 7.466539127532163, "learning_rate": 1.8914153658467606e-07, "loss": 0.6089, "step": 24336 }, { "epoch": 1.7582314374988712, "grad_norm": 8.073094703597977, "learning_rate": 1.8902994470281794e-07, "loss": 0.6025, "step": 24337 }, { "epoch": 1.7583036826990806, "grad_norm": 7.8832734259600095, "learning_rate": 1.8891838445634608e-07, "loss": 0.5619, "step": 24338 }, { "epoch": 1.7583759278992903, "grad_norm": 8.587151230749672, "learning_rate": 1.8880685584678676e-07, "loss": 0.7014, "step": 24339 }, { "epoch": 1.7584481730994996, "grad_norm": 6.491732401223251, "learning_rate": 1.886953588756668e-07, "loss": 0.5781, "step": 24340 }, { "epoch": 1.7585204182997092, "grad_norm": 7.221406258029377, "learning_rate": 1.8858389354451306e-07, "loss": 0.5637, "step": 24341 }, { "epoch": 1.7585926634999187, "grad_norm": 8.307574426189614, "learning_rate": 1.8847245985485068e-07, "loss": 0.5855, "step": 24342 }, { "epoch": 1.7586649087001283, "grad_norm": 7.396057068212089, "learning_rate": 1.8836105780820596e-07, "loss": 0.5368, "step": 24343 }, { "epoch": 1.7587371539003378, "grad_norm": 7.05573509101359, "learning_rate": 1.882496874061032e-07, "loss": 0.5779, "step": 24344 }, { "epoch": 1.7588093991005471, "grad_norm": 7.944556996482644, "learning_rate": 1.881383486500679e-07, "loss": 0.5716, "step": 24345 }, { "epoch": 1.7588816443007569, "grad_norm": 6.795063908762737, "learning_rate": 1.88027041541623e-07, "loss": 0.6712, "step": 24346 }, { "epoch": 1.7589538895009662, "grad_norm": 6.530304324666187, "learning_rate": 1.879157660822928e-07, "loss": 0.5822, "step": 24347 }, { "epoch": 1.7590261347011757, "grad_norm": 6.734382930317256, "learning_rate": 1.878045222736008e-07, "loss": 0.5889, "step": 24348 }, { "epoch": 1.7590983799013853, "grad_norm": 6.747060912955712, "learning_rate": 1.876933101170697e-07, "loss": 0.5902, "step": 24349 }, { "epoch": 1.7591706251015948, "grad_norm": 7.639776701534961, "learning_rate": 1.8758212961422135e-07, "loss": 0.6595, "step": 24350 }, { "epoch": 1.7592428703018044, "grad_norm": 7.6108439631422495, "learning_rate": 1.8747098076657837e-07, "loss": 0.5534, "step": 24351 }, { "epoch": 1.7593151155020137, "grad_norm": 8.53016605407868, "learning_rate": 1.8735986357566184e-07, "loss": 0.6059, "step": 24352 }, { "epoch": 1.7593873607022235, "grad_norm": 8.852254168833433, "learning_rate": 1.8724877804299325e-07, "loss": 0.6279, "step": 24353 }, { "epoch": 1.7594596059024328, "grad_norm": 6.765170533595795, "learning_rate": 1.8713772417009334e-07, "loss": 0.6289, "step": 24354 }, { "epoch": 1.7595318511026423, "grad_norm": 7.293110237731037, "learning_rate": 1.8702670195848205e-07, "loss": 0.6175, "step": 24355 }, { "epoch": 1.7596040963028519, "grad_norm": 7.900277396052009, "learning_rate": 1.8691571140967952e-07, "loss": 0.7258, "step": 24356 }, { "epoch": 1.7596763415030614, "grad_norm": 7.404331393351351, "learning_rate": 1.868047525252048e-07, "loss": 0.6417, "step": 24357 }, { "epoch": 1.759748586703271, "grad_norm": 8.166039682830503, "learning_rate": 1.866938253065767e-07, "loss": 0.5984, "step": 24358 }, { "epoch": 1.7598208319034803, "grad_norm": 7.088394899738493, "learning_rate": 1.86582929755314e-07, "loss": 0.622, "step": 24359 }, { "epoch": 1.75989307710369, "grad_norm": 5.871122338586417, "learning_rate": 1.8647206587293522e-07, "loss": 0.6005, "step": 24360 }, { "epoch": 1.7599653223038993, "grad_norm": 8.052937359117061, "learning_rate": 1.8636123366095715e-07, "loss": 0.6072, "step": 24361 }, { "epoch": 1.760037567504109, "grad_norm": 6.3893617906483335, "learning_rate": 1.8625043312089696e-07, "loss": 0.5473, "step": 24362 }, { "epoch": 1.7601098127043184, "grad_norm": 6.383411617479624, "learning_rate": 1.861396642542726e-07, "loss": 0.6254, "step": 24363 }, { "epoch": 1.760182057904528, "grad_norm": 7.272621379428165, "learning_rate": 1.8602892706259923e-07, "loss": 0.553, "step": 24364 }, { "epoch": 1.7602543031047375, "grad_norm": 8.66328051024661, "learning_rate": 1.8591822154739313e-07, "loss": 0.6579, "step": 24365 }, { "epoch": 1.7603265483049468, "grad_norm": 8.488142381946206, "learning_rate": 1.8580754771016978e-07, "loss": 0.6685, "step": 24366 }, { "epoch": 1.7603987935051566, "grad_norm": 8.92583082590171, "learning_rate": 1.8569690555244492e-07, "loss": 0.6565, "step": 24367 }, { "epoch": 1.760471038705366, "grad_norm": 6.929753282787399, "learning_rate": 1.8558629507573172e-07, "loss": 0.5935, "step": 24368 }, { "epoch": 1.7605432839055757, "grad_norm": 7.7645427479325955, "learning_rate": 1.8547571628154514e-07, "loss": 0.5558, "step": 24369 }, { "epoch": 1.760615529105785, "grad_norm": 7.454245045311875, "learning_rate": 1.8536516917139923e-07, "loss": 0.6132, "step": 24370 }, { "epoch": 1.7606877743059945, "grad_norm": 6.926406926757846, "learning_rate": 1.8525465374680667e-07, "loss": 0.5872, "step": 24371 }, { "epoch": 1.760760019506204, "grad_norm": 7.778474488918496, "learning_rate": 1.851441700092807e-07, "loss": 0.6242, "step": 24372 }, { "epoch": 1.7608322647064134, "grad_norm": 7.325039797730117, "learning_rate": 1.850337179603337e-07, "loss": 0.5722, "step": 24373 }, { "epoch": 1.7609045099066232, "grad_norm": 7.238729991065071, "learning_rate": 1.8492329760147782e-07, "loss": 0.5823, "step": 24374 }, { "epoch": 1.7609767551068325, "grad_norm": 6.979203257298748, "learning_rate": 1.8481290893422433e-07, "loss": 0.6154, "step": 24375 }, { "epoch": 1.7610490003070423, "grad_norm": 7.768442176373346, "learning_rate": 1.8470255196008452e-07, "loss": 0.63, "step": 24376 }, { "epoch": 1.7611212455072516, "grad_norm": 7.3618465735944625, "learning_rate": 1.8459222668056915e-07, "loss": 0.5395, "step": 24377 }, { "epoch": 1.7611934907074611, "grad_norm": 7.202918192445375, "learning_rate": 1.8448193309718837e-07, "loss": 0.5659, "step": 24378 }, { "epoch": 1.7612657359076707, "grad_norm": 6.7087785403286215, "learning_rate": 1.8437167121145183e-07, "loss": 0.6065, "step": 24379 }, { "epoch": 1.76133798110788, "grad_norm": 7.646270850307342, "learning_rate": 1.8426144102486915e-07, "loss": 0.6215, "step": 24380 }, { "epoch": 1.7614102263080897, "grad_norm": 6.314855457140731, "learning_rate": 1.841512425389494e-07, "loss": 0.544, "step": 24381 }, { "epoch": 1.761482471508299, "grad_norm": 6.474050810182313, "learning_rate": 1.840410757552008e-07, "loss": 0.5489, "step": 24382 }, { "epoch": 1.7615547167085088, "grad_norm": 7.457879874937911, "learning_rate": 1.839309406751319e-07, "loss": 0.6008, "step": 24383 }, { "epoch": 1.7616269619087181, "grad_norm": 7.180850460933242, "learning_rate": 1.838208373002498e-07, "loss": 0.5526, "step": 24384 }, { "epoch": 1.7616992071089277, "grad_norm": 6.2944064543294855, "learning_rate": 1.8371076563206275e-07, "loss": 0.5175, "step": 24385 }, { "epoch": 1.7617714523091372, "grad_norm": 6.596127049968502, "learning_rate": 1.836007256720762e-07, "loss": 0.5847, "step": 24386 }, { "epoch": 1.7618436975093468, "grad_norm": 8.229852292113145, "learning_rate": 1.834907174217973e-07, "loss": 0.5694, "step": 24387 }, { "epoch": 1.7619159427095563, "grad_norm": 6.309713825199064, "learning_rate": 1.8338074088273205e-07, "loss": 0.583, "step": 24388 }, { "epoch": 1.7619881879097656, "grad_norm": 7.953595999989859, "learning_rate": 1.8327079605638563e-07, "loss": 0.6426, "step": 24389 }, { "epoch": 1.7620604331099754, "grad_norm": 7.470167209127532, "learning_rate": 1.8316088294426243e-07, "loss": 0.6097, "step": 24390 }, { "epoch": 1.7621326783101847, "grad_norm": 6.806604756899756, "learning_rate": 1.8305100154786842e-07, "loss": 0.638, "step": 24391 }, { "epoch": 1.7622049235103943, "grad_norm": 8.27240600495647, "learning_rate": 1.829411518687077e-07, "loss": 0.6068, "step": 24392 }, { "epoch": 1.7622771687106038, "grad_norm": 7.381187042045351, "learning_rate": 1.8283133390828295e-07, "loss": 0.6771, "step": 24393 }, { "epoch": 1.7623494139108133, "grad_norm": 8.03082488007269, "learning_rate": 1.8272154766809825e-07, "loss": 0.6262, "step": 24394 }, { "epoch": 1.7624216591110229, "grad_norm": 7.457588059112934, "learning_rate": 1.8261179314965655e-07, "loss": 0.5077, "step": 24395 }, { "epoch": 1.7624939043112322, "grad_norm": 6.611060682829253, "learning_rate": 1.8250207035445972e-07, "loss": 0.5721, "step": 24396 }, { "epoch": 1.762566149511442, "grad_norm": 7.570113456006879, "learning_rate": 1.8239237928401016e-07, "loss": 0.5844, "step": 24397 }, { "epoch": 1.7626383947116513, "grad_norm": 6.854071093726803, "learning_rate": 1.8228271993980916e-07, "loss": 0.6249, "step": 24398 }, { "epoch": 1.7627106399118608, "grad_norm": 7.199168411550816, "learning_rate": 1.8217309232335834e-07, "loss": 0.5602, "step": 24399 }, { "epoch": 1.7627828851120704, "grad_norm": 7.303313133157257, "learning_rate": 1.820634964361584e-07, "loss": 0.6207, "step": 24400 }, { "epoch": 1.76285513031228, "grad_norm": 6.282652740989843, "learning_rate": 1.8195393227970927e-07, "loss": 0.595, "step": 24401 }, { "epoch": 1.7629273755124895, "grad_norm": 6.166639826260828, "learning_rate": 1.818443998555114e-07, "loss": 0.5772, "step": 24402 }, { "epoch": 1.7629996207126988, "grad_norm": 6.557955957951803, "learning_rate": 1.817348991650633e-07, "loss": 0.5953, "step": 24403 }, { "epoch": 1.7630718659129085, "grad_norm": 7.081408549568259, "learning_rate": 1.816254302098644e-07, "loss": 0.6441, "step": 24404 }, { "epoch": 1.7631441111131179, "grad_norm": 7.3144027973068315, "learning_rate": 1.8151599299141315e-07, "loss": 0.5653, "step": 24405 }, { "epoch": 1.7632163563133274, "grad_norm": 6.743008984571771, "learning_rate": 1.8140658751120838e-07, "loss": 0.6074, "step": 24406 }, { "epoch": 1.763288601513537, "grad_norm": 7.476545662696559, "learning_rate": 1.8129721377074666e-07, "loss": 0.6403, "step": 24407 }, { "epoch": 1.7633608467137465, "grad_norm": 7.345815295108741, "learning_rate": 1.8118787177152568e-07, "loss": 0.5993, "step": 24408 }, { "epoch": 1.763433091913956, "grad_norm": 8.165618777354833, "learning_rate": 1.8107856151504262e-07, "loss": 0.5896, "step": 24409 }, { "epoch": 1.7635053371141653, "grad_norm": 7.17392623256833, "learning_rate": 1.8096928300279315e-07, "loss": 0.6423, "step": 24410 }, { "epoch": 1.7635775823143751, "grad_norm": 7.167113208905492, "learning_rate": 1.8086003623627364e-07, "loss": 0.5774, "step": 24411 }, { "epoch": 1.7636498275145844, "grad_norm": 6.80608464820453, "learning_rate": 1.8075082121697952e-07, "loss": 0.5503, "step": 24412 }, { "epoch": 1.763722072714794, "grad_norm": 8.818482444808433, "learning_rate": 1.8064163794640655e-07, "loss": 0.59, "step": 24413 }, { "epoch": 1.7637943179150035, "grad_norm": 9.113646250191676, "learning_rate": 1.8053248642604797e-07, "loss": 0.5409, "step": 24414 }, { "epoch": 1.763866563115213, "grad_norm": 7.192092357191003, "learning_rate": 1.80423366657399e-07, "loss": 0.6316, "step": 24415 }, { "epoch": 1.7639388083154226, "grad_norm": 7.69908918468163, "learning_rate": 1.803142786419529e-07, "loss": 0.6624, "step": 24416 }, { "epoch": 1.764011053515632, "grad_norm": 7.294619356595462, "learning_rate": 1.8020522238120369e-07, "loss": 0.6457, "step": 24417 }, { "epoch": 1.7640832987158417, "grad_norm": 7.544277968494929, "learning_rate": 1.800961978766433e-07, "loss": 0.6134, "step": 24418 }, { "epoch": 1.764155543916051, "grad_norm": 7.0007506376792765, "learning_rate": 1.7998720512976408e-07, "loss": 0.5812, "step": 24419 }, { "epoch": 1.7642277891162605, "grad_norm": 9.95317252490616, "learning_rate": 1.798782441420599e-07, "loss": 0.5439, "step": 24420 }, { "epoch": 1.76430003431647, "grad_norm": 7.216059195637284, "learning_rate": 1.7976931491502037e-07, "loss": 0.5984, "step": 24421 }, { "epoch": 1.7643722795166796, "grad_norm": 7.955608467314431, "learning_rate": 1.7966041745013762e-07, "loss": 0.6191, "step": 24422 }, { "epoch": 1.7644445247168892, "grad_norm": 7.508336837829833, "learning_rate": 1.7955155174890188e-07, "loss": 0.6001, "step": 24423 }, { "epoch": 1.7645167699170985, "grad_norm": 6.766743838366594, "learning_rate": 1.7944271781280414e-07, "loss": 0.5843, "step": 24424 }, { "epoch": 1.7645890151173083, "grad_norm": 7.192642097435189, "learning_rate": 1.7933391564333353e-07, "loss": 0.5862, "step": 24425 }, { "epoch": 1.7646612603175176, "grad_norm": 7.121980261432791, "learning_rate": 1.7922514524197965e-07, "loss": 0.6707, "step": 24426 }, { "epoch": 1.7647335055177271, "grad_norm": 7.173957605437139, "learning_rate": 1.7911640661023162e-07, "loss": 0.6352, "step": 24427 }, { "epoch": 1.7648057507179367, "grad_norm": 7.7609043318007265, "learning_rate": 1.7900769974957765e-07, "loss": 0.5678, "step": 24428 }, { "epoch": 1.7648779959181462, "grad_norm": 7.468812056407334, "learning_rate": 1.788990246615066e-07, "loss": 0.6093, "step": 24429 }, { "epoch": 1.7649502411183557, "grad_norm": 7.022159379498177, "learning_rate": 1.787903813475053e-07, "loss": 0.6176, "step": 24430 }, { "epoch": 1.765022486318565, "grad_norm": 6.4344679719674875, "learning_rate": 1.7868176980906204e-07, "loss": 0.5946, "step": 24431 }, { "epoch": 1.7650947315187748, "grad_norm": 7.670340390367403, "learning_rate": 1.7857319004766254e-07, "loss": 0.577, "step": 24432 }, { "epoch": 1.7651669767189841, "grad_norm": 6.414912326656648, "learning_rate": 1.7846464206479342e-07, "loss": 0.5675, "step": 24433 }, { "epoch": 1.7652392219191937, "grad_norm": 7.910211226660804, "learning_rate": 1.7835612586194122e-07, "loss": 0.6642, "step": 24434 }, { "epoch": 1.7653114671194032, "grad_norm": 7.436065278996419, "learning_rate": 1.7824764144059064e-07, "loss": 0.609, "step": 24435 }, { "epoch": 1.7653837123196128, "grad_norm": 7.101059966147467, "learning_rate": 1.7813918880222713e-07, "loss": 0.6975, "step": 24436 }, { "epoch": 1.7654559575198223, "grad_norm": 7.574476610489437, "learning_rate": 1.7803076794833535e-07, "loss": 0.6234, "step": 24437 }, { "epoch": 1.7655282027200316, "grad_norm": 7.350052876184963, "learning_rate": 1.7792237888039966e-07, "loss": 0.643, "step": 24438 }, { "epoch": 1.7656004479202414, "grad_norm": 7.221952710461931, "learning_rate": 1.778140215999033e-07, "loss": 0.6388, "step": 24439 }, { "epoch": 1.7656726931204507, "grad_norm": 7.434081758646341, "learning_rate": 1.777056961083301e-07, "loss": 0.6311, "step": 24440 }, { "epoch": 1.7657449383206605, "grad_norm": 8.311489416659459, "learning_rate": 1.7759740240716278e-07, "loss": 0.6258, "step": 24441 }, { "epoch": 1.7658171835208698, "grad_norm": 8.408416582293405, "learning_rate": 1.7748914049788402e-07, "loss": 0.6368, "step": 24442 }, { "epoch": 1.7658894287210793, "grad_norm": 6.144033325740714, "learning_rate": 1.7738091038197542e-07, "loss": 0.6049, "step": 24443 }, { "epoch": 1.7659616739212889, "grad_norm": 6.527256671135195, "learning_rate": 1.7727271206091857e-07, "loss": 0.5859, "step": 24444 }, { "epoch": 1.7660339191214982, "grad_norm": 6.807709575765461, "learning_rate": 1.7716454553619534e-07, "loss": 0.6447, "step": 24445 }, { "epoch": 1.766106164321708, "grad_norm": 6.9856354602285915, "learning_rate": 1.7705641080928564e-07, "loss": 0.6412, "step": 24446 }, { "epoch": 1.7661784095219173, "grad_norm": 7.024156306637002, "learning_rate": 1.7694830788166945e-07, "loss": 0.575, "step": 24447 }, { "epoch": 1.766250654722127, "grad_norm": 6.70651109796873, "learning_rate": 1.7684023675482748e-07, "loss": 0.6008, "step": 24448 }, { "epoch": 1.7663228999223364, "grad_norm": 6.809399660511363, "learning_rate": 1.767321974302394e-07, "loss": 0.6474, "step": 24449 }, { "epoch": 1.766395145122546, "grad_norm": 7.426063271373953, "learning_rate": 1.7662418990938347e-07, "loss": 0.6351, "step": 24450 }, { "epoch": 1.7664673903227555, "grad_norm": 7.4345109847765825, "learning_rate": 1.7651621419373793e-07, "loss": 0.5565, "step": 24451 }, { "epoch": 1.7665396355229648, "grad_norm": 6.940297155696937, "learning_rate": 1.764082702847822e-07, "loss": 0.5079, "step": 24452 }, { "epoch": 1.7666118807231745, "grad_norm": 7.610716652529372, "learning_rate": 1.7630035818399227e-07, "loss": 0.4867, "step": 24453 }, { "epoch": 1.7666841259233839, "grad_norm": 7.038590460899791, "learning_rate": 1.7619247789284645e-07, "loss": 0.5928, "step": 24454 }, { "epoch": 1.7667563711235936, "grad_norm": 8.392769253627131, "learning_rate": 1.760846294128213e-07, "loss": 0.6262, "step": 24455 }, { "epoch": 1.766828616323803, "grad_norm": 8.038385331203658, "learning_rate": 1.7597681274539314e-07, "loss": 0.5871, "step": 24456 }, { "epoch": 1.7669008615240125, "grad_norm": 8.269426535266394, "learning_rate": 1.7586902789203776e-07, "loss": 0.6244, "step": 24457 }, { "epoch": 1.766973106724222, "grad_norm": 6.62252012659444, "learning_rate": 1.7576127485423116e-07, "loss": 0.5934, "step": 24458 }, { "epoch": 1.7670453519244316, "grad_norm": 6.485288256942383, "learning_rate": 1.7565355363344804e-07, "loss": 0.5969, "step": 24459 }, { "epoch": 1.7671175971246411, "grad_norm": 9.075202826132825, "learning_rate": 1.7554586423116303e-07, "loss": 0.5614, "step": 24460 }, { "epoch": 1.7671898423248504, "grad_norm": 6.985682968796061, "learning_rate": 1.7543820664884997e-07, "loss": 0.6054, "step": 24461 }, { "epoch": 1.7672620875250602, "grad_norm": 7.565393067893334, "learning_rate": 1.7533058088798321e-07, "loss": 0.5641, "step": 24462 }, { "epoch": 1.7673343327252695, "grad_norm": 7.190737318664558, "learning_rate": 1.7522298695003603e-07, "loss": 0.5913, "step": 24463 }, { "epoch": 1.767406577925479, "grad_norm": 7.521179557573125, "learning_rate": 1.7511542483648087e-07, "loss": 0.6288, "step": 24464 }, { "epoch": 1.7674788231256886, "grad_norm": 7.509218272884616, "learning_rate": 1.7500789454879015e-07, "loss": 0.5834, "step": 24465 }, { "epoch": 1.7675510683258981, "grad_norm": 7.454863467485347, "learning_rate": 1.749003960884363e-07, "loss": 0.5614, "step": 24466 }, { "epoch": 1.7676233135261077, "grad_norm": 7.478489100403471, "learning_rate": 1.7479292945689063e-07, "loss": 0.615, "step": 24467 }, { "epoch": 1.767695558726317, "grad_norm": 8.056298998438805, "learning_rate": 1.7468549465562452e-07, "loss": 0.574, "step": 24468 }, { "epoch": 1.7677678039265268, "grad_norm": 5.817473222107808, "learning_rate": 1.7457809168610812e-07, "loss": 0.6202, "step": 24469 }, { "epoch": 1.767840049126736, "grad_norm": 6.986935549408054, "learning_rate": 1.7447072054981278e-07, "loss": 0.5378, "step": 24470 }, { "epoch": 1.7679122943269456, "grad_norm": 7.07208465791321, "learning_rate": 1.7436338124820706e-07, "loss": 0.5527, "step": 24471 }, { "epoch": 1.7679845395271552, "grad_norm": 5.824298001011782, "learning_rate": 1.7425607378276117e-07, "loss": 0.587, "step": 24472 }, { "epoch": 1.7680567847273647, "grad_norm": 6.153605091435255, "learning_rate": 1.7414879815494362e-07, "loss": 0.6203, "step": 24473 }, { "epoch": 1.7681290299275743, "grad_norm": 7.790193251831363, "learning_rate": 1.7404155436622355e-07, "loss": 0.5874, "step": 24474 }, { "epoch": 1.7682012751277836, "grad_norm": 6.846800520547101, "learning_rate": 1.7393434241806756e-07, "loss": 0.5426, "step": 24475 }, { "epoch": 1.7682735203279933, "grad_norm": 7.024425127393795, "learning_rate": 1.7382716231194502e-07, "loss": 0.5953, "step": 24476 }, { "epoch": 1.7683457655282027, "grad_norm": 6.287115745861171, "learning_rate": 1.7372001404932283e-07, "loss": 0.621, "step": 24477 }, { "epoch": 1.7684180107284122, "grad_norm": 6.2334439531326264, "learning_rate": 1.73612897631667e-07, "loss": 0.591, "step": 24478 }, { "epoch": 1.7684902559286217, "grad_norm": 6.34418835558287, "learning_rate": 1.7350581306044446e-07, "loss": 0.621, "step": 24479 }, { "epoch": 1.7685625011288313, "grad_norm": 6.852948699923928, "learning_rate": 1.7339876033712065e-07, "loss": 0.5429, "step": 24480 }, { "epoch": 1.7686347463290408, "grad_norm": 7.381596347005636, "learning_rate": 1.7329173946316169e-07, "loss": 0.6648, "step": 24481 }, { "epoch": 1.7687069915292501, "grad_norm": 9.623435153332235, "learning_rate": 1.7318475044003218e-07, "loss": 0.6577, "step": 24482 }, { "epoch": 1.76877923672946, "grad_norm": 7.097209607747864, "learning_rate": 1.7307779326919655e-07, "loss": 0.5455, "step": 24483 }, { "epoch": 1.7688514819296692, "grad_norm": 6.825101064974726, "learning_rate": 1.7297086795211915e-07, "loss": 0.5796, "step": 24484 }, { "epoch": 1.7689237271298788, "grad_norm": 8.122429074171187, "learning_rate": 1.7286397449026382e-07, "loss": 0.6818, "step": 24485 }, { "epoch": 1.7689959723300883, "grad_norm": 8.50551246960496, "learning_rate": 1.7275711288509384e-07, "loss": 0.592, "step": 24486 }, { "epoch": 1.7690682175302979, "grad_norm": 7.329791369624158, "learning_rate": 1.7265028313807192e-07, "loss": 0.5656, "step": 24487 }, { "epoch": 1.7691404627305074, "grad_norm": 8.645824116774707, "learning_rate": 1.7254348525066105e-07, "loss": 0.6462, "step": 24488 }, { "epoch": 1.7692127079307167, "grad_norm": 8.019718664091146, "learning_rate": 1.7243671922432205e-07, "loss": 0.6174, "step": 24489 }, { "epoch": 1.7692849531309265, "grad_norm": 8.38682654870753, "learning_rate": 1.7232998506051728e-07, "loss": 0.5948, "step": 24490 }, { "epoch": 1.7693571983311358, "grad_norm": 6.686999721175117, "learning_rate": 1.7222328276070789e-07, "loss": 0.5585, "step": 24491 }, { "epoch": 1.7694294435313453, "grad_norm": 7.788301145635511, "learning_rate": 1.7211661232635457e-07, "loss": 0.6406, "step": 24492 }, { "epoch": 1.769501688731555, "grad_norm": 8.328277529237598, "learning_rate": 1.7200997375891677e-07, "loss": 0.6607, "step": 24493 }, { "epoch": 1.7695739339317644, "grad_norm": 23.3413117346925, "learning_rate": 1.7190336705985522e-07, "loss": 0.6257, "step": 24494 }, { "epoch": 1.769646179131974, "grad_norm": 7.380263180058933, "learning_rate": 1.7179679223062878e-07, "loss": 0.5935, "step": 24495 }, { "epoch": 1.7697184243321833, "grad_norm": 6.555888305811157, "learning_rate": 1.7169024927269628e-07, "loss": 0.6472, "step": 24496 }, { "epoch": 1.769790669532393, "grad_norm": 7.548042791458555, "learning_rate": 1.7158373818751683e-07, "loss": 0.616, "step": 24497 }, { "epoch": 1.7698629147326024, "grad_norm": 8.68510788292168, "learning_rate": 1.714772589765479e-07, "loss": 0.5741, "step": 24498 }, { "epoch": 1.769935159932812, "grad_norm": 7.208586965813839, "learning_rate": 1.713708116412477e-07, "loss": 0.5749, "step": 24499 }, { "epoch": 1.7700074051330215, "grad_norm": 8.290535036498323, "learning_rate": 1.7126439618307288e-07, "loss": 0.6294, "step": 24500 }, { "epoch": 1.770079650333231, "grad_norm": 6.682467554700802, "learning_rate": 1.7115801260348008e-07, "loss": 0.5598, "step": 24501 }, { "epoch": 1.7701518955334405, "grad_norm": 6.656618493137758, "learning_rate": 1.7105166090392616e-07, "loss": 0.543, "step": 24502 }, { "epoch": 1.7702241407336499, "grad_norm": 8.525721061394117, "learning_rate": 1.7094534108586608e-07, "loss": 0.6269, "step": 24503 }, { "epoch": 1.7702963859338596, "grad_norm": 7.1125894175508435, "learning_rate": 1.7083905315075616e-07, "loss": 0.5688, "step": 24504 }, { "epoch": 1.770368631134069, "grad_norm": 6.700456563851638, "learning_rate": 1.7073279710005135e-07, "loss": 0.5411, "step": 24505 }, { "epoch": 1.7704408763342785, "grad_norm": 6.159660827414855, "learning_rate": 1.7062657293520634e-07, "loss": 0.6047, "step": 24506 }, { "epoch": 1.770513121534488, "grad_norm": 6.48629048562551, "learning_rate": 1.705203806576744e-07, "loss": 0.586, "step": 24507 }, { "epoch": 1.7705853667346976, "grad_norm": 8.520180475893506, "learning_rate": 1.7041422026890962e-07, "loss": 0.6443, "step": 24508 }, { "epoch": 1.7706576119349071, "grad_norm": 6.415675825762204, "learning_rate": 1.7030809177036588e-07, "loss": 0.6128, "step": 24509 }, { "epoch": 1.7707298571351164, "grad_norm": 7.707641515530779, "learning_rate": 1.7020199516349505e-07, "loss": 0.5516, "step": 24510 }, { "epoch": 1.7708021023353262, "grad_norm": 6.480044028521123, "learning_rate": 1.7009593044975014e-07, "loss": 0.5607, "step": 24511 }, { "epoch": 1.7708743475355355, "grad_norm": 7.421089339583777, "learning_rate": 1.699898976305825e-07, "loss": 0.641, "step": 24512 }, { "epoch": 1.7709465927357453, "grad_norm": 7.113506751385139, "learning_rate": 1.6988389670744425e-07, "loss": 0.5748, "step": 24513 }, { "epoch": 1.7710188379359546, "grad_norm": 8.144201975508166, "learning_rate": 1.6977792768178624e-07, "loss": 0.6848, "step": 24514 }, { "epoch": 1.7710910831361641, "grad_norm": 6.509716401141529, "learning_rate": 1.6967199055505924e-07, "loss": 0.648, "step": 24515 }, { "epoch": 1.7711633283363737, "grad_norm": 7.774932625530782, "learning_rate": 1.6956608532871315e-07, "loss": 0.7069, "step": 24516 }, { "epoch": 1.771235573536583, "grad_norm": 9.058428671954248, "learning_rate": 1.6946021200419828e-07, "loss": 0.6075, "step": 24517 }, { "epoch": 1.7713078187367928, "grad_norm": 6.178282374228546, "learning_rate": 1.693543705829631e-07, "loss": 0.5759, "step": 24518 }, { "epoch": 1.771380063937002, "grad_norm": 7.94821885027367, "learning_rate": 1.692485610664571e-07, "loss": 0.5919, "step": 24519 }, { "epoch": 1.7714523091372119, "grad_norm": 8.347708345356864, "learning_rate": 1.6914278345612878e-07, "loss": 0.6183, "step": 24520 }, { "epoch": 1.7715245543374212, "grad_norm": 6.179239944088682, "learning_rate": 1.6903703775342562e-07, "loss": 0.5641, "step": 24521 }, { "epoch": 1.7715967995376307, "grad_norm": 6.531245035416026, "learning_rate": 1.689313239597956e-07, "loss": 0.5678, "step": 24522 }, { "epoch": 1.7716690447378403, "grad_norm": 8.777659703787124, "learning_rate": 1.6882564207668595e-07, "loss": 0.6555, "step": 24523 }, { "epoch": 1.7717412899380496, "grad_norm": 7.2401047626313355, "learning_rate": 1.6871999210554297e-07, "loss": 0.5629, "step": 24524 }, { "epoch": 1.7718135351382593, "grad_norm": 8.149933572510154, "learning_rate": 1.6861437404781327e-07, "loss": 0.6324, "step": 24525 }, { "epoch": 1.7718857803384687, "grad_norm": 7.10147735913965, "learning_rate": 1.685087879049427e-07, "loss": 0.6429, "step": 24526 }, { "epoch": 1.7719580255386784, "grad_norm": 7.827596010772486, "learning_rate": 1.684032336783767e-07, "loss": 0.6461, "step": 24527 }, { "epoch": 1.7720302707388877, "grad_norm": 6.762971917289967, "learning_rate": 1.6829771136955997e-07, "loss": 0.6308, "step": 24528 }, { "epoch": 1.7721025159390973, "grad_norm": 6.72530907129867, "learning_rate": 1.6819222097993692e-07, "loss": 0.6051, "step": 24529 }, { "epoch": 1.7721747611393068, "grad_norm": 7.7860085517999, "learning_rate": 1.6808676251095169e-07, "loss": 0.5912, "step": 24530 }, { "epoch": 1.7722470063395164, "grad_norm": 7.186285297747038, "learning_rate": 1.6798133596404864e-07, "loss": 0.6975, "step": 24531 }, { "epoch": 1.772319251539726, "grad_norm": 6.8900514394226855, "learning_rate": 1.678759413406697e-07, "loss": 0.5976, "step": 24532 }, { "epoch": 1.7723914967399352, "grad_norm": 6.899776687325535, "learning_rate": 1.6777057864225871e-07, "loss": 0.5437, "step": 24533 }, { "epoch": 1.772463741940145, "grad_norm": 6.239812864774259, "learning_rate": 1.6766524787025783e-07, "loss": 0.616, "step": 24534 }, { "epoch": 1.7725359871403543, "grad_norm": 7.527975676478008, "learning_rate": 1.6755994902610873e-07, "loss": 0.5834, "step": 24535 }, { "epoch": 1.7726082323405639, "grad_norm": 6.882528725705128, "learning_rate": 1.6745468211125243e-07, "loss": 0.5713, "step": 24536 }, { "epoch": 1.7726804775407734, "grad_norm": 7.47788819106676, "learning_rate": 1.6734944712713086e-07, "loss": 0.6018, "step": 24537 }, { "epoch": 1.772752722740983, "grad_norm": 8.65183010255837, "learning_rate": 1.6724424407518426e-07, "loss": 0.5984, "step": 24538 }, { "epoch": 1.7728249679411925, "grad_norm": 6.6215846056989776, "learning_rate": 1.6713907295685256e-07, "loss": 0.567, "step": 24539 }, { "epoch": 1.7728972131414018, "grad_norm": 8.988529525172755, "learning_rate": 1.6703393377357545e-07, "loss": 0.6286, "step": 24540 }, { "epoch": 1.7729694583416116, "grad_norm": 7.183780138793192, "learning_rate": 1.6692882652679237e-07, "loss": 0.6677, "step": 24541 }, { "epoch": 1.773041703541821, "grad_norm": 7.221001344176419, "learning_rate": 1.6682375121794214e-07, "loss": 0.6098, "step": 24542 }, { "epoch": 1.7731139487420304, "grad_norm": 8.459781416285418, "learning_rate": 1.6671870784846305e-07, "loss": 0.6237, "step": 24543 }, { "epoch": 1.77318619394224, "grad_norm": 7.1313737168141165, "learning_rate": 1.6661369641979341e-07, "loss": 0.6363, "step": 24544 }, { "epoch": 1.7732584391424495, "grad_norm": 8.054167944415827, "learning_rate": 1.6650871693337067e-07, "loss": 0.6178, "step": 24545 }, { "epoch": 1.773330684342659, "grad_norm": 6.631283641158188, "learning_rate": 1.6640376939063147e-07, "loss": 0.537, "step": 24546 }, { "epoch": 1.7734029295428684, "grad_norm": 8.721767108595714, "learning_rate": 1.6629885379301297e-07, "loss": 0.6329, "step": 24547 }, { "epoch": 1.7734751747430781, "grad_norm": 7.962336094485272, "learning_rate": 1.6619397014195098e-07, "loss": 0.6251, "step": 24548 }, { "epoch": 1.7735474199432875, "grad_norm": 7.97469117334977, "learning_rate": 1.660891184388816e-07, "loss": 0.6076, "step": 24549 }, { "epoch": 1.773619665143497, "grad_norm": 8.036306963665451, "learning_rate": 1.6598429868524003e-07, "loss": 0.6211, "step": 24550 }, { "epoch": 1.7736919103437065, "grad_norm": 7.216073204582703, "learning_rate": 1.6587951088246097e-07, "loss": 0.5847, "step": 24551 }, { "epoch": 1.773764155543916, "grad_norm": 7.998178274637958, "learning_rate": 1.657747550319791e-07, "loss": 0.6732, "step": 24552 }, { "epoch": 1.7738364007441256, "grad_norm": 8.106395843064483, "learning_rate": 1.656700311352283e-07, "loss": 0.6282, "step": 24553 }, { "epoch": 1.773908645944335, "grad_norm": 7.2692121940999606, "learning_rate": 1.6556533919364237e-07, "loss": 0.574, "step": 24554 }, { "epoch": 1.7739808911445447, "grad_norm": 7.52446216169328, "learning_rate": 1.654606792086544e-07, "loss": 0.5897, "step": 24555 }, { "epoch": 1.774053136344754, "grad_norm": 6.728027467778185, "learning_rate": 1.653560511816976e-07, "loss": 0.6765, "step": 24556 }, { "epoch": 1.7741253815449636, "grad_norm": 8.14803253952368, "learning_rate": 1.6525145511420316e-07, "loss": 0.6173, "step": 24557 }, { "epoch": 1.7741976267451731, "grad_norm": 9.207726874310213, "learning_rate": 1.6514689100760346e-07, "loss": 0.5488, "step": 24558 }, { "epoch": 1.7742698719453827, "grad_norm": 7.700080316298098, "learning_rate": 1.6504235886333042e-07, "loss": 0.6099, "step": 24559 }, { "epoch": 1.7743421171455922, "grad_norm": 6.850268738758567, "learning_rate": 1.6493785868281375e-07, "loss": 0.6116, "step": 24560 }, { "epoch": 1.7744143623458015, "grad_norm": 6.416538517981044, "learning_rate": 1.648333904674851e-07, "loss": 0.5215, "step": 24561 }, { "epoch": 1.7744866075460113, "grad_norm": 6.098376899279353, "learning_rate": 1.6472895421877412e-07, "loss": 0.537, "step": 24562 }, { "epoch": 1.7745588527462206, "grad_norm": 7.606597205381778, "learning_rate": 1.6462454993811105e-07, "loss": 0.5572, "step": 24563 }, { "epoch": 1.7746310979464301, "grad_norm": 6.079144468347583, "learning_rate": 1.6452017762692397e-07, "loss": 0.5809, "step": 24564 }, { "epoch": 1.7747033431466397, "grad_norm": 7.567303353745469, "learning_rate": 1.644158372866425e-07, "loss": 0.6368, "step": 24565 }, { "epoch": 1.7747755883468492, "grad_norm": 8.584849794614566, "learning_rate": 1.64311528918695e-07, "loss": 0.5789, "step": 24566 }, { "epoch": 1.7748478335470588, "grad_norm": 8.101426891343355, "learning_rate": 1.6420725252450892e-07, "loss": 0.645, "step": 24567 }, { "epoch": 1.774920078747268, "grad_norm": 7.496483296206478, "learning_rate": 1.6410300810551172e-07, "loss": 0.6109, "step": 24568 }, { "epoch": 1.7749923239474779, "grad_norm": 8.12572816373511, "learning_rate": 1.6399879566313088e-07, "loss": 0.616, "step": 24569 }, { "epoch": 1.7750645691476872, "grad_norm": 7.715008898548821, "learning_rate": 1.6389461519879247e-07, "loss": 0.6265, "step": 24570 }, { "epoch": 1.7751368143478967, "grad_norm": 10.285475213436223, "learning_rate": 1.6379046671392313e-07, "loss": 0.5913, "step": 24571 }, { "epoch": 1.7752090595481063, "grad_norm": 6.484495010184631, "learning_rate": 1.636863502099481e-07, "loss": 0.5913, "step": 24572 }, { "epoch": 1.7752813047483158, "grad_norm": 9.055035483229155, "learning_rate": 1.6358226568829293e-07, "loss": 0.624, "step": 24573 }, { "epoch": 1.7753535499485253, "grad_norm": 6.8276213366336505, "learning_rate": 1.6347821315038314e-07, "loss": 0.5862, "step": 24574 }, { "epoch": 1.7754257951487347, "grad_norm": 8.463756257295584, "learning_rate": 1.6337419259764175e-07, "loss": 0.6458, "step": 24575 }, { "epoch": 1.7754980403489444, "grad_norm": 6.503720905767497, "learning_rate": 1.6327020403149346e-07, "loss": 0.519, "step": 24576 }, { "epoch": 1.7755702855491537, "grad_norm": 8.269065328454602, "learning_rate": 1.6316624745336212e-07, "loss": 0.6257, "step": 24577 }, { "epoch": 1.7756425307493633, "grad_norm": 6.43920816517323, "learning_rate": 1.6306232286466993e-07, "loss": 0.5757, "step": 24578 }, { "epoch": 1.7757147759495728, "grad_norm": 8.199014967393758, "learning_rate": 1.6295843026684023e-07, "loss": 0.6475, "step": 24579 }, { "epoch": 1.7757870211497824, "grad_norm": 8.78686344017084, "learning_rate": 1.628545696612946e-07, "loss": 0.5742, "step": 24580 }, { "epoch": 1.775859266349992, "grad_norm": 8.29389190207473, "learning_rate": 1.6275074104945583e-07, "loss": 0.6569, "step": 24581 }, { "epoch": 1.7759315115502012, "grad_norm": 5.760426894369123, "learning_rate": 1.6264694443274442e-07, "loss": 0.5875, "step": 24582 }, { "epoch": 1.776003756750411, "grad_norm": 6.539765423875417, "learning_rate": 1.6254317981258149e-07, "loss": 0.6085, "step": 24583 }, { "epoch": 1.7760760019506203, "grad_norm": 8.511522449262346, "learning_rate": 1.6243944719038786e-07, "loss": 0.5908, "step": 24584 }, { "epoch": 1.7761482471508299, "grad_norm": 7.041672234467181, "learning_rate": 1.623357465675829e-07, "loss": 0.508, "step": 24585 }, { "epoch": 1.7762204923510394, "grad_norm": 7.173278804179455, "learning_rate": 1.6223207794558632e-07, "loss": 0.6488, "step": 24586 }, { "epoch": 1.776292737551249, "grad_norm": 9.341937957916993, "learning_rate": 1.6212844132581757e-07, "loss": 0.6252, "step": 24587 }, { "epoch": 1.7763649827514585, "grad_norm": 7.272867903650204, "learning_rate": 1.6202483670969522e-07, "loss": 0.5483, "step": 24588 }, { "epoch": 1.7764372279516678, "grad_norm": 7.008100999147901, "learning_rate": 1.6192126409863756e-07, "loss": 0.5887, "step": 24589 }, { "epoch": 1.7765094731518776, "grad_norm": 6.996773657463097, "learning_rate": 1.6181772349406238e-07, "loss": 0.6489, "step": 24590 }, { "epoch": 1.776581718352087, "grad_norm": 7.791042309665382, "learning_rate": 1.617142148973874e-07, "loss": 0.5531, "step": 24591 }, { "epoch": 1.7766539635522967, "grad_norm": 7.480898629129819, "learning_rate": 1.6161073831002878e-07, "loss": 0.6388, "step": 24592 }, { "epoch": 1.776726208752506, "grad_norm": 6.224773614282873, "learning_rate": 1.6150729373340363e-07, "loss": 0.6096, "step": 24593 }, { "epoch": 1.7767984539527155, "grad_norm": 7.242576218330071, "learning_rate": 1.614038811689278e-07, "loss": 0.6035, "step": 24594 }, { "epoch": 1.776870699152925, "grad_norm": 7.6199429110894075, "learning_rate": 1.6130050061801767e-07, "loss": 0.5364, "step": 24595 }, { "epoch": 1.7769429443531344, "grad_norm": 8.6294659095894, "learning_rate": 1.6119715208208737e-07, "loss": 0.5953, "step": 24596 }, { "epoch": 1.7770151895533441, "grad_norm": 7.4262191747678346, "learning_rate": 1.6109383556255186e-07, "loss": 0.6194, "step": 24597 }, { "epoch": 1.7770874347535535, "grad_norm": 8.734639663644456, "learning_rate": 1.609905510608259e-07, "loss": 0.6416, "step": 24598 }, { "epoch": 1.7771596799537632, "grad_norm": 7.4901770797189355, "learning_rate": 1.6088729857832302e-07, "loss": 0.6692, "step": 24599 }, { "epoch": 1.7772319251539725, "grad_norm": 6.315251723200951, "learning_rate": 1.6078407811645685e-07, "loss": 0.5594, "step": 24600 }, { "epoch": 1.777304170354182, "grad_norm": 8.521906728374194, "learning_rate": 1.6068088967664041e-07, "loss": 0.6047, "step": 24601 }, { "epoch": 1.7773764155543916, "grad_norm": 6.269858891847698, "learning_rate": 1.6057773326028675e-07, "loss": 0.5365, "step": 24602 }, { "epoch": 1.777448660754601, "grad_norm": 7.002959307064156, "learning_rate": 1.6047460886880695e-07, "loss": 0.6386, "step": 24603 }, { "epoch": 1.7775209059548107, "grad_norm": 7.324725503302528, "learning_rate": 1.603715165036132e-07, "loss": 0.5806, "step": 24604 }, { "epoch": 1.77759315115502, "grad_norm": 7.593225531657108, "learning_rate": 1.6026845616611693e-07, "loss": 0.5828, "step": 24605 }, { "epoch": 1.7776653963552298, "grad_norm": 7.4447878225411, "learning_rate": 1.6016542785772887e-07, "loss": 0.6824, "step": 24606 }, { "epoch": 1.7777376415554391, "grad_norm": 6.584173253557452, "learning_rate": 1.6006243157985935e-07, "loss": 0.5585, "step": 24607 }, { "epoch": 1.7778098867556487, "grad_norm": 7.231774144806663, "learning_rate": 1.5995946733391803e-07, "loss": 0.6067, "step": 24608 }, { "epoch": 1.7778821319558582, "grad_norm": 7.657492525132554, "learning_rate": 1.5985653512131466e-07, "loss": 0.591, "step": 24609 }, { "epoch": 1.7779543771560677, "grad_norm": 8.447074374641357, "learning_rate": 1.5975363494345863e-07, "loss": 0.5866, "step": 24610 }, { "epoch": 1.7780266223562773, "grad_norm": 6.527382905889439, "learning_rate": 1.59650766801758e-07, "loss": 0.6123, "step": 24611 }, { "epoch": 1.7780988675564866, "grad_norm": 6.368511712854706, "learning_rate": 1.5954793069762137e-07, "loss": 0.5439, "step": 24612 }, { "epoch": 1.7781711127566964, "grad_norm": 7.755417929627235, "learning_rate": 1.594451266324565e-07, "loss": 0.5744, "step": 24613 }, { "epoch": 1.7782433579569057, "grad_norm": 7.162496580752211, "learning_rate": 1.593423546076703e-07, "loss": 0.5862, "step": 24614 }, { "epoch": 1.7783156031571152, "grad_norm": 7.806594691977703, "learning_rate": 1.592396146246697e-07, "loss": 0.5818, "step": 24615 }, { "epoch": 1.7783878483573248, "grad_norm": 8.087719648111284, "learning_rate": 1.5913690668486193e-07, "loss": 0.6442, "step": 24616 }, { "epoch": 1.7784600935575343, "grad_norm": 6.656947139714065, "learning_rate": 1.5903423078965142e-07, "loss": 0.5665, "step": 24617 }, { "epoch": 1.7785323387577439, "grad_norm": 8.89294339005213, "learning_rate": 1.5893158694044482e-07, "loss": 0.5914, "step": 24618 }, { "epoch": 1.7786045839579532, "grad_norm": 7.824186571638773, "learning_rate": 1.5882897513864737e-07, "loss": 0.6175, "step": 24619 }, { "epoch": 1.778676829158163, "grad_norm": 7.173345012064412, "learning_rate": 1.587263953856638e-07, "loss": 0.5844, "step": 24620 }, { "epoch": 1.7787490743583723, "grad_norm": 7.72514492256987, "learning_rate": 1.5862384768289745e-07, "loss": 0.5681, "step": 24621 }, { "epoch": 1.7788213195585818, "grad_norm": 7.0859256563434645, "learning_rate": 1.5852133203175273e-07, "loss": 0.6176, "step": 24622 }, { "epoch": 1.7788935647587913, "grad_norm": 8.096953829429804, "learning_rate": 1.5841884843363326e-07, "loss": 0.6197, "step": 24623 }, { "epoch": 1.7789658099590009, "grad_norm": 7.2330771944369285, "learning_rate": 1.583163968899415e-07, "loss": 0.5698, "step": 24624 }, { "epoch": 1.7790380551592104, "grad_norm": 6.428012708574229, "learning_rate": 1.5821397740207993e-07, "loss": 0.5397, "step": 24625 }, { "epoch": 1.7791103003594197, "grad_norm": 6.9544854708355315, "learning_rate": 1.581115899714508e-07, "loss": 0.5892, "step": 24626 }, { "epoch": 1.7791825455596295, "grad_norm": 8.217188001372632, "learning_rate": 1.5800923459945548e-07, "loss": 0.5655, "step": 24627 }, { "epoch": 1.7792547907598388, "grad_norm": 8.175093008089753, "learning_rate": 1.5790691128749564e-07, "loss": 0.6187, "step": 24628 }, { "epoch": 1.7793270359600484, "grad_norm": 8.471056409438821, "learning_rate": 1.5780462003697122e-07, "loss": 0.6277, "step": 24629 }, { "epoch": 1.779399281160258, "grad_norm": 7.316541027939732, "learning_rate": 1.577023608492831e-07, "loss": 0.58, "step": 24630 }, { "epoch": 1.7794715263604675, "grad_norm": 7.8690938448752235, "learning_rate": 1.576001337258315e-07, "loss": 0.5781, "step": 24631 }, { "epoch": 1.779543771560677, "grad_norm": 7.845683919490434, "learning_rate": 1.5749793866801477e-07, "loss": 0.6196, "step": 24632 }, { "epoch": 1.7796160167608863, "grad_norm": 6.860767961673517, "learning_rate": 1.5739577567723264e-07, "loss": 0.5619, "step": 24633 }, { "epoch": 1.779688261961096, "grad_norm": 8.201049309559846, "learning_rate": 1.572936447548837e-07, "loss": 0.5988, "step": 24634 }, { "epoch": 1.7797605071613054, "grad_norm": 7.575496886420842, "learning_rate": 1.5719154590236541e-07, "loss": 0.6237, "step": 24635 }, { "epoch": 1.779832752361515, "grad_norm": 7.889830173458464, "learning_rate": 1.5708947912107558e-07, "loss": 0.6111, "step": 24636 }, { "epoch": 1.7799049975617245, "grad_norm": 7.5499536778121605, "learning_rate": 1.5698744441241142e-07, "loss": 0.5856, "step": 24637 }, { "epoch": 1.779977242761934, "grad_norm": 8.666338498675945, "learning_rate": 1.568854417777707e-07, "loss": 0.6337, "step": 24638 }, { "epoch": 1.7800494879621436, "grad_norm": 5.949950966512771, "learning_rate": 1.5678347121854842e-07, "loss": 0.6108, "step": 24639 }, { "epoch": 1.780121733162353, "grad_norm": 7.937091967089897, "learning_rate": 1.5668153273614122e-07, "loss": 0.6351, "step": 24640 }, { "epoch": 1.7801939783625627, "grad_norm": 7.236224537467078, "learning_rate": 1.5657962633194468e-07, "loss": 0.5739, "step": 24641 }, { "epoch": 1.780266223562772, "grad_norm": 7.407205125731176, "learning_rate": 1.564777520073532e-07, "loss": 0.5782, "step": 24642 }, { "epoch": 1.7803384687629815, "grad_norm": 6.256767271381637, "learning_rate": 1.5637590976376155e-07, "loss": 0.6009, "step": 24643 }, { "epoch": 1.780410713963191, "grad_norm": 7.382143085144111, "learning_rate": 1.5627409960256413e-07, "loss": 0.5725, "step": 24644 }, { "epoch": 1.7804829591634006, "grad_norm": 7.917070425345037, "learning_rate": 1.5617232152515426e-07, "loss": 0.667, "step": 24645 }, { "epoch": 1.7805552043636101, "grad_norm": 7.526714051161966, "learning_rate": 1.560705755329256e-07, "loss": 0.5493, "step": 24646 }, { "epoch": 1.7806274495638195, "grad_norm": 9.549507439977802, "learning_rate": 1.5596886162727087e-07, "loss": 0.5666, "step": 24647 }, { "epoch": 1.7806996947640292, "grad_norm": 8.305842666454883, "learning_rate": 1.5586717980958233e-07, "loss": 0.5795, "step": 24648 }, { "epoch": 1.7807719399642385, "grad_norm": 5.764167580767402, "learning_rate": 1.5576553008125244e-07, "loss": 0.5788, "step": 24649 }, { "epoch": 1.780844185164448, "grad_norm": 7.297457359322685, "learning_rate": 1.556639124436718e-07, "loss": 0.5996, "step": 24650 }, { "epoch": 1.7809164303646576, "grad_norm": 8.061040701873774, "learning_rate": 1.5556232689823203e-07, "loss": 0.5274, "step": 24651 }, { "epoch": 1.7809886755648672, "grad_norm": 8.439995742724461, "learning_rate": 1.55460773446324e-07, "loss": 0.6273, "step": 24652 }, { "epoch": 1.7810609207650767, "grad_norm": 6.055535159917581, "learning_rate": 1.5535925208933712e-07, "loss": 0.5868, "step": 24653 }, { "epoch": 1.781133165965286, "grad_norm": 7.590023931935493, "learning_rate": 1.5525776282866169e-07, "loss": 0.6073, "step": 24654 }, { "epoch": 1.7812054111654958, "grad_norm": 6.265234423709396, "learning_rate": 1.5515630566568686e-07, "loss": 0.6005, "step": 24655 }, { "epoch": 1.7812776563657051, "grad_norm": 7.3289182839550495, "learning_rate": 1.5505488060180153e-07, "loss": 0.5931, "step": 24656 }, { "epoch": 1.7813499015659147, "grad_norm": 7.988037463807575, "learning_rate": 1.54953487638394e-07, "loss": 0.6015, "step": 24657 }, { "epoch": 1.7814221467661242, "grad_norm": 8.230662947845401, "learning_rate": 1.548521267768527e-07, "loss": 0.6451, "step": 24658 }, { "epoch": 1.7814943919663337, "grad_norm": 7.556524769320582, "learning_rate": 1.5475079801856502e-07, "loss": 0.5851, "step": 24659 }, { "epoch": 1.7815666371665433, "grad_norm": 7.764018024909093, "learning_rate": 1.546495013649177e-07, "loss": 0.6074, "step": 24660 }, { "epoch": 1.7816388823667526, "grad_norm": 7.841295565000424, "learning_rate": 1.5454823681729764e-07, "loss": 0.5267, "step": 24661 }, { "epoch": 1.7817111275669624, "grad_norm": 6.3967133785342565, "learning_rate": 1.5444700437709125e-07, "loss": 0.6336, "step": 24662 }, { "epoch": 1.7817833727671717, "grad_norm": 6.366157233077091, "learning_rate": 1.5434580404568438e-07, "loss": 0.582, "step": 24663 }, { "epoch": 1.7818556179673815, "grad_norm": 7.509083651170323, "learning_rate": 1.5424463582446176e-07, "loss": 0.6086, "step": 24664 }, { "epoch": 1.7819278631675908, "grad_norm": 8.61187585969247, "learning_rate": 1.5414349971480834e-07, "loss": 0.6153, "step": 24665 }, { "epoch": 1.7820001083678003, "grad_norm": 8.587115692023902, "learning_rate": 1.5404239571810974e-07, "loss": 0.6464, "step": 24666 }, { "epoch": 1.7820723535680099, "grad_norm": 7.725635006184236, "learning_rate": 1.53941323835749e-07, "loss": 0.6097, "step": 24667 }, { "epoch": 1.7821445987682192, "grad_norm": 6.8811412778437075, "learning_rate": 1.5384028406910971e-07, "loss": 0.571, "step": 24668 }, { "epoch": 1.782216843968429, "grad_norm": 7.498226719075922, "learning_rate": 1.5373927641957554e-07, "loss": 0.5881, "step": 24669 }, { "epoch": 1.7822890891686383, "grad_norm": 6.771655348739285, "learning_rate": 1.5363830088852922e-07, "loss": 0.545, "step": 24670 }, { "epoch": 1.782361334368848, "grad_norm": 7.0948598388419555, "learning_rate": 1.5353735747735215e-07, "loss": 0.5592, "step": 24671 }, { "epoch": 1.7824335795690573, "grad_norm": 7.003974875918674, "learning_rate": 1.5343644618742687e-07, "loss": 0.585, "step": 24672 }, { "epoch": 1.7825058247692669, "grad_norm": 8.231809502357637, "learning_rate": 1.5333556702013475e-07, "loss": 0.5392, "step": 24673 }, { "epoch": 1.7825780699694764, "grad_norm": 6.724588669460571, "learning_rate": 1.5323471997685636e-07, "loss": 0.5494, "step": 24674 }, { "epoch": 1.7826503151696858, "grad_norm": 7.987693380722222, "learning_rate": 1.531339050589728e-07, "loss": 0.5776, "step": 24675 }, { "epoch": 1.7827225603698955, "grad_norm": 7.337019283007815, "learning_rate": 1.5303312226786383e-07, "loss": 0.5682, "step": 24676 }, { "epoch": 1.7827948055701048, "grad_norm": 7.390747585449402, "learning_rate": 1.5293237160490914e-07, "loss": 0.641, "step": 24677 }, { "epoch": 1.7828670507703146, "grad_norm": 8.62642580493038, "learning_rate": 1.5283165307148795e-07, "loss": 0.5647, "step": 24678 }, { "epoch": 1.782939295970524, "grad_norm": 7.025767990977008, "learning_rate": 1.5273096666897884e-07, "loss": 0.6079, "step": 24679 }, { "epoch": 1.7830115411707335, "grad_norm": 6.636000802654565, "learning_rate": 1.526303123987602e-07, "loss": 0.5911, "step": 24680 }, { "epoch": 1.783083786370943, "grad_norm": 7.352702426400201, "learning_rate": 1.5252969026221032e-07, "loss": 0.6051, "step": 24681 }, { "epoch": 1.7831560315711525, "grad_norm": 6.470435176405858, "learning_rate": 1.524291002607059e-07, "loss": 0.6181, "step": 24682 }, { "epoch": 1.783228276771362, "grad_norm": 7.086050821258275, "learning_rate": 1.5232854239562444e-07, "loss": 0.541, "step": 24683 }, { "epoch": 1.7833005219715714, "grad_norm": 6.870675408757844, "learning_rate": 1.5222801666834236e-07, "loss": 0.5494, "step": 24684 }, { "epoch": 1.7833727671717812, "grad_norm": 7.894916357040647, "learning_rate": 1.5212752308023577e-07, "loss": 0.5144, "step": 24685 }, { "epoch": 1.7834450123719905, "grad_norm": 8.095642104338788, "learning_rate": 1.5202706163268049e-07, "loss": 0.5911, "step": 24686 }, { "epoch": 1.7835172575722, "grad_norm": 8.95537694600943, "learning_rate": 1.5192663232705157e-07, "loss": 0.6563, "step": 24687 }, { "epoch": 1.7835895027724096, "grad_norm": 7.175266369916036, "learning_rate": 1.5182623516472428e-07, "loss": 0.5867, "step": 24688 }, { "epoch": 1.7836617479726191, "grad_norm": 6.822012882700275, "learning_rate": 1.5172587014707223e-07, "loss": 0.5707, "step": 24689 }, { "epoch": 1.7837339931728287, "grad_norm": 6.666772968716293, "learning_rate": 1.5162553727546964e-07, "loss": 0.5572, "step": 24690 }, { "epoch": 1.783806238373038, "grad_norm": 7.85478230971786, "learning_rate": 1.5152523655129037e-07, "loss": 0.5807, "step": 24691 }, { "epoch": 1.7838784835732477, "grad_norm": 8.351741549764487, "learning_rate": 1.5142496797590695e-07, "loss": 0.6317, "step": 24692 }, { "epoch": 1.783950728773457, "grad_norm": 7.86144045921293, "learning_rate": 1.513247315506916e-07, "loss": 0.6299, "step": 24693 }, { "epoch": 1.7840229739736666, "grad_norm": 6.484902381132831, "learning_rate": 1.5122452727701742e-07, "loss": 0.5606, "step": 24694 }, { "epoch": 1.7840952191738761, "grad_norm": 7.997267494840547, "learning_rate": 1.5112435515625634e-07, "loss": 0.6997, "step": 24695 }, { "epoch": 1.7841674643740857, "grad_norm": 7.959070647538553, "learning_rate": 1.510242151897784e-07, "loss": 0.6139, "step": 24696 }, { "epoch": 1.7842397095742952, "grad_norm": 7.53199580761615, "learning_rate": 1.5092410737895523e-07, "loss": 0.5838, "step": 24697 }, { "epoch": 1.7843119547745045, "grad_norm": 8.284056220438083, "learning_rate": 1.5082403172515775e-07, "loss": 0.6175, "step": 24698 }, { "epoch": 1.7843841999747143, "grad_norm": 7.8009873670033905, "learning_rate": 1.5072398822975454e-07, "loss": 0.5748, "step": 24699 }, { "epoch": 1.7844564451749236, "grad_norm": 6.99642607870303, "learning_rate": 1.506239768941159e-07, "loss": 0.5945, "step": 24700 }, { "epoch": 1.7845286903751332, "grad_norm": 7.373481206164172, "learning_rate": 1.5052399771961073e-07, "loss": 0.6732, "step": 24701 }, { "epoch": 1.7846009355753427, "grad_norm": 7.387879843717349, "learning_rate": 1.5042405070760796e-07, "loss": 0.6287, "step": 24702 }, { "epoch": 1.7846731807755523, "grad_norm": 8.04300432164912, "learning_rate": 1.5032413585947563e-07, "loss": 0.6382, "step": 24703 }, { "epoch": 1.7847454259757618, "grad_norm": 6.610794383920484, "learning_rate": 1.502242531765813e-07, "loss": 0.6012, "step": 24704 }, { "epoch": 1.7848176711759711, "grad_norm": 7.44517441708588, "learning_rate": 1.501244026602927e-07, "loss": 0.6056, "step": 24705 }, { "epoch": 1.7848899163761809, "grad_norm": 7.989305261765495, "learning_rate": 1.5002458431197658e-07, "loss": 0.6261, "step": 24706 }, { "epoch": 1.7849621615763902, "grad_norm": 6.730343490382579, "learning_rate": 1.4992479813299904e-07, "loss": 0.6163, "step": 24707 }, { "epoch": 1.7850344067765997, "grad_norm": 8.101837948095916, "learning_rate": 1.4982504412472622e-07, "loss": 0.6272, "step": 24708 }, { "epoch": 1.7851066519768093, "grad_norm": 7.334445204396774, "learning_rate": 1.497253222885242e-07, "loss": 0.6678, "step": 24709 }, { "epoch": 1.7851788971770188, "grad_norm": 7.8416652872676895, "learning_rate": 1.4962563262575724e-07, "loss": 0.6251, "step": 24710 }, { "epoch": 1.7852511423772284, "grad_norm": 7.648489869106975, "learning_rate": 1.495259751377906e-07, "loss": 0.6421, "step": 24711 }, { "epoch": 1.7853233875774377, "grad_norm": 6.338851465591541, "learning_rate": 1.494263498259882e-07, "loss": 0.5683, "step": 24712 }, { "epoch": 1.7853956327776475, "grad_norm": 7.806840723683361, "learning_rate": 1.4932675669171393e-07, "loss": 0.5339, "step": 24713 }, { "epoch": 1.7854678779778568, "grad_norm": 6.2765205759856695, "learning_rate": 1.4922719573633115e-07, "loss": 0.6402, "step": 24714 }, { "epoch": 1.7855401231780663, "grad_norm": 7.870053872453623, "learning_rate": 1.4912766696120268e-07, "loss": 0.6518, "step": 24715 }, { "epoch": 1.7856123683782759, "grad_norm": 7.096314626227762, "learning_rate": 1.4902817036769185e-07, "loss": 0.5655, "step": 24716 }, { "epoch": 1.7856846135784854, "grad_norm": 7.506460077933628, "learning_rate": 1.4892870595715926e-07, "loss": 0.5894, "step": 24717 }, { "epoch": 1.785756858778695, "grad_norm": 6.996966385851538, "learning_rate": 1.4882927373096738e-07, "loss": 0.6044, "step": 24718 }, { "epoch": 1.7858291039789043, "grad_norm": 6.3087839198310744, "learning_rate": 1.4872987369047714e-07, "loss": 0.6565, "step": 24719 }, { "epoch": 1.785901349179114, "grad_norm": 7.980060045977245, "learning_rate": 1.4863050583704964e-07, "loss": 0.5847, "step": 24720 }, { "epoch": 1.7859735943793233, "grad_norm": 7.89702589813967, "learning_rate": 1.4853117017204433e-07, "loss": 0.6308, "step": 24721 }, { "epoch": 1.786045839579533, "grad_norm": 7.89141418740629, "learning_rate": 1.4843186669682124e-07, "loss": 0.6281, "step": 24722 }, { "epoch": 1.7861180847797424, "grad_norm": 8.309757554686886, "learning_rate": 1.483325954127407e-07, "loss": 0.6608, "step": 24723 }, { "epoch": 1.786190329979952, "grad_norm": 8.84461825256983, "learning_rate": 1.4823335632116077e-07, "loss": 0.6287, "step": 24724 }, { "epoch": 1.7862625751801615, "grad_norm": 7.309351316546577, "learning_rate": 1.481341494234398e-07, "loss": 0.5809, "step": 24725 }, { "epoch": 1.7863348203803708, "grad_norm": 7.244704844989589, "learning_rate": 1.480349747209367e-07, "loss": 0.5545, "step": 24726 }, { "epoch": 1.7864070655805806, "grad_norm": 7.192773890950409, "learning_rate": 1.4793583221500873e-07, "loss": 0.6018, "step": 24727 }, { "epoch": 1.78647931078079, "grad_norm": 7.75979192553119, "learning_rate": 1.478367219070126e-07, "loss": 0.6299, "step": 24728 }, { "epoch": 1.7865515559809995, "grad_norm": 8.807592012746065, "learning_rate": 1.4773764379830523e-07, "loss": 0.5342, "step": 24729 }, { "epoch": 1.786623801181209, "grad_norm": 6.664128615128508, "learning_rate": 1.4763859789024336e-07, "loss": 0.6318, "step": 24730 }, { "epoch": 1.7866960463814185, "grad_norm": 7.054987600033121, "learning_rate": 1.4753958418418256e-07, "loss": 0.6143, "step": 24731 }, { "epoch": 1.786768291581628, "grad_norm": 7.776479216059331, "learning_rate": 1.4744060268147815e-07, "loss": 0.5996, "step": 24732 }, { "epoch": 1.7868405367818374, "grad_norm": 7.4322229465514615, "learning_rate": 1.473416533834854e-07, "loss": 0.5765, "step": 24733 }, { "epoch": 1.7869127819820472, "grad_norm": 7.187397035607601, "learning_rate": 1.4724273629155905e-07, "loss": 0.6039, "step": 24734 }, { "epoch": 1.7869850271822565, "grad_norm": 6.590636761339671, "learning_rate": 1.4714385140705222e-07, "loss": 0.6399, "step": 24735 }, { "epoch": 1.7870572723824663, "grad_norm": 7.249444348319049, "learning_rate": 1.4704499873131938e-07, "loss": 0.585, "step": 24736 }, { "epoch": 1.7871295175826756, "grad_norm": 6.6654648015354345, "learning_rate": 1.469461782657136e-07, "loss": 0.619, "step": 24737 }, { "epoch": 1.7872017627828851, "grad_norm": 8.549228155979245, "learning_rate": 1.46847390011588e-07, "loss": 0.5942, "step": 24738 }, { "epoch": 1.7872740079830947, "grad_norm": 7.9808886656075, "learning_rate": 1.4674863397029422e-07, "loss": 0.6057, "step": 24739 }, { "epoch": 1.787346253183304, "grad_norm": 8.874652694568884, "learning_rate": 1.4664991014318425e-07, "loss": 0.6127, "step": 24740 }, { "epoch": 1.7874184983835137, "grad_norm": 8.212888999330993, "learning_rate": 1.465512185316098e-07, "loss": 0.6727, "step": 24741 }, { "epoch": 1.787490743583723, "grad_norm": 7.263425038660228, "learning_rate": 1.46452559136922e-07, "loss": 0.5504, "step": 24742 }, { "epoch": 1.7875629887839328, "grad_norm": 6.479561584147025, "learning_rate": 1.463539319604712e-07, "loss": 0.5502, "step": 24743 }, { "epoch": 1.7876352339841421, "grad_norm": 7.888853452122336, "learning_rate": 1.462553370036074e-07, "loss": 0.644, "step": 24744 }, { "epoch": 1.7877074791843517, "grad_norm": 7.0301057011565335, "learning_rate": 1.461567742676809e-07, "loss": 0.5616, "step": 24745 }, { "epoch": 1.7877797243845612, "grad_norm": 8.66148554158635, "learning_rate": 1.4605824375404033e-07, "loss": 0.6095, "step": 24746 }, { "epoch": 1.7878519695847706, "grad_norm": 7.891857934693006, "learning_rate": 1.4595974546403434e-07, "loss": 0.4933, "step": 24747 }, { "epoch": 1.7879242147849803, "grad_norm": 8.576487900904162, "learning_rate": 1.4586127939901246e-07, "loss": 0.6426, "step": 24748 }, { "epoch": 1.7879964599851896, "grad_norm": 7.575594072369269, "learning_rate": 1.4576284556032106e-07, "loss": 0.6582, "step": 24749 }, { "epoch": 1.7880687051853994, "grad_norm": 6.919018020194017, "learning_rate": 1.4566444394930822e-07, "loss": 0.6122, "step": 24750 }, { "epoch": 1.7881409503856087, "grad_norm": 6.190641588230236, "learning_rate": 1.4556607456732125e-07, "loss": 0.5111, "step": 24751 }, { "epoch": 1.7882131955858183, "grad_norm": 7.075538684839767, "learning_rate": 1.4546773741570735e-07, "loss": 0.6034, "step": 24752 }, { "epoch": 1.7882854407860278, "grad_norm": 7.76665037205287, "learning_rate": 1.453694324958113e-07, "loss": 0.6022, "step": 24753 }, { "epoch": 1.7883576859862373, "grad_norm": 6.84700192743416, "learning_rate": 1.4527115980897954e-07, "loss": 0.5624, "step": 24754 }, { "epoch": 1.7884299311864469, "grad_norm": 7.080012122537181, "learning_rate": 1.4517291935655709e-07, "loss": 0.5473, "step": 24755 }, { "epoch": 1.7885021763866562, "grad_norm": 6.649785444197235, "learning_rate": 1.4507471113988953e-07, "loss": 0.5845, "step": 24756 }, { "epoch": 1.788574421586866, "grad_norm": 6.730096081618915, "learning_rate": 1.4497653516032052e-07, "loss": 0.6585, "step": 24757 }, { "epoch": 1.7886466667870753, "grad_norm": 7.173834771896594, "learning_rate": 1.448783914191937e-07, "loss": 0.6484, "step": 24758 }, { "epoch": 1.7887189119872848, "grad_norm": 7.585773815899492, "learning_rate": 1.4478027991785354e-07, "loss": 0.6472, "step": 24759 }, { "epoch": 1.7887911571874944, "grad_norm": 8.22473437219489, "learning_rate": 1.4468220065764232e-07, "loss": 0.5725, "step": 24760 }, { "epoch": 1.788863402387704, "grad_norm": 7.472167442275393, "learning_rate": 1.4458415363990313e-07, "loss": 0.5999, "step": 24761 }, { "epoch": 1.7889356475879135, "grad_norm": 8.2548943075788, "learning_rate": 1.4448613886597796e-07, "loss": 0.5907, "step": 24762 }, { "epoch": 1.7890078927881228, "grad_norm": 9.12224255011159, "learning_rate": 1.4438815633720905e-07, "loss": 0.6641, "step": 24763 }, { "epoch": 1.7890801379883325, "grad_norm": 8.271077367864196, "learning_rate": 1.4429020605493671e-07, "loss": 0.657, "step": 24764 }, { "epoch": 1.7891523831885419, "grad_norm": 9.303322180214291, "learning_rate": 1.4419228802050268e-07, "loss": 0.7069, "step": 24765 }, { "epoch": 1.7892246283887514, "grad_norm": 8.246226979620078, "learning_rate": 1.4409440223524722e-07, "loss": 0.6148, "step": 24766 }, { "epoch": 1.789296873588961, "grad_norm": 6.402062512889523, "learning_rate": 1.439965487005099e-07, "loss": 0.5482, "step": 24767 }, { "epoch": 1.7893691187891705, "grad_norm": 7.218865992287127, "learning_rate": 1.438987274176304e-07, "loss": 0.534, "step": 24768 }, { "epoch": 1.78944136398938, "grad_norm": 8.067292910560838, "learning_rate": 1.4380093838794796e-07, "loss": 0.6057, "step": 24769 }, { "epoch": 1.7895136091895893, "grad_norm": 8.420569040818524, "learning_rate": 1.4370318161280094e-07, "loss": 0.5716, "step": 24770 }, { "epoch": 1.7895858543897991, "grad_norm": 6.548028885206153, "learning_rate": 1.436054570935283e-07, "loss": 0.552, "step": 24771 }, { "epoch": 1.7896580995900084, "grad_norm": 6.786481254657985, "learning_rate": 1.43507764831467e-07, "loss": 0.5829, "step": 24772 }, { "epoch": 1.789730344790218, "grad_norm": 7.360655201482894, "learning_rate": 1.4341010482795514e-07, "loss": 0.6097, "step": 24773 }, { "epoch": 1.7898025899904275, "grad_norm": 8.448455705547243, "learning_rate": 1.4331247708432887e-07, "loss": 0.6263, "step": 24774 }, { "epoch": 1.789874835190637, "grad_norm": 6.891043514076125, "learning_rate": 1.4321488160192493e-07, "loss": 0.6436, "step": 24775 }, { "epoch": 1.7899470803908466, "grad_norm": 8.698729358708356, "learning_rate": 1.4311731838207916e-07, "loss": 0.5867, "step": 24776 }, { "epoch": 1.790019325591056, "grad_norm": 8.746365037384816, "learning_rate": 1.43019787426128e-07, "loss": 0.6477, "step": 24777 }, { "epoch": 1.7900915707912657, "grad_norm": 6.163646301993974, "learning_rate": 1.429222887354048e-07, "loss": 0.5003, "step": 24778 }, { "epoch": 1.790163815991475, "grad_norm": 6.826369699386715, "learning_rate": 1.4282482231124578e-07, "loss": 0.5788, "step": 24779 }, { "epoch": 1.7902360611916845, "grad_norm": 7.844532961731712, "learning_rate": 1.4272738815498483e-07, "loss": 0.608, "step": 24780 }, { "epoch": 1.790308306391894, "grad_norm": 7.359335346479556, "learning_rate": 1.4262998626795588e-07, "loss": 0.5688, "step": 24781 }, { "epoch": 1.7903805515921036, "grad_norm": 7.295724520769612, "learning_rate": 1.4253261665149177e-07, "loss": 0.633, "step": 24782 }, { "epoch": 1.7904527967923132, "grad_norm": 8.314605675647156, "learning_rate": 1.4243527930692585e-07, "loss": 0.6409, "step": 24783 }, { "epoch": 1.7905250419925225, "grad_norm": 8.006167895143992, "learning_rate": 1.4233797423559043e-07, "loss": 0.6337, "step": 24784 }, { "epoch": 1.7905972871927323, "grad_norm": 7.847732809404429, "learning_rate": 1.4224070143881747e-07, "loss": 0.6177, "step": 24785 }, { "epoch": 1.7906695323929416, "grad_norm": 6.354070677318854, "learning_rate": 1.421434609179384e-07, "loss": 0.583, "step": 24786 }, { "epoch": 1.7907417775931511, "grad_norm": 7.065677974321813, "learning_rate": 1.4204625267428467e-07, "loss": 0.575, "step": 24787 }, { "epoch": 1.7908140227933607, "grad_norm": 6.735550567620631, "learning_rate": 1.419490767091869e-07, "loss": 0.6065, "step": 24788 }, { "epoch": 1.7908862679935702, "grad_norm": 8.097632224548295, "learning_rate": 1.418519330239754e-07, "loss": 0.6419, "step": 24789 }, { "epoch": 1.7909585131937797, "grad_norm": 7.375220602583192, "learning_rate": 1.417548216199799e-07, "loss": 0.5636, "step": 24790 }, { "epoch": 1.791030758393989, "grad_norm": 7.026439055074607, "learning_rate": 1.4165774249853022e-07, "loss": 0.5204, "step": 24791 }, { "epoch": 1.7911030035941988, "grad_norm": 7.071346717349719, "learning_rate": 1.4156069566095443e-07, "loss": 0.5902, "step": 24792 }, { "epoch": 1.7911752487944081, "grad_norm": 8.12868240246214, "learning_rate": 1.4146368110858177e-07, "loss": 0.6632, "step": 24793 }, { "epoch": 1.7912474939946177, "grad_norm": 6.939226365296648, "learning_rate": 1.413666988427398e-07, "loss": 0.5566, "step": 24794 }, { "epoch": 1.7913197391948272, "grad_norm": 5.412562571245388, "learning_rate": 1.4126974886475685e-07, "loss": 0.5554, "step": 24795 }, { "epoch": 1.7913919843950368, "grad_norm": 6.385897429032281, "learning_rate": 1.4117283117595914e-07, "loss": 0.5739, "step": 24796 }, { "epoch": 1.7914642295952463, "grad_norm": 7.594455262388599, "learning_rate": 1.4107594577767392e-07, "loss": 0.5703, "step": 24797 }, { "epoch": 1.7915364747954556, "grad_norm": 6.885706662194735, "learning_rate": 1.4097909267122734e-07, "loss": 0.5794, "step": 24798 }, { "epoch": 1.7916087199956654, "grad_norm": 6.729077727337296, "learning_rate": 1.4088227185794557e-07, "loss": 0.5733, "step": 24799 }, { "epoch": 1.7916809651958747, "grad_norm": 8.557508809884451, "learning_rate": 1.4078548333915364e-07, "loss": 0.5564, "step": 24800 }, { "epoch": 1.7917532103960843, "grad_norm": 8.2673481116864, "learning_rate": 1.4068872711617638e-07, "loss": 0.6012, "step": 24801 }, { "epoch": 1.7918254555962938, "grad_norm": 6.205717145743009, "learning_rate": 1.4059200319033934e-07, "loss": 0.5659, "step": 24802 }, { "epoch": 1.7918977007965033, "grad_norm": 7.46556706619618, "learning_rate": 1.4049531156296507e-07, "loss": 0.6342, "step": 24803 }, { "epoch": 1.7919699459967129, "grad_norm": 7.101671274692938, "learning_rate": 1.4039865223537814e-07, "loss": 0.6059, "step": 24804 }, { "epoch": 1.7920421911969222, "grad_norm": 7.549569922317621, "learning_rate": 1.4030202520890185e-07, "loss": 0.6402, "step": 24805 }, { "epoch": 1.792114436397132, "grad_norm": 8.336434918822418, "learning_rate": 1.4020543048485823e-07, "loss": 0.632, "step": 24806 }, { "epoch": 1.7921866815973413, "grad_norm": 6.534949610077342, "learning_rate": 1.4010886806456985e-07, "loss": 0.6055, "step": 24807 }, { "epoch": 1.7922589267975508, "grad_norm": 7.310822123207998, "learning_rate": 1.4001233794935898e-07, "loss": 0.5766, "step": 24808 }, { "epoch": 1.7923311719977604, "grad_norm": 9.11682306000942, "learning_rate": 1.3991584014054703e-07, "loss": 0.6496, "step": 24809 }, { "epoch": 1.79240341719797, "grad_norm": 7.190255076588469, "learning_rate": 1.3981937463945466e-07, "loss": 0.6388, "step": 24810 }, { "epoch": 1.7924756623981795, "grad_norm": 5.982739097959882, "learning_rate": 1.3972294144740218e-07, "loss": 0.5286, "step": 24811 }, { "epoch": 1.7925479075983888, "grad_norm": 7.325209047634937, "learning_rate": 1.3962654056571018e-07, "loss": 0.5398, "step": 24812 }, { "epoch": 1.7926201527985985, "grad_norm": 7.232911326437199, "learning_rate": 1.3953017199569847e-07, "loss": 0.5383, "step": 24813 }, { "epoch": 1.7926923979988079, "grad_norm": 9.103701046371153, "learning_rate": 1.394338357386857e-07, "loss": 0.6265, "step": 24814 }, { "epoch": 1.7927646431990176, "grad_norm": 8.18131261091994, "learning_rate": 1.3933753179599052e-07, "loss": 0.6793, "step": 24815 }, { "epoch": 1.792836888399227, "grad_norm": 7.502162367314298, "learning_rate": 1.3924126016893192e-07, "loss": 0.6429, "step": 24816 }, { "epoch": 1.7929091335994365, "grad_norm": 7.793106544442794, "learning_rate": 1.3914502085882715e-07, "loss": 0.5852, "step": 24817 }, { "epoch": 1.792981378799646, "grad_norm": 7.129064103560563, "learning_rate": 1.3904881386699405e-07, "loss": 0.5616, "step": 24818 }, { "epoch": 1.7930536239998554, "grad_norm": 6.36850422543127, "learning_rate": 1.389526391947496e-07, "loss": 0.6398, "step": 24819 }, { "epoch": 1.7931258692000651, "grad_norm": 6.450973780261632, "learning_rate": 1.3885649684341086e-07, "loss": 0.6152, "step": 24820 }, { "epoch": 1.7931981144002744, "grad_norm": 7.181771558076902, "learning_rate": 1.3876038681429256e-07, "loss": 0.5884, "step": 24821 }, { "epoch": 1.7932703596004842, "grad_norm": 7.994542644652536, "learning_rate": 1.3866430910871142e-07, "loss": 0.5775, "step": 24822 }, { "epoch": 1.7933426048006935, "grad_norm": 6.232586214438606, "learning_rate": 1.385682637279828e-07, "loss": 0.6015, "step": 24823 }, { "epoch": 1.793414850000903, "grad_norm": 6.974068156244906, "learning_rate": 1.3847225067342068e-07, "loss": 0.5688, "step": 24824 }, { "epoch": 1.7934870952011126, "grad_norm": 7.098322936235014, "learning_rate": 1.3837626994634006e-07, "loss": 0.6356, "step": 24825 }, { "epoch": 1.793559340401322, "grad_norm": 7.056104076353192, "learning_rate": 1.3828032154805437e-07, "loss": 0.594, "step": 24826 }, { "epoch": 1.7936315856015317, "grad_norm": 8.0829077015404, "learning_rate": 1.3818440547987754e-07, "loss": 0.611, "step": 24827 }, { "epoch": 1.793703830801741, "grad_norm": 7.064250357799883, "learning_rate": 1.3808852174312214e-07, "loss": 0.645, "step": 24828 }, { "epoch": 1.7937760760019508, "grad_norm": 6.840734644917732, "learning_rate": 1.3799267033910102e-07, "loss": 0.5506, "step": 24829 }, { "epoch": 1.79384832120216, "grad_norm": 8.277434078081939, "learning_rate": 1.3789685126912672e-07, "loss": 0.6337, "step": 24830 }, { "epoch": 1.7939205664023696, "grad_norm": 8.139728050094314, "learning_rate": 1.3780106453451013e-07, "loss": 0.6057, "step": 24831 }, { "epoch": 1.7939928116025792, "grad_norm": 6.9005653149767205, "learning_rate": 1.3770531013656274e-07, "loss": 0.5271, "step": 24832 }, { "epoch": 1.7940650568027887, "grad_norm": 7.239449025470296, "learning_rate": 1.376095880765954e-07, "loss": 0.5788, "step": 24833 }, { "epoch": 1.7941373020029983, "grad_norm": 8.147934690562982, "learning_rate": 1.3751389835591906e-07, "loss": 0.6606, "step": 24834 }, { "epoch": 1.7942095472032076, "grad_norm": 7.678636853519162, "learning_rate": 1.3741824097584206e-07, "loss": 0.5855, "step": 24835 }, { "epoch": 1.7942817924034173, "grad_norm": 6.334157522012843, "learning_rate": 1.373226159376756e-07, "loss": 0.5753, "step": 24836 }, { "epoch": 1.7943540376036267, "grad_norm": 7.501417916097805, "learning_rate": 1.372270232427278e-07, "loss": 0.669, "step": 24837 }, { "epoch": 1.7944262828038362, "grad_norm": 7.984458848247966, "learning_rate": 1.3713146289230818e-07, "loss": 0.6043, "step": 24838 }, { "epoch": 1.7944985280040457, "grad_norm": 8.11377962479908, "learning_rate": 1.3703593488772344e-07, "loss": 0.6308, "step": 24839 }, { "epoch": 1.7945707732042553, "grad_norm": 8.45262497076999, "learning_rate": 1.3694043923028228e-07, "loss": 0.6311, "step": 24840 }, { "epoch": 1.7946430184044648, "grad_norm": 7.586965787831303, "learning_rate": 1.3684497592129198e-07, "loss": 0.6772, "step": 24841 }, { "epoch": 1.7947152636046741, "grad_norm": 6.205166343899956, "learning_rate": 1.3674954496205872e-07, "loss": 0.5575, "step": 24842 }, { "epoch": 1.794787508804884, "grad_norm": 6.2283802705581275, "learning_rate": 1.3665414635388923e-07, "loss": 0.5444, "step": 24843 }, { "epoch": 1.7948597540050932, "grad_norm": 8.483366904601377, "learning_rate": 1.3655878009808943e-07, "loss": 0.5735, "step": 24844 }, { "epoch": 1.7949319992053028, "grad_norm": 7.886943902726572, "learning_rate": 1.364634461959649e-07, "loss": 0.5946, "step": 24845 }, { "epoch": 1.7950042444055123, "grad_norm": 7.683704710926676, "learning_rate": 1.3636814464882077e-07, "loss": 0.5713, "step": 24846 }, { "epoch": 1.7950764896057219, "grad_norm": 6.893303383049503, "learning_rate": 1.362728754579612e-07, "loss": 0.5518, "step": 24847 }, { "epoch": 1.7951487348059314, "grad_norm": 7.153570348125888, "learning_rate": 1.3617763862469103e-07, "loss": 0.5138, "step": 24848 }, { "epoch": 1.7952209800061407, "grad_norm": 7.878064240611345, "learning_rate": 1.3608243415031336e-07, "loss": 0.5736, "step": 24849 }, { "epoch": 1.7952932252063505, "grad_norm": 9.249743896238213, "learning_rate": 1.359872620361319e-07, "loss": 0.6575, "step": 24850 }, { "epoch": 1.7953654704065598, "grad_norm": 6.663221836171831, "learning_rate": 1.3589212228344888e-07, "loss": 0.5849, "step": 24851 }, { "epoch": 1.7954377156067693, "grad_norm": 6.191889275322319, "learning_rate": 1.3579701489356778e-07, "loss": 0.5612, "step": 24852 }, { "epoch": 1.7955099608069789, "grad_norm": 7.0340348217807, "learning_rate": 1.3570193986778946e-07, "loss": 0.6236, "step": 24853 }, { "epoch": 1.7955822060071884, "grad_norm": 7.524853534477068, "learning_rate": 1.3560689720741566e-07, "loss": 0.6023, "step": 24854 }, { "epoch": 1.795654451207398, "grad_norm": 7.211373131055051, "learning_rate": 1.3551188691374755e-07, "loss": 0.5668, "step": 24855 }, { "epoch": 1.7957266964076073, "grad_norm": 8.68145981075768, "learning_rate": 1.3541690898808607e-07, "loss": 0.5949, "step": 24856 }, { "epoch": 1.795798941607817, "grad_norm": 8.080858251483342, "learning_rate": 1.35321963431731e-07, "loss": 0.5586, "step": 24857 }, { "epoch": 1.7958711868080264, "grad_norm": 8.438687615348348, "learning_rate": 1.352270502459821e-07, "loss": 0.5855, "step": 24858 }, { "epoch": 1.795943432008236, "grad_norm": 83.2586331859946, "learning_rate": 1.351321694321392e-07, "loss": 0.7373, "step": 24859 }, { "epoch": 1.7960156772084455, "grad_norm": 6.841428876950278, "learning_rate": 1.3503732099150041e-07, "loss": 0.5413, "step": 24860 }, { "epoch": 1.796087922408655, "grad_norm": 8.596176415267566, "learning_rate": 1.3494250492536443e-07, "loss": 0.6455, "step": 24861 }, { "epoch": 1.7961601676088645, "grad_norm": 8.866509042739452, "learning_rate": 1.348477212350291e-07, "loss": 0.599, "step": 24862 }, { "epoch": 1.7962324128090739, "grad_norm": 6.568279809211816, "learning_rate": 1.3475296992179198e-07, "loss": 0.5446, "step": 24863 }, { "epoch": 1.7963046580092836, "grad_norm": 7.806985602679145, "learning_rate": 1.346582509869504e-07, "loss": 0.606, "step": 24864 }, { "epoch": 1.796376903209493, "grad_norm": 7.109259904202268, "learning_rate": 1.345635644318008e-07, "loss": 0.6177, "step": 24865 }, { "epoch": 1.7964491484097025, "grad_norm": 7.205480757166348, "learning_rate": 1.3446891025763937e-07, "loss": 0.6359, "step": 24866 }, { "epoch": 1.796521393609912, "grad_norm": 7.297339741122328, "learning_rate": 1.3437428846576173e-07, "loss": 0.559, "step": 24867 }, { "epoch": 1.7965936388101216, "grad_norm": 7.5268840877101475, "learning_rate": 1.342796990574635e-07, "loss": 0.5532, "step": 24868 }, { "epoch": 1.7966658840103311, "grad_norm": 6.797599806450958, "learning_rate": 1.34185142034039e-07, "loss": 0.5853, "step": 24869 }, { "epoch": 1.7967381292105404, "grad_norm": 5.744042004955004, "learning_rate": 1.3409061739678347e-07, "loss": 0.6124, "step": 24870 }, { "epoch": 1.7968103744107502, "grad_norm": 7.464696063263027, "learning_rate": 1.339961251469901e-07, "loss": 0.5007, "step": 24871 }, { "epoch": 1.7968826196109595, "grad_norm": 8.144887207449242, "learning_rate": 1.339016652859526e-07, "loss": 0.6695, "step": 24872 }, { "epoch": 1.796954864811169, "grad_norm": 8.147537196338698, "learning_rate": 1.3380723781496408e-07, "loss": 0.6448, "step": 24873 }, { "epoch": 1.7970271100113786, "grad_norm": 8.961143537844897, "learning_rate": 1.3371284273531738e-07, "loss": 0.6098, "step": 24874 }, { "epoch": 1.7970993552115881, "grad_norm": 6.374270229082631, "learning_rate": 1.3361848004830424e-07, "loss": 0.6349, "step": 24875 }, { "epoch": 1.7971716004117977, "grad_norm": 6.770145166100086, "learning_rate": 1.33524149755217e-07, "loss": 0.5948, "step": 24876 }, { "epoch": 1.797243845612007, "grad_norm": 8.05842144144433, "learning_rate": 1.3342985185734707e-07, "loss": 0.5951, "step": 24877 }, { "epoch": 1.7973160908122168, "grad_norm": 7.3208252555210604, "learning_rate": 1.3333558635598458e-07, "loss": 0.6024, "step": 24878 }, { "epoch": 1.797388336012426, "grad_norm": 8.923929649304851, "learning_rate": 1.3324135325242043e-07, "loss": 0.6224, "step": 24879 }, { "epoch": 1.7974605812126356, "grad_norm": 7.4256772230187185, "learning_rate": 1.331471525479447e-07, "loss": 0.6345, "step": 24880 }, { "epoch": 1.7975328264128452, "grad_norm": 8.347037021516929, "learning_rate": 1.3305298424384632e-07, "loss": 0.6362, "step": 24881 }, { "epoch": 1.7976050716130547, "grad_norm": 6.756627820202355, "learning_rate": 1.329588483414146e-07, "loss": 0.6172, "step": 24882 }, { "epoch": 1.7976773168132643, "grad_norm": 7.373110771660214, "learning_rate": 1.3286474484193872e-07, "loss": 0.57, "step": 24883 }, { "epoch": 1.7977495620134736, "grad_norm": 7.36058938278802, "learning_rate": 1.327706737467063e-07, "loss": 0.6219, "step": 24884 }, { "epoch": 1.7978218072136833, "grad_norm": 6.854333228161988, "learning_rate": 1.3267663505700523e-07, "loss": 0.5581, "step": 24885 }, { "epoch": 1.7978940524138927, "grad_norm": 7.663568976607995, "learning_rate": 1.325826287741233e-07, "loss": 0.6222, "step": 24886 }, { "epoch": 1.7979662976141024, "grad_norm": 7.133888455951153, "learning_rate": 1.3248865489934677e-07, "loss": 0.6298, "step": 24887 }, { "epoch": 1.7980385428143117, "grad_norm": 8.771301063757544, "learning_rate": 1.323947134339626e-07, "loss": 0.6498, "step": 24888 }, { "epoch": 1.7981107880145213, "grad_norm": 6.372886157661663, "learning_rate": 1.323008043792562e-07, "loss": 0.615, "step": 24889 }, { "epoch": 1.7981830332147308, "grad_norm": 7.9115710540302135, "learning_rate": 1.3220692773651316e-07, "loss": 0.6637, "step": 24890 }, { "epoch": 1.7982552784149402, "grad_norm": 8.087480510234196, "learning_rate": 1.3211308350701918e-07, "loss": 0.5994, "step": 24891 }, { "epoch": 1.79832752361515, "grad_norm": 7.944409104322719, "learning_rate": 1.320192716920582e-07, "loss": 0.5702, "step": 24892 }, { "epoch": 1.7983997688153592, "grad_norm": 6.81425131588891, "learning_rate": 1.31925492292915e-07, "loss": 0.5723, "step": 24893 }, { "epoch": 1.798472014015569, "grad_norm": 8.852682933280029, "learning_rate": 1.3183174531087307e-07, "loss": 0.5983, "step": 24894 }, { "epoch": 1.7985442592157783, "grad_norm": 8.547875159287806, "learning_rate": 1.317380307472163e-07, "loss": 0.5894, "step": 24895 }, { "epoch": 1.7986165044159879, "grad_norm": 8.739632268017676, "learning_rate": 1.316443486032265e-07, "loss": 0.5934, "step": 24896 }, { "epoch": 1.7986887496161974, "grad_norm": 7.677778097846758, "learning_rate": 1.315506988801865e-07, "loss": 0.6269, "step": 24897 }, { "epoch": 1.7987609948164067, "grad_norm": 7.237224240229754, "learning_rate": 1.314570815793792e-07, "loss": 0.579, "step": 24898 }, { "epoch": 1.7988332400166165, "grad_norm": 7.706778814606374, "learning_rate": 1.3136349670208465e-07, "loss": 0.6487, "step": 24899 }, { "epoch": 1.7989054852168258, "grad_norm": 7.355014424364998, "learning_rate": 1.312699442495849e-07, "loss": 0.5885, "step": 24900 }, { "epoch": 1.7989777304170356, "grad_norm": 7.394235128447406, "learning_rate": 1.3117642422316058e-07, "loss": 0.6567, "step": 24901 }, { "epoch": 1.799049975617245, "grad_norm": 7.493819423987088, "learning_rate": 1.3108293662409148e-07, "loss": 0.5762, "step": 24902 }, { "epoch": 1.7991222208174544, "grad_norm": 7.019089962478554, "learning_rate": 1.3098948145365775e-07, "loss": 0.5605, "step": 24903 }, { "epoch": 1.799194466017664, "grad_norm": 7.998101724476149, "learning_rate": 1.308960587131386e-07, "loss": 0.6109, "step": 24904 }, { "epoch": 1.7992667112178735, "grad_norm": 8.742002047156044, "learning_rate": 1.30802668403813e-07, "loss": 0.6814, "step": 24905 }, { "epoch": 1.799338956418083, "grad_norm": 7.526290336821378, "learning_rate": 1.307093105269594e-07, "loss": 0.6115, "step": 24906 }, { "epoch": 1.7994112016182924, "grad_norm": 7.18641189963468, "learning_rate": 1.3061598508385537e-07, "loss": 0.5648, "step": 24907 }, { "epoch": 1.7994834468185021, "grad_norm": 6.435154756168638, "learning_rate": 1.3052269207577907e-07, "loss": 0.5862, "step": 24908 }, { "epoch": 1.7995556920187115, "grad_norm": 6.846367601043146, "learning_rate": 1.3042943150400748e-07, "loss": 0.6055, "step": 24909 }, { "epoch": 1.799627937218921, "grad_norm": 9.894282576202981, "learning_rate": 1.303362033698169e-07, "loss": 0.6279, "step": 24910 }, { "epoch": 1.7997001824191305, "grad_norm": 8.43469689042237, "learning_rate": 1.3024300767448345e-07, "loss": 0.5734, "step": 24911 }, { "epoch": 1.79977242761934, "grad_norm": 6.951009432809724, "learning_rate": 1.3014984441928364e-07, "loss": 0.5705, "step": 24912 }, { "epoch": 1.7998446728195496, "grad_norm": 7.078145479757899, "learning_rate": 1.3005671360549227e-07, "loss": 0.6211, "step": 24913 }, { "epoch": 1.799916918019759, "grad_norm": 6.456725217717522, "learning_rate": 1.2996361523438417e-07, "loss": 0.5558, "step": 24914 }, { "epoch": 1.7999891632199687, "grad_norm": 7.674935594717173, "learning_rate": 1.2987054930723414e-07, "loss": 0.5523, "step": 24915 }, { "epoch": 1.800061408420178, "grad_norm": 6.607637506180703, "learning_rate": 1.2977751582531616e-07, "loss": 0.5378, "step": 24916 }, { "epoch": 1.8001336536203876, "grad_norm": 7.013954422479875, "learning_rate": 1.296845147899034e-07, "loss": 0.5944, "step": 24917 }, { "epoch": 1.8002058988205971, "grad_norm": 5.505603536701924, "learning_rate": 1.2959154620226928e-07, "loss": 0.5057, "step": 24918 }, { "epoch": 1.8002781440208067, "grad_norm": 8.159630787294295, "learning_rate": 1.2949861006368637e-07, "loss": 0.6721, "step": 24919 }, { "epoch": 1.8003503892210162, "grad_norm": 7.552541938975926, "learning_rate": 1.2940570637542698e-07, "loss": 0.624, "step": 24920 }, { "epoch": 1.8004226344212255, "grad_norm": 7.395641342321693, "learning_rate": 1.2931283513876263e-07, "loss": 0.6328, "step": 24921 }, { "epoch": 1.8004948796214353, "grad_norm": 6.902966024934952, "learning_rate": 1.2921999635496507e-07, "loss": 0.5797, "step": 24922 }, { "epoch": 1.8005671248216446, "grad_norm": 7.778278810015612, "learning_rate": 1.291271900253052e-07, "loss": 0.5455, "step": 24923 }, { "epoch": 1.8006393700218541, "grad_norm": 7.233697913104118, "learning_rate": 1.2903441615105288e-07, "loss": 0.642, "step": 24924 }, { "epoch": 1.8007116152220637, "grad_norm": 6.893717030976382, "learning_rate": 1.2894167473347847e-07, "loss": 0.5879, "step": 24925 }, { "epoch": 1.8007838604222732, "grad_norm": 6.584854272201687, "learning_rate": 1.2884896577385147e-07, "loss": 0.5743, "step": 24926 }, { "epoch": 1.8008561056224828, "grad_norm": 7.5311180514731415, "learning_rate": 1.2875628927344146e-07, "loss": 0.6688, "step": 24927 }, { "epoch": 1.800928350822692, "grad_norm": 9.843548097507789, "learning_rate": 1.2866364523351632e-07, "loss": 0.6704, "step": 24928 }, { "epoch": 1.8010005960229019, "grad_norm": 8.040449402808346, "learning_rate": 1.2857103365534445e-07, "loss": 0.5795, "step": 24929 }, { "epoch": 1.8010728412231112, "grad_norm": 6.400242812795788, "learning_rate": 1.2847845454019402e-07, "loss": 0.5891, "step": 24930 }, { "epoch": 1.8011450864233207, "grad_norm": 8.332082069553175, "learning_rate": 1.2838590788933237e-07, "loss": 0.5627, "step": 24931 }, { "epoch": 1.8012173316235303, "grad_norm": 7.371949162904558, "learning_rate": 1.2829339370402593e-07, "loss": 0.6196, "step": 24932 }, { "epoch": 1.8012895768237398, "grad_norm": 6.89668626686926, "learning_rate": 1.2820091198554151e-07, "loss": 0.5366, "step": 24933 }, { "epoch": 1.8013618220239493, "grad_norm": 8.360845190517804, "learning_rate": 1.281084627351456e-07, "loss": 0.6465, "step": 24934 }, { "epoch": 1.8014340672241587, "grad_norm": 8.172772622636005, "learning_rate": 1.2801604595410244e-07, "loss": 0.6137, "step": 24935 }, { "epoch": 1.8015063124243684, "grad_norm": 5.386062947298854, "learning_rate": 1.2792366164367825e-07, "loss": 0.5603, "step": 24936 }, { "epoch": 1.8015785576245777, "grad_norm": 6.9666476649461755, "learning_rate": 1.278313098051373e-07, "loss": 0.5861, "step": 24937 }, { "epoch": 1.8016508028247873, "grad_norm": 6.637407021311314, "learning_rate": 1.2773899043974387e-07, "loss": 0.6009, "step": 24938 }, { "epoch": 1.8017230480249968, "grad_norm": 8.307136354020974, "learning_rate": 1.2764670354876135e-07, "loss": 0.6068, "step": 24939 }, { "epoch": 1.8017952932252064, "grad_norm": 7.244547404914318, "learning_rate": 1.275544491334535e-07, "loss": 0.6455, "step": 24940 }, { "epoch": 1.801867538425416, "grad_norm": 7.163514560390981, "learning_rate": 1.2746222719508345e-07, "loss": 0.6151, "step": 24941 }, { "epoch": 1.8019397836256252, "grad_norm": 7.629864001180733, "learning_rate": 1.2737003773491324e-07, "loss": 0.641, "step": 24942 }, { "epoch": 1.802012028825835, "grad_norm": 7.337043979387663, "learning_rate": 1.2727788075420494e-07, "loss": 0.616, "step": 24943 }, { "epoch": 1.8020842740260443, "grad_norm": 7.538856811879988, "learning_rate": 1.2718575625422032e-07, "loss": 0.6344, "step": 24944 }, { "epoch": 1.8021565192262539, "grad_norm": 7.921853089913611, "learning_rate": 1.2709366423622027e-07, "loss": 0.5521, "step": 24945 }, { "epoch": 1.8022287644264634, "grad_norm": 7.025547546551615, "learning_rate": 1.270016047014655e-07, "loss": 0.6253, "step": 24946 }, { "epoch": 1.802301009626673, "grad_norm": 6.749155910131843, "learning_rate": 1.269095776512161e-07, "loss": 0.5938, "step": 24947 }, { "epoch": 1.8023732548268825, "grad_norm": 8.33236775208326, "learning_rate": 1.2681758308673214e-07, "loss": 0.5988, "step": 24948 }, { "epoch": 1.8024455000270918, "grad_norm": 7.105319230748862, "learning_rate": 1.2672562100927265e-07, "loss": 0.6156, "step": 24949 }, { "epoch": 1.8025177452273016, "grad_norm": 6.112121120950399, "learning_rate": 1.266336914200969e-07, "loss": 0.5377, "step": 24950 }, { "epoch": 1.802589990427511, "grad_norm": 7.12634344987499, "learning_rate": 1.2654179432046276e-07, "loss": 0.5743, "step": 24951 }, { "epoch": 1.8026622356277204, "grad_norm": 8.432491822407, "learning_rate": 1.2644992971162923e-07, "loss": 0.6466, "step": 24952 }, { "epoch": 1.80273448082793, "grad_norm": 6.4087793334742, "learning_rate": 1.263580975948528e-07, "loss": 0.5989, "step": 24953 }, { "epoch": 1.8028067260281395, "grad_norm": 9.453333223429878, "learning_rate": 1.2626629797139106e-07, "loss": 0.5836, "step": 24954 }, { "epoch": 1.802878971228349, "grad_norm": 6.63783661586852, "learning_rate": 1.261745308425008e-07, "loss": 0.5854, "step": 24955 }, { "epoch": 1.8029512164285584, "grad_norm": 8.492688344102744, "learning_rate": 1.260827962094377e-07, "loss": 0.6249, "step": 24956 }, { "epoch": 1.8030234616287681, "grad_norm": 7.612405349819526, "learning_rate": 1.2599109407345822e-07, "loss": 0.5361, "step": 24957 }, { "epoch": 1.8030957068289775, "grad_norm": 7.625261145950695, "learning_rate": 1.2589942443581694e-07, "loss": 0.5344, "step": 24958 }, { "epoch": 1.8031679520291872, "grad_norm": 7.0134295658331345, "learning_rate": 1.258077872977695e-07, "loss": 0.5801, "step": 24959 }, { "epoch": 1.8032401972293965, "grad_norm": 7.768104326296007, "learning_rate": 1.257161826605699e-07, "loss": 0.5664, "step": 24960 }, { "epoch": 1.803312442429606, "grad_norm": 6.969328851992622, "learning_rate": 1.2562461052547214e-07, "loss": 0.5992, "step": 24961 }, { "epoch": 1.8033846876298156, "grad_norm": 6.17442435233955, "learning_rate": 1.2553307089373022e-07, "loss": 0.5201, "step": 24962 }, { "epoch": 1.803456932830025, "grad_norm": 8.365549705488576, "learning_rate": 1.2544156376659676e-07, "loss": 0.5714, "step": 24963 }, { "epoch": 1.8035291780302347, "grad_norm": 7.456879571777866, "learning_rate": 1.2535008914532438e-07, "loss": 0.5993, "step": 24964 }, { "epoch": 1.803601423230444, "grad_norm": 8.912227297940523, "learning_rate": 1.2525864703116564e-07, "loss": 0.5891, "step": 24965 }, { "epoch": 1.8036736684306538, "grad_norm": 6.545735480801156, "learning_rate": 1.2516723742537233e-07, "loss": 0.6251, "step": 24966 }, { "epoch": 1.8037459136308631, "grad_norm": 6.746407011762585, "learning_rate": 1.2507586032919517e-07, "loss": 0.572, "step": 24967 }, { "epoch": 1.8038181588310727, "grad_norm": 8.222614496627958, "learning_rate": 1.2498451574388532e-07, "loss": 0.5667, "step": 24968 }, { "epoch": 1.8038904040312822, "grad_norm": 7.703646690546217, "learning_rate": 1.2489320367069374e-07, "loss": 0.5794, "step": 24969 }, { "epoch": 1.8039626492314915, "grad_norm": 8.279101744501077, "learning_rate": 1.2480192411087028e-07, "loss": 0.5804, "step": 24970 }, { "epoch": 1.8040348944317013, "grad_norm": 7.2062245483478335, "learning_rate": 1.247106770656642e-07, "loss": 0.6236, "step": 24971 }, { "epoch": 1.8041071396319106, "grad_norm": 7.965130631021583, "learning_rate": 1.2461946253632424e-07, "loss": 0.6729, "step": 24972 }, { "epoch": 1.8041793848321204, "grad_norm": 7.876425477848786, "learning_rate": 1.245282805241002e-07, "loss": 0.5935, "step": 24973 }, { "epoch": 1.8042516300323297, "grad_norm": 6.935928579154124, "learning_rate": 1.244371310302389e-07, "loss": 0.6069, "step": 24974 }, { "epoch": 1.8043238752325392, "grad_norm": 7.053602978047731, "learning_rate": 1.243460140559888e-07, "loss": 0.6131, "step": 24975 }, { "epoch": 1.8043961204327488, "grad_norm": 7.132958501552023, "learning_rate": 1.2425492960259745e-07, "loss": 0.5497, "step": 24976 }, { "epoch": 1.8044683656329583, "grad_norm": 6.464955566914419, "learning_rate": 1.2416387767131139e-07, "loss": 0.5361, "step": 24977 }, { "epoch": 1.8045406108331679, "grad_norm": 7.149221908807083, "learning_rate": 1.2407285826337684e-07, "loss": 0.6015, "step": 24978 }, { "epoch": 1.8046128560333772, "grad_norm": 7.312806473528963, "learning_rate": 1.239818713800403e-07, "loss": 0.5968, "step": 24979 }, { "epoch": 1.804685101233587, "grad_norm": 8.987065663672658, "learning_rate": 1.2389091702254746e-07, "loss": 0.6617, "step": 24980 }, { "epoch": 1.8047573464337963, "grad_norm": 7.605114631265429, "learning_rate": 1.237999951921426e-07, "loss": 0.5474, "step": 24981 }, { "epoch": 1.8048295916340058, "grad_norm": 8.055161559309784, "learning_rate": 1.237091058900708e-07, "loss": 0.5586, "step": 24982 }, { "epoch": 1.8049018368342153, "grad_norm": 6.415673150106241, "learning_rate": 1.2361824911757637e-07, "loss": 0.5773, "step": 24983 }, { "epoch": 1.8049740820344249, "grad_norm": 7.362006945850564, "learning_rate": 1.235274248759033e-07, "loss": 0.6121, "step": 24984 }, { "epoch": 1.8050463272346344, "grad_norm": 7.278290110148828, "learning_rate": 1.2343663316629423e-07, "loss": 0.6262, "step": 24985 }, { "epoch": 1.8051185724348437, "grad_norm": 7.349449510784618, "learning_rate": 1.2334587398999232e-07, "loss": 0.5718, "step": 24986 }, { "epoch": 1.8051908176350535, "grad_norm": 7.523549048368271, "learning_rate": 1.232551473482399e-07, "loss": 0.6473, "step": 24987 }, { "epoch": 1.8052630628352628, "grad_norm": 7.255754751937504, "learning_rate": 1.2316445324227933e-07, "loss": 0.6005, "step": 24988 }, { "epoch": 1.8053353080354724, "grad_norm": 7.239758064484986, "learning_rate": 1.2307379167335182e-07, "loss": 0.6394, "step": 24989 }, { "epoch": 1.805407553235682, "grad_norm": 6.237989252813192, "learning_rate": 1.229831626426986e-07, "loss": 0.5525, "step": 24990 }, { "epoch": 1.8054797984358915, "grad_norm": 8.34367136257419, "learning_rate": 1.2289256615156037e-07, "loss": 0.6095, "step": 24991 }, { "epoch": 1.805552043636101, "grad_norm": 10.41149440273788, "learning_rate": 1.2280200220117694e-07, "loss": 0.6243, "step": 24992 }, { "epoch": 1.8056242888363103, "grad_norm": 8.531105571264865, "learning_rate": 1.2271147079278846e-07, "loss": 0.5978, "step": 24993 }, { "epoch": 1.80569653403652, "grad_norm": 7.468274983259919, "learning_rate": 1.2262097192763416e-07, "loss": 0.5259, "step": 24994 }, { "epoch": 1.8057687792367294, "grad_norm": 7.868730016899641, "learning_rate": 1.2253050560695285e-07, "loss": 0.5197, "step": 24995 }, { "epoch": 1.805841024436939, "grad_norm": 8.067753462709343, "learning_rate": 1.2244007183198291e-07, "loss": 0.6529, "step": 24996 }, { "epoch": 1.8059132696371485, "grad_norm": 6.5272046570308815, "learning_rate": 1.2234967060396176e-07, "loss": 0.6194, "step": 24997 }, { "epoch": 1.805985514837358, "grad_norm": 6.6918611655783025, "learning_rate": 1.2225930192412831e-07, "loss": 0.6226, "step": 24998 }, { "epoch": 1.8060577600375676, "grad_norm": 7.489481862052389, "learning_rate": 1.2216896579371862e-07, "loss": 0.5915, "step": 24999 }, { "epoch": 1.806130005237777, "grad_norm": 6.567868607148858, "learning_rate": 1.2207866221396913e-07, "loss": 0.5053, "step": 25000 }, { "epoch": 1.8062022504379867, "grad_norm": 7.604163796062886, "learning_rate": 1.2198839118611665e-07, "loss": 0.5794, "step": 25001 }, { "epoch": 1.806274495638196, "grad_norm": 7.3374638051272685, "learning_rate": 1.2189815271139716e-07, "loss": 0.6531, "step": 25002 }, { "epoch": 1.8063467408384055, "grad_norm": 8.453872732207605, "learning_rate": 1.218079467910449e-07, "loss": 0.6725, "step": 25003 }, { "epoch": 1.806418986038615, "grad_norm": 7.957368748978934, "learning_rate": 1.217177734262956e-07, "loss": 0.5778, "step": 25004 }, { "epoch": 1.8064912312388246, "grad_norm": 6.648894494887883, "learning_rate": 1.2162763261838294e-07, "loss": 0.6191, "step": 25005 }, { "epoch": 1.8065634764390341, "grad_norm": 8.929426481448719, "learning_rate": 1.2153752436854155e-07, "loss": 0.6571, "step": 25006 }, { "epoch": 1.8066357216392435, "grad_norm": 8.13245835939448, "learning_rate": 1.2144744867800485e-07, "loss": 0.6255, "step": 25007 }, { "epoch": 1.8067079668394532, "grad_norm": 7.960313108027349, "learning_rate": 1.2135740554800547e-07, "loss": 0.6497, "step": 25008 }, { "epoch": 1.8067802120396625, "grad_norm": 7.3542646738353294, "learning_rate": 1.2126739497977685e-07, "loss": 0.5519, "step": 25009 }, { "epoch": 1.806852457239872, "grad_norm": 7.746724328610534, "learning_rate": 1.2117741697455026e-07, "loss": 0.6594, "step": 25010 }, { "epoch": 1.8069247024400816, "grad_norm": 8.153936416059942, "learning_rate": 1.2108747153355777e-07, "loss": 0.6095, "step": 25011 }, { "epoch": 1.8069969476402912, "grad_norm": 7.178399203904394, "learning_rate": 1.2099755865803088e-07, "loss": 0.604, "step": 25012 }, { "epoch": 1.8070691928405007, "grad_norm": 6.404879688618951, "learning_rate": 1.2090767834919998e-07, "loss": 0.6677, "step": 25013 }, { "epoch": 1.80714143804071, "grad_norm": 8.200094306217421, "learning_rate": 1.2081783060829577e-07, "loss": 0.6417, "step": 25014 }, { "epoch": 1.8072136832409198, "grad_norm": 8.51801276371849, "learning_rate": 1.207280154365481e-07, "loss": 0.6431, "step": 25015 }, { "epoch": 1.8072859284411291, "grad_norm": 7.408131034614965, "learning_rate": 1.2063823283518655e-07, "loss": 0.6262, "step": 25016 }, { "epoch": 1.8073581736413387, "grad_norm": 7.506764730803331, "learning_rate": 1.2054848280544014e-07, "loss": 0.5416, "step": 25017 }, { "epoch": 1.8074304188415482, "grad_norm": 6.50447134134819, "learning_rate": 1.2045876534853757e-07, "loss": 0.637, "step": 25018 }, { "epoch": 1.8075026640417577, "grad_norm": 8.529702294220318, "learning_rate": 1.2036908046570677e-07, "loss": 0.666, "step": 25019 }, { "epoch": 1.8075749092419673, "grad_norm": 6.706871426277179, "learning_rate": 1.2027942815817594e-07, "loss": 0.5423, "step": 25020 }, { "epoch": 1.8076471544421766, "grad_norm": 6.594439574427932, "learning_rate": 1.201898084271716e-07, "loss": 0.578, "step": 25021 }, { "epoch": 1.8077193996423864, "grad_norm": 6.886944469056872, "learning_rate": 1.2010022127392106e-07, "loss": 0.5895, "step": 25022 }, { "epoch": 1.8077916448425957, "grad_norm": 6.977270280534283, "learning_rate": 1.200106666996509e-07, "loss": 0.5291, "step": 25023 }, { "epoch": 1.8078638900428052, "grad_norm": 9.063054350636857, "learning_rate": 1.1992114470558646e-07, "loss": 0.5763, "step": 25024 }, { "epoch": 1.8079361352430148, "grad_norm": 7.105711410095029, "learning_rate": 1.198316552929532e-07, "loss": 0.6285, "step": 25025 }, { "epoch": 1.8080083804432243, "grad_norm": 7.1413496388502535, "learning_rate": 1.1974219846297708e-07, "loss": 0.6078, "step": 25026 }, { "epoch": 1.8080806256434339, "grad_norm": 6.485733514976924, "learning_rate": 1.196527742168821e-07, "loss": 0.5689, "step": 25027 }, { "epoch": 1.8081528708436432, "grad_norm": 7.975345769101096, "learning_rate": 1.1956338255589227e-07, "loss": 0.5621, "step": 25028 }, { "epoch": 1.808225116043853, "grad_norm": 7.209071685667427, "learning_rate": 1.1947402348123137e-07, "loss": 0.5176, "step": 25029 }, { "epoch": 1.8082973612440623, "grad_norm": 7.620262300076859, "learning_rate": 1.1938469699412315e-07, "loss": 0.6336, "step": 25030 }, { "epoch": 1.8083696064442718, "grad_norm": 7.625532788282782, "learning_rate": 1.1929540309578962e-07, "loss": 0.6649, "step": 25031 }, { "epoch": 1.8084418516444813, "grad_norm": 8.442722992802922, "learning_rate": 1.192061417874535e-07, "loss": 0.6356, "step": 25032 }, { "epoch": 1.8085140968446909, "grad_norm": 8.95002008361268, "learning_rate": 1.1911691307033679e-07, "loss": 0.6151, "step": 25033 }, { "epoch": 1.8085863420449004, "grad_norm": 6.521031613326373, "learning_rate": 1.1902771694566079e-07, "loss": 0.5581, "step": 25034 }, { "epoch": 1.8086585872451098, "grad_norm": 7.9395026136219, "learning_rate": 1.1893855341464671e-07, "loss": 0.6243, "step": 25035 }, { "epoch": 1.8087308324453195, "grad_norm": 7.627708329287995, "learning_rate": 1.18849422478515e-07, "loss": 0.5831, "step": 25036 }, { "epoch": 1.8088030776455288, "grad_norm": 6.88733385166049, "learning_rate": 1.1876032413848631e-07, "loss": 0.5733, "step": 25037 }, { "epoch": 1.8088753228457386, "grad_norm": 9.121983277690056, "learning_rate": 1.1867125839577941e-07, "loss": 0.6067, "step": 25038 }, { "epoch": 1.808947568045948, "grad_norm": 7.798357805361922, "learning_rate": 1.185822252516139e-07, "loss": 0.5763, "step": 25039 }, { "epoch": 1.8090198132461575, "grad_norm": 7.745533794530047, "learning_rate": 1.1849322470720903e-07, "loss": 0.6337, "step": 25040 }, { "epoch": 1.809092058446367, "grad_norm": 7.437030424793665, "learning_rate": 1.1840425676378276e-07, "loss": 0.5916, "step": 25041 }, { "epoch": 1.8091643036465763, "grad_norm": 7.624323830563011, "learning_rate": 1.1831532142255297e-07, "loss": 0.5334, "step": 25042 }, { "epoch": 1.809236548846786, "grad_norm": 6.761315927125891, "learning_rate": 1.1822641868473705e-07, "loss": 0.6041, "step": 25043 }, { "epoch": 1.8093087940469954, "grad_norm": 7.817106552055652, "learning_rate": 1.1813754855155208e-07, "loss": 0.5946, "step": 25044 }, { "epoch": 1.8093810392472052, "grad_norm": 8.901856526679254, "learning_rate": 1.1804871102421483e-07, "loss": 0.5736, "step": 25045 }, { "epoch": 1.8094532844474145, "grad_norm": 7.349799079484354, "learning_rate": 1.1795990610394104e-07, "loss": 0.5911, "step": 25046 }, { "epoch": 1.809525529647624, "grad_norm": 6.482021704468234, "learning_rate": 1.1787113379194665e-07, "loss": 0.5621, "step": 25047 }, { "epoch": 1.8095977748478336, "grad_norm": 7.214959805086489, "learning_rate": 1.1778239408944736e-07, "loss": 0.6893, "step": 25048 }, { "epoch": 1.809670020048043, "grad_norm": 6.889109611393403, "learning_rate": 1.1769368699765693e-07, "loss": 0.5745, "step": 25049 }, { "epoch": 1.8097422652482527, "grad_norm": 6.930384655690482, "learning_rate": 1.1760501251779021e-07, "loss": 0.5753, "step": 25050 }, { "epoch": 1.809814510448462, "grad_norm": 7.969676483748587, "learning_rate": 1.1751637065106126e-07, "loss": 0.6002, "step": 25051 }, { "epoch": 1.8098867556486717, "grad_norm": 7.708770354658347, "learning_rate": 1.1742776139868323e-07, "loss": 0.5851, "step": 25052 }, { "epoch": 1.809959000848881, "grad_norm": 10.438668374094293, "learning_rate": 1.1733918476186878e-07, "loss": 0.6705, "step": 25053 }, { "epoch": 1.8100312460490906, "grad_norm": 6.887846717484678, "learning_rate": 1.1725064074183112e-07, "loss": 0.611, "step": 25054 }, { "epoch": 1.8101034912493001, "grad_norm": 7.518880856612738, "learning_rate": 1.171621293397826e-07, "loss": 0.598, "step": 25055 }, { "epoch": 1.8101757364495097, "grad_norm": 7.726821941343829, "learning_rate": 1.1707365055693392e-07, "loss": 0.5617, "step": 25056 }, { "epoch": 1.8102479816497192, "grad_norm": 7.390325108708165, "learning_rate": 1.169852043944969e-07, "loss": 0.5983, "step": 25057 }, { "epoch": 1.8103202268499285, "grad_norm": 8.097336846993082, "learning_rate": 1.1689679085368194e-07, "loss": 0.6278, "step": 25058 }, { "epoch": 1.8103924720501383, "grad_norm": 8.564155362911052, "learning_rate": 1.1680840993570002e-07, "loss": 0.59, "step": 25059 }, { "epoch": 1.8104647172503476, "grad_norm": 6.496148949329518, "learning_rate": 1.167200616417602e-07, "loss": 0.5916, "step": 25060 }, { "epoch": 1.8105369624505572, "grad_norm": 7.603309924490868, "learning_rate": 1.1663174597307203e-07, "loss": 0.677, "step": 25061 }, { "epoch": 1.8106092076507667, "grad_norm": 6.946015776137679, "learning_rate": 1.1654346293084484e-07, "loss": 0.6025, "step": 25062 }, { "epoch": 1.8106814528509763, "grad_norm": 7.3416269683053255, "learning_rate": 1.164552125162871e-07, "loss": 0.6186, "step": 25063 }, { "epoch": 1.8107536980511858, "grad_norm": 7.458452987421465, "learning_rate": 1.1636699473060675e-07, "loss": 0.609, "step": 25064 }, { "epoch": 1.8108259432513951, "grad_norm": 7.255876461537755, "learning_rate": 1.1627880957501141e-07, "loss": 0.5679, "step": 25065 }, { "epoch": 1.8108981884516049, "grad_norm": 6.647751231472705, "learning_rate": 1.1619065705070847e-07, "loss": 0.5853, "step": 25066 }, { "epoch": 1.8109704336518142, "grad_norm": 5.783578764501726, "learning_rate": 1.1610253715890446e-07, "loss": 0.5881, "step": 25067 }, { "epoch": 1.8110426788520237, "grad_norm": 7.365298339436513, "learning_rate": 1.1601444990080563e-07, "loss": 0.6533, "step": 25068 }, { "epoch": 1.8111149240522333, "grad_norm": 6.786876682162197, "learning_rate": 1.1592639527761851e-07, "loss": 0.6517, "step": 25069 }, { "epoch": 1.8111871692524428, "grad_norm": 7.426064041909483, "learning_rate": 1.1583837329054743e-07, "loss": 0.6899, "step": 25070 }, { "epoch": 1.8112594144526524, "grad_norm": 6.668405242560166, "learning_rate": 1.157503839407978e-07, "loss": 0.5586, "step": 25071 }, { "epoch": 1.8113316596528617, "grad_norm": 7.8130217110959475, "learning_rate": 1.1566242722957422e-07, "loss": 0.654, "step": 25072 }, { "epoch": 1.8114039048530715, "grad_norm": 8.081763151063189, "learning_rate": 1.1557450315808045e-07, "loss": 0.5857, "step": 25073 }, { "epoch": 1.8114761500532808, "grad_norm": 6.941815657770296, "learning_rate": 1.1548661172752051e-07, "loss": 0.6141, "step": 25074 }, { "epoch": 1.8115483952534903, "grad_norm": 8.307584528210313, "learning_rate": 1.1539875293909763e-07, "loss": 0.5975, "step": 25075 }, { "epoch": 1.8116206404536999, "grad_norm": 5.719365321340741, "learning_rate": 1.153109267940139e-07, "loss": 0.5262, "step": 25076 }, { "epoch": 1.8116928856539094, "grad_norm": 7.944998495823194, "learning_rate": 1.1522313329347252e-07, "loss": 0.534, "step": 25077 }, { "epoch": 1.811765130854119, "grad_norm": 8.146417647248294, "learning_rate": 1.1513537243867445e-07, "loss": 0.6206, "step": 25078 }, { "epoch": 1.8118373760543283, "grad_norm": 7.1029772466622445, "learning_rate": 1.1504764423082154e-07, "loss": 0.6442, "step": 25079 }, { "epoch": 1.811909621254538, "grad_norm": 6.464547824137171, "learning_rate": 1.1495994867111477e-07, "loss": 0.569, "step": 25080 }, { "epoch": 1.8119818664547473, "grad_norm": 6.294921875, "learning_rate": 1.1487228576075371e-07, "loss": 0.6494, "step": 25081 }, { "epoch": 1.812054111654957, "grad_norm": 6.729468024939389, "learning_rate": 1.1478465550093965e-07, "loss": 0.5288, "step": 25082 }, { "epoch": 1.8121263568551664, "grad_norm": 7.4126080612874485, "learning_rate": 1.1469705789287161e-07, "loss": 0.6152, "step": 25083 }, { "epoch": 1.812198602055376, "grad_norm": 7.171884308187723, "learning_rate": 1.1460949293774892e-07, "loss": 0.6161, "step": 25084 }, { "epoch": 1.8122708472555855, "grad_norm": 6.3342617090649, "learning_rate": 1.1452196063677007e-07, "loss": 0.5204, "step": 25085 }, { "epoch": 1.8123430924557948, "grad_norm": 6.735513188238558, "learning_rate": 1.1443446099113325e-07, "loss": 0.581, "step": 25086 }, { "epoch": 1.8124153376560046, "grad_norm": 7.467722802173207, "learning_rate": 1.143469940020367e-07, "loss": 0.6091, "step": 25087 }, { "epoch": 1.812487582856214, "grad_norm": 7.883825533813354, "learning_rate": 1.1425955967067692e-07, "loss": 0.561, "step": 25088 }, { "epoch": 1.8125598280564235, "grad_norm": 6.959059747785054, "learning_rate": 1.141721579982516e-07, "loss": 0.6626, "step": 25089 }, { "epoch": 1.812632073256633, "grad_norm": 6.822725793240988, "learning_rate": 1.1408478898595698e-07, "loss": 0.5707, "step": 25090 }, { "epoch": 1.8127043184568425, "grad_norm": 7.211299337481813, "learning_rate": 1.1399745263498907e-07, "loss": 0.5875, "step": 25091 }, { "epoch": 1.812776563657052, "grad_norm": 8.405949129867833, "learning_rate": 1.1391014894654329e-07, "loss": 0.6017, "step": 25092 }, { "epoch": 1.8128488088572614, "grad_norm": 6.845155336499716, "learning_rate": 1.1382287792181507e-07, "loss": 0.5918, "step": 25093 }, { "epoch": 1.8129210540574712, "grad_norm": 7.921691772027352, "learning_rate": 1.1373563956199901e-07, "loss": 0.5666, "step": 25094 }, { "epoch": 1.8129932992576805, "grad_norm": 7.628036644786076, "learning_rate": 1.1364843386828917e-07, "loss": 0.645, "step": 25095 }, { "epoch": 1.81306554445789, "grad_norm": 6.413395167400975, "learning_rate": 1.1356126084187929e-07, "loss": 0.5118, "step": 25096 }, { "epoch": 1.8131377896580996, "grad_norm": 8.076436617108797, "learning_rate": 1.1347412048396261e-07, "loss": 0.5867, "step": 25097 }, { "epoch": 1.8132100348583091, "grad_norm": 6.849000632345221, "learning_rate": 1.1338701279573261e-07, "loss": 0.5946, "step": 25098 }, { "epoch": 1.8132822800585187, "grad_norm": 7.629107010207247, "learning_rate": 1.1329993777838111e-07, "loss": 0.6617, "step": 25099 }, { "epoch": 1.813354525258728, "grad_norm": 7.752156111524162, "learning_rate": 1.1321289543310021e-07, "loss": 0.5922, "step": 25100 }, { "epoch": 1.8134267704589377, "grad_norm": 7.739354605565851, "learning_rate": 1.1312588576108174e-07, "loss": 0.6142, "step": 25101 }, { "epoch": 1.813499015659147, "grad_norm": 8.23257548233028, "learning_rate": 1.1303890876351642e-07, "loss": 0.5914, "step": 25102 }, { "epoch": 1.8135712608593566, "grad_norm": 6.73998025455469, "learning_rate": 1.1295196444159496e-07, "loss": 0.5831, "step": 25103 }, { "epoch": 1.8136435060595661, "grad_norm": 8.062320677663017, "learning_rate": 1.1286505279650806e-07, "loss": 0.5789, "step": 25104 }, { "epoch": 1.8137157512597757, "grad_norm": 7.280139494596996, "learning_rate": 1.1277817382944506e-07, "loss": 0.5747, "step": 25105 }, { "epoch": 1.8137879964599852, "grad_norm": 6.786653255806634, "learning_rate": 1.1269132754159528e-07, "loss": 0.6094, "step": 25106 }, { "epoch": 1.8138602416601945, "grad_norm": 6.309485291588799, "learning_rate": 1.1260451393414723e-07, "loss": 0.5647, "step": 25107 }, { "epoch": 1.8139324868604043, "grad_norm": 7.254422384623154, "learning_rate": 1.1251773300828994e-07, "loss": 0.6137, "step": 25108 }, { "epoch": 1.8140047320606136, "grad_norm": 8.067556289084017, "learning_rate": 1.1243098476521136e-07, "loss": 0.6637, "step": 25109 }, { "epoch": 1.8140769772608234, "grad_norm": 8.094567217128855, "learning_rate": 1.1234426920609831e-07, "loss": 0.5431, "step": 25110 }, { "epoch": 1.8141492224610327, "grad_norm": 7.3014779776398, "learning_rate": 1.1225758633213846e-07, "loss": 0.5488, "step": 25111 }, { "epoch": 1.8142214676612423, "grad_norm": 5.562275742714332, "learning_rate": 1.1217093614451863e-07, "loss": 0.5339, "step": 25112 }, { "epoch": 1.8142937128614518, "grad_norm": 7.23924668075918, "learning_rate": 1.1208431864442453e-07, "loss": 0.5875, "step": 25113 }, { "epoch": 1.8143659580616611, "grad_norm": 6.745765770834393, "learning_rate": 1.1199773383304164e-07, "loss": 0.6326, "step": 25114 }, { "epoch": 1.8144382032618709, "grad_norm": 6.478267137041892, "learning_rate": 1.119111817115559e-07, "loss": 0.5087, "step": 25115 }, { "epoch": 1.8145104484620802, "grad_norm": 7.666779918110611, "learning_rate": 1.1182466228115225e-07, "loss": 0.5632, "step": 25116 }, { "epoch": 1.81458269366229, "grad_norm": 6.878527204651222, "learning_rate": 1.1173817554301414e-07, "loss": 0.666, "step": 25117 }, { "epoch": 1.8146549388624993, "grad_norm": 7.182335631909339, "learning_rate": 1.1165172149832593e-07, "loss": 0.6149, "step": 25118 }, { "epoch": 1.8147271840627088, "grad_norm": 5.0776002583449715, "learning_rate": 1.1156530014827138e-07, "loss": 0.5125, "step": 25119 }, { "epoch": 1.8147994292629184, "grad_norm": 7.959063697840913, "learning_rate": 1.1147891149403345e-07, "loss": 0.6167, "step": 25120 }, { "epoch": 1.8148716744631277, "grad_norm": 8.537819146241374, "learning_rate": 1.1139255553679451e-07, "loss": 0.6997, "step": 25121 }, { "epoch": 1.8149439196633375, "grad_norm": 7.899646036451298, "learning_rate": 1.1130623227773695e-07, "loss": 0.554, "step": 25122 }, { "epoch": 1.8150161648635468, "grad_norm": 8.565304484836995, "learning_rate": 1.1121994171804262e-07, "loss": 0.5573, "step": 25123 }, { "epoch": 1.8150884100637565, "grad_norm": 7.048516668011667, "learning_rate": 1.1113368385889223e-07, "loss": 0.5764, "step": 25124 }, { "epoch": 1.8151606552639659, "grad_norm": 7.411103666124825, "learning_rate": 1.1104745870146705e-07, "loss": 0.6462, "step": 25125 }, { "epoch": 1.8152329004641754, "grad_norm": 8.209992939693933, "learning_rate": 1.1096126624694697e-07, "loss": 0.559, "step": 25126 }, { "epoch": 1.815305145664385, "grad_norm": 7.97768962791336, "learning_rate": 1.1087510649651273e-07, "loss": 0.6396, "step": 25127 }, { "epoch": 1.8153773908645945, "grad_norm": 7.781556395354468, "learning_rate": 1.1078897945134282e-07, "loss": 0.6306, "step": 25128 }, { "epoch": 1.815449636064804, "grad_norm": 6.923618760057427, "learning_rate": 1.1070288511261657e-07, "loss": 0.573, "step": 25129 }, { "epoch": 1.8155218812650133, "grad_norm": 7.957460312284443, "learning_rate": 1.1061682348151276e-07, "loss": 0.58, "step": 25130 }, { "epoch": 1.8155941264652231, "grad_norm": 7.155568719276551, "learning_rate": 1.1053079455920962e-07, "loss": 0.6324, "step": 25131 }, { "epoch": 1.8156663716654324, "grad_norm": 7.336704461871137, "learning_rate": 1.1044479834688427e-07, "loss": 0.6049, "step": 25132 }, { "epoch": 1.815738616865642, "grad_norm": 7.29788573492016, "learning_rate": 1.1035883484571436e-07, "loss": 0.6423, "step": 25133 }, { "epoch": 1.8158108620658515, "grad_norm": 9.009160360319335, "learning_rate": 1.1027290405687701e-07, "loss": 0.5796, "step": 25134 }, { "epoch": 1.815883107266061, "grad_norm": 7.151653296052833, "learning_rate": 1.1018700598154769e-07, "loss": 0.6233, "step": 25135 }, { "epoch": 1.8159553524662706, "grad_norm": 6.899588708091252, "learning_rate": 1.1010114062090266e-07, "loss": 0.5725, "step": 25136 }, { "epoch": 1.81602759766648, "grad_norm": 9.315494747076642, "learning_rate": 1.1001530797611764e-07, "loss": 0.6318, "step": 25137 }, { "epoch": 1.8160998428666897, "grad_norm": 6.523337211523266, "learning_rate": 1.0992950804836671e-07, "loss": 0.5255, "step": 25138 }, { "epoch": 1.816172088066899, "grad_norm": 7.41034929489584, "learning_rate": 1.0984374083882559e-07, "loss": 0.5458, "step": 25139 }, { "epoch": 1.8162443332671085, "grad_norm": 7.40580136513688, "learning_rate": 1.0975800634866751e-07, "loss": 0.6515, "step": 25140 }, { "epoch": 1.816316578467318, "grad_norm": 6.391731297026547, "learning_rate": 1.0967230457906708e-07, "loss": 0.6003, "step": 25141 }, { "epoch": 1.8163888236675276, "grad_norm": 6.9691821592983505, "learning_rate": 1.0958663553119615e-07, "loss": 0.5461, "step": 25142 }, { "epoch": 1.8164610688677372, "grad_norm": 8.872245683548122, "learning_rate": 1.0950099920622848e-07, "loss": 0.6196, "step": 25143 }, { "epoch": 1.8165333140679465, "grad_norm": 8.553714003542234, "learning_rate": 1.094153956053362e-07, "loss": 0.7032, "step": 25144 }, { "epoch": 1.8166055592681563, "grad_norm": 7.613565595238413, "learning_rate": 1.0932982472969061e-07, "loss": 0.6885, "step": 25145 }, { "epoch": 1.8166778044683656, "grad_norm": 8.428932191457207, "learning_rate": 1.0924428658046354e-07, "loss": 0.5687, "step": 25146 }, { "epoch": 1.8167500496685751, "grad_norm": 7.324499994600207, "learning_rate": 1.0915878115882572e-07, "loss": 0.6724, "step": 25147 }, { "epoch": 1.8168222948687847, "grad_norm": 7.048197349426649, "learning_rate": 1.0907330846594816e-07, "loss": 0.5688, "step": 25148 }, { "epoch": 1.8168945400689942, "grad_norm": 7.490643577886849, "learning_rate": 1.089878685030002e-07, "loss": 0.5694, "step": 25149 }, { "epoch": 1.8169667852692037, "grad_norm": 6.887432716996395, "learning_rate": 1.0890246127115201e-07, "loss": 0.5664, "step": 25150 }, { "epoch": 1.817039030469413, "grad_norm": 6.500928372328712, "learning_rate": 1.0881708677157266e-07, "loss": 0.5411, "step": 25151 }, { "epoch": 1.8171112756696228, "grad_norm": 7.358755810519698, "learning_rate": 1.0873174500543094e-07, "loss": 0.5737, "step": 25152 }, { "epoch": 1.8171835208698321, "grad_norm": 6.930648582240179, "learning_rate": 1.0864643597389451e-07, "loss": 0.5423, "step": 25153 }, { "epoch": 1.8172557660700417, "grad_norm": 6.586817563808526, "learning_rate": 1.0856115967813191e-07, "loss": 0.6123, "step": 25154 }, { "epoch": 1.8173280112702512, "grad_norm": 7.0272441388333435, "learning_rate": 1.0847591611931024e-07, "loss": 0.6427, "step": 25155 }, { "epoch": 1.8174002564704608, "grad_norm": 7.4138919340397145, "learning_rate": 1.0839070529859608e-07, "loss": 0.6082, "step": 25156 }, { "epoch": 1.8174725016706703, "grad_norm": 6.065993099059051, "learning_rate": 1.0830552721715653e-07, "loss": 0.5418, "step": 25157 }, { "epoch": 1.8175447468708796, "grad_norm": 6.777916998901222, "learning_rate": 1.0822038187615653e-07, "loss": 0.6131, "step": 25158 }, { "epoch": 1.8176169920710894, "grad_norm": 6.89341682742605, "learning_rate": 1.0813526927676343e-07, "loss": 0.5534, "step": 25159 }, { "epoch": 1.8176892372712987, "grad_norm": 6.923946854147748, "learning_rate": 1.0805018942014078e-07, "loss": 0.6089, "step": 25160 }, { "epoch": 1.8177614824715083, "grad_norm": 6.886705455886204, "learning_rate": 1.0796514230745376e-07, "loss": 0.6019, "step": 25161 }, { "epoch": 1.8178337276717178, "grad_norm": 7.218822660534614, "learning_rate": 1.0788012793986696e-07, "loss": 0.6282, "step": 25162 }, { "epoch": 1.8179059728719273, "grad_norm": 7.169470955498619, "learning_rate": 1.0779514631854365e-07, "loss": 0.633, "step": 25163 }, { "epoch": 1.8179782180721369, "grad_norm": 8.997945339059747, "learning_rate": 1.0771019744464734e-07, "loss": 0.6468, "step": 25164 }, { "epoch": 1.8180504632723462, "grad_norm": 7.20498732449901, "learning_rate": 1.0762528131934096e-07, "loss": 0.5631, "step": 25165 }, { "epoch": 1.818122708472556, "grad_norm": 6.721032610983982, "learning_rate": 1.0754039794378696e-07, "loss": 0.5985, "step": 25166 }, { "epoch": 1.8181949536727653, "grad_norm": 6.288272405242102, "learning_rate": 1.0745554731914714e-07, "loss": 0.6598, "step": 25167 }, { "epoch": 1.8182671988729748, "grad_norm": 8.18871781705199, "learning_rate": 1.0737072944658311e-07, "loss": 0.6288, "step": 25168 }, { "epoch": 1.8183394440731844, "grad_norm": 7.770013133544819, "learning_rate": 1.0728594432725669e-07, "loss": 0.6091, "step": 25169 }, { "epoch": 1.818411689273394, "grad_norm": 7.8285852475884665, "learning_rate": 1.0720119196232726e-07, "loss": 0.6116, "step": 25170 }, { "epoch": 1.8184839344736035, "grad_norm": 8.234492668454806, "learning_rate": 1.071164723529558e-07, "loss": 0.5737, "step": 25171 }, { "epoch": 1.8185561796738128, "grad_norm": 7.710037565424303, "learning_rate": 1.070317855003017e-07, "loss": 0.6, "step": 25172 }, { "epoch": 1.8186284248740225, "grad_norm": 7.291868922743648, "learning_rate": 1.0694713140552482e-07, "loss": 0.5337, "step": 25173 }, { "epoch": 1.8187006700742319, "grad_norm": 7.641298978134685, "learning_rate": 1.0686251006978344e-07, "loss": 0.5623, "step": 25174 }, { "epoch": 1.8187729152744414, "grad_norm": 6.985662764042485, "learning_rate": 1.0677792149423604e-07, "loss": 0.6196, "step": 25175 }, { "epoch": 1.818845160474651, "grad_norm": 7.499547308929198, "learning_rate": 1.0669336568004063e-07, "loss": 0.5778, "step": 25176 }, { "epoch": 1.8189174056748605, "grad_norm": 7.699106278510056, "learning_rate": 1.0660884262835486e-07, "loss": 0.5243, "step": 25177 }, { "epoch": 1.81898965087507, "grad_norm": 8.020222852849852, "learning_rate": 1.0652435234033559e-07, "loss": 0.6319, "step": 25178 }, { "epoch": 1.8190618960752793, "grad_norm": 7.29258690249686, "learning_rate": 1.0643989481713968e-07, "loss": 0.5631, "step": 25179 }, { "epoch": 1.8191341412754891, "grad_norm": 6.724324597331929, "learning_rate": 1.0635547005992341e-07, "loss": 0.555, "step": 25180 }, { "epoch": 1.8192063864756984, "grad_norm": 6.7530513506981436, "learning_rate": 1.0627107806984172e-07, "loss": 0.5831, "step": 25181 }, { "epoch": 1.8192786316759082, "grad_norm": 6.812315212158396, "learning_rate": 1.061867188480506e-07, "loss": 0.5643, "step": 25182 }, { "epoch": 1.8193508768761175, "grad_norm": 8.61266561717121, "learning_rate": 1.0610239239570441e-07, "loss": 0.5451, "step": 25183 }, { "epoch": 1.819423122076327, "grad_norm": 7.117572325971103, "learning_rate": 1.0601809871395808e-07, "loss": 0.5934, "step": 25184 }, { "epoch": 1.8194953672765366, "grad_norm": 7.3657388243783855, "learning_rate": 1.0593383780396482e-07, "loss": 0.6156, "step": 25185 }, { "epoch": 1.819567612476746, "grad_norm": 6.399135304688055, "learning_rate": 1.0584960966687874e-07, "loss": 0.5228, "step": 25186 }, { "epoch": 1.8196398576769557, "grad_norm": 6.66052681795335, "learning_rate": 1.0576541430385223e-07, "loss": 0.6017, "step": 25187 }, { "epoch": 1.819712102877165, "grad_norm": 6.992882243330842, "learning_rate": 1.0568125171603827e-07, "loss": 0.583, "step": 25188 }, { "epoch": 1.8197843480773748, "grad_norm": 7.71249148277171, "learning_rate": 1.0559712190458899e-07, "loss": 0.5963, "step": 25189 }, { "epoch": 1.819856593277584, "grad_norm": 8.344665248549612, "learning_rate": 1.0551302487065595e-07, "loss": 0.6137, "step": 25190 }, { "epoch": 1.8199288384777936, "grad_norm": 6.040603894997566, "learning_rate": 1.0542896061539077e-07, "loss": 0.596, "step": 25191 }, { "epoch": 1.8200010836780032, "grad_norm": 6.067571661942093, "learning_rate": 1.053449291399436e-07, "loss": 0.5216, "step": 25192 }, { "epoch": 1.8200733288782125, "grad_norm": 7.713942045801636, "learning_rate": 1.0526093044546465e-07, "loss": 0.5662, "step": 25193 }, { "epoch": 1.8201455740784223, "grad_norm": 7.337091552179812, "learning_rate": 1.0517696453310494e-07, "loss": 0.5736, "step": 25194 }, { "epoch": 1.8202178192786316, "grad_norm": 7.775198548164472, "learning_rate": 1.0509303140401217e-07, "loss": 0.6611, "step": 25195 }, { "epoch": 1.8202900644788413, "grad_norm": 6.1640194890174, "learning_rate": 1.050091310593368e-07, "loss": 0.5929, "step": 25196 }, { "epoch": 1.8203623096790507, "grad_norm": 7.9777886086298615, "learning_rate": 1.049252635002268e-07, "loss": 0.5952, "step": 25197 }, { "epoch": 1.8204345548792602, "grad_norm": 7.215908003023976, "learning_rate": 1.0484142872783043e-07, "loss": 0.6556, "step": 25198 }, { "epoch": 1.8205068000794697, "grad_norm": 6.97511664398995, "learning_rate": 1.047576267432951e-07, "loss": 0.5666, "step": 25199 }, { "epoch": 1.8205790452796793, "grad_norm": 7.322115322960597, "learning_rate": 1.0467385754776793e-07, "loss": 0.584, "step": 25200 }, { "epoch": 1.8206512904798888, "grad_norm": 7.866443890579947, "learning_rate": 1.0459012114239609e-07, "loss": 0.5695, "step": 25201 }, { "epoch": 1.8207235356800981, "grad_norm": 7.283581536411781, "learning_rate": 1.045064175283253e-07, "loss": 0.6129, "step": 25202 }, { "epoch": 1.820795780880308, "grad_norm": 7.493721941083312, "learning_rate": 1.0442274670670161e-07, "loss": 0.6079, "step": 25203 }, { "epoch": 1.8208680260805172, "grad_norm": 7.608443498875833, "learning_rate": 1.0433910867867047e-07, "loss": 0.5896, "step": 25204 }, { "epoch": 1.8209402712807268, "grad_norm": 6.566375750505462, "learning_rate": 1.042555034453771e-07, "loss": 0.5837, "step": 25205 }, { "epoch": 1.8210125164809363, "grad_norm": 7.898455369817817, "learning_rate": 1.0417193100796558e-07, "loss": 0.655, "step": 25206 }, { "epoch": 1.8210847616811459, "grad_norm": 8.071230870271924, "learning_rate": 1.0408839136757998e-07, "loss": 0.6788, "step": 25207 }, { "epoch": 1.8211570068813554, "grad_norm": 7.186187093488935, "learning_rate": 1.0400488452536383e-07, "loss": 0.685, "step": 25208 }, { "epoch": 1.8212292520815647, "grad_norm": 9.301665077047572, "learning_rate": 1.0392141048246097e-07, "loss": 0.5516, "step": 25209 }, { "epoch": 1.8213014972817745, "grad_norm": 7.447188022068139, "learning_rate": 1.0383796924001322e-07, "loss": 0.5798, "step": 25210 }, { "epoch": 1.8213737424819838, "grad_norm": 8.012924721064664, "learning_rate": 1.037545607991633e-07, "loss": 0.6768, "step": 25211 }, { "epoch": 1.8214459876821933, "grad_norm": 7.470453682518636, "learning_rate": 1.0367118516105307e-07, "loss": 0.5288, "step": 25212 }, { "epoch": 1.8215182328824029, "grad_norm": 7.219286349629665, "learning_rate": 1.0358784232682328e-07, "loss": 0.6183, "step": 25213 }, { "epoch": 1.8215904780826124, "grad_norm": 8.53999925171181, "learning_rate": 1.0350453229761553e-07, "loss": 0.6481, "step": 25214 }, { "epoch": 1.821662723282822, "grad_norm": 7.104746894596548, "learning_rate": 1.0342125507456945e-07, "loss": 0.5249, "step": 25215 }, { "epoch": 1.8217349684830313, "grad_norm": 6.5688521387233365, "learning_rate": 1.033380106588261e-07, "loss": 0.5606, "step": 25216 }, { "epoch": 1.821807213683241, "grad_norm": 7.241813773478046, "learning_rate": 1.0325479905152425e-07, "loss": 0.5942, "step": 25217 }, { "epoch": 1.8218794588834504, "grad_norm": 7.164467703405783, "learning_rate": 1.0317162025380329e-07, "loss": 0.6246, "step": 25218 }, { "epoch": 1.82195170408366, "grad_norm": 7.961734810417027, "learning_rate": 1.0308847426680229e-07, "loss": 0.6107, "step": 25219 }, { "epoch": 1.8220239492838695, "grad_norm": 7.319757495469401, "learning_rate": 1.0300536109165843e-07, "loss": 0.6205, "step": 25220 }, { "epoch": 1.822096194484079, "grad_norm": 8.012718580957227, "learning_rate": 1.0292228072951022e-07, "loss": 0.6137, "step": 25221 }, { "epoch": 1.8221684396842885, "grad_norm": 7.512019381009614, "learning_rate": 1.028392331814948e-07, "loss": 0.5341, "step": 25222 }, { "epoch": 1.8222406848844979, "grad_norm": 6.633407727819548, "learning_rate": 1.0275621844874878e-07, "loss": 0.5538, "step": 25223 }, { "epoch": 1.8223129300847076, "grad_norm": 7.393637174388794, "learning_rate": 1.0267323653240901e-07, "loss": 0.6285, "step": 25224 }, { "epoch": 1.822385175284917, "grad_norm": 7.233436342317487, "learning_rate": 1.0259028743361099e-07, "loss": 0.6217, "step": 25225 }, { "epoch": 1.8224574204851265, "grad_norm": 5.962969312451661, "learning_rate": 1.0250737115349101e-07, "loss": 0.6108, "step": 25226 }, { "epoch": 1.822529665685336, "grad_norm": 6.206586587773766, "learning_rate": 1.024244876931832e-07, "loss": 0.5508, "step": 25227 }, { "epoch": 1.8226019108855456, "grad_norm": 8.913534404547653, "learning_rate": 1.0234163705382244e-07, "loss": 0.6325, "step": 25228 }, { "epoch": 1.8226741560857551, "grad_norm": 6.4345497852391125, "learning_rate": 1.0225881923654313e-07, "loss": 0.5772, "step": 25229 }, { "epoch": 1.8227464012859644, "grad_norm": 6.9987298630569335, "learning_rate": 1.0217603424247907e-07, "loss": 0.608, "step": 25230 }, { "epoch": 1.8228186464861742, "grad_norm": 6.167901954989816, "learning_rate": 1.0209328207276326e-07, "loss": 0.5904, "step": 25231 }, { "epoch": 1.8228908916863835, "grad_norm": 6.930122645670364, "learning_rate": 1.0201056272852838e-07, "loss": 0.5694, "step": 25232 }, { "epoch": 1.822963136886593, "grad_norm": 5.880024161775733, "learning_rate": 1.0192787621090689e-07, "loss": 0.5595, "step": 25233 }, { "epoch": 1.8230353820868026, "grad_norm": 6.487176423281345, "learning_rate": 1.0184522252103091e-07, "loss": 0.559, "step": 25234 }, { "epoch": 1.8231076272870121, "grad_norm": 5.969047818568014, "learning_rate": 1.0176260166003177e-07, "loss": 0.6131, "step": 25235 }, { "epoch": 1.8231798724872217, "grad_norm": 8.411315526232263, "learning_rate": 1.0168001362904051e-07, "loss": 0.6219, "step": 25236 }, { "epoch": 1.823252117687431, "grad_norm": 6.852572672200069, "learning_rate": 1.0159745842918789e-07, "loss": 0.6152, "step": 25237 }, { "epoch": 1.8233243628876408, "grad_norm": 7.990205968462705, "learning_rate": 1.0151493606160357e-07, "loss": 0.6343, "step": 25238 }, { "epoch": 1.82339660808785, "grad_norm": 7.056415198447499, "learning_rate": 1.0143244652741746e-07, "loss": 0.5907, "step": 25239 }, { "epoch": 1.8234688532880596, "grad_norm": 7.293625951658412, "learning_rate": 1.0134998982775895e-07, "loss": 0.6455, "step": 25240 }, { "epoch": 1.8235410984882692, "grad_norm": 7.269736424932357, "learning_rate": 1.0126756596375687e-07, "loss": 0.604, "step": 25241 }, { "epoch": 1.8236133436884787, "grad_norm": 7.916943461615351, "learning_rate": 1.011851749365389e-07, "loss": 0.6991, "step": 25242 }, { "epoch": 1.8236855888886883, "grad_norm": 6.95829145529322, "learning_rate": 1.0110281674723305e-07, "loss": 0.5854, "step": 25243 }, { "epoch": 1.8237578340888976, "grad_norm": 8.184071063049188, "learning_rate": 1.0102049139696785e-07, "loss": 0.5954, "step": 25244 }, { "epoch": 1.8238300792891073, "grad_norm": 7.232595929413852, "learning_rate": 1.0093819888686879e-07, "loss": 0.6496, "step": 25245 }, { "epoch": 1.8239023244893167, "grad_norm": 9.313793937344986, "learning_rate": 1.008559392180633e-07, "loss": 0.6633, "step": 25246 }, { "epoch": 1.8239745696895262, "grad_norm": 7.021539790288974, "learning_rate": 1.0077371239167715e-07, "loss": 0.5851, "step": 25247 }, { "epoch": 1.8240468148897357, "grad_norm": 8.359231708760213, "learning_rate": 1.0069151840883612e-07, "loss": 0.6465, "step": 25248 }, { "epoch": 1.8241190600899453, "grad_norm": 7.508753436744885, "learning_rate": 1.0060935727066513e-07, "loss": 0.6394, "step": 25249 }, { "epoch": 1.8241913052901548, "grad_norm": 7.57777413411331, "learning_rate": 1.0052722897828882e-07, "loss": 0.6293, "step": 25250 }, { "epoch": 1.8242635504903641, "grad_norm": 6.645357631132752, "learning_rate": 1.0044513353283186e-07, "loss": 0.5499, "step": 25251 }, { "epoch": 1.824335795690574, "grad_norm": 7.9503418375093755, "learning_rate": 1.0036307093541808e-07, "loss": 0.6141, "step": 25252 }, { "epoch": 1.8244080408907832, "grad_norm": 7.453820793894245, "learning_rate": 1.002810411871702e-07, "loss": 0.5697, "step": 25253 }, { "epoch": 1.8244802860909928, "grad_norm": 6.930760039339148, "learning_rate": 1.0019904428921201e-07, "loss": 0.5951, "step": 25254 }, { "epoch": 1.8245525312912023, "grad_norm": 6.9786654012404625, "learning_rate": 1.001170802426657e-07, "loss": 0.6146, "step": 25255 }, { "epoch": 1.8246247764914119, "grad_norm": 6.861209980213266, "learning_rate": 1.0003514904865285e-07, "loss": 0.5555, "step": 25256 }, { "epoch": 1.8246970216916214, "grad_norm": 7.297160957826927, "learning_rate": 9.995325070829537e-08, "loss": 0.5559, "step": 25257 }, { "epoch": 1.8247692668918307, "grad_norm": 7.522666249895991, "learning_rate": 9.987138522271456e-08, "loss": 0.5745, "step": 25258 }, { "epoch": 1.8248415120920405, "grad_norm": 7.329322440563257, "learning_rate": 9.978955259303091e-08, "loss": 0.6422, "step": 25259 }, { "epoch": 1.8249137572922498, "grad_norm": 7.316714906062799, "learning_rate": 9.970775282036438e-08, "loss": 0.5839, "step": 25260 }, { "epoch": 1.8249860024924596, "grad_norm": 6.828558796735847, "learning_rate": 9.962598590583516e-08, "loss": 0.6263, "step": 25261 }, { "epoch": 1.825058247692669, "grad_norm": 8.572538630948335, "learning_rate": 9.954425185056238e-08, "loss": 0.6079, "step": 25262 }, { "epoch": 1.8251304928928784, "grad_norm": 7.340020973944776, "learning_rate": 9.946255065566513e-08, "loss": 0.5827, "step": 25263 }, { "epoch": 1.825202738093088, "grad_norm": 7.676553267076042, "learning_rate": 9.938088232226139e-08, "loss": 0.6435, "step": 25264 }, { "epoch": 1.8252749832932973, "grad_norm": 7.916530273333357, "learning_rate": 9.929924685146947e-08, "loss": 0.5977, "step": 25265 }, { "epoch": 1.825347228493507, "grad_norm": 7.343725066447035, "learning_rate": 9.921764424440706e-08, "loss": 0.6158, "step": 25266 }, { "epoch": 1.8254194736937164, "grad_norm": 6.941966500586392, "learning_rate": 9.913607450219104e-08, "loss": 0.5441, "step": 25267 }, { "epoch": 1.8254917188939261, "grad_norm": 7.588684401853047, "learning_rate": 9.905453762593776e-08, "loss": 0.6259, "step": 25268 }, { "epoch": 1.8255639640941355, "grad_norm": 6.83852880573056, "learning_rate": 9.897303361676381e-08, "loss": 0.5809, "step": 25269 }, { "epoch": 1.825636209294345, "grad_norm": 9.198099470788067, "learning_rate": 9.889156247578469e-08, "loss": 0.5998, "step": 25270 }, { "epoch": 1.8257084544945545, "grad_norm": 6.652469044713334, "learning_rate": 9.881012420411535e-08, "loss": 0.6175, "step": 25271 }, { "epoch": 1.8257806996947639, "grad_norm": 7.500104267667144, "learning_rate": 9.872871880287128e-08, "loss": 0.6078, "step": 25272 }, { "epoch": 1.8258529448949736, "grad_norm": 6.79685283087261, "learning_rate": 9.864734627316713e-08, "loss": 0.63, "step": 25273 }, { "epoch": 1.825925190095183, "grad_norm": 7.993435550616235, "learning_rate": 9.856600661611565e-08, "loss": 0.5893, "step": 25274 }, { "epoch": 1.8259974352953927, "grad_norm": 8.12799251907485, "learning_rate": 9.848469983283094e-08, "loss": 0.5704, "step": 25275 }, { "epoch": 1.826069680495602, "grad_norm": 7.3525465206372385, "learning_rate": 9.840342592442654e-08, "loss": 0.5964, "step": 25276 }, { "epoch": 1.8261419256958116, "grad_norm": 7.769120532994785, "learning_rate": 9.832218489201435e-08, "loss": 0.6519, "step": 25277 }, { "epoch": 1.8262141708960211, "grad_norm": 6.777082859261706, "learning_rate": 9.824097673670624e-08, "loss": 0.5162, "step": 25278 }, { "epoch": 1.8262864160962307, "grad_norm": 7.469068959340566, "learning_rate": 9.815980145961468e-08, "loss": 0.6027, "step": 25279 }, { "epoch": 1.8263586612964402, "grad_norm": 8.151050766684882, "learning_rate": 9.807865906185043e-08, "loss": 0.6013, "step": 25280 }, { "epoch": 1.8264309064966495, "grad_norm": 7.332030209443187, "learning_rate": 9.799754954452428e-08, "loss": 0.6384, "step": 25281 }, { "epoch": 1.8265031516968593, "grad_norm": 8.36892707757019, "learning_rate": 9.791647290874673e-08, "loss": 0.6531, "step": 25282 }, { "epoch": 1.8265753968970686, "grad_norm": 7.184641394004938, "learning_rate": 9.783542915562772e-08, "loss": 0.6006, "step": 25283 }, { "epoch": 1.8266476420972781, "grad_norm": 7.8047170705301925, "learning_rate": 9.775441828627635e-08, "loss": 0.5597, "step": 25284 }, { "epoch": 1.8267198872974877, "grad_norm": 8.670569299224605, "learning_rate": 9.767344030180176e-08, "loss": 0.6668, "step": 25285 }, { "epoch": 1.8267921324976972, "grad_norm": 7.795739100702428, "learning_rate": 9.759249520331248e-08, "loss": 0.5665, "step": 25286 }, { "epoch": 1.8268643776979068, "grad_norm": 7.764996938809236, "learning_rate": 9.751158299191682e-08, "loss": 0.558, "step": 25287 }, { "epoch": 1.826936622898116, "grad_norm": 8.565051958468759, "learning_rate": 9.743070366872193e-08, "loss": 0.6324, "step": 25288 }, { "epoch": 1.8270088680983259, "grad_norm": 8.199760284641615, "learning_rate": 9.734985723483526e-08, "loss": 0.6036, "step": 25289 }, { "epoch": 1.8270811132985352, "grad_norm": 7.940038080004029, "learning_rate": 9.726904369136341e-08, "loss": 0.6127, "step": 25290 }, { "epoch": 1.8271533584987447, "grad_norm": 7.904641451440418, "learning_rate": 9.718826303941304e-08, "loss": 0.6089, "step": 25291 }, { "epoch": 1.8272256036989543, "grad_norm": 7.131911288733974, "learning_rate": 9.710751528008932e-08, "loss": 0.5394, "step": 25292 }, { "epoch": 1.8272978488991638, "grad_norm": 7.158080820493357, "learning_rate": 9.702680041449808e-08, "loss": 0.5851, "step": 25293 }, { "epoch": 1.8273700940993733, "grad_norm": 9.023959586492635, "learning_rate": 9.69461184437448e-08, "loss": 0.5862, "step": 25294 }, { "epoch": 1.8274423392995827, "grad_norm": 7.021764978311688, "learning_rate": 9.686546936893249e-08, "loss": 0.6478, "step": 25295 }, { "epoch": 1.8275145844997924, "grad_norm": 9.16864445475422, "learning_rate": 9.678485319116638e-08, "loss": 0.6186, "step": 25296 }, { "epoch": 1.8275868297000017, "grad_norm": 6.809841652260163, "learning_rate": 9.670426991154946e-08, "loss": 0.5811, "step": 25297 }, { "epoch": 1.8276590749002113, "grad_norm": 6.807948840835512, "learning_rate": 9.662371953118532e-08, "loss": 0.5063, "step": 25298 }, { "epoch": 1.8277313201004208, "grad_norm": 6.8210812339715945, "learning_rate": 9.654320205117613e-08, "loss": 0.6093, "step": 25299 }, { "epoch": 1.8278035653006304, "grad_norm": 6.887150517785906, "learning_rate": 9.646271747262403e-08, "loss": 0.6074, "step": 25300 }, { "epoch": 1.82787581050084, "grad_norm": 6.408588559442613, "learning_rate": 9.638226579663179e-08, "loss": 0.5817, "step": 25301 }, { "epoch": 1.8279480557010492, "grad_norm": 8.196537533560283, "learning_rate": 9.630184702429963e-08, "loss": 0.6265, "step": 25302 }, { "epoch": 1.828020300901259, "grad_norm": 7.901570378926268, "learning_rate": 9.622146115672887e-08, "loss": 0.5984, "step": 25303 }, { "epoch": 1.8280925461014683, "grad_norm": 7.365879950078256, "learning_rate": 9.614110819502004e-08, "loss": 0.5383, "step": 25304 }, { "epoch": 1.8281647913016779, "grad_norm": 7.446455236363017, "learning_rate": 9.606078814027309e-08, "loss": 0.5621, "step": 25305 }, { "epoch": 1.8282370365018874, "grad_norm": 8.358940555299531, "learning_rate": 9.598050099358713e-08, "loss": 0.6485, "step": 25306 }, { "epoch": 1.828309281702097, "grad_norm": 8.380143137872436, "learning_rate": 9.590024675606158e-08, "loss": 0.6034, "step": 25307 }, { "epoch": 1.8283815269023065, "grad_norm": 7.3187917393798045, "learning_rate": 9.582002542879471e-08, "loss": 0.5876, "step": 25308 }, { "epoch": 1.8284537721025158, "grad_norm": 7.829467414681448, "learning_rate": 9.573983701288536e-08, "loss": 0.6389, "step": 25309 }, { "epoch": 1.8285260173027256, "grad_norm": 8.00318177846171, "learning_rate": 9.565968150943073e-08, "loss": 0.607, "step": 25310 }, { "epoch": 1.828598262502935, "grad_norm": 7.949268417765596, "learning_rate": 9.557955891952797e-08, "loss": 0.6366, "step": 25311 }, { "epoch": 1.8286705077031444, "grad_norm": 8.073073440178023, "learning_rate": 9.549946924427483e-08, "loss": 0.5911, "step": 25312 }, { "epoch": 1.828742752903354, "grad_norm": 7.365135967089861, "learning_rate": 9.541941248476627e-08, "loss": 0.5858, "step": 25313 }, { "epoch": 1.8288149981035635, "grad_norm": 7.716950859400768, "learning_rate": 9.533938864209918e-08, "loss": 0.6195, "step": 25314 }, { "epoch": 1.828887243303773, "grad_norm": 7.730768275999937, "learning_rate": 9.52593977173688e-08, "loss": 0.619, "step": 25315 }, { "epoch": 1.8289594885039824, "grad_norm": 8.035203250196766, "learning_rate": 9.517943971167037e-08, "loss": 0.5486, "step": 25316 }, { "epoch": 1.8290317337041921, "grad_norm": 6.272575803795078, "learning_rate": 9.509951462609774e-08, "loss": 0.5845, "step": 25317 }, { "epoch": 1.8291039789044015, "grad_norm": 7.9364825219852495, "learning_rate": 9.501962246174556e-08, "loss": 0.5898, "step": 25318 }, { "epoch": 1.829176224104611, "grad_norm": 8.087880484402602, "learning_rate": 9.493976321970743e-08, "loss": 0.5723, "step": 25319 }, { "epoch": 1.8292484693048205, "grad_norm": 7.425927399025371, "learning_rate": 9.485993690107637e-08, "loss": 0.5756, "step": 25320 }, { "epoch": 1.82932071450503, "grad_norm": 7.052915838585618, "learning_rate": 9.478014350694536e-08, "loss": 0.6309, "step": 25321 }, { "epoch": 1.8293929597052396, "grad_norm": 7.934917600572854, "learning_rate": 9.470038303840689e-08, "loss": 0.5352, "step": 25322 }, { "epoch": 1.829465204905449, "grad_norm": 7.830428159658591, "learning_rate": 9.462065549655258e-08, "loss": 0.6382, "step": 25323 }, { "epoch": 1.8295374501056587, "grad_norm": 7.711671618422987, "learning_rate": 9.454096088247377e-08, "loss": 0.6543, "step": 25324 }, { "epoch": 1.829609695305868, "grad_norm": 8.144632418436409, "learning_rate": 9.446129919726155e-08, "loss": 0.6521, "step": 25325 }, { "epoch": 1.8296819405060776, "grad_norm": 8.727916643200945, "learning_rate": 9.438167044200641e-08, "loss": 0.5873, "step": 25326 }, { "epoch": 1.8297541857062871, "grad_norm": 7.613013429662699, "learning_rate": 9.430207461779806e-08, "loss": 0.6095, "step": 25327 }, { "epoch": 1.8298264309064967, "grad_norm": 7.740580833780312, "learning_rate": 9.422251172572616e-08, "loss": 0.6317, "step": 25328 }, { "epoch": 1.8298986761067062, "grad_norm": 7.619344646476945, "learning_rate": 9.414298176688069e-08, "loss": 0.599, "step": 25329 }, { "epoch": 1.8299709213069155, "grad_norm": 6.277661565344528, "learning_rate": 9.406348474234966e-08, "loss": 0.6079, "step": 25330 }, { "epoch": 1.8300431665071253, "grad_norm": 7.662809839254029, "learning_rate": 9.398402065322138e-08, "loss": 0.6261, "step": 25331 }, { "epoch": 1.8301154117073346, "grad_norm": 6.839455567939623, "learning_rate": 9.390458950058357e-08, "loss": 0.5834, "step": 25332 }, { "epoch": 1.8301876569075444, "grad_norm": 6.116515218884979, "learning_rate": 9.382519128552426e-08, "loss": 0.5453, "step": 25333 }, { "epoch": 1.8302599021077537, "grad_norm": 7.0376208117128, "learning_rate": 9.374582600912924e-08, "loss": 0.6443, "step": 25334 }, { "epoch": 1.8303321473079632, "grad_norm": 7.11980529947564, "learning_rate": 9.36664936724857e-08, "loss": 0.5573, "step": 25335 }, { "epoch": 1.8304043925081728, "grad_norm": 7.023963238109415, "learning_rate": 9.358719427667917e-08, "loss": 0.6254, "step": 25336 }, { "epoch": 1.830476637708382, "grad_norm": 6.547322770058176, "learning_rate": 9.350792782279571e-08, "loss": 0.5945, "step": 25337 }, { "epoch": 1.8305488829085919, "grad_norm": 7.742240466755625, "learning_rate": 9.34286943119203e-08, "loss": 0.6357, "step": 25338 }, { "epoch": 1.8306211281088012, "grad_norm": 7.1217397625926875, "learning_rate": 9.334949374513736e-08, "loss": 0.5125, "step": 25339 }, { "epoch": 1.830693373309011, "grad_norm": 6.299379518016971, "learning_rate": 9.327032612353126e-08, "loss": 0.5288, "step": 25340 }, { "epoch": 1.8307656185092203, "grad_norm": 9.109673994074669, "learning_rate": 9.319119144818589e-08, "loss": 0.6179, "step": 25341 }, { "epoch": 1.8308378637094298, "grad_norm": 8.445988207242241, "learning_rate": 9.311208972018426e-08, "loss": 0.6106, "step": 25342 }, { "epoch": 1.8309101089096393, "grad_norm": 8.279686891836882, "learning_rate": 9.303302094060911e-08, "loss": 0.6921, "step": 25343 }, { "epoch": 1.8309823541098487, "grad_norm": 6.582169474160648, "learning_rate": 9.295398511054321e-08, "loss": 0.5809, "step": 25344 }, { "epoch": 1.8310545993100584, "grad_norm": 7.475291942394185, "learning_rate": 9.287498223106817e-08, "loss": 0.6346, "step": 25345 }, { "epoch": 1.8311268445102677, "grad_norm": 8.023999930841022, "learning_rate": 9.279601230326562e-08, "loss": 0.6278, "step": 25346 }, { "epoch": 1.8311990897104775, "grad_norm": 8.201889787954359, "learning_rate": 9.271707532821667e-08, "loss": 0.6148, "step": 25347 }, { "epoch": 1.8312713349106868, "grad_norm": 6.23689819368323, "learning_rate": 9.263817130700153e-08, "loss": 0.5219, "step": 25348 }, { "epoch": 1.8313435801108964, "grad_norm": 7.6739596081093975, "learning_rate": 9.255930024070076e-08, "loss": 0.6456, "step": 25349 }, { "epoch": 1.831415825311106, "grad_norm": 6.355735243337189, "learning_rate": 9.248046213039403e-08, "loss": 0.5583, "step": 25350 }, { "epoch": 1.8314880705113155, "grad_norm": 7.202908659559622, "learning_rate": 9.240165697716074e-08, "loss": 0.627, "step": 25351 }, { "epoch": 1.831560315711525, "grad_norm": 6.2010396147312905, "learning_rate": 9.232288478207896e-08, "loss": 0.5701, "step": 25352 }, { "epoch": 1.8316325609117343, "grad_norm": 7.256718909385563, "learning_rate": 9.224414554622751e-08, "loss": 0.5875, "step": 25353 }, { "epoch": 1.831704806111944, "grad_norm": 5.679786565827065, "learning_rate": 9.216543927068417e-08, "loss": 0.591, "step": 25354 }, { "epoch": 1.8317770513121534, "grad_norm": 6.996426351320588, "learning_rate": 9.208676595652694e-08, "loss": 0.609, "step": 25355 }, { "epoch": 1.831849296512363, "grad_norm": 7.317846707340139, "learning_rate": 9.200812560483136e-08, "loss": 0.6353, "step": 25356 }, { "epoch": 1.8319215417125725, "grad_norm": 8.429348094708025, "learning_rate": 9.192951821667517e-08, "loss": 0.5691, "step": 25357 }, { "epoch": 1.831993786912782, "grad_norm": 9.259560670256032, "learning_rate": 9.185094379313448e-08, "loss": 0.6911, "step": 25358 }, { "epoch": 1.8320660321129916, "grad_norm": 7.881847734646883, "learning_rate": 9.17724023352845e-08, "loss": 0.5888, "step": 25359 }, { "epoch": 1.832138277313201, "grad_norm": 7.982902615050302, "learning_rate": 9.169389384420024e-08, "loss": 0.6078, "step": 25360 }, { "epoch": 1.8322105225134107, "grad_norm": 6.9345680433429955, "learning_rate": 9.161541832095666e-08, "loss": 0.6359, "step": 25361 }, { "epoch": 1.83228276771362, "grad_norm": 6.86615225460718, "learning_rate": 9.153697576662818e-08, "loss": 0.5736, "step": 25362 }, { "epoch": 1.8323550129138295, "grad_norm": 8.036049206732581, "learning_rate": 9.145856618228837e-08, "loss": 0.558, "step": 25363 }, { "epoch": 1.832427258114039, "grad_norm": 7.149196563609575, "learning_rate": 9.138018956901057e-08, "loss": 0.5257, "step": 25364 }, { "epoch": 1.8324995033142486, "grad_norm": 7.79865912625244, "learning_rate": 9.130184592786779e-08, "loss": 0.618, "step": 25365 }, { "epoch": 1.8325717485144581, "grad_norm": 6.951639149526849, "learning_rate": 9.12235352599325e-08, "loss": 0.6024, "step": 25366 }, { "epoch": 1.8326439937146675, "grad_norm": 8.236309830353763, "learning_rate": 9.114525756627691e-08, "loss": 0.6126, "step": 25367 }, { "epoch": 1.8327162389148772, "grad_norm": 7.669574959925613, "learning_rate": 9.106701284797237e-08, "loss": 0.6242, "step": 25368 }, { "epoch": 1.8327884841150865, "grad_norm": 7.025517682858553, "learning_rate": 9.098880110608999e-08, "loss": 0.5862, "step": 25369 }, { "epoch": 1.832860729315296, "grad_norm": 7.399679238868044, "learning_rate": 9.091062234170028e-08, "loss": 0.5907, "step": 25370 }, { "epoch": 1.8329329745155056, "grad_norm": 7.304190323989546, "learning_rate": 9.08324765558738e-08, "loss": 0.5511, "step": 25371 }, { "epoch": 1.8330052197157152, "grad_norm": 6.4839417094505665, "learning_rate": 9.075436374967994e-08, "loss": 0.6311, "step": 25372 }, { "epoch": 1.8330774649159247, "grad_norm": 7.001953125, "learning_rate": 9.067628392418871e-08, "loss": 0.5771, "step": 25373 }, { "epoch": 1.833149710116134, "grad_norm": 5.9707107918253035, "learning_rate": 9.059823708046783e-08, "loss": 0.6182, "step": 25374 }, { "epoch": 1.8332219553163438, "grad_norm": 8.020397884554264, "learning_rate": 9.052022321958648e-08, "loss": 0.5518, "step": 25375 }, { "epoch": 1.8332942005165531, "grad_norm": 6.7126681457713895, "learning_rate": 9.04422423426124e-08, "loss": 0.6126, "step": 25376 }, { "epoch": 1.8333664457167627, "grad_norm": 6.714318805833336, "learning_rate": 9.036429445061307e-08, "loss": 0.6198, "step": 25377 }, { "epoch": 1.8334386909169722, "grad_norm": 8.731095737413426, "learning_rate": 9.028637954465569e-08, "loss": 0.6651, "step": 25378 }, { "epoch": 1.8335109361171817, "grad_norm": 6.414920057243139, "learning_rate": 9.020849762580663e-08, "loss": 0.6509, "step": 25379 }, { "epoch": 1.8335831813173913, "grad_norm": 8.145664639364439, "learning_rate": 9.013064869513255e-08, "loss": 0.5839, "step": 25380 }, { "epoch": 1.8336554265176006, "grad_norm": 8.557304198119882, "learning_rate": 9.005283275369842e-08, "loss": 0.6196, "step": 25381 }, { "epoch": 1.8337276717178104, "grad_norm": 6.862013324650119, "learning_rate": 8.997504980256977e-08, "loss": 0.6268, "step": 25382 }, { "epoch": 1.8337999169180197, "grad_norm": 6.788588811330114, "learning_rate": 8.989729984281159e-08, "loss": 0.5585, "step": 25383 }, { "epoch": 1.8338721621182292, "grad_norm": 8.229151420740097, "learning_rate": 8.981958287548803e-08, "loss": 0.6068, "step": 25384 }, { "epoch": 1.8339444073184388, "grad_norm": 8.067478742261175, "learning_rate": 8.974189890166268e-08, "loss": 0.5929, "step": 25385 }, { "epoch": 1.8340166525186483, "grad_norm": 8.140771201902112, "learning_rate": 8.966424792239942e-08, "loss": 0.5482, "step": 25386 }, { "epoch": 1.8340888977188579, "grad_norm": 6.985820304964332, "learning_rate": 8.958662993876154e-08, "loss": 0.5416, "step": 25387 }, { "epoch": 1.8341611429190672, "grad_norm": 7.487047327588285, "learning_rate": 8.950904495181074e-08, "loss": 0.5409, "step": 25388 }, { "epoch": 1.834233388119277, "grad_norm": 8.159904742713376, "learning_rate": 8.943149296260944e-08, "loss": 0.7174, "step": 25389 }, { "epoch": 1.8343056333194863, "grad_norm": 7.587151065954033, "learning_rate": 8.935397397221935e-08, "loss": 0.607, "step": 25390 }, { "epoch": 1.8343778785196958, "grad_norm": 9.194572373569773, "learning_rate": 8.927648798170207e-08, "loss": 0.6774, "step": 25391 }, { "epoch": 1.8344501237199053, "grad_norm": 7.057826293364316, "learning_rate": 8.919903499211735e-08, "loss": 0.5775, "step": 25392 }, { "epoch": 1.8345223689201149, "grad_norm": 8.200513907355232, "learning_rate": 8.912161500452599e-08, "loss": 0.6679, "step": 25393 }, { "epoch": 1.8345946141203244, "grad_norm": 7.357857646953495, "learning_rate": 8.904422801998797e-08, "loss": 0.5661, "step": 25394 }, { "epoch": 1.8346668593205337, "grad_norm": 7.359195650349105, "learning_rate": 8.896687403956245e-08, "loss": 0.6269, "step": 25395 }, { "epoch": 1.8347391045207435, "grad_norm": 8.033119310664755, "learning_rate": 8.88895530643083e-08, "loss": 0.5695, "step": 25396 }, { "epoch": 1.8348113497209528, "grad_norm": 8.029209219069694, "learning_rate": 8.881226509528384e-08, "loss": 0.6892, "step": 25397 }, { "epoch": 1.8348835949211624, "grad_norm": 6.3300103287213325, "learning_rate": 8.873501013354796e-08, "loss": 0.487, "step": 25398 }, { "epoch": 1.834955840121372, "grad_norm": 8.181374158274133, "learning_rate": 8.865778818015702e-08, "loss": 0.6695, "step": 25399 }, { "epoch": 1.8350280853215815, "grad_norm": 7.556837499858494, "learning_rate": 8.858059923616879e-08, "loss": 0.6964, "step": 25400 }, { "epoch": 1.835100330521791, "grad_norm": 7.383244190008015, "learning_rate": 8.850344330264021e-08, "loss": 0.562, "step": 25401 }, { "epoch": 1.8351725757220003, "grad_norm": 6.165567695362975, "learning_rate": 8.842632038062681e-08, "loss": 0.6287, "step": 25402 }, { "epoch": 1.83524482092221, "grad_norm": 7.320741883084665, "learning_rate": 8.834923047118444e-08, "loss": 0.6429, "step": 25403 }, { "epoch": 1.8353170661224194, "grad_norm": 5.624030305143185, "learning_rate": 8.82721735753686e-08, "loss": 0.5599, "step": 25404 }, { "epoch": 1.8353893113226292, "grad_norm": 8.002749447425035, "learning_rate": 8.819514969423404e-08, "loss": 0.6006, "step": 25405 }, { "epoch": 1.8354615565228385, "grad_norm": 7.38863857422115, "learning_rate": 8.811815882883545e-08, "loss": 0.5715, "step": 25406 }, { "epoch": 1.835533801723048, "grad_norm": 7.459631808606287, "learning_rate": 8.804120098022645e-08, "loss": 0.6859, "step": 25407 }, { "epoch": 1.8356060469232576, "grad_norm": 7.404022268054683, "learning_rate": 8.79642761494609e-08, "loss": 0.6115, "step": 25408 }, { "epoch": 1.835678292123467, "grad_norm": 9.550987344402133, "learning_rate": 8.788738433759131e-08, "loss": 0.6523, "step": 25409 }, { "epoch": 1.8357505373236767, "grad_norm": 8.330871930597867, "learning_rate": 8.781052554567071e-08, "loss": 0.6136, "step": 25410 }, { "epoch": 1.835822782523886, "grad_norm": 8.525010059160579, "learning_rate": 8.773369977475105e-08, "loss": 0.5324, "step": 25411 }, { "epoch": 1.8358950277240957, "grad_norm": 7.414080765503712, "learning_rate": 8.765690702588397e-08, "loss": 0.5843, "step": 25412 }, { "epoch": 1.835967272924305, "grad_norm": 7.266955575828468, "learning_rate": 8.758014730012059e-08, "loss": 0.6507, "step": 25413 }, { "epoch": 1.8360395181245146, "grad_norm": 6.646004542075714, "learning_rate": 8.750342059851203e-08, "loss": 0.6025, "step": 25414 }, { "epoch": 1.8361117633247241, "grad_norm": 7.049471290188464, "learning_rate": 8.742672692210851e-08, "loss": 0.6799, "step": 25415 }, { "epoch": 1.8361840085249335, "grad_norm": 6.95892517253094, "learning_rate": 8.73500662719598e-08, "loss": 0.6106, "step": 25416 }, { "epoch": 1.8362562537251432, "grad_norm": 7.15060668718907, "learning_rate": 8.727343864911558e-08, "loss": 0.5477, "step": 25417 }, { "epoch": 1.8363284989253525, "grad_norm": 7.808980896871568, "learning_rate": 8.71968440546242e-08, "loss": 0.6202, "step": 25418 }, { "epoch": 1.8364007441255623, "grad_norm": 6.031893839096707, "learning_rate": 8.712028248953507e-08, "loss": 0.5669, "step": 25419 }, { "epoch": 1.8364729893257716, "grad_norm": 7.019789379470648, "learning_rate": 8.704375395489572e-08, "loss": 0.6112, "step": 25420 }, { "epoch": 1.8365452345259812, "grad_norm": 8.63804414086834, "learning_rate": 8.696725845175364e-08, "loss": 0.6725, "step": 25421 }, { "epoch": 1.8366174797261907, "grad_norm": 6.465455031523183, "learning_rate": 8.689079598115601e-08, "loss": 0.5856, "step": 25422 }, { "epoch": 1.8366897249264, "grad_norm": 7.540123488328177, "learning_rate": 8.68143665441501e-08, "loss": 0.6125, "step": 25423 }, { "epoch": 1.8367619701266098, "grad_norm": 7.621077482283381, "learning_rate": 8.67379701417817e-08, "loss": 0.5533, "step": 25424 }, { "epoch": 1.8368342153268191, "grad_norm": 6.8600784670954775, "learning_rate": 8.666160677509638e-08, "loss": 0.581, "step": 25425 }, { "epoch": 1.8369064605270289, "grad_norm": 7.22399130779094, "learning_rate": 8.658527644514054e-08, "loss": 0.5951, "step": 25426 }, { "epoch": 1.8369787057272382, "grad_norm": 7.727542209516136, "learning_rate": 8.650897915295775e-08, "loss": 0.6514, "step": 25427 }, { "epoch": 1.8370509509274477, "grad_norm": 7.280978349194259, "learning_rate": 8.643271489959331e-08, "loss": 0.6354, "step": 25428 }, { "epoch": 1.8371231961276573, "grad_norm": 6.3599814235967775, "learning_rate": 8.635648368609084e-08, "loss": 0.6123, "step": 25429 }, { "epoch": 1.8371954413278668, "grad_norm": 7.785232444041968, "learning_rate": 8.628028551349448e-08, "loss": 0.5568, "step": 25430 }, { "epoch": 1.8372676865280764, "grad_norm": 6.375662974870976, "learning_rate": 8.620412038284649e-08, "loss": 0.6061, "step": 25431 }, { "epoch": 1.8373399317282857, "grad_norm": 7.218991230158374, "learning_rate": 8.612798829518987e-08, "loss": 0.637, "step": 25432 }, { "epoch": 1.8374121769284955, "grad_norm": 7.456174854242036, "learning_rate": 8.605188925156688e-08, "loss": 0.5941, "step": 25433 }, { "epoch": 1.8374844221287048, "grad_norm": 6.797825678844739, "learning_rate": 8.597582325301918e-08, "loss": 0.6226, "step": 25434 }, { "epoch": 1.8375566673289143, "grad_norm": 6.770799591762882, "learning_rate": 8.589979030058814e-08, "loss": 0.6089, "step": 25435 }, { "epoch": 1.8376289125291239, "grad_norm": 7.497119604765824, "learning_rate": 8.582379039531464e-08, "loss": 0.6159, "step": 25436 }, { "epoch": 1.8377011577293334, "grad_norm": 7.375902540940806, "learning_rate": 8.574782353823919e-08, "loss": 0.6649, "step": 25437 }, { "epoch": 1.837773402929543, "grad_norm": 7.70749427421523, "learning_rate": 8.567188973040097e-08, "loss": 0.6136, "step": 25438 }, { "epoch": 1.8378456481297523, "grad_norm": 7.556621442047247, "learning_rate": 8.559598897284027e-08, "loss": 0.5495, "step": 25439 }, { "epoch": 1.837917893329962, "grad_norm": 6.0088502461384055, "learning_rate": 8.552012126659597e-08, "loss": 0.6045, "step": 25440 }, { "epoch": 1.8379901385301713, "grad_norm": 7.718985488326438, "learning_rate": 8.544428661270587e-08, "loss": 0.567, "step": 25441 }, { "epoch": 1.8380623837303809, "grad_norm": 8.434942917644777, "learning_rate": 8.536848501220912e-08, "loss": 0.5687, "step": 25442 }, { "epoch": 1.8381346289305904, "grad_norm": 6.2027194181207035, "learning_rate": 8.529271646614295e-08, "loss": 0.5868, "step": 25443 }, { "epoch": 1.8382068741308, "grad_norm": 7.111064198789562, "learning_rate": 8.521698097554487e-08, "loss": 0.6388, "step": 25444 }, { "epoch": 1.8382791193310095, "grad_norm": 7.539786285285758, "learning_rate": 8.514127854145099e-08, "loss": 0.6277, "step": 25445 }, { "epoch": 1.8383513645312188, "grad_norm": 7.928860986790698, "learning_rate": 8.506560916489798e-08, "loss": 0.565, "step": 25446 }, { "epoch": 1.8384236097314286, "grad_norm": 8.001681627914955, "learning_rate": 8.498997284692196e-08, "loss": 0.6098, "step": 25447 }, { "epoch": 1.838495854931638, "grad_norm": 6.33902718767124, "learning_rate": 8.49143695885582e-08, "loss": 0.5162, "step": 25448 }, { "epoch": 1.8385681001318475, "grad_norm": 8.685793043439213, "learning_rate": 8.483879939084117e-08, "loss": 0.5803, "step": 25449 }, { "epoch": 1.838640345332057, "grad_norm": 7.993908708919136, "learning_rate": 8.476326225480558e-08, "loss": 0.5992, "step": 25450 }, { "epoch": 1.8387125905322665, "grad_norm": 9.153690748615503, "learning_rate": 8.46877581814859e-08, "loss": 0.6046, "step": 25451 }, { "epoch": 1.838784835732476, "grad_norm": 6.245469854804792, "learning_rate": 8.461228717191545e-08, "loss": 0.5941, "step": 25452 }, { "epoch": 1.8388570809326854, "grad_norm": 8.21370643800392, "learning_rate": 8.45368492271273e-08, "loss": 0.6393, "step": 25453 }, { "epoch": 1.8389293261328952, "grad_norm": 8.072711953469627, "learning_rate": 8.446144434815395e-08, "loss": 0.5808, "step": 25454 }, { "epoch": 1.8390015713331045, "grad_norm": 6.719117620302459, "learning_rate": 8.438607253602849e-08, "loss": 0.5491, "step": 25455 }, { "epoch": 1.839073816533314, "grad_norm": 8.083921850220273, "learning_rate": 8.431073379178173e-08, "loss": 0.6168, "step": 25456 }, { "epoch": 1.8391460617335236, "grad_norm": 8.625709393947151, "learning_rate": 8.423542811644536e-08, "loss": 0.585, "step": 25457 }, { "epoch": 1.8392183069337331, "grad_norm": 7.370106512203247, "learning_rate": 8.41601555110505e-08, "loss": 0.6205, "step": 25458 }, { "epoch": 1.8392905521339427, "grad_norm": 7.809757331077099, "learning_rate": 8.408491597662688e-08, "loss": 0.6217, "step": 25459 }, { "epoch": 1.839362797334152, "grad_norm": 7.554647104338743, "learning_rate": 8.400970951420534e-08, "loss": 0.5719, "step": 25460 }, { "epoch": 1.8394350425343617, "grad_norm": 6.340257406695275, "learning_rate": 8.393453612481478e-08, "loss": 0.6058, "step": 25461 }, { "epoch": 1.839507287734571, "grad_norm": 6.726935364139494, "learning_rate": 8.385939580948437e-08, "loss": 0.6066, "step": 25462 }, { "epoch": 1.8395795329347806, "grad_norm": 6.724177949147157, "learning_rate": 8.378428856924275e-08, "loss": 0.5919, "step": 25463 }, { "epoch": 1.8396517781349901, "grad_norm": 8.430742284420074, "learning_rate": 8.370921440511825e-08, "loss": 0.6138, "step": 25464 }, { "epoch": 1.8397240233351997, "grad_norm": 6.420580805475364, "learning_rate": 8.363417331813894e-08, "loss": 0.5884, "step": 25465 }, { "epoch": 1.8397962685354092, "grad_norm": 7.576599345055673, "learning_rate": 8.355916530933122e-08, "loss": 0.6134, "step": 25466 }, { "epoch": 1.8398685137356185, "grad_norm": 6.2955935856908205, "learning_rate": 8.348419037972205e-08, "loss": 0.7014, "step": 25467 }, { "epoch": 1.8399407589358283, "grad_norm": 8.199214097485578, "learning_rate": 8.340924853033838e-08, "loss": 0.5317, "step": 25468 }, { "epoch": 1.8400130041360376, "grad_norm": 6.814435211534208, "learning_rate": 8.33343397622055e-08, "loss": 0.5008, "step": 25469 }, { "epoch": 1.8400852493362472, "grad_norm": 7.604025336916158, "learning_rate": 8.325946407634927e-08, "loss": 0.6152, "step": 25470 }, { "epoch": 1.8401574945364567, "grad_norm": 6.895809515444218, "learning_rate": 8.318462147379414e-08, "loss": 0.6262, "step": 25471 }, { "epoch": 1.8402297397366663, "grad_norm": 7.2451947829321295, "learning_rate": 8.31098119555654e-08, "loss": 0.6379, "step": 25472 }, { "epoch": 1.8403019849368758, "grad_norm": 5.803755550607808, "learning_rate": 8.303503552268693e-08, "loss": 0.5308, "step": 25473 }, { "epoch": 1.8403742301370851, "grad_norm": 7.09889740240229, "learning_rate": 8.296029217618184e-08, "loss": 0.6588, "step": 25474 }, { "epoch": 1.8404464753372949, "grad_norm": 8.098132504536782, "learning_rate": 8.288558191707346e-08, "loss": 0.6319, "step": 25475 }, { "epoch": 1.8405187205375042, "grad_norm": 7.743439143039925, "learning_rate": 8.281090474638514e-08, "loss": 0.6368, "step": 25476 }, { "epoch": 1.8405909657377137, "grad_norm": 7.855506627519323, "learning_rate": 8.273626066513856e-08, "loss": 0.5732, "step": 25477 }, { "epoch": 1.8406632109379233, "grad_norm": 6.476599606912698, "learning_rate": 8.266164967435542e-08, "loss": 0.5892, "step": 25478 }, { "epoch": 1.8407354561381328, "grad_norm": 7.823582227681321, "learning_rate": 8.258707177505765e-08, "loss": 0.5726, "step": 25479 }, { "epoch": 1.8408077013383424, "grad_norm": 7.7499913861626775, "learning_rate": 8.251252696826556e-08, "loss": 0.601, "step": 25480 }, { "epoch": 1.8408799465385517, "grad_norm": 7.093795977876191, "learning_rate": 8.2438015255e-08, "loss": 0.5755, "step": 25481 }, { "epoch": 1.8409521917387615, "grad_norm": 6.600355554887004, "learning_rate": 8.236353663628099e-08, "loss": 0.528, "step": 25482 }, { "epoch": 1.8410244369389708, "grad_norm": 7.077833695975122, "learning_rate": 8.228909111312827e-08, "loss": 0.6148, "step": 25483 }, { "epoch": 1.8410966821391805, "grad_norm": 6.7833006961921525, "learning_rate": 8.221467868656047e-08, "loss": 0.6293, "step": 25484 }, { "epoch": 1.8411689273393899, "grad_norm": 6.976712591558082, "learning_rate": 8.21402993575962e-08, "loss": 0.5844, "step": 25485 }, { "epoch": 1.8412411725395994, "grad_norm": 7.48980172112569, "learning_rate": 8.206595312725413e-08, "loss": 0.617, "step": 25486 }, { "epoch": 1.841313417739809, "grad_norm": 7.344963070936963, "learning_rate": 8.199163999655174e-08, "loss": 0.5928, "step": 25487 }, { "epoch": 1.8413856629400183, "grad_norm": 7.26181540750626, "learning_rate": 8.191735996650629e-08, "loss": 0.5937, "step": 25488 }, { "epoch": 1.841457908140228, "grad_norm": 6.372292336290029, "learning_rate": 8.184311303813447e-08, "loss": 0.5553, "step": 25489 }, { "epoch": 1.8415301533404373, "grad_norm": 7.816604634617003, "learning_rate": 8.176889921245296e-08, "loss": 0.5859, "step": 25490 }, { "epoch": 1.8416023985406471, "grad_norm": 7.0963960661511525, "learning_rate": 8.16947184904776e-08, "loss": 0.5687, "step": 25491 }, { "epoch": 1.8416746437408564, "grad_norm": 7.467911549514827, "learning_rate": 8.1620570873224e-08, "loss": 0.532, "step": 25492 }, { "epoch": 1.841746888941066, "grad_norm": 7.629332265370914, "learning_rate": 8.154645636170689e-08, "loss": 0.6988, "step": 25493 }, { "epoch": 1.8418191341412755, "grad_norm": 7.411700983067405, "learning_rate": 8.147237495694127e-08, "loss": 0.6226, "step": 25494 }, { "epoch": 1.8418913793414848, "grad_norm": 6.704382007175943, "learning_rate": 8.139832665994079e-08, "loss": 0.608, "step": 25495 }, { "epoch": 1.8419636245416946, "grad_norm": 7.3972955968527305, "learning_rate": 8.132431147171909e-08, "loss": 0.5189, "step": 25496 }, { "epoch": 1.842035869741904, "grad_norm": 7.724601473605314, "learning_rate": 8.125032939328953e-08, "loss": 0.6295, "step": 25497 }, { "epoch": 1.8421081149421137, "grad_norm": 8.144688154214041, "learning_rate": 8.117638042566517e-08, "loss": 0.5299, "step": 25498 }, { "epoch": 1.842180360142323, "grad_norm": 7.529178065291536, "learning_rate": 8.110246456985798e-08, "loss": 0.601, "step": 25499 }, { "epoch": 1.8422526053425325, "grad_norm": 7.377938929614496, "learning_rate": 8.102858182687995e-08, "loss": 0.6081, "step": 25500 }, { "epoch": 1.842324850542742, "grad_norm": 8.6304503095949, "learning_rate": 8.095473219774275e-08, "loss": 0.6274, "step": 25501 }, { "epoch": 1.8423970957429516, "grad_norm": 7.997891148132913, "learning_rate": 8.088091568345641e-08, "loss": 0.6257, "step": 25502 }, { "epoch": 1.8424693409431612, "grad_norm": 7.485601718850778, "learning_rate": 8.080713228503234e-08, "loss": 0.6073, "step": 25503 }, { "epoch": 1.8425415861433705, "grad_norm": 8.490528888741082, "learning_rate": 8.073338200348002e-08, "loss": 0.5996, "step": 25504 }, { "epoch": 1.8426138313435803, "grad_norm": 8.93482386931514, "learning_rate": 8.065966483980975e-08, "loss": 0.607, "step": 25505 }, { "epoch": 1.8426860765437896, "grad_norm": 7.061005560826963, "learning_rate": 8.058598079502961e-08, "loss": 0.5515, "step": 25506 }, { "epoch": 1.8427583217439991, "grad_norm": 7.34091768229374, "learning_rate": 8.051232987014906e-08, "loss": 0.5293, "step": 25507 }, { "epoch": 1.8428305669442087, "grad_norm": 6.043178009769667, "learning_rate": 8.043871206617621e-08, "loss": 0.6012, "step": 25508 }, { "epoch": 1.8429028121444182, "grad_norm": 6.438969120421392, "learning_rate": 8.036512738411855e-08, "loss": 0.6002, "step": 25509 }, { "epoch": 1.8429750573446277, "grad_norm": 7.413418805203687, "learning_rate": 8.029157582498364e-08, "loss": 0.5442, "step": 25510 }, { "epoch": 1.843047302544837, "grad_norm": 6.953533717797792, "learning_rate": 8.021805738977816e-08, "loss": 0.5554, "step": 25511 }, { "epoch": 1.8431195477450468, "grad_norm": 8.080600027334468, "learning_rate": 8.014457207950882e-08, "loss": 0.6267, "step": 25512 }, { "epoch": 1.8431917929452561, "grad_norm": 7.554761978934188, "learning_rate": 8.007111989518146e-08, "loss": 0.6095, "step": 25513 }, { "epoch": 1.8432640381454657, "grad_norm": 6.526082161800321, "learning_rate": 7.99977008378014e-08, "loss": 0.5571, "step": 25514 }, { "epoch": 1.8433362833456752, "grad_norm": 8.683929368233077, "learning_rate": 7.992431490837393e-08, "loss": 0.5579, "step": 25515 }, { "epoch": 1.8434085285458848, "grad_norm": 6.558917421007502, "learning_rate": 7.985096210790327e-08, "loss": 0.6563, "step": 25516 }, { "epoch": 1.8434807737460943, "grad_norm": 6.692730720769545, "learning_rate": 7.977764243739389e-08, "loss": 0.6616, "step": 25517 }, { "epoch": 1.8435530189463036, "grad_norm": 6.859428110112587, "learning_rate": 7.970435589784914e-08, "loss": 0.5034, "step": 25518 }, { "epoch": 1.8436252641465134, "grad_norm": 7.390913782264358, "learning_rate": 7.963110249027295e-08, "loss": 0.5696, "step": 25519 }, { "epoch": 1.8436975093467227, "grad_norm": 7.848910028873909, "learning_rate": 7.955788221566757e-08, "loss": 0.6221, "step": 25520 }, { "epoch": 1.8437697545469323, "grad_norm": 7.178571379769331, "learning_rate": 7.948469507503525e-08, "loss": 0.6322, "step": 25521 }, { "epoch": 1.8438419997471418, "grad_norm": 7.417832986514344, "learning_rate": 7.941154106937826e-08, "loss": 0.6576, "step": 25522 }, { "epoch": 1.8439142449473513, "grad_norm": 7.905673643070046, "learning_rate": 7.9338420199698e-08, "loss": 0.6358, "step": 25523 }, { "epoch": 1.8439864901475609, "grad_norm": 5.919607407723275, "learning_rate": 7.926533246699481e-08, "loss": 0.6087, "step": 25524 }, { "epoch": 1.8440587353477702, "grad_norm": 7.066084500690494, "learning_rate": 7.91922778722698e-08, "loss": 0.5118, "step": 25525 }, { "epoch": 1.84413098054798, "grad_norm": 7.120649649310283, "learning_rate": 7.911925641652273e-08, "loss": 0.6069, "step": 25526 }, { "epoch": 1.8442032257481893, "grad_norm": 6.067015234746207, "learning_rate": 7.904626810075367e-08, "loss": 0.6009, "step": 25527 }, { "epoch": 1.8442754709483988, "grad_norm": 6.622424218740369, "learning_rate": 7.897331292596122e-08, "loss": 0.5879, "step": 25528 }, { "epoch": 1.8443477161486084, "grad_norm": 6.6613056880193, "learning_rate": 7.890039089314433e-08, "loss": 0.5679, "step": 25529 }, { "epoch": 1.844419961348818, "grad_norm": 7.9265010050067515, "learning_rate": 7.882750200330135e-08, "loss": 0.6209, "step": 25530 }, { "epoch": 1.8444922065490275, "grad_norm": 7.475265406359015, "learning_rate": 7.875464625742985e-08, "loss": 0.5356, "step": 25531 }, { "epoch": 1.8445644517492368, "grad_norm": 7.208574265292161, "learning_rate": 7.868182365652732e-08, "loss": 0.5749, "step": 25532 }, { "epoch": 1.8446366969494465, "grad_norm": 7.026047880275081, "learning_rate": 7.860903420159077e-08, "loss": 0.6596, "step": 25533 }, { "epoch": 1.8447089421496559, "grad_norm": 8.679862857561416, "learning_rate": 7.853627789361606e-08, "loss": 0.6267, "step": 25534 }, { "epoch": 1.8447811873498654, "grad_norm": 8.017902846271342, "learning_rate": 7.846355473359962e-08, "loss": 0.7103, "step": 25535 }, { "epoch": 1.844853432550075, "grad_norm": 7.115549613007137, "learning_rate": 7.839086472253704e-08, "loss": 0.5496, "step": 25536 }, { "epoch": 1.8449256777502845, "grad_norm": 7.7835371340659005, "learning_rate": 7.831820786142336e-08, "loss": 0.64, "step": 25537 }, { "epoch": 1.844997922950494, "grad_norm": 6.123241561398528, "learning_rate": 7.824558415125278e-08, "loss": 0.6377, "step": 25538 }, { "epoch": 1.8450701681507033, "grad_norm": 8.302673957010825, "learning_rate": 7.817299359302006e-08, "loss": 0.5793, "step": 25539 }, { "epoch": 1.8451424133509131, "grad_norm": 6.501047196763073, "learning_rate": 7.810043618771884e-08, "loss": 0.6285, "step": 25540 }, { "epoch": 1.8452146585511224, "grad_norm": 8.352231633186099, "learning_rate": 7.802791193634168e-08, "loss": 0.5759, "step": 25541 }, { "epoch": 1.845286903751332, "grad_norm": 6.595897740595629, "learning_rate": 7.795542083988223e-08, "loss": 0.5581, "step": 25542 }, { "epoch": 1.8453591489515415, "grad_norm": 7.065873412228436, "learning_rate": 7.788296289933217e-08, "loss": 0.5919, "step": 25543 }, { "epoch": 1.845431394151751, "grad_norm": 6.922977402778718, "learning_rate": 7.781053811568407e-08, "loss": 0.5663, "step": 25544 }, { "epoch": 1.8455036393519606, "grad_norm": 6.736681479026117, "learning_rate": 7.773814648992878e-08, "loss": 0.5701, "step": 25545 }, { "epoch": 1.84557588455217, "grad_norm": 7.827260226607347, "learning_rate": 7.766578802305719e-08, "loss": 0.575, "step": 25546 }, { "epoch": 1.8456481297523797, "grad_norm": 8.851378476821642, "learning_rate": 7.759346271606072e-08, "loss": 0.6155, "step": 25547 }, { "epoch": 1.845720374952589, "grad_norm": 6.745649277844757, "learning_rate": 7.752117056992831e-08, "loss": 0.5647, "step": 25548 }, { "epoch": 1.8457926201527985, "grad_norm": 7.251263738526138, "learning_rate": 7.744891158565055e-08, "loss": 0.5695, "step": 25549 }, { "epoch": 1.845864865353008, "grad_norm": 6.990641195988726, "learning_rate": 7.737668576421609e-08, "loss": 0.5808, "step": 25550 }, { "epoch": 1.8459371105532176, "grad_norm": 5.936004450320815, "learning_rate": 7.730449310661387e-08, "loss": 0.5834, "step": 25551 }, { "epoch": 1.8460093557534272, "grad_norm": 8.079556661094948, "learning_rate": 7.72323336138317e-08, "loss": 0.5957, "step": 25552 }, { "epoch": 1.8460816009536365, "grad_norm": 8.81560012721921, "learning_rate": 7.716020728685797e-08, "loss": 0.5855, "step": 25553 }, { "epoch": 1.8461538461538463, "grad_norm": 6.975298486154357, "learning_rate": 7.708811412667965e-08, "loss": 0.5613, "step": 25554 }, { "epoch": 1.8462260913540556, "grad_norm": 6.93203079875393, "learning_rate": 7.701605413428404e-08, "loss": 0.5982, "step": 25555 }, { "epoch": 1.8462983365542653, "grad_norm": 7.3682766025409725, "learning_rate": 7.694402731065698e-08, "loss": 0.5664, "step": 25556 }, { "epoch": 1.8463705817544747, "grad_norm": 7.584194120522211, "learning_rate": 7.687203365678492e-08, "loss": 0.5431, "step": 25557 }, { "epoch": 1.8464428269546842, "grad_norm": 7.584515769353137, "learning_rate": 7.680007317365373e-08, "loss": 0.5611, "step": 25558 }, { "epoch": 1.8465150721548937, "grad_norm": 7.629084509326099, "learning_rate": 7.672814586224736e-08, "loss": 0.5443, "step": 25559 }, { "epoch": 1.846587317355103, "grad_norm": 7.11484808217448, "learning_rate": 7.66562517235514e-08, "loss": 0.5586, "step": 25560 }, { "epoch": 1.8466595625553128, "grad_norm": 7.335692835293698, "learning_rate": 7.658439075854979e-08, "loss": 0.5734, "step": 25561 }, { "epoch": 1.8467318077555221, "grad_norm": 6.6976062313140154, "learning_rate": 7.651256296822645e-08, "loss": 0.5748, "step": 25562 }, { "epoch": 1.846804052955732, "grad_norm": 7.514898887659637, "learning_rate": 7.644076835356423e-08, "loss": 0.5773, "step": 25563 }, { "epoch": 1.8468762981559412, "grad_norm": 6.27311612608889, "learning_rate": 7.636900691554594e-08, "loss": 0.635, "step": 25564 }, { "epoch": 1.8469485433561508, "grad_norm": 7.641519630651026, "learning_rate": 7.629727865515412e-08, "loss": 0.5976, "step": 25565 }, { "epoch": 1.8470207885563603, "grad_norm": 8.005286377472798, "learning_rate": 7.622558357337078e-08, "loss": 0.5889, "step": 25566 }, { "epoch": 1.8470930337565696, "grad_norm": 5.993495912705308, "learning_rate": 7.615392167117707e-08, "loss": 0.5805, "step": 25567 }, { "epoch": 1.8471652789567794, "grad_norm": 7.102971607570946, "learning_rate": 7.608229294955443e-08, "loss": 0.5604, "step": 25568 }, { "epoch": 1.8472375241569887, "grad_norm": 7.144604665338356, "learning_rate": 7.601069740948319e-08, "loss": 0.6082, "step": 25569 }, { "epoch": 1.8473097693571985, "grad_norm": 7.192945192393133, "learning_rate": 7.593913505194338e-08, "loss": 0.5664, "step": 25570 }, { "epoch": 1.8473820145574078, "grad_norm": 7.48169891361803, "learning_rate": 7.586760587791454e-08, "loss": 0.6563, "step": 25571 }, { "epoch": 1.8474542597576173, "grad_norm": 7.666533372245497, "learning_rate": 7.579610988837611e-08, "loss": 0.6079, "step": 25572 }, { "epoch": 1.8475265049578269, "grad_norm": 8.166895908589645, "learning_rate": 7.572464708430649e-08, "loss": 0.6222, "step": 25573 }, { "epoch": 1.8475987501580364, "grad_norm": 6.88177905080167, "learning_rate": 7.565321746668408e-08, "loss": 0.5736, "step": 25574 }, { "epoch": 1.847670995358246, "grad_norm": 5.909991013056962, "learning_rate": 7.558182103648643e-08, "loss": 0.5647, "step": 25575 }, { "epoch": 1.8477432405584553, "grad_norm": 7.669237231109183, "learning_rate": 7.55104577946919e-08, "loss": 0.59, "step": 25576 }, { "epoch": 1.847815485758665, "grad_norm": 7.464418823348629, "learning_rate": 7.543912774227641e-08, "loss": 0.6034, "step": 25577 }, { "epoch": 1.8478877309588744, "grad_norm": 5.497411812512552, "learning_rate": 7.536783088021665e-08, "loss": 0.4974, "step": 25578 }, { "epoch": 1.847959976159084, "grad_norm": 8.096409658931197, "learning_rate": 7.52965672094888e-08, "loss": 0.5671, "step": 25579 }, { "epoch": 1.8480322213592935, "grad_norm": 9.755064993621783, "learning_rate": 7.522533673106846e-08, "loss": 0.6393, "step": 25580 }, { "epoch": 1.848104466559503, "grad_norm": 7.708298004344363, "learning_rate": 7.515413944593014e-08, "loss": 0.6077, "step": 25581 }, { "epoch": 1.8481767117597125, "grad_norm": 7.661064782700661, "learning_rate": 7.508297535504888e-08, "loss": 0.6521, "step": 25582 }, { "epoch": 1.8482489569599219, "grad_norm": 7.8637724266769355, "learning_rate": 7.501184445939918e-08, "loss": 0.5953, "step": 25583 }, { "epoch": 1.8483212021601316, "grad_norm": 7.116138234348833, "learning_rate": 7.494074675995416e-08, "loss": 0.589, "step": 25584 }, { "epoch": 1.848393447360341, "grad_norm": 8.599421956352558, "learning_rate": 7.486968225768748e-08, "loss": 0.6021, "step": 25585 }, { "epoch": 1.8484656925605505, "grad_norm": 7.974439078667839, "learning_rate": 7.479865095357198e-08, "loss": 0.637, "step": 25586 }, { "epoch": 1.84853793776076, "grad_norm": 7.4777252026631045, "learning_rate": 7.472765284857991e-08, "loss": 0.5624, "step": 25587 }, { "epoch": 1.8486101829609696, "grad_norm": 8.400575218487413, "learning_rate": 7.465668794368303e-08, "loss": 0.5921, "step": 25588 }, { "epoch": 1.8486824281611791, "grad_norm": 7.846234296728601, "learning_rate": 7.458575623985304e-08, "loss": 0.6001, "step": 25589 }, { "epoch": 1.8487546733613884, "grad_norm": 6.75336683158939, "learning_rate": 7.451485773806084e-08, "loss": 0.5504, "step": 25590 }, { "epoch": 1.8488269185615982, "grad_norm": 8.234010402418065, "learning_rate": 7.444399243927703e-08, "loss": 0.556, "step": 25591 }, { "epoch": 1.8488991637618075, "grad_norm": 6.959857552923222, "learning_rate": 7.43731603444714e-08, "loss": 0.6144, "step": 25592 }, { "epoch": 1.848971408962017, "grad_norm": 8.23976980610014, "learning_rate": 7.430236145461373e-08, "loss": 0.5549, "step": 25593 }, { "epoch": 1.8490436541622266, "grad_norm": 7.331918088581324, "learning_rate": 7.423159577067351e-08, "loss": 0.5737, "step": 25594 }, { "epoch": 1.8491158993624361, "grad_norm": 7.55140010154755, "learning_rate": 7.416086329361916e-08, "loss": 0.5425, "step": 25595 }, { "epoch": 1.8491881445626457, "grad_norm": 7.893823077542403, "learning_rate": 7.409016402441932e-08, "loss": 0.5856, "step": 25596 }, { "epoch": 1.849260389762855, "grad_norm": 6.195248770656613, "learning_rate": 7.401949796404156e-08, "loss": 0.555, "step": 25597 }, { "epoch": 1.8493326349630648, "grad_norm": 7.204567986057364, "learning_rate": 7.39488651134529e-08, "loss": 0.5727, "step": 25598 }, { "epoch": 1.849404880163274, "grad_norm": 6.664087400579945, "learning_rate": 7.387826547362059e-08, "loss": 0.5993, "step": 25599 }, { "epoch": 1.8494771253634836, "grad_norm": 7.766883180189901, "learning_rate": 7.380769904551111e-08, "loss": 0.5818, "step": 25600 }, { "epoch": 1.8495493705636932, "grad_norm": 8.331603620941431, "learning_rate": 7.373716583009088e-08, "loss": 0.5641, "step": 25601 }, { "epoch": 1.8496216157639027, "grad_norm": 8.165254847140412, "learning_rate": 7.366666582832443e-08, "loss": 0.6395, "step": 25602 }, { "epoch": 1.8496938609641123, "grad_norm": 8.491804772309337, "learning_rate": 7.359619904117709e-08, "loss": 0.5921, "step": 25603 }, { "epoch": 1.8497661061643216, "grad_norm": 6.608217972289285, "learning_rate": 7.35257654696142e-08, "loss": 0.6059, "step": 25604 }, { "epoch": 1.8498383513645313, "grad_norm": 7.37196209942187, "learning_rate": 7.34553651146e-08, "loss": 0.6119, "step": 25605 }, { "epoch": 1.8499105965647407, "grad_norm": 8.699502900051693, "learning_rate": 7.33849979770973e-08, "loss": 0.6536, "step": 25606 }, { "epoch": 1.8499828417649502, "grad_norm": 6.953307691595699, "learning_rate": 7.33146640580698e-08, "loss": 0.5946, "step": 25607 }, { "epoch": 1.8500550869651597, "grad_norm": 8.716309127136594, "learning_rate": 7.324436335848062e-08, "loss": 0.5842, "step": 25608 }, { "epoch": 1.8501273321653693, "grad_norm": 8.25145685452987, "learning_rate": 7.317409587929175e-08, "loss": 0.6047, "step": 25609 }, { "epoch": 1.8501995773655788, "grad_norm": 8.234100742586437, "learning_rate": 7.310386162146494e-08, "loss": 0.6289, "step": 25610 }, { "epoch": 1.8502718225657881, "grad_norm": 6.262580112698659, "learning_rate": 7.303366058596217e-08, "loss": 0.6819, "step": 25611 }, { "epoch": 1.850344067765998, "grad_norm": 8.490487554095466, "learning_rate": 7.29634927737441e-08, "loss": 0.5778, "step": 25612 }, { "epoch": 1.8504163129662072, "grad_norm": 7.521257411451652, "learning_rate": 7.289335818577103e-08, "loss": 0.6185, "step": 25613 }, { "epoch": 1.8504885581664168, "grad_norm": 7.308707533521018, "learning_rate": 7.282325682300362e-08, "loss": 0.6062, "step": 25614 }, { "epoch": 1.8505608033666263, "grad_norm": 8.329238902992579, "learning_rate": 7.275318868640163e-08, "loss": 0.6347, "step": 25615 }, { "epoch": 1.8506330485668359, "grad_norm": 6.812257814914614, "learning_rate": 7.268315377692348e-08, "loss": 0.5297, "step": 25616 }, { "epoch": 1.8507052937670454, "grad_norm": 6.253499386554437, "learning_rate": 7.261315209552811e-08, "loss": 0.6257, "step": 25617 }, { "epoch": 1.8507775389672547, "grad_norm": 7.74740212490991, "learning_rate": 7.25431836431742e-08, "loss": 0.562, "step": 25618 }, { "epoch": 1.8508497841674645, "grad_norm": 8.189960605028212, "learning_rate": 7.247324842081933e-08, "loss": 0.6263, "step": 25619 }, { "epoch": 1.8509220293676738, "grad_norm": 7.179743022517224, "learning_rate": 7.240334642942076e-08, "loss": 0.6007, "step": 25620 }, { "epoch": 1.8509942745678833, "grad_norm": 8.211024841947449, "learning_rate": 7.233347766993554e-08, "loss": 0.6502, "step": 25621 }, { "epoch": 1.8510665197680929, "grad_norm": 7.904007544664497, "learning_rate": 7.226364214331982e-08, "loss": 0.674, "step": 25622 }, { "epoch": 1.8511387649683024, "grad_norm": 6.915028787968112, "learning_rate": 7.21938398505298e-08, "loss": 0.524, "step": 25623 }, { "epoch": 1.851211010168512, "grad_norm": 6.831876863160567, "learning_rate": 7.212407079252109e-08, "loss": 0.5803, "step": 25624 }, { "epoch": 1.8512832553687213, "grad_norm": 7.4571645094333245, "learning_rate": 7.205433497024878e-08, "loss": 0.594, "step": 25625 }, { "epoch": 1.851355500568931, "grad_norm": 8.987419660737064, "learning_rate": 7.198463238466791e-08, "loss": 0.5765, "step": 25626 }, { "epoch": 1.8514277457691404, "grad_norm": 6.858838592618987, "learning_rate": 7.191496303673163e-08, "loss": 0.6177, "step": 25627 }, { "epoch": 1.8514999909693501, "grad_norm": 6.030315425587626, "learning_rate": 7.184532692739443e-08, "loss": 0.5508, "step": 25628 }, { "epoch": 1.8515722361695595, "grad_norm": 7.317109831276211, "learning_rate": 7.177572405760918e-08, "loss": 0.5783, "step": 25629 }, { "epoch": 1.851644481369769, "grad_norm": 6.572084530380212, "learning_rate": 7.170615442832928e-08, "loss": 0.497, "step": 25630 }, { "epoch": 1.8517167265699785, "grad_norm": 6.781377975732945, "learning_rate": 7.163661804050592e-08, "loss": 0.5935, "step": 25631 }, { "epoch": 1.8517889717701879, "grad_norm": 7.85220161228124, "learning_rate": 7.156711489509222e-08, "loss": 0.6532, "step": 25632 }, { "epoch": 1.8518612169703976, "grad_norm": 7.98470083269845, "learning_rate": 7.149764499303935e-08, "loss": 0.5756, "step": 25633 }, { "epoch": 1.851933462170607, "grad_norm": 6.944515770439914, "learning_rate": 7.142820833529795e-08, "loss": 0.5602, "step": 25634 }, { "epoch": 1.8520057073708167, "grad_norm": 7.320128545414125, "learning_rate": 7.135880492281865e-08, "loss": 0.5779, "step": 25635 }, { "epoch": 1.852077952571026, "grad_norm": 7.387412020035146, "learning_rate": 7.128943475655153e-08, "loss": 0.5903, "step": 25636 }, { "epoch": 1.8521501977712356, "grad_norm": 7.94751450174022, "learning_rate": 7.122009783744666e-08, "loss": 0.583, "step": 25637 }, { "epoch": 1.8522224429714451, "grad_norm": 6.019381373357226, "learning_rate": 7.115079416645271e-08, "loss": 0.5604, "step": 25638 }, { "epoch": 1.8522946881716544, "grad_norm": 6.794709380210712, "learning_rate": 7.10815237445181e-08, "loss": 0.6254, "step": 25639 }, { "epoch": 1.8523669333718642, "grad_norm": 7.3164886290744, "learning_rate": 7.101228657259179e-08, "loss": 0.6225, "step": 25640 }, { "epoch": 1.8524391785720735, "grad_norm": 7.554558990553191, "learning_rate": 7.094308265162109e-08, "loss": 0.5966, "step": 25641 }, { "epoch": 1.8525114237722833, "grad_norm": 7.420801731608503, "learning_rate": 7.087391198255384e-08, "loss": 0.6102, "step": 25642 }, { "epoch": 1.8525836689724926, "grad_norm": 8.158800917393926, "learning_rate": 7.08047745663365e-08, "loss": 0.6044, "step": 25643 }, { "epoch": 1.8526559141727021, "grad_norm": 6.817006991346595, "learning_rate": 7.073567040391583e-08, "loss": 0.5549, "step": 25644 }, { "epoch": 1.8527281593729117, "grad_norm": 6.97014627880572, "learning_rate": 7.066659949623744e-08, "loss": 0.5695, "step": 25645 }, { "epoch": 1.852800404573121, "grad_norm": 7.3089387487051924, "learning_rate": 7.059756184424699e-08, "loss": 0.6451, "step": 25646 }, { "epoch": 1.8528726497733308, "grad_norm": 8.472265886149081, "learning_rate": 7.052855744888981e-08, "loss": 0.5861, "step": 25647 }, { "epoch": 1.85294489497354, "grad_norm": 6.769580556928771, "learning_rate": 7.045958631111016e-08, "loss": 0.6213, "step": 25648 }, { "epoch": 1.8530171401737499, "grad_norm": 8.071162338785129, "learning_rate": 7.039064843185228e-08, "loss": 0.613, "step": 25649 }, { "epoch": 1.8530893853739592, "grad_norm": 6.947735632997659, "learning_rate": 7.032174381206014e-08, "loss": 0.6043, "step": 25650 }, { "epoch": 1.8531616305741687, "grad_norm": 7.692045775135969, "learning_rate": 7.025287245267659e-08, "loss": 0.6137, "step": 25651 }, { "epoch": 1.8532338757743783, "grad_norm": 7.420362458951503, "learning_rate": 7.018403435464477e-08, "loss": 0.5825, "step": 25652 }, { "epoch": 1.8533061209745878, "grad_norm": 8.63909114861184, "learning_rate": 7.01152295189067e-08, "loss": 0.6178, "step": 25653 }, { "epoch": 1.8533783661747973, "grad_norm": 7.861064873309647, "learning_rate": 7.004645794640497e-08, "loss": 0.6321, "step": 25654 }, { "epoch": 1.8534506113750067, "grad_norm": 7.368944947541173, "learning_rate": 6.99777196380802e-08, "loss": 0.6126, "step": 25655 }, { "epoch": 1.8535228565752164, "grad_norm": 7.514942288865595, "learning_rate": 6.990901459487359e-08, "loss": 0.5754, "step": 25656 }, { "epoch": 1.8535951017754257, "grad_norm": 6.912551973502763, "learning_rate": 6.98403428177255e-08, "loss": 0.5752, "step": 25657 }, { "epoch": 1.8536673469756353, "grad_norm": 7.105162997218362, "learning_rate": 6.977170430757685e-08, "loss": 0.5892, "step": 25658 }, { "epoch": 1.8537395921758448, "grad_norm": 7.476273454362427, "learning_rate": 6.970309906536576e-08, "loss": 0.5679, "step": 25659 }, { "epoch": 1.8538118373760544, "grad_norm": 6.773051989011276, "learning_rate": 6.963452709203289e-08, "loss": 0.5492, "step": 25660 }, { "epoch": 1.853884082576264, "grad_norm": 7.218583274336135, "learning_rate": 6.956598838851608e-08, "loss": 0.5911, "step": 25661 }, { "epoch": 1.8539563277764732, "grad_norm": 6.641423435455945, "learning_rate": 6.949748295575404e-08, "loss": 0.6976, "step": 25662 }, { "epoch": 1.854028572976683, "grad_norm": 7.237129362693023, "learning_rate": 6.942901079468406e-08, "loss": 0.6258, "step": 25663 }, { "epoch": 1.8541008181768923, "grad_norm": 8.05802473906059, "learning_rate": 6.936057190624346e-08, "loss": 0.5831, "step": 25664 }, { "epoch": 1.8541730633771019, "grad_norm": 8.034933588765927, "learning_rate": 6.929216629136981e-08, "loss": 0.6405, "step": 25665 }, { "epoch": 1.8542453085773114, "grad_norm": 7.380748771167848, "learning_rate": 6.922379395099904e-08, "loss": 0.5652, "step": 25666 }, { "epoch": 1.854317553777521, "grad_norm": 7.617299428264192, "learning_rate": 6.915545488606679e-08, "loss": 0.5509, "step": 25667 }, { "epoch": 1.8543897989777305, "grad_norm": 6.989686725456266, "learning_rate": 6.908714909750924e-08, "loss": 0.5547, "step": 25668 }, { "epoch": 1.8544620441779398, "grad_norm": 7.114464985508699, "learning_rate": 6.901887658626094e-08, "loss": 0.6401, "step": 25669 }, { "epoch": 1.8545342893781496, "grad_norm": 8.070126260859302, "learning_rate": 6.895063735325669e-08, "loss": 0.6115, "step": 25670 }, { "epoch": 1.854606534578359, "grad_norm": 7.799534896286359, "learning_rate": 6.888243139943074e-08, "loss": 0.6176, "step": 25671 }, { "epoch": 1.8546787797785684, "grad_norm": 6.861220821819648, "learning_rate": 6.881425872571679e-08, "loss": 0.5889, "step": 25672 }, { "epoch": 1.854751024978778, "grad_norm": 7.6835401307229265, "learning_rate": 6.874611933304798e-08, "loss": 0.5517, "step": 25673 }, { "epoch": 1.8548232701789875, "grad_norm": 7.792829239822669, "learning_rate": 6.867801322235663e-08, "loss": 0.5728, "step": 25674 }, { "epoch": 1.854895515379197, "grad_norm": 6.764421386785277, "learning_rate": 6.860994039457586e-08, "loss": 0.579, "step": 25675 }, { "epoch": 1.8549677605794064, "grad_norm": 7.187743075033728, "learning_rate": 6.854190085063717e-08, "loss": 0.5842, "step": 25676 }, { "epoch": 1.8550400057796161, "grad_norm": 7.032175503325433, "learning_rate": 6.847389459147175e-08, "loss": 0.5881, "step": 25677 }, { "epoch": 1.8551122509798255, "grad_norm": 6.4658677318766, "learning_rate": 6.840592161801079e-08, "loss": 0.6121, "step": 25678 }, { "epoch": 1.855184496180035, "grad_norm": 7.299596111673213, "learning_rate": 6.833798193118469e-08, "loss": 0.62, "step": 25679 }, { "epoch": 1.8552567413802445, "grad_norm": 7.183956168213072, "learning_rate": 6.827007553192349e-08, "loss": 0.5595, "step": 25680 }, { "epoch": 1.855328986580454, "grad_norm": 6.3570687425925225, "learning_rate": 6.820220242115705e-08, "loss": 0.5299, "step": 25681 }, { "epoch": 1.8554012317806636, "grad_norm": 8.102466499708978, "learning_rate": 6.813436259981432e-08, "loss": 0.5617, "step": 25682 }, { "epoch": 1.855473476980873, "grad_norm": 8.975636460202844, "learning_rate": 6.806655606882401e-08, "loss": 0.6443, "step": 25683 }, { "epoch": 1.8555457221810827, "grad_norm": 7.415924578055801, "learning_rate": 6.799878282911398e-08, "loss": 0.6236, "step": 25684 }, { "epoch": 1.855617967381292, "grad_norm": 6.781045515080097, "learning_rate": 6.79310428816124e-08, "loss": 0.5772, "step": 25685 }, { "epoch": 1.8556902125815016, "grad_norm": 6.518447228163884, "learning_rate": 6.786333622724656e-08, "loss": 0.5779, "step": 25686 }, { "epoch": 1.8557624577817111, "grad_norm": 7.549880919875284, "learning_rate": 6.779566286694322e-08, "loss": 0.5915, "step": 25687 }, { "epoch": 1.8558347029819207, "grad_norm": 7.585114769365927, "learning_rate": 6.772802280162832e-08, "loss": 0.6752, "step": 25688 }, { "epoch": 1.8559069481821302, "grad_norm": 7.3811113284761785, "learning_rate": 6.766041603222861e-08, "loss": 0.6034, "step": 25689 }, { "epoch": 1.8559791933823395, "grad_norm": 7.997026129155787, "learning_rate": 6.759284255966947e-08, "loss": 0.5699, "step": 25690 }, { "epoch": 1.8560514385825493, "grad_norm": 8.486162029085158, "learning_rate": 6.752530238487543e-08, "loss": 0.6176, "step": 25691 }, { "epoch": 1.8561236837827586, "grad_norm": 6.657745050152644, "learning_rate": 6.74577955087713e-08, "loss": 0.5861, "step": 25692 }, { "epoch": 1.8561959289829681, "grad_norm": 8.266878375552157, "learning_rate": 6.739032193228107e-08, "loss": 0.6391, "step": 25693 }, { "epoch": 1.8562681741831777, "grad_norm": 8.538441515903695, "learning_rate": 6.7322881656329e-08, "loss": 0.5697, "step": 25694 }, { "epoch": 1.8563404193833872, "grad_norm": 7.798826168450794, "learning_rate": 6.725547468183741e-08, "loss": 0.6065, "step": 25695 }, { "epoch": 1.8564126645835968, "grad_norm": 6.269602134042435, "learning_rate": 6.718810100972945e-08, "loss": 0.5484, "step": 25696 }, { "epoch": 1.856484909783806, "grad_norm": 7.364947952532038, "learning_rate": 6.712076064092742e-08, "loss": 0.5649, "step": 25697 }, { "epoch": 1.8565571549840159, "grad_norm": 7.325746455256897, "learning_rate": 6.70534535763534e-08, "loss": 0.6071, "step": 25698 }, { "epoch": 1.8566294001842252, "grad_norm": 8.752544360120599, "learning_rate": 6.698617981692829e-08, "loss": 0.5687, "step": 25699 }, { "epoch": 1.8567016453844347, "grad_norm": 7.00595221180126, "learning_rate": 6.69189393635733e-08, "loss": 0.6052, "step": 25700 }, { "epoch": 1.8567738905846443, "grad_norm": 6.93278411866303, "learning_rate": 6.685173221720908e-08, "loss": 0.5492, "step": 25701 }, { "epoch": 1.8568461357848538, "grad_norm": 6.732979118673992, "learning_rate": 6.678455837875546e-08, "loss": 0.6033, "step": 25702 }, { "epoch": 1.8569183809850633, "grad_norm": 8.098451405122637, "learning_rate": 6.671741784913171e-08, "loss": 0.5921, "step": 25703 }, { "epoch": 1.8569906261852727, "grad_norm": 8.140462394159355, "learning_rate": 6.665031062925737e-08, "loss": 0.6309, "step": 25704 }, { "epoch": 1.8570628713854824, "grad_norm": 7.957147266888626, "learning_rate": 6.658323672005085e-08, "loss": 0.651, "step": 25705 }, { "epoch": 1.8571351165856917, "grad_norm": 8.687593528189216, "learning_rate": 6.651619612243032e-08, "loss": 0.6297, "step": 25706 }, { "epoch": 1.8572073617859015, "grad_norm": 7.676876760642684, "learning_rate": 6.644918883731366e-08, "loss": 0.6573, "step": 25707 }, { "epoch": 1.8572796069861108, "grad_norm": 8.883378063707179, "learning_rate": 6.638221486561791e-08, "loss": 0.6107, "step": 25708 }, { "epoch": 1.8573518521863204, "grad_norm": 7.097034114261685, "learning_rate": 6.631527420826012e-08, "loss": 0.5372, "step": 25709 }, { "epoch": 1.85742409738653, "grad_norm": 6.4331107870293485, "learning_rate": 6.624836686615677e-08, "loss": 0.5627, "step": 25710 }, { "epoch": 1.8574963425867392, "grad_norm": 6.950588492307993, "learning_rate": 6.618149284022352e-08, "loss": 0.6015, "step": 25711 }, { "epoch": 1.857568587786949, "grad_norm": 8.290950980681439, "learning_rate": 6.611465213137602e-08, "loss": 0.6313, "step": 25712 }, { "epoch": 1.8576408329871583, "grad_norm": 7.082369779984624, "learning_rate": 6.604784474052884e-08, "loss": 0.629, "step": 25713 }, { "epoch": 1.857713078187368, "grad_norm": 7.056044337178759, "learning_rate": 6.598107066859704e-08, "loss": 0.6453, "step": 25714 }, { "epoch": 1.8577853233875774, "grad_norm": 6.409804535026485, "learning_rate": 6.591432991649466e-08, "loss": 0.5772, "step": 25715 }, { "epoch": 1.857857568587787, "grad_norm": 6.782539772397959, "learning_rate": 6.584762248513455e-08, "loss": 0.5442, "step": 25716 }, { "epoch": 1.8579298137879965, "grad_norm": 8.336029938861877, "learning_rate": 6.57809483754307e-08, "loss": 0.6195, "step": 25717 }, { "epoch": 1.8580020589882058, "grad_norm": 7.56083774425953, "learning_rate": 6.571430758829573e-08, "loss": 0.5822, "step": 25718 }, { "epoch": 1.8580743041884156, "grad_norm": 7.742407987132232, "learning_rate": 6.564770012464223e-08, "loss": 0.585, "step": 25719 }, { "epoch": 1.858146549388625, "grad_norm": 7.561384891234454, "learning_rate": 6.558112598538113e-08, "loss": 0.6541, "step": 25720 }, { "epoch": 1.8582187945888347, "grad_norm": 7.498982932747265, "learning_rate": 6.551458517142423e-08, "loss": 0.6892, "step": 25721 }, { "epoch": 1.858291039789044, "grad_norm": 6.276592292743426, "learning_rate": 6.544807768368244e-08, "loss": 0.6366, "step": 25722 }, { "epoch": 1.8583632849892535, "grad_norm": 8.072227583158002, "learning_rate": 6.538160352306616e-08, "loss": 0.5712, "step": 25723 }, { "epoch": 1.858435530189463, "grad_norm": 7.552134069513481, "learning_rate": 6.531516269048549e-08, "loss": 0.706, "step": 25724 }, { "epoch": 1.8585077753896726, "grad_norm": 8.792791743553796, "learning_rate": 6.524875518684942e-08, "loss": 0.6681, "step": 25725 }, { "epoch": 1.8585800205898821, "grad_norm": 7.307127454294251, "learning_rate": 6.518238101306779e-08, "loss": 0.5158, "step": 25726 }, { "epoch": 1.8586522657900915, "grad_norm": 7.3020074670816655, "learning_rate": 6.511604017004874e-08, "loss": 0.557, "step": 25727 }, { "epoch": 1.8587245109903012, "grad_norm": 6.595731464626709, "learning_rate": 6.504973265870046e-08, "loss": 0.6053, "step": 25728 }, { "epoch": 1.8587967561905105, "grad_norm": 6.28899395828333, "learning_rate": 6.498345847993109e-08, "loss": 0.5785, "step": 25729 }, { "epoch": 1.85886900139072, "grad_norm": 7.339151963973288, "learning_rate": 6.491721763464714e-08, "loss": 0.5861, "step": 25730 }, { "epoch": 1.8589412465909296, "grad_norm": 8.134403831615492, "learning_rate": 6.485101012375595e-08, "loss": 0.6091, "step": 25731 }, { "epoch": 1.8590134917911392, "grad_norm": 7.598205013119001, "learning_rate": 6.478483594816343e-08, "loss": 0.5926, "step": 25732 }, { "epoch": 1.8590857369913487, "grad_norm": 7.3207137446730774, "learning_rate": 6.47186951087761e-08, "loss": 0.6526, "step": 25733 }, { "epoch": 1.859157982191558, "grad_norm": 6.54799218307709, "learning_rate": 6.46525876064985e-08, "loss": 0.5511, "step": 25734 }, { "epoch": 1.8592302273917678, "grad_norm": 7.082993742338328, "learning_rate": 6.458651344223633e-08, "loss": 0.5898, "step": 25735 }, { "epoch": 1.8593024725919771, "grad_norm": 6.821638225338, "learning_rate": 6.452047261689326e-08, "loss": 0.6255, "step": 25736 }, { "epoch": 1.8593747177921867, "grad_norm": 7.951489953107231, "learning_rate": 6.445446513137471e-08, "loss": 0.6809, "step": 25737 }, { "epoch": 1.8594469629923962, "grad_norm": 10.025863009095449, "learning_rate": 6.438849098658301e-08, "loss": 0.575, "step": 25738 }, { "epoch": 1.8595192081926057, "grad_norm": 8.151566487114465, "learning_rate": 6.432255018342159e-08, "loss": 0.5452, "step": 25739 }, { "epoch": 1.8595914533928153, "grad_norm": 6.4442207290287135, "learning_rate": 6.425664272279392e-08, "loss": 0.5809, "step": 25740 }, { "epoch": 1.8596636985930246, "grad_norm": 7.342491788823827, "learning_rate": 6.419076860560091e-08, "loss": 0.6431, "step": 25741 }, { "epoch": 1.8597359437932344, "grad_norm": 7.258623979123901, "learning_rate": 6.412492783274521e-08, "loss": 0.6489, "step": 25742 }, { "epoch": 1.8598081889934437, "grad_norm": 8.575138760436602, "learning_rate": 6.405912040512801e-08, "loss": 0.6619, "step": 25743 }, { "epoch": 1.8598804341936532, "grad_norm": 7.104882197735665, "learning_rate": 6.399334632365001e-08, "loss": 0.6212, "step": 25744 }, { "epoch": 1.8599526793938628, "grad_norm": 7.020663145211794, "learning_rate": 6.392760558921157e-08, "loss": 0.5653, "step": 25745 }, { "epoch": 1.8600249245940723, "grad_norm": 9.973529877519464, "learning_rate": 6.386189820271282e-08, "loss": 0.5408, "step": 25746 }, { "epoch": 1.8600971697942819, "grad_norm": 8.182968166721542, "learning_rate": 6.379622416505332e-08, "loss": 0.601, "step": 25747 }, { "epoch": 1.8601694149944912, "grad_norm": 7.218024675664794, "learning_rate": 6.373058347713179e-08, "loss": 0.6519, "step": 25748 }, { "epoch": 1.860241660194701, "grad_norm": 9.496279339249794, "learning_rate": 6.366497613984668e-08, "loss": 0.7144, "step": 25749 }, { "epoch": 1.8603139053949103, "grad_norm": 6.9135540619022935, "learning_rate": 6.359940215409643e-08, "loss": 0.5788, "step": 25750 }, { "epoch": 1.8603861505951198, "grad_norm": 7.653394516934034, "learning_rate": 6.353386152077895e-08, "loss": 0.6189, "step": 25751 }, { "epoch": 1.8604583957953293, "grad_norm": 7.56822101688049, "learning_rate": 6.3468354240791e-08, "loss": 0.5779, "step": 25752 }, { "epoch": 1.8605306409955389, "grad_norm": 7.357290438416499, "learning_rate": 6.34028803150294e-08, "loss": 0.5468, "step": 25753 }, { "epoch": 1.8606028861957484, "grad_norm": 6.963635676302673, "learning_rate": 6.333743974439033e-08, "loss": 0.5736, "step": 25754 }, { "epoch": 1.8606751313959577, "grad_norm": 8.087568713785084, "learning_rate": 6.327203252976976e-08, "loss": 0.5833, "step": 25755 }, { "epoch": 1.8607473765961675, "grad_norm": 7.066950923072913, "learning_rate": 6.32066586720631e-08, "loss": 0.6764, "step": 25756 }, { "epoch": 1.8608196217963768, "grad_norm": 7.084529311994536, "learning_rate": 6.314131817216518e-08, "loss": 0.6427, "step": 25757 }, { "epoch": 1.8608918669965864, "grad_norm": 6.896421317150119, "learning_rate": 6.307601103097084e-08, "loss": 0.5483, "step": 25758 }, { "epoch": 1.860964112196796, "grad_norm": 9.102520570551032, "learning_rate": 6.301073724937351e-08, "loss": 0.6029, "step": 25759 }, { "epoch": 1.8610363573970055, "grad_norm": 6.939964887546684, "learning_rate": 6.294549682826694e-08, "loss": 0.5693, "step": 25760 }, { "epoch": 1.861108602597215, "grad_norm": 6.790229723363188, "learning_rate": 6.288028976854433e-08, "loss": 0.5543, "step": 25761 }, { "epoch": 1.8611808477974243, "grad_norm": 7.619618752621599, "learning_rate": 6.281511607109852e-08, "loss": 0.6482, "step": 25762 }, { "epoch": 1.861253092997634, "grad_norm": 8.051584350864246, "learning_rate": 6.274997573682106e-08, "loss": 0.5943, "step": 25763 }, { "epoch": 1.8613253381978434, "grad_norm": 7.3775601873266305, "learning_rate": 6.268486876660429e-08, "loss": 0.6064, "step": 25764 }, { "epoch": 1.861397583398053, "grad_norm": 8.031080904713068, "learning_rate": 6.261979516133886e-08, "loss": 0.6285, "step": 25765 }, { "epoch": 1.8614698285982625, "grad_norm": 6.976924737323974, "learning_rate": 6.255475492191603e-08, "loss": 0.5495, "step": 25766 }, { "epoch": 1.861542073798472, "grad_norm": 7.4953339367071505, "learning_rate": 6.248974804922619e-08, "loss": 0.6099, "step": 25767 }, { "epoch": 1.8616143189986816, "grad_norm": 6.665772982142648, "learning_rate": 6.242477454415918e-08, "loss": 0.5869, "step": 25768 }, { "epoch": 1.861686564198891, "grad_norm": 6.437710360451035, "learning_rate": 6.235983440760429e-08, "loss": 0.6027, "step": 25769 }, { "epoch": 1.8617588093991007, "grad_norm": 8.212122576676231, "learning_rate": 6.229492764045053e-08, "loss": 0.5995, "step": 25770 }, { "epoch": 1.86183105459931, "grad_norm": 7.592474936355054, "learning_rate": 6.223005424358635e-08, "loss": 0.5644, "step": 25771 }, { "epoch": 1.8619032997995195, "grad_norm": 7.324766125379288, "learning_rate": 6.216521421790023e-08, "loss": 0.5388, "step": 25772 }, { "epoch": 1.861975544999729, "grad_norm": 6.363712565473617, "learning_rate": 6.210040756427892e-08, "loss": 0.5893, "step": 25773 }, { "epoch": 1.8620477901999386, "grad_norm": 8.91331913462126, "learning_rate": 6.203563428361037e-08, "loss": 0.5894, "step": 25774 }, { "epoch": 1.8621200354001481, "grad_norm": 7.4919270618348595, "learning_rate": 6.197089437678105e-08, "loss": 0.5548, "step": 25775 }, { "epoch": 1.8621922806003575, "grad_norm": 6.475528721965791, "learning_rate": 6.19061878446775e-08, "loss": 0.6508, "step": 25776 }, { "epoch": 1.8622645258005672, "grad_norm": 6.269026520279117, "learning_rate": 6.184151468818484e-08, "loss": 0.592, "step": 25777 }, { "epoch": 1.8623367710007765, "grad_norm": 8.191377842553269, "learning_rate": 6.177687490818873e-08, "loss": 0.6437, "step": 25778 }, { "epoch": 1.8624090162009863, "grad_norm": 7.6005570157366025, "learning_rate": 6.171226850557432e-08, "loss": 0.5798, "step": 25779 }, { "epoch": 1.8624812614011956, "grad_norm": 21.767745572623113, "learning_rate": 6.164769548122562e-08, "loss": 0.6306, "step": 25780 }, { "epoch": 1.8625535066014052, "grad_norm": 8.001971002008954, "learning_rate": 6.158315583602664e-08, "loss": 0.6616, "step": 25781 }, { "epoch": 1.8626257518016147, "grad_norm": 7.100958434412629, "learning_rate": 6.151864957086084e-08, "loss": 0.5913, "step": 25782 }, { "epoch": 1.862697997001824, "grad_norm": 8.570577333960784, "learning_rate": 6.145417668661141e-08, "loss": 0.6013, "step": 25783 }, { "epoch": 1.8627702422020338, "grad_norm": 6.982514203935019, "learning_rate": 6.138973718416097e-08, "loss": 0.595, "step": 25784 }, { "epoch": 1.8628424874022431, "grad_norm": 7.635564520750061, "learning_rate": 6.132533106439159e-08, "loss": 0.5644, "step": 25785 }, { "epoch": 1.8629147326024529, "grad_norm": 6.44750465798182, "learning_rate": 6.126095832818507e-08, "loss": 0.6119, "step": 25786 }, { "epoch": 1.8629869778026622, "grad_norm": 7.100146128265573, "learning_rate": 6.119661897642209e-08, "loss": 0.6187, "step": 25787 }, { "epoch": 1.8630592230028717, "grad_norm": 7.872651491715467, "learning_rate": 6.113231300998418e-08, "loss": 0.6117, "step": 25788 }, { "epoch": 1.8631314682030813, "grad_norm": 7.428243566970214, "learning_rate": 6.10680404297509e-08, "loss": 0.6528, "step": 25789 }, { "epoch": 1.8632037134032906, "grad_norm": 7.333189818394016, "learning_rate": 6.100380123660293e-08, "loss": 0.5947, "step": 25790 }, { "epoch": 1.8632759586035004, "grad_norm": 7.0556272301158005, "learning_rate": 6.093959543141875e-08, "loss": 0.5389, "step": 25791 }, { "epoch": 1.8633482038037097, "grad_norm": 6.974828421283756, "learning_rate": 6.087542301507766e-08, "loss": 0.6341, "step": 25792 }, { "epoch": 1.8634204490039195, "grad_norm": 9.104304425370684, "learning_rate": 6.081128398845809e-08, "loss": 0.6491, "step": 25793 }, { "epoch": 1.8634926942041288, "grad_norm": 6.604892662729776, "learning_rate": 6.074717835243854e-08, "loss": 0.5865, "step": 25794 }, { "epoch": 1.8635649394043383, "grad_norm": 8.042474050625247, "learning_rate": 6.06831061078958e-08, "loss": 0.6267, "step": 25795 }, { "epoch": 1.8636371846045479, "grad_norm": 7.392005065718067, "learning_rate": 6.061906725570748e-08, "loss": 0.6004, "step": 25796 }, { "epoch": 1.8637094298047574, "grad_norm": 6.854535526758736, "learning_rate": 6.055506179675041e-08, "loss": 0.6724, "step": 25797 }, { "epoch": 1.863781675004967, "grad_norm": 7.1671339666322895, "learning_rate": 6.049108973189998e-08, "loss": 0.595, "step": 25798 }, { "epoch": 1.8638539202051763, "grad_norm": 7.9822400368480135, "learning_rate": 6.042715106203245e-08, "loss": 0.5855, "step": 25799 }, { "epoch": 1.863926165405386, "grad_norm": 8.011889682324531, "learning_rate": 6.036324578802294e-08, "loss": 0.6031, "step": 25800 }, { "epoch": 1.8639984106055953, "grad_norm": 6.217382232691873, "learning_rate": 6.029937391074659e-08, "loss": 0.642, "step": 25801 }, { "epoch": 1.8640706558058049, "grad_norm": 6.82631744971741, "learning_rate": 6.023553543107713e-08, "loss": 0.6629, "step": 25802 }, { "epoch": 1.8641429010060144, "grad_norm": 6.851200594418539, "learning_rate": 6.017173034988916e-08, "loss": 0.5898, "step": 25803 }, { "epoch": 1.864215146206224, "grad_norm": 8.685561588207124, "learning_rate": 6.010795866805586e-08, "loss": 0.5291, "step": 25804 }, { "epoch": 1.8642873914064335, "grad_norm": 9.514865688352243, "learning_rate": 6.004422038645014e-08, "loss": 0.6449, "step": 25805 }, { "epoch": 1.8643596366066428, "grad_norm": 7.98686164609463, "learning_rate": 5.998051550594436e-08, "loss": 0.5605, "step": 25806 }, { "epoch": 1.8644318818068526, "grad_norm": 7.07553895440915, "learning_rate": 5.99168440274106e-08, "loss": 0.5985, "step": 25807 }, { "epoch": 1.864504127007062, "grad_norm": 7.2769948944329705, "learning_rate": 5.985320595172123e-08, "loss": 0.6237, "step": 25808 }, { "epoch": 1.8645763722072715, "grad_norm": 6.418945535357709, "learning_rate": 5.978960127974637e-08, "loss": 0.584, "step": 25809 }, { "epoch": 1.864648617407481, "grad_norm": 7.991942162351226, "learning_rate": 5.972603001235728e-08, "loss": 0.5942, "step": 25810 }, { "epoch": 1.8647208626076905, "grad_norm": 6.940807756802844, "learning_rate": 5.966249215042381e-08, "loss": 0.5161, "step": 25811 }, { "epoch": 1.8647931078079, "grad_norm": 7.59072074594179, "learning_rate": 5.959898769481637e-08, "loss": 0.5421, "step": 25812 }, { "epoch": 1.8648653530081094, "grad_norm": 7.555135119963653, "learning_rate": 5.9535516646403727e-08, "loss": 0.5891, "step": 25813 }, { "epoch": 1.8649375982083192, "grad_norm": 6.85680436820235, "learning_rate": 5.9472079006055174e-08, "loss": 0.5911, "step": 25814 }, { "epoch": 1.8650098434085285, "grad_norm": 7.365215729391673, "learning_rate": 5.940867477463891e-08, "loss": 0.6705, "step": 25815 }, { "epoch": 1.865082088608738, "grad_norm": 8.43195437690799, "learning_rate": 5.934530395302285e-08, "loss": 0.6534, "step": 25816 }, { "epoch": 1.8651543338089476, "grad_norm": 7.6929634342928725, "learning_rate": 5.9281966542074355e-08, "loss": 0.6471, "step": 25817 }, { "epoch": 1.8652265790091571, "grad_norm": 7.78221963869453, "learning_rate": 5.921866254266079e-08, "loss": 0.5822, "step": 25818 }, { "epoch": 1.8652988242093667, "grad_norm": 6.130264199865798, "learning_rate": 5.9155391955648675e-08, "loss": 0.588, "step": 25819 }, { "epoch": 1.865371069409576, "grad_norm": 8.201645606926764, "learning_rate": 5.9092154781904e-08, "loss": 0.6539, "step": 25820 }, { "epoch": 1.8654433146097857, "grad_norm": 6.992476916583642, "learning_rate": 5.9028951022292166e-08, "loss": 0.5983, "step": 25821 }, { "epoch": 1.865515559809995, "grad_norm": 8.218086709065245, "learning_rate": 5.896578067767916e-08, "loss": 0.6593, "step": 25822 }, { "epoch": 1.8655878050102046, "grad_norm": 5.965125776117579, "learning_rate": 5.890264374892901e-08, "loss": 0.57, "step": 25823 }, { "epoch": 1.8656600502104141, "grad_norm": 7.3677599007982435, "learning_rate": 5.88395402369063e-08, "loss": 0.6193, "step": 25824 }, { "epoch": 1.8657322954106237, "grad_norm": 7.234411647366006, "learning_rate": 5.877647014247479e-08, "loss": 0.5728, "step": 25825 }, { "epoch": 1.8658045406108332, "grad_norm": 7.642950971610596, "learning_rate": 5.8713433466498214e-08, "loss": 0.6143, "step": 25826 }, { "epoch": 1.8658767858110425, "grad_norm": 6.068521557013656, "learning_rate": 5.865043020983896e-08, "loss": 0.5725, "step": 25827 }, { "epoch": 1.8659490310112523, "grad_norm": 8.471336504666366, "learning_rate": 5.858746037335994e-08, "loss": 0.5978, "step": 25828 }, { "epoch": 1.8660212762114616, "grad_norm": 8.27180789103161, "learning_rate": 5.852452395792268e-08, "loss": 0.6086, "step": 25829 }, { "epoch": 1.8660935214116712, "grad_norm": 6.61499938457368, "learning_rate": 5.846162096438901e-08, "loss": 0.5526, "step": 25830 }, { "epoch": 1.8661657666118807, "grad_norm": 7.746705370142644, "learning_rate": 5.839875139362017e-08, "loss": 0.6787, "step": 25831 }, { "epoch": 1.8662380118120903, "grad_norm": 6.854970156370235, "learning_rate": 5.8335915246476585e-08, "loss": 0.5775, "step": 25832 }, { "epoch": 1.8663102570122998, "grad_norm": 7.166071785774071, "learning_rate": 5.827311252381868e-08, "loss": 0.6369, "step": 25833 }, { "epoch": 1.8663825022125091, "grad_norm": 7.594799832587447, "learning_rate": 5.821034322650576e-08, "loss": 0.5814, "step": 25834 }, { "epoch": 1.8664547474127189, "grad_norm": 6.456959174204507, "learning_rate": 5.8147607355397154e-08, "loss": 0.6055, "step": 25835 }, { "epoch": 1.8665269926129282, "grad_norm": 7.910125144614767, "learning_rate": 5.8084904911352437e-08, "loss": 0.5964, "step": 25836 }, { "epoch": 1.8665992378131377, "grad_norm": 5.723028531512818, "learning_rate": 5.8022235895228706e-08, "loss": 0.6016, "step": 25837 }, { "epoch": 1.8666714830133473, "grad_norm": 7.148366792256176, "learning_rate": 5.795960030788472e-08, "loss": 0.5582, "step": 25838 }, { "epoch": 1.8667437282135568, "grad_norm": 7.258823156052215, "learning_rate": 5.7896998150177577e-08, "loss": 0.6122, "step": 25839 }, { "epoch": 1.8668159734137664, "grad_norm": 8.453696748079865, "learning_rate": 5.783442942296463e-08, "loss": 0.6051, "step": 25840 }, { "epoch": 1.8668882186139757, "grad_norm": 6.698267459335071, "learning_rate": 5.777189412710188e-08, "loss": 0.5521, "step": 25841 }, { "epoch": 1.8669604638141855, "grad_norm": 6.666515761892748, "learning_rate": 5.7709392263445574e-08, "loss": 0.6254, "step": 25842 }, { "epoch": 1.8670327090143948, "grad_norm": 7.266313286451078, "learning_rate": 5.7646923832851695e-08, "loss": 0.5785, "step": 25843 }, { "epoch": 1.8671049542146043, "grad_norm": 7.897759141477227, "learning_rate": 5.758448883617484e-08, "loss": 0.6353, "step": 25844 }, { "epoch": 1.8671771994148139, "grad_norm": 8.05046993803123, "learning_rate": 5.7522087274270163e-08, "loss": 0.5433, "step": 25845 }, { "epoch": 1.8672494446150234, "grad_norm": 9.290998175369886, "learning_rate": 5.745971914799142e-08, "loss": 0.5859, "step": 25846 }, { "epoch": 1.867321689815233, "grad_norm": 7.1932334257063575, "learning_rate": 5.739738445819265e-08, "loss": 0.5602, "step": 25847 }, { "epoch": 1.8673939350154423, "grad_norm": 7.645342846624449, "learning_rate": 5.733508320572706e-08, "loss": 0.6117, "step": 25848 }, { "epoch": 1.867466180215652, "grad_norm": 6.821293466466903, "learning_rate": 5.727281539144758e-08, "loss": 0.5966, "step": 25849 }, { "epoch": 1.8675384254158613, "grad_norm": 8.51240352626362, "learning_rate": 5.7210581016206577e-08, "loss": 0.5462, "step": 25850 }, { "epoch": 1.8676106706160711, "grad_norm": 9.154523354689882, "learning_rate": 5.7148380080856156e-08, "loss": 0.6554, "step": 25851 }, { "epoch": 1.8676829158162804, "grad_norm": 7.345185614331633, "learning_rate": 5.708621258624758e-08, "loss": 0.59, "step": 25852 }, { "epoch": 1.86775516101649, "grad_norm": 7.400674129400651, "learning_rate": 5.702407853323211e-08, "loss": 0.5325, "step": 25853 }, { "epoch": 1.8678274062166995, "grad_norm": 8.240778074842726, "learning_rate": 5.696197792266017e-08, "loss": 0.599, "step": 25854 }, { "epoch": 1.8678996514169088, "grad_norm": 9.683481478146053, "learning_rate": 5.689991075538165e-08, "loss": 0.6977, "step": 25855 }, { "epoch": 1.8679718966171186, "grad_norm": 7.058498362924248, "learning_rate": 5.683787703224641e-08, "loss": 0.5372, "step": 25856 }, { "epoch": 1.868044141817328, "grad_norm": 7.9557297793490624, "learning_rate": 5.677587675410351e-08, "loss": 0.5798, "step": 25857 }, { "epoch": 1.8681163870175377, "grad_norm": 7.548202745359891, "learning_rate": 5.6713909921801976e-08, "loss": 0.5992, "step": 25858 }, { "epoch": 1.868188632217747, "grad_norm": 7.580125179012032, "learning_rate": 5.6651976536190036e-08, "loss": 0.6154, "step": 25859 }, { "epoch": 1.8682608774179565, "grad_norm": 8.35875800877252, "learning_rate": 5.6590076598115326e-08, "loss": 0.6272, "step": 25860 }, { "epoch": 1.868333122618166, "grad_norm": 9.600457546138873, "learning_rate": 5.652821010842552e-08, "loss": 0.5751, "step": 25861 }, { "epoch": 1.8684053678183754, "grad_norm": 8.444058956025357, "learning_rate": 5.646637706796687e-08, "loss": 0.6277, "step": 25862 }, { "epoch": 1.8684776130185852, "grad_norm": 6.052666308305561, "learning_rate": 5.6404577477586486e-08, "loss": 0.5952, "step": 25863 }, { "epoch": 1.8685498582187945, "grad_norm": 6.886149572781636, "learning_rate": 5.63428113381298e-08, "loss": 0.5418, "step": 25864 }, { "epoch": 1.8686221034190043, "grad_norm": 7.8333841957984145, "learning_rate": 5.6281078650443077e-08, "loss": 0.5929, "step": 25865 }, { "epoch": 1.8686943486192136, "grad_norm": 7.92971107216324, "learning_rate": 5.6219379415370644e-08, "loss": 0.6142, "step": 25866 }, { "epoch": 1.8687665938194231, "grad_norm": 7.415516139025947, "learning_rate": 5.615771363375766e-08, "loss": 0.6483, "step": 25867 }, { "epoch": 1.8688388390196327, "grad_norm": 7.059269529165209, "learning_rate": 5.609608130644789e-08, "loss": 0.6504, "step": 25868 }, { "epoch": 1.868911084219842, "grad_norm": 7.2239480067818524, "learning_rate": 5.603448243428511e-08, "loss": 0.5954, "step": 25869 }, { "epoch": 1.8689833294200517, "grad_norm": 9.301966092238368, "learning_rate": 5.5972917018112805e-08, "loss": 0.6188, "step": 25870 }, { "epoch": 1.869055574620261, "grad_norm": 8.648181666834834, "learning_rate": 5.5911385058773635e-08, "loss": 0.606, "step": 25871 }, { "epoch": 1.8691278198204708, "grad_norm": 7.187033198581301, "learning_rate": 5.584988655710999e-08, "loss": 0.6176, "step": 25872 }, { "epoch": 1.8692000650206801, "grad_norm": 7.678157432776434, "learning_rate": 5.578842151396341e-08, "loss": 0.5996, "step": 25873 }, { "epoch": 1.8692723102208897, "grad_norm": 7.43501996918224, "learning_rate": 5.572698993017572e-08, "loss": 0.5915, "step": 25874 }, { "epoch": 1.8693445554210992, "grad_norm": 6.561362322330809, "learning_rate": 5.566559180658737e-08, "loss": 0.5606, "step": 25875 }, { "epoch": 1.8694168006213088, "grad_norm": 7.329416905378733, "learning_rate": 5.560422714403962e-08, "loss": 0.6795, "step": 25876 }, { "epoch": 1.8694890458215183, "grad_norm": 8.16546134098575, "learning_rate": 5.5542895943371814e-08, "loss": 0.5778, "step": 25877 }, { "epoch": 1.8695612910217276, "grad_norm": 6.555236284200533, "learning_rate": 5.548159820542354e-08, "loss": 0.6259, "step": 25878 }, { "epoch": 1.8696335362219374, "grad_norm": 5.785750003818818, "learning_rate": 5.542033393103441e-08, "loss": 0.4748, "step": 25879 }, { "epoch": 1.8697057814221467, "grad_norm": 6.867977925748904, "learning_rate": 5.535910312104292e-08, "loss": 0.5997, "step": 25880 }, { "epoch": 1.8697780266223563, "grad_norm": 8.721839712817143, "learning_rate": 5.529790577628674e-08, "loss": 0.664, "step": 25881 }, { "epoch": 1.8698502718225658, "grad_norm": 8.516607777291764, "learning_rate": 5.523674189760436e-08, "loss": 0.5841, "step": 25882 }, { "epoch": 1.8699225170227753, "grad_norm": 6.295193355338938, "learning_rate": 5.5175611485832895e-08, "loss": 0.5656, "step": 25883 }, { "epoch": 1.8699947622229849, "grad_norm": 6.390674245779848, "learning_rate": 5.5114514541808884e-08, "loss": 0.6585, "step": 25884 }, { "epoch": 1.8700670074231942, "grad_norm": 7.3316222997966145, "learning_rate": 5.505345106636861e-08, "loss": 0.6396, "step": 25885 }, { "epoch": 1.870139252623404, "grad_norm": 6.522339213309636, "learning_rate": 5.499242106034836e-08, "loss": 0.6163, "step": 25886 }, { "epoch": 1.8702114978236133, "grad_norm": 6.534636719113102, "learning_rate": 5.493142452458355e-08, "loss": 0.5621, "step": 25887 }, { "epoch": 1.8702837430238228, "grad_norm": 6.176361539289428, "learning_rate": 5.487046145990882e-08, "loss": 0.5608, "step": 25888 }, { "epoch": 1.8703559882240324, "grad_norm": 7.043902732459165, "learning_rate": 5.480953186715904e-08, "loss": 0.5791, "step": 25889 }, { "epoch": 1.870428233424242, "grad_norm": 8.09210023304461, "learning_rate": 5.4748635747168546e-08, "loss": 0.5916, "step": 25890 }, { "epoch": 1.8705004786244515, "grad_norm": 7.269143973093565, "learning_rate": 5.468777310077028e-08, "loss": 0.6709, "step": 25891 }, { "epoch": 1.8705727238246608, "grad_norm": 6.393432895705915, "learning_rate": 5.462694392879775e-08, "loss": 0.5727, "step": 25892 }, { "epoch": 1.8706449690248705, "grad_norm": 7.434964557247687, "learning_rate": 5.4566148232083614e-08, "loss": 0.5753, "step": 25893 }, { "epoch": 1.8707172142250799, "grad_norm": 7.873435455400732, "learning_rate": 5.450538601146055e-08, "loss": 0.6072, "step": 25894 }, { "epoch": 1.8707894594252894, "grad_norm": 8.854843676246448, "learning_rate": 5.444465726775955e-08, "loss": 0.692, "step": 25895 }, { "epoch": 1.870861704625499, "grad_norm": 8.236805392488673, "learning_rate": 5.4383962001812174e-08, "loss": 0.6341, "step": 25896 }, { "epoch": 1.8709339498257085, "grad_norm": 6.298545294037737, "learning_rate": 5.43233002144497e-08, "loss": 0.5918, "step": 25897 }, { "epoch": 1.871006195025918, "grad_norm": 6.912461745309777, "learning_rate": 5.426267190650231e-08, "loss": 0.5586, "step": 25898 }, { "epoch": 1.8710784402261273, "grad_norm": 6.335101466401277, "learning_rate": 5.420207707879988e-08, "loss": 0.5875, "step": 25899 }, { "epoch": 1.8711506854263371, "grad_norm": 8.009804916006411, "learning_rate": 5.414151573217202e-08, "loss": 0.6368, "step": 25900 }, { "epoch": 1.8712229306265464, "grad_norm": 7.151355652237897, "learning_rate": 5.408098786744809e-08, "loss": 0.6041, "step": 25901 }, { "epoch": 1.871295175826756, "grad_norm": 8.057617128321567, "learning_rate": 5.4020493485455736e-08, "loss": 0.6735, "step": 25902 }, { "epoch": 1.8713674210269655, "grad_norm": 7.966356344310805, "learning_rate": 5.396003258702403e-08, "loss": 0.5505, "step": 25903 }, { "epoch": 1.871439666227175, "grad_norm": 7.174484011076707, "learning_rate": 5.38996051729801e-08, "loss": 0.6258, "step": 25904 }, { "epoch": 1.8715119114273846, "grad_norm": 8.310746775283834, "learning_rate": 5.383921124415131e-08, "loss": 0.6262, "step": 25905 }, { "epoch": 1.871584156627594, "grad_norm": 6.721072057404985, "learning_rate": 5.377885080136397e-08, "loss": 0.5943, "step": 25906 }, { "epoch": 1.8716564018278037, "grad_norm": 7.7322361310255845, "learning_rate": 5.371852384544518e-08, "loss": 0.6352, "step": 25907 }, { "epoch": 1.871728647028013, "grad_norm": 6.754174248677194, "learning_rate": 5.3658230377220674e-08, "loss": 0.604, "step": 25908 }, { "epoch": 1.8718008922282225, "grad_norm": 6.34534573333762, "learning_rate": 5.359797039751535e-08, "loss": 0.6106, "step": 25909 }, { "epoch": 1.871873137428432, "grad_norm": 7.280308478390689, "learning_rate": 5.35377439071541e-08, "loss": 0.5627, "step": 25910 }, { "epoch": 1.8719453826286416, "grad_norm": 7.278044293727201, "learning_rate": 5.3477550906961825e-08, "loss": 0.6059, "step": 25911 }, { "epoch": 1.8720176278288512, "grad_norm": 8.196921947837817, "learning_rate": 5.341739139776203e-08, "loss": 0.5823, "step": 25912 }, { "epoch": 1.8720898730290605, "grad_norm": 6.990812266556744, "learning_rate": 5.3357265380378774e-08, "loss": 0.5523, "step": 25913 }, { "epoch": 1.8721621182292703, "grad_norm": 7.895561139758841, "learning_rate": 5.329717285563446e-08, "loss": 0.6169, "step": 25914 }, { "epoch": 1.8722343634294796, "grad_norm": 6.86168699468913, "learning_rate": 5.323711382435232e-08, "loss": 0.6159, "step": 25915 }, { "epoch": 1.8723066086296891, "grad_norm": 6.51927730369709, "learning_rate": 5.317708828735446e-08, "loss": 0.522, "step": 25916 }, { "epoch": 1.8723788538298987, "grad_norm": 7.360938681576269, "learning_rate": 5.311709624546218e-08, "loss": 0.5991, "step": 25917 }, { "epoch": 1.8724510990301082, "grad_norm": 6.713096901835079, "learning_rate": 5.305713769949733e-08, "loss": 0.5794, "step": 25918 }, { "epoch": 1.8725233442303177, "grad_norm": 7.989722325700492, "learning_rate": 5.2997212650280074e-08, "loss": 0.6428, "step": 25919 }, { "epoch": 1.872595589430527, "grad_norm": 8.07655139109799, "learning_rate": 5.2937321098630865e-08, "loss": 0.6118, "step": 25920 }, { "epoch": 1.8726678346307368, "grad_norm": 6.546266659534607, "learning_rate": 5.2877463045369893e-08, "loss": 0.5725, "step": 25921 }, { "epoch": 1.8727400798309461, "grad_norm": 7.815800328291948, "learning_rate": 5.281763849131649e-08, "loss": 0.6413, "step": 25922 }, { "epoch": 1.8728123250311557, "grad_norm": 7.885427202010245, "learning_rate": 5.275784743728946e-08, "loss": 0.6026, "step": 25923 }, { "epoch": 1.8728845702313652, "grad_norm": 5.863966300128521, "learning_rate": 5.2698089884107015e-08, "loss": 0.5863, "step": 25924 }, { "epoch": 1.8729568154315748, "grad_norm": 8.390517250093769, "learning_rate": 5.263836583258769e-08, "loss": 0.5678, "step": 25925 }, { "epoch": 1.8730290606317843, "grad_norm": 6.279285332415519, "learning_rate": 5.257867528354915e-08, "loss": 0.5915, "step": 25926 }, { "epoch": 1.8731013058319936, "grad_norm": 8.610934966005116, "learning_rate": 5.2519018237807975e-08, "loss": 0.6939, "step": 25927 }, { "epoch": 1.8731735510322034, "grad_norm": 6.963117435822035, "learning_rate": 5.2459394696181e-08, "loss": 0.5478, "step": 25928 }, { "epoch": 1.8732457962324127, "grad_norm": 6.403026830647685, "learning_rate": 5.23998046594848e-08, "loss": 0.5801, "step": 25929 }, { "epoch": 1.8733180414326225, "grad_norm": 7.111407247311138, "learning_rate": 5.234024812853483e-08, "loss": 0.6137, "step": 25930 }, { "epoch": 1.8733902866328318, "grad_norm": 9.271705934984361, "learning_rate": 5.2280725104146e-08, "loss": 0.6116, "step": 25931 }, { "epoch": 1.8734625318330413, "grad_norm": 7.037643577506851, "learning_rate": 5.2221235587133765e-08, "loss": 0.5618, "step": 25932 }, { "epoch": 1.8735347770332509, "grad_norm": 7.064194467548426, "learning_rate": 5.216177957831248e-08, "loss": 0.6063, "step": 25933 }, { "epoch": 1.8736070222334602, "grad_norm": 8.501200871627821, "learning_rate": 5.210235707849537e-08, "loss": 0.6986, "step": 25934 }, { "epoch": 1.87367926743367, "grad_norm": 8.482689677803961, "learning_rate": 5.2042968088496515e-08, "loss": 0.5674, "step": 25935 }, { "epoch": 1.8737515126338793, "grad_norm": 7.299344480163126, "learning_rate": 5.1983612609128874e-08, "loss": 0.5725, "step": 25936 }, { "epoch": 1.873823757834089, "grad_norm": 6.755096771879803, "learning_rate": 5.192429064120458e-08, "loss": 0.5105, "step": 25937 }, { "epoch": 1.8738960030342984, "grad_norm": 6.733186196460125, "learning_rate": 5.186500218553603e-08, "loss": 0.575, "step": 25938 }, { "epoch": 1.873968248234508, "grad_norm": 7.90511075583459, "learning_rate": 5.1805747242934525e-08, "loss": 0.6445, "step": 25939 }, { "epoch": 1.8740404934347175, "grad_norm": 7.095988857187427, "learning_rate": 5.1746525814211914e-08, "loss": 0.6155, "step": 25940 }, { "epoch": 1.8741127386349268, "grad_norm": 7.273302710049385, "learning_rate": 5.16873379001781e-08, "loss": 0.6364, "step": 25941 }, { "epoch": 1.8741849838351365, "grad_norm": 6.830317444245191, "learning_rate": 5.1628183501643556e-08, "loss": 0.5772, "step": 25942 }, { "epoch": 1.8742572290353459, "grad_norm": 6.415186162136922, "learning_rate": 5.156906261941846e-08, "loss": 0.6149, "step": 25943 }, { "epoch": 1.8743294742355556, "grad_norm": 6.833210672657754, "learning_rate": 5.1509975254311615e-08, "loss": 0.5432, "step": 25944 }, { "epoch": 1.874401719435765, "grad_norm": 6.887230276929266, "learning_rate": 5.145092140713181e-08, "loss": 0.6061, "step": 25945 }, { "epoch": 1.8744739646359745, "grad_norm": 7.797463801900164, "learning_rate": 5.1391901078688136e-08, "loss": 0.5425, "step": 25946 }, { "epoch": 1.874546209836184, "grad_norm": 6.598823800907244, "learning_rate": 5.133291426978798e-08, "loss": 0.5659, "step": 25947 }, { "epoch": 1.8746184550363936, "grad_norm": 8.017719196003128, "learning_rate": 5.1273960981239045e-08, "loss": 0.5865, "step": 25948 }, { "epoch": 1.8746907002366031, "grad_norm": 6.897362146773231, "learning_rate": 5.121504121384818e-08, "loss": 0.5073, "step": 25949 }, { "epoch": 1.8747629454368124, "grad_norm": 7.663074674728088, "learning_rate": 5.115615496842197e-08, "loss": 0.5824, "step": 25950 }, { "epoch": 1.8748351906370222, "grad_norm": 7.571112402512236, "learning_rate": 5.1097302245766975e-08, "loss": 0.5971, "step": 25951 }, { "epoch": 1.8749074358372315, "grad_norm": 7.938089423935244, "learning_rate": 5.103848304668813e-08, "loss": 0.6491, "step": 25952 }, { "epoch": 1.874979681037441, "grad_norm": 8.454649274828114, "learning_rate": 5.097969737199088e-08, "loss": 0.5592, "step": 25953 }, { "epoch": 1.8750519262376506, "grad_norm": 7.891869293909298, "learning_rate": 5.092094522248015e-08, "loss": 0.6093, "step": 25954 }, { "epoch": 1.8751241714378601, "grad_norm": 8.88007134623691, "learning_rate": 5.086222659896001e-08, "loss": 0.6023, "step": 25955 }, { "epoch": 1.8751964166380697, "grad_norm": 7.170227792438604, "learning_rate": 5.080354150223427e-08, "loss": 0.5793, "step": 25956 }, { "epoch": 1.875268661838279, "grad_norm": 7.146421654804712, "learning_rate": 5.0744889933106447e-08, "loss": 0.58, "step": 25957 }, { "epoch": 1.8753409070384888, "grad_norm": 7.067583533253195, "learning_rate": 5.068627189237979e-08, "loss": 0.5272, "step": 25958 }, { "epoch": 1.875413152238698, "grad_norm": 7.530006654288099, "learning_rate": 5.062768738085588e-08, "loss": 0.6239, "step": 25959 }, { "epoch": 1.8754853974389076, "grad_norm": 6.429332403525991, "learning_rate": 5.0569136399337135e-08, "loss": 0.579, "step": 25960 }, { "epoch": 1.8755576426391172, "grad_norm": 7.5882669127258255, "learning_rate": 5.051061894862513e-08, "loss": 0.614, "step": 25961 }, { "epoch": 1.8756298878393267, "grad_norm": 8.155290686071, "learning_rate": 5.04521350295209e-08, "loss": 0.5928, "step": 25962 }, { "epoch": 1.8757021330395363, "grad_norm": 7.91894067596439, "learning_rate": 5.0393684642824626e-08, "loss": 0.6474, "step": 25963 }, { "epoch": 1.8757743782397456, "grad_norm": 8.460522698742524, "learning_rate": 5.033526778933706e-08, "loss": 0.6047, "step": 25964 }, { "epoch": 1.8758466234399553, "grad_norm": 7.997930020513877, "learning_rate": 5.027688446985785e-08, "loss": 0.6053, "step": 25965 }, { "epoch": 1.8759188686401647, "grad_norm": 6.776984917103652, "learning_rate": 5.021853468518578e-08, "loss": 0.5939, "step": 25966 }, { "epoch": 1.8759911138403742, "grad_norm": 9.014421987680223, "learning_rate": 5.016021843611968e-08, "loss": 0.6804, "step": 25967 }, { "epoch": 1.8760633590405837, "grad_norm": 9.089480312083305, "learning_rate": 5.010193572345834e-08, "loss": 0.5669, "step": 25968 }, { "epoch": 1.8761356042407933, "grad_norm": 7.753925682831442, "learning_rate": 5.004368654799918e-08, "loss": 0.5757, "step": 25969 }, { "epoch": 1.8762078494410028, "grad_norm": 6.4574043179998455, "learning_rate": 4.9985470910539624e-08, "loss": 0.5743, "step": 25970 }, { "epoch": 1.8762800946412121, "grad_norm": 7.111734992051375, "learning_rate": 4.9927288811876805e-08, "loss": 0.5556, "step": 25971 }, { "epoch": 1.876352339841422, "grad_norm": 6.987264765019598, "learning_rate": 4.986914025280676e-08, "loss": 0.5807, "step": 25972 }, { "epoch": 1.8764245850416312, "grad_norm": 8.257881792223472, "learning_rate": 4.981102523412579e-08, "loss": 0.5702, "step": 25973 }, { "epoch": 1.8764968302418408, "grad_norm": 8.544676558978614, "learning_rate": 4.9752943756629644e-08, "loss": 0.6299, "step": 25974 }, { "epoch": 1.8765690754420503, "grad_norm": 5.923822651353389, "learning_rate": 4.9694895821113255e-08, "loss": 0.5559, "step": 25975 }, { "epoch": 1.8766413206422599, "grad_norm": 8.032824885105216, "learning_rate": 4.963688142837125e-08, "loss": 0.606, "step": 25976 }, { "epoch": 1.8767135658424694, "grad_norm": 7.664261094999584, "learning_rate": 4.9578900579197455e-08, "loss": 0.6687, "step": 25977 }, { "epoch": 1.8767858110426787, "grad_norm": 7.516384792188215, "learning_rate": 4.952095327438594e-08, "loss": 0.6841, "step": 25978 }, { "epoch": 1.8768580562428885, "grad_norm": 7.335056824883334, "learning_rate": 4.946303951473025e-08, "loss": 0.616, "step": 25979 }, { "epoch": 1.8769303014430978, "grad_norm": 8.035036611894945, "learning_rate": 4.940515930102252e-08, "loss": 0.5979, "step": 25980 }, { "epoch": 1.8770025466433073, "grad_norm": 6.822183149283173, "learning_rate": 4.934731263405546e-08, "loss": 0.6068, "step": 25981 }, { "epoch": 1.8770747918435169, "grad_norm": 7.678026021553097, "learning_rate": 4.928949951462064e-08, "loss": 0.5474, "step": 25982 }, { "epoch": 1.8771470370437264, "grad_norm": 7.529399977001612, "learning_rate": 4.923171994350995e-08, "loss": 0.5843, "step": 25983 }, { "epoch": 1.877219282243936, "grad_norm": 6.771940952573104, "learning_rate": 4.917397392151413e-08, "loss": 0.5141, "step": 25984 }, { "epoch": 1.8772915274441453, "grad_norm": 7.721046754701156, "learning_rate": 4.9116261449423666e-08, "loss": 0.6687, "step": 25985 }, { "epoch": 1.877363772644355, "grad_norm": 6.751737017893178, "learning_rate": 4.9058582528028754e-08, "loss": 0.5366, "step": 25986 }, { "epoch": 1.8774360178445644, "grad_norm": 7.5434458901232135, "learning_rate": 4.900093715811849e-08, "loss": 0.6233, "step": 25987 }, { "epoch": 1.877508263044774, "grad_norm": 6.934892318978722, "learning_rate": 4.894332534048252e-08, "loss": 0.5792, "step": 25988 }, { "epoch": 1.8775805082449835, "grad_norm": 8.409653212284018, "learning_rate": 4.8885747075909095e-08, "loss": 0.5978, "step": 25989 }, { "epoch": 1.877652753445193, "grad_norm": 6.614509770342253, "learning_rate": 4.8828202365187036e-08, "loss": 0.5572, "step": 25990 }, { "epoch": 1.8777249986454025, "grad_norm": 7.277863726055817, "learning_rate": 4.8770691209103217e-08, "loss": 0.5756, "step": 25991 }, { "epoch": 1.8777972438456119, "grad_norm": 7.270790282142946, "learning_rate": 4.871321360844533e-08, "loss": 0.5969, "step": 25992 }, { "epoch": 1.8778694890458216, "grad_norm": 8.781139658170177, "learning_rate": 4.865576956400081e-08, "loss": 0.5209, "step": 25993 }, { "epoch": 1.877941734246031, "grad_norm": 7.243087103646872, "learning_rate": 4.859835907655514e-08, "loss": 0.5663, "step": 25994 }, { "epoch": 1.8780139794462405, "grad_norm": 7.359317722624109, "learning_rate": 4.8540982146894345e-08, "loss": 0.5516, "step": 25995 }, { "epoch": 1.87808622464645, "grad_norm": 6.607168419983654, "learning_rate": 4.8483638775804196e-08, "loss": 0.5813, "step": 25996 }, { "epoch": 1.8781584698466596, "grad_norm": 6.787513196170125, "learning_rate": 4.84263289640699e-08, "loss": 0.5878, "step": 25997 }, { "epoch": 1.8782307150468691, "grad_norm": 7.322588881151479, "learning_rate": 4.836905271247527e-08, "loss": 0.6521, "step": 25998 }, { "epoch": 1.8783029602470784, "grad_norm": 7.886567596604403, "learning_rate": 4.831181002180496e-08, "loss": 0.6425, "step": 25999 }, { "epoch": 1.8783752054472882, "grad_norm": 7.618174765747673, "learning_rate": 4.8254600892841954e-08, "loss": 0.6556, "step": 26000 }, { "epoch": 1.8784474506474975, "grad_norm": 6.6554180932784295, "learning_rate": 4.8197425326370064e-08, "loss": 0.6325, "step": 26001 }, { "epoch": 1.8785196958477073, "grad_norm": 6.575943900117323, "learning_rate": 4.8140283323171723e-08, "loss": 0.5753, "step": 26002 }, { "epoch": 1.8785919410479166, "grad_norm": 6.9551263907690615, "learning_rate": 4.808317488402908e-08, "loss": 0.665, "step": 26003 }, { "epoch": 1.8786641862481261, "grad_norm": 7.165916344450587, "learning_rate": 4.80261000097243e-08, "loss": 0.5421, "step": 26004 }, { "epoch": 1.8787364314483357, "grad_norm": 7.061287564883235, "learning_rate": 4.7969058701038126e-08, "loss": 0.5826, "step": 26005 }, { "epoch": 1.878808676648545, "grad_norm": 7.659580549503735, "learning_rate": 4.791205095875162e-08, "loss": 0.6022, "step": 26006 }, { "epoch": 1.8788809218487548, "grad_norm": 6.832033762596022, "learning_rate": 4.785507678364526e-08, "loss": 0.6105, "step": 26007 }, { "epoch": 1.878953167048964, "grad_norm": 7.394536150866706, "learning_rate": 4.779813617649925e-08, "loss": 0.5751, "step": 26008 }, { "epoch": 1.8790254122491739, "grad_norm": 7.4801099885412, "learning_rate": 4.7741229138092424e-08, "loss": 0.5151, "step": 26009 }, { "epoch": 1.8790976574493832, "grad_norm": 7.80327678222675, "learning_rate": 4.7684355669203875e-08, "loss": 0.5608, "step": 26010 }, { "epoch": 1.8791699026495927, "grad_norm": 6.508577702518178, "learning_rate": 4.76275157706127e-08, "loss": 0.5832, "step": 26011 }, { "epoch": 1.8792421478498023, "grad_norm": 7.028075726625991, "learning_rate": 4.7570709443096606e-08, "loss": 0.6003, "step": 26012 }, { "epoch": 1.8793143930500116, "grad_norm": 6.057914021851299, "learning_rate": 4.7513936687433316e-08, "loss": 0.5589, "step": 26013 }, { "epoch": 1.8793866382502213, "grad_norm": 7.795295264797919, "learning_rate": 4.7457197504399973e-08, "loss": 0.547, "step": 26014 }, { "epoch": 1.8794588834504307, "grad_norm": 7.626227608338194, "learning_rate": 4.740049189477347e-08, "loss": 0.5812, "step": 26015 }, { "epoch": 1.8795311286506404, "grad_norm": 7.728683196076322, "learning_rate": 4.734381985932984e-08, "loss": 0.6365, "step": 26016 }, { "epoch": 1.8796033738508497, "grad_norm": 6.968148107152494, "learning_rate": 4.728718139884486e-08, "loss": 0.5688, "step": 26017 }, { "epoch": 1.8796756190510593, "grad_norm": 7.627494529039489, "learning_rate": 4.72305765140943e-08, "loss": 0.592, "step": 26018 }, { "epoch": 1.8797478642512688, "grad_norm": 8.85635308291168, "learning_rate": 4.717400520585225e-08, "loss": 0.6124, "step": 26019 }, { "epoch": 1.8798201094514784, "grad_norm": 7.4278083294043835, "learning_rate": 4.711746747489365e-08, "loss": 0.6461, "step": 26020 }, { "epoch": 1.879892354651688, "grad_norm": 7.689879506609442, "learning_rate": 4.7060963321992334e-08, "loss": 0.5847, "step": 26021 }, { "epoch": 1.8799645998518972, "grad_norm": 6.70603669779242, "learning_rate": 4.7004492747922125e-08, "loss": 0.5685, "step": 26022 }, { "epoch": 1.880036845052107, "grad_norm": 7.103229121507817, "learning_rate": 4.694805575345574e-08, "loss": 0.5678, "step": 26023 }, { "epoch": 1.8801090902523163, "grad_norm": 7.674229029598378, "learning_rate": 4.68916523393656e-08, "loss": 0.6684, "step": 26024 }, { "epoch": 1.8801813354525259, "grad_norm": 7.917157877356019, "learning_rate": 4.683528250642444e-08, "loss": 0.6069, "step": 26025 }, { "epoch": 1.8802535806527354, "grad_norm": 7.992258856521019, "learning_rate": 4.6778946255403026e-08, "loss": 0.5932, "step": 26026 }, { "epoch": 1.880325825852945, "grad_norm": 7.549504487406514, "learning_rate": 4.672264358707324e-08, "loss": 0.5626, "step": 26027 }, { "epoch": 1.8803980710531545, "grad_norm": 7.487762894894978, "learning_rate": 4.666637450220557e-08, "loss": 0.6479, "step": 26028 }, { "epoch": 1.8804703162533638, "grad_norm": 7.571247432834895, "learning_rate": 4.661013900157024e-08, "loss": 0.628, "step": 26029 }, { "epoch": 1.8805425614535736, "grad_norm": 6.889614039713033, "learning_rate": 4.6553937085937194e-08, "loss": 0.6148, "step": 26030 }, { "epoch": 1.8806148066537829, "grad_norm": 7.480873387799523, "learning_rate": 4.649776875607581e-08, "loss": 0.5803, "step": 26031 }, { "epoch": 1.8806870518539924, "grad_norm": 9.165150557476453, "learning_rate": 4.64416340127552e-08, "loss": 0.614, "step": 26032 }, { "epoch": 1.880759297054202, "grad_norm": 8.252777094587568, "learning_rate": 4.638553285674335e-08, "loss": 0.6635, "step": 26033 }, { "epoch": 1.8808315422544115, "grad_norm": 7.843947936216445, "learning_rate": 4.632946528880855e-08, "loss": 0.5733, "step": 26034 }, { "epoch": 1.880903787454621, "grad_norm": 7.198013974832556, "learning_rate": 4.6273431309717956e-08, "loss": 0.5688, "step": 26035 }, { "epoch": 1.8809760326548304, "grad_norm": 6.900788922749895, "learning_rate": 4.621743092023928e-08, "loss": 0.662, "step": 26036 }, { "epoch": 1.8810482778550401, "grad_norm": 7.089513109972688, "learning_rate": 4.616146412113859e-08, "loss": 0.6223, "step": 26037 }, { "epoch": 1.8811205230552495, "grad_norm": 6.222628663427551, "learning_rate": 4.610553091318193e-08, "loss": 0.6076, "step": 26038 }, { "epoch": 1.881192768255459, "grad_norm": 8.281860329071838, "learning_rate": 4.6049631297135356e-08, "loss": 0.6216, "step": 26039 }, { "epoch": 1.8812650134556685, "grad_norm": 8.286607384115442, "learning_rate": 4.599376527376409e-08, "loss": 0.609, "step": 26040 }, { "epoch": 1.881337258655878, "grad_norm": 6.617968012188602, "learning_rate": 4.593793284383252e-08, "loss": 0.6142, "step": 26041 }, { "epoch": 1.8814095038560876, "grad_norm": 8.19510117250172, "learning_rate": 4.5882134008105314e-08, "loss": 0.5258, "step": 26042 }, { "epoch": 1.881481749056297, "grad_norm": 8.658599865078369, "learning_rate": 4.5826368767346305e-08, "loss": 0.5603, "step": 26043 }, { "epoch": 1.8815539942565067, "grad_norm": 7.499763485040243, "learning_rate": 4.577063712231877e-08, "loss": 0.5629, "step": 26044 }, { "epoch": 1.881626239456716, "grad_norm": 7.2380778185794945, "learning_rate": 4.571493907378544e-08, "loss": 0.5841, "step": 26045 }, { "epoch": 1.8816984846569256, "grad_norm": 8.231556013555206, "learning_rate": 4.565927462250902e-08, "loss": 0.6048, "step": 26046 }, { "epoch": 1.8817707298571351, "grad_norm": 8.30287473600776, "learning_rate": 4.560364376925142e-08, "loss": 0.579, "step": 26047 }, { "epoch": 1.8818429750573447, "grad_norm": 8.178890448123287, "learning_rate": 4.5548046514774235e-08, "loss": 0.6039, "step": 26048 }, { "epoch": 1.8819152202575542, "grad_norm": 9.39839731716784, "learning_rate": 4.549248285983854e-08, "loss": 0.6454, "step": 26049 }, { "epoch": 1.8819874654577635, "grad_norm": 5.7026437216662735, "learning_rate": 4.543695280520538e-08, "loss": 0.5507, "step": 26050 }, { "epoch": 1.8820597106579733, "grad_norm": 7.345688064993868, "learning_rate": 4.5381456351634155e-08, "loss": 0.534, "step": 26051 }, { "epoch": 1.8821319558581826, "grad_norm": 8.216514370366397, "learning_rate": 4.532599349988481e-08, "loss": 0.6219, "step": 26052 }, { "epoch": 1.8822042010583921, "grad_norm": 6.70740377805709, "learning_rate": 4.5270564250716745e-08, "loss": 0.5367, "step": 26053 }, { "epoch": 1.8822764462586017, "grad_norm": 7.632533619841305, "learning_rate": 4.521516860488878e-08, "loss": 0.5758, "step": 26054 }, { "epoch": 1.8823486914588112, "grad_norm": 7.059301951968925, "learning_rate": 4.5159806563158934e-08, "loss": 0.5722, "step": 26055 }, { "epoch": 1.8824209366590208, "grad_norm": 7.75686144881632, "learning_rate": 4.510447812628549e-08, "loss": 0.5562, "step": 26056 }, { "epoch": 1.88249318185923, "grad_norm": 7.438976750303279, "learning_rate": 4.504918329502533e-08, "loss": 0.5987, "step": 26057 }, { "epoch": 1.8825654270594399, "grad_norm": 8.021493172855536, "learning_rate": 4.4993922070135924e-08, "loss": 0.6047, "step": 26058 }, { "epoch": 1.8826376722596492, "grad_norm": 7.040194235202944, "learning_rate": 4.493869445237359e-08, "loss": 0.5718, "step": 26059 }, { "epoch": 1.8827099174598587, "grad_norm": 6.767251187088476, "learning_rate": 4.488350044249412e-08, "loss": 0.5097, "step": 26060 }, { "epoch": 1.8827821626600683, "grad_norm": 6.408482306856665, "learning_rate": 4.482834004125358e-08, "loss": 0.5455, "step": 26061 }, { "epoch": 1.8828544078602778, "grad_norm": 6.921419494658948, "learning_rate": 4.477321324940637e-08, "loss": 0.6119, "step": 26062 }, { "epoch": 1.8829266530604873, "grad_norm": 7.034226800415771, "learning_rate": 4.471812006770771e-08, "loss": 0.597, "step": 26063 }, { "epoch": 1.8829988982606967, "grad_norm": 7.813002425250658, "learning_rate": 4.4663060496911435e-08, "loss": 0.5808, "step": 26064 }, { "epoch": 1.8830711434609064, "grad_norm": 8.434254113794218, "learning_rate": 4.4608034537771685e-08, "loss": 0.6299, "step": 26065 }, { "epoch": 1.8831433886611157, "grad_norm": 7.795729558740948, "learning_rate": 4.4553042191041176e-08, "loss": 0.5746, "step": 26066 }, { "epoch": 1.8832156338613253, "grad_norm": 6.863527470505156, "learning_rate": 4.449808345747319e-08, "loss": 0.547, "step": 26067 }, { "epoch": 1.8832878790615348, "grad_norm": 7.923176494881518, "learning_rate": 4.444315833781965e-08, "loss": 0.6334, "step": 26068 }, { "epoch": 1.8833601242617444, "grad_norm": 8.59742508421058, "learning_rate": 4.438826683283298e-08, "loss": 0.621, "step": 26069 }, { "epoch": 1.883432369461954, "grad_norm": 8.511935661531675, "learning_rate": 4.433340894326399e-08, "loss": 0.6287, "step": 26070 }, { "epoch": 1.8835046146621632, "grad_norm": 7.981837875474628, "learning_rate": 4.427858466986401e-08, "loss": 0.5789, "step": 26071 }, { "epoch": 1.883576859862373, "grad_norm": 6.862286551513853, "learning_rate": 4.422379401338356e-08, "loss": 0.5645, "step": 26072 }, { "epoch": 1.8836491050625823, "grad_norm": 7.058729937811825, "learning_rate": 4.4169036974572586e-08, "loss": 0.6337, "step": 26073 }, { "epoch": 1.8837213502627919, "grad_norm": 7.6736799863214715, "learning_rate": 4.41143135541805e-08, "loss": 0.6166, "step": 26074 }, { "epoch": 1.8837935954630014, "grad_norm": 8.12796952195348, "learning_rate": 4.405962375295669e-08, "loss": 0.6533, "step": 26075 }, { "epoch": 1.883865840663211, "grad_norm": 6.439186245701854, "learning_rate": 4.400496757164946e-08, "loss": 0.5824, "step": 26076 }, { "epoch": 1.8839380858634205, "grad_norm": 8.097673209099462, "learning_rate": 4.3950345011007644e-08, "loss": 0.5599, "step": 26077 }, { "epoch": 1.8840103310636298, "grad_norm": 7.726163570338451, "learning_rate": 4.3895756071778425e-08, "loss": 0.6394, "step": 26078 }, { "epoch": 1.8840825762638396, "grad_norm": 8.234526949499362, "learning_rate": 4.384120075470927e-08, "loss": 0.6049, "step": 26079 }, { "epoch": 1.884154821464049, "grad_norm": 6.707513257559898, "learning_rate": 4.378667906054707e-08, "loss": 0.6175, "step": 26080 }, { "epoch": 1.8842270666642587, "grad_norm": 7.588926188183494, "learning_rate": 4.3732190990037895e-08, "loss": 0.6767, "step": 26081 }, { "epoch": 1.884299311864468, "grad_norm": 6.509822759368986, "learning_rate": 4.3677736543928094e-08, "loss": 0.6387, "step": 26082 }, { "epoch": 1.8843715570646775, "grad_norm": 9.109507747880311, "learning_rate": 4.3623315722962914e-08, "loss": 0.5898, "step": 26083 }, { "epoch": 1.884443802264887, "grad_norm": 7.814518538060387, "learning_rate": 4.35689285278873e-08, "loss": 0.6337, "step": 26084 }, { "epoch": 1.8845160474650964, "grad_norm": 8.125658683486979, "learning_rate": 4.351457495944539e-08, "loss": 0.6023, "step": 26085 }, { "epoch": 1.8845882926653061, "grad_norm": 6.800964668513084, "learning_rate": 4.346025501838186e-08, "loss": 0.5978, "step": 26086 }, { "epoch": 1.8846605378655155, "grad_norm": 7.01958722414383, "learning_rate": 4.340596870544e-08, "loss": 0.6424, "step": 26087 }, { "epoch": 1.8847327830657252, "grad_norm": 7.528020522491988, "learning_rate": 4.335171602136312e-08, "loss": 0.6064, "step": 26088 }, { "epoch": 1.8848050282659345, "grad_norm": 6.857745348483599, "learning_rate": 4.329749696689367e-08, "loss": 0.5989, "step": 26089 }, { "epoch": 1.884877273466144, "grad_norm": 8.690332184336862, "learning_rate": 4.3243311542774116e-08, "loss": 0.5441, "step": 26090 }, { "epoch": 1.8849495186663536, "grad_norm": 7.049925556195639, "learning_rate": 4.3189159749746076e-08, "loss": 0.505, "step": 26091 }, { "epoch": 1.885021763866563, "grad_norm": 8.638162493107213, "learning_rate": 4.3135041588550645e-08, "loss": 0.648, "step": 26092 }, { "epoch": 1.8850940090667727, "grad_norm": 7.180570761859008, "learning_rate": 4.308095705992915e-08, "loss": 0.5537, "step": 26093 }, { "epoch": 1.885166254266982, "grad_norm": 7.078934983145803, "learning_rate": 4.3026906164621576e-08, "loss": 0.6486, "step": 26094 }, { "epoch": 1.8852384994671918, "grad_norm": 6.882739617679532, "learning_rate": 4.297288890336787e-08, "loss": 0.6269, "step": 26095 }, { "epoch": 1.8853107446674011, "grad_norm": 8.184257972000033, "learning_rate": 4.2918905276907175e-08, "loss": 0.6006, "step": 26096 }, { "epoch": 1.8853829898676107, "grad_norm": 7.6976667956387725, "learning_rate": 4.286495528597945e-08, "loss": 0.5914, "step": 26097 }, { "epoch": 1.8854552350678202, "grad_norm": 6.603059533476638, "learning_rate": 4.281103893132244e-08, "loss": 0.6143, "step": 26098 }, { "epoch": 1.8855274802680297, "grad_norm": 7.766144702491647, "learning_rate": 4.275715621367443e-08, "loss": 0.5654, "step": 26099 }, { "epoch": 1.8855997254682393, "grad_norm": 6.895981002409715, "learning_rate": 4.270330713377319e-08, "loss": 0.6837, "step": 26100 }, { "epoch": 1.8856719706684486, "grad_norm": 7.023454608832749, "learning_rate": 4.264949169235561e-08, "loss": 0.6549, "step": 26101 }, { "epoch": 1.8857442158686584, "grad_norm": 6.628577076388053, "learning_rate": 4.259570989015832e-08, "loss": 0.6169, "step": 26102 }, { "epoch": 1.8858164610688677, "grad_norm": 7.97807621954033, "learning_rate": 4.254196172791769e-08, "loss": 0.6812, "step": 26103 }, { "epoch": 1.8858887062690772, "grad_norm": 6.243049418845135, "learning_rate": 4.248824720636952e-08, "loss": 0.5784, "step": 26104 }, { "epoch": 1.8859609514692868, "grad_norm": 6.568849525459901, "learning_rate": 4.2434566326249314e-08, "loss": 0.6502, "step": 26105 }, { "epoch": 1.8860331966694963, "grad_norm": 7.268777930221624, "learning_rate": 4.23809190882915e-08, "loss": 0.539, "step": 26106 }, { "epoch": 1.8861054418697059, "grad_norm": 7.804893758223006, "learning_rate": 4.232730549323077e-08, "loss": 0.5877, "step": 26107 }, { "epoch": 1.8861776870699152, "grad_norm": 6.348481617013058, "learning_rate": 4.2273725541801257e-08, "loss": 0.5855, "step": 26108 }, { "epoch": 1.886249932270125, "grad_norm": 7.2713159730701165, "learning_rate": 4.2220179234735704e-08, "loss": 0.6114, "step": 26109 }, { "epoch": 1.8863221774703343, "grad_norm": 6.996228155552177, "learning_rate": 4.216666657276769e-08, "loss": 0.5198, "step": 26110 }, { "epoch": 1.8863944226705438, "grad_norm": 7.181931171033247, "learning_rate": 4.211318755662996e-08, "loss": 0.6594, "step": 26111 }, { "epoch": 1.8864666678707533, "grad_norm": 8.250902531125003, "learning_rate": 4.205974218705389e-08, "loss": 0.5834, "step": 26112 }, { "epoch": 1.8865389130709629, "grad_norm": 7.218981454284578, "learning_rate": 4.200633046477165e-08, "loss": 0.5864, "step": 26113 }, { "epoch": 1.8866111582711724, "grad_norm": 6.5801398664178175, "learning_rate": 4.1952952390514055e-08, "loss": 0.6172, "step": 26114 }, { "epoch": 1.8866834034713817, "grad_norm": 7.913317992729266, "learning_rate": 4.189960796501219e-08, "loss": 0.548, "step": 26115 }, { "epoch": 1.8867556486715915, "grad_norm": 8.217615085465557, "learning_rate": 4.1846297188995743e-08, "loss": 0.5947, "step": 26116 }, { "epoch": 1.8868278938718008, "grad_norm": 8.334904128486103, "learning_rate": 4.1793020063195244e-08, "loss": 0.594, "step": 26117 }, { "epoch": 1.8869001390720104, "grad_norm": 7.176440944260939, "learning_rate": 4.173977658833955e-08, "loss": 0.6045, "step": 26118 }, { "epoch": 1.88697238427222, "grad_norm": 7.272354914062827, "learning_rate": 4.1686566765157534e-08, "loss": 0.5846, "step": 26119 }, { "epoch": 1.8870446294724295, "grad_norm": 6.728901136452399, "learning_rate": 4.163339059437777e-08, "loss": 0.6123, "step": 26120 }, { "epoch": 1.887116874672639, "grad_norm": 8.24435694197666, "learning_rate": 4.1580248076728e-08, "loss": 0.5548, "step": 26121 }, { "epoch": 1.8871891198728483, "grad_norm": 6.666086584921172, "learning_rate": 4.1527139212935996e-08, "loss": 0.5689, "step": 26122 }, { "epoch": 1.887261365073058, "grad_norm": 7.3889935166576635, "learning_rate": 4.147406400372811e-08, "loss": 0.5521, "step": 26123 }, { "epoch": 1.8873336102732674, "grad_norm": 7.174293658748095, "learning_rate": 4.1421022449831535e-08, "loss": 0.537, "step": 26124 }, { "epoch": 1.887405855473477, "grad_norm": 6.45145747551277, "learning_rate": 4.136801455197237e-08, "loss": 0.5165, "step": 26125 }, { "epoch": 1.8874781006736865, "grad_norm": 7.541708874101361, "learning_rate": 4.131504031087613e-08, "loss": 0.5822, "step": 26126 }, { "epoch": 1.887550345873896, "grad_norm": 9.14616552168175, "learning_rate": 4.126209972726752e-08, "loss": 0.5535, "step": 26127 }, { "epoch": 1.8876225910741056, "grad_norm": 6.833765558979024, "learning_rate": 4.120919280187208e-08, "loss": 0.6242, "step": 26128 }, { "epoch": 1.887694836274315, "grad_norm": 7.009988742443846, "learning_rate": 4.1156319535413656e-08, "loss": 0.587, "step": 26129 }, { "epoch": 1.8877670814745247, "grad_norm": 7.780760722882937, "learning_rate": 4.110347992861585e-08, "loss": 0.5504, "step": 26130 }, { "epoch": 1.887839326674734, "grad_norm": 7.086872278904832, "learning_rate": 4.1050673982202246e-08, "loss": 0.5873, "step": 26131 }, { "epoch": 1.8879115718749435, "grad_norm": 6.6971764835311385, "learning_rate": 4.099790169689588e-08, "loss": 0.6461, "step": 26132 }, { "epoch": 1.887983817075153, "grad_norm": 7.71348608504796, "learning_rate": 4.094516307341867e-08, "loss": 0.5721, "step": 26133 }, { "epoch": 1.8880560622753626, "grad_norm": 7.784649332389448, "learning_rate": 4.089245811249282e-08, "loss": 0.5984, "step": 26134 }, { "epoch": 1.8881283074755721, "grad_norm": 8.624146350135454, "learning_rate": 4.083978681483996e-08, "loss": 0.6677, "step": 26135 }, { "epoch": 1.8882005526757815, "grad_norm": 7.68931000033488, "learning_rate": 4.0787149181181205e-08, "loss": 0.6851, "step": 26136 }, { "epoch": 1.8882727978759912, "grad_norm": 6.589720448424114, "learning_rate": 4.0734545212236795e-08, "loss": 0.6091, "step": 26137 }, { "epoch": 1.8883450430762005, "grad_norm": 7.472318044408115, "learning_rate": 4.0681974908726994e-08, "loss": 0.6148, "step": 26138 }, { "epoch": 1.88841728827641, "grad_norm": 7.493580932237156, "learning_rate": 4.0629438271371225e-08, "loss": 0.615, "step": 26139 }, { "epoch": 1.8884895334766196, "grad_norm": 7.162621206486311, "learning_rate": 4.057693530088919e-08, "loss": 0.6332, "step": 26140 }, { "epoch": 1.8885617786768292, "grad_norm": 6.796249466096718, "learning_rate": 4.052446599799892e-08, "loss": 0.4996, "step": 26141 }, { "epoch": 1.8886340238770387, "grad_norm": 6.110043384254379, "learning_rate": 4.047203036341929e-08, "loss": 0.5965, "step": 26142 }, { "epoch": 1.888706269077248, "grad_norm": 7.378064828035157, "learning_rate": 4.041962839786778e-08, "loss": 0.6181, "step": 26143 }, { "epoch": 1.8887785142774578, "grad_norm": 7.1993204855806745, "learning_rate": 4.03672601020616e-08, "loss": 0.5249, "step": 26144 }, { "epoch": 1.8888507594776671, "grad_norm": 7.849776790871439, "learning_rate": 4.031492547671822e-08, "loss": 0.623, "step": 26145 }, { "epoch": 1.8889230046778767, "grad_norm": 7.815456471843551, "learning_rate": 4.026262452255347e-08, "loss": 0.5974, "step": 26146 }, { "epoch": 1.8889952498780862, "grad_norm": 6.7470088971956965, "learning_rate": 4.021035724028372e-08, "loss": 0.6012, "step": 26147 }, { "epoch": 1.8890674950782957, "grad_norm": 7.595960256158188, "learning_rate": 4.015812363062421e-08, "loss": 0.5866, "step": 26148 }, { "epoch": 1.8891397402785053, "grad_norm": 7.952561394107611, "learning_rate": 4.010592369428995e-08, "loss": 0.6109, "step": 26149 }, { "epoch": 1.8892119854787146, "grad_norm": 7.679387968630958, "learning_rate": 4.005375743199563e-08, "loss": 0.561, "step": 26150 }, { "epoch": 1.8892842306789244, "grad_norm": 7.442150809617855, "learning_rate": 4.000162484445541e-08, "loss": 0.5916, "step": 26151 }, { "epoch": 1.8893564758791337, "grad_norm": 8.590992544116565, "learning_rate": 3.994952593238288e-08, "loss": 0.6318, "step": 26152 }, { "epoch": 1.8894287210793435, "grad_norm": 7.289555736343056, "learning_rate": 3.989746069649081e-08, "loss": 0.6133, "step": 26153 }, { "epoch": 1.8895009662795528, "grad_norm": 7.298507475144907, "learning_rate": 3.9845429137493066e-08, "loss": 0.5876, "step": 26154 }, { "epoch": 1.8895732114797623, "grad_norm": 7.958331069812819, "learning_rate": 3.979343125610074e-08, "loss": 0.6616, "step": 26155 }, { "epoch": 1.8896454566799719, "grad_norm": 9.254228604822366, "learning_rate": 3.974146705302606e-08, "loss": 0.5797, "step": 26156 }, { "epoch": 1.8897177018801812, "grad_norm": 7.971902380836087, "learning_rate": 3.968953652898094e-08, "loss": 0.5907, "step": 26157 }, { "epoch": 1.889789947080391, "grad_norm": 6.75996595701415, "learning_rate": 3.963763968467538e-08, "loss": 0.5442, "step": 26158 }, { "epoch": 1.8898621922806003, "grad_norm": 7.1084794035822, "learning_rate": 3.958577652082019e-08, "loss": 0.6522, "step": 26159 }, { "epoch": 1.88993443748081, "grad_norm": 8.466361859859868, "learning_rate": 3.953394703812535e-08, "loss": 0.5859, "step": 26160 }, { "epoch": 1.8900066826810193, "grad_norm": 7.43093017711236, "learning_rate": 3.948215123730032e-08, "loss": 0.6095, "step": 26161 }, { "epoch": 1.8900789278812289, "grad_norm": 9.298789768023704, "learning_rate": 3.9430389119054224e-08, "loss": 0.6661, "step": 26162 }, { "epoch": 1.8901511730814384, "grad_norm": 7.094523786783047, "learning_rate": 3.937866068409568e-08, "loss": 0.6069, "step": 26163 }, { "epoch": 1.8902234182816477, "grad_norm": 5.776991476857279, "learning_rate": 3.9326965933132454e-08, "loss": 0.5775, "step": 26164 }, { "epoch": 1.8902956634818575, "grad_norm": 7.534256331127496, "learning_rate": 3.9275304866872865e-08, "loss": 0.6851, "step": 26165 }, { "epoch": 1.8903679086820668, "grad_norm": 6.082683824676927, "learning_rate": 3.922367748602357e-08, "loss": 0.6123, "step": 26166 }, { "epoch": 1.8904401538822766, "grad_norm": 8.929913909257905, "learning_rate": 3.917208379129123e-08, "loss": 0.5693, "step": 26167 }, { "epoch": 1.890512399082486, "grad_norm": 7.787708956883221, "learning_rate": 3.912052378338277e-08, "loss": 0.583, "step": 26168 }, { "epoch": 1.8905846442826955, "grad_norm": 7.66468713608344, "learning_rate": 3.906899746300319e-08, "loss": 0.6308, "step": 26169 }, { "epoch": 1.890656889482905, "grad_norm": 7.78494432374306, "learning_rate": 3.901750483085859e-08, "loss": 0.5676, "step": 26170 }, { "epoch": 1.8907291346831145, "grad_norm": 7.405956664895795, "learning_rate": 3.8966045887653124e-08, "loss": 0.5433, "step": 26171 }, { "epoch": 1.890801379883324, "grad_norm": 8.173137618309877, "learning_rate": 3.8914620634091795e-08, "loss": 0.6224, "step": 26172 }, { "epoch": 1.8908736250835334, "grad_norm": 8.649965724987414, "learning_rate": 3.886322907087847e-08, "loss": 0.5754, "step": 26173 }, { "epoch": 1.8909458702837432, "grad_norm": 6.73247570329511, "learning_rate": 3.881187119871649e-08, "loss": 0.5724, "step": 26174 }, { "epoch": 1.8910181154839525, "grad_norm": 9.29943340585446, "learning_rate": 3.876054701830917e-08, "loss": 0.5916, "step": 26175 }, { "epoch": 1.891090360684162, "grad_norm": 8.369507767982554, "learning_rate": 3.870925653035901e-08, "loss": 0.6404, "step": 26176 }, { "epoch": 1.8911626058843716, "grad_norm": 7.603752173706125, "learning_rate": 3.8657999735567664e-08, "loss": 0.6702, "step": 26177 }, { "epoch": 1.8912348510845811, "grad_norm": 7.1487480721468595, "learning_rate": 3.8606776634637355e-08, "loss": 0.6043, "step": 26178 }, { "epoch": 1.8913070962847907, "grad_norm": 7.174443601507432, "learning_rate": 3.8555587228269465e-08, "loss": 0.5266, "step": 26179 }, { "epoch": 1.891379341485, "grad_norm": 8.358523887016428, "learning_rate": 3.8504431517163987e-08, "loss": 0.58, "step": 26180 }, { "epoch": 1.8914515866852097, "grad_norm": 7.4579456022554345, "learning_rate": 3.845330950202175e-08, "loss": 0.5557, "step": 26181 }, { "epoch": 1.891523831885419, "grad_norm": 6.760799105210539, "learning_rate": 3.8402221183542474e-08, "loss": 0.5582, "step": 26182 }, { "epoch": 1.8915960770856286, "grad_norm": 8.53383544199642, "learning_rate": 3.835116656242532e-08, "loss": 0.5552, "step": 26183 }, { "epoch": 1.8916683222858381, "grad_norm": 8.088465316683951, "learning_rate": 3.8300145639369455e-08, "loss": 0.6569, "step": 26184 }, { "epoch": 1.8917405674860477, "grad_norm": 7.639266129243581, "learning_rate": 3.824915841507321e-08, "loss": 0.5997, "step": 26185 }, { "epoch": 1.8918128126862572, "grad_norm": 7.398818524870363, "learning_rate": 3.8198204890234904e-08, "loss": 0.5639, "step": 26186 }, { "epoch": 1.8918850578864665, "grad_norm": 7.538616708808336, "learning_rate": 3.814728506555121e-08, "loss": 0.5895, "step": 26187 }, { "epoch": 1.8919573030866763, "grad_norm": 7.9134069323739284, "learning_rate": 3.80963989417199e-08, "loss": 0.6053, "step": 26188 }, { "epoch": 1.8920295482868856, "grad_norm": 6.842014597859626, "learning_rate": 3.8045546519437095e-08, "loss": 0.5306, "step": 26189 }, { "epoch": 1.8921017934870952, "grad_norm": 8.049606541086874, "learning_rate": 3.799472779939945e-08, "loss": 0.5385, "step": 26190 }, { "epoch": 1.8921740386873047, "grad_norm": 6.722710156487823, "learning_rate": 3.794394278230223e-08, "loss": 0.6055, "step": 26191 }, { "epoch": 1.8922462838875143, "grad_norm": 6.214170677235261, "learning_rate": 3.789319146884074e-08, "loss": 0.611, "step": 26192 }, { "epoch": 1.8923185290877238, "grad_norm": 8.545351105290836, "learning_rate": 3.784247385970996e-08, "loss": 0.6002, "step": 26193 }, { "epoch": 1.8923907742879331, "grad_norm": 7.052078254581428, "learning_rate": 3.7791789955603785e-08, "loss": 0.6212, "step": 26194 }, { "epoch": 1.8924630194881429, "grad_norm": 7.21926759127568, "learning_rate": 3.774113975721638e-08, "loss": 0.6228, "step": 26195 }, { "epoch": 1.8925352646883522, "grad_norm": 7.316042311089805, "learning_rate": 3.769052326524081e-08, "loss": 0.5735, "step": 26196 }, { "epoch": 1.8926075098885617, "grad_norm": 7.929384663629254, "learning_rate": 3.7639940480370396e-08, "loss": 0.5786, "step": 26197 }, { "epoch": 1.8926797550887713, "grad_norm": 7.221313285688993, "learning_rate": 3.7589391403296813e-08, "loss": 0.651, "step": 26198 }, { "epoch": 1.8927520002889808, "grad_norm": 6.262636456554292, "learning_rate": 3.753887603471284e-08, "loss": 0.6021, "step": 26199 }, { "epoch": 1.8928242454891904, "grad_norm": 7.44695315942316, "learning_rate": 3.74883943753096e-08, "loss": 0.5379, "step": 26200 }, { "epoch": 1.8928964906893997, "grad_norm": 8.340756683053309, "learning_rate": 3.7437946425778194e-08, "loss": 0.5501, "step": 26201 }, { "epoch": 1.8929687358896095, "grad_norm": 8.308476212732335, "learning_rate": 3.7387532186809186e-08, "loss": 0.5803, "step": 26202 }, { "epoch": 1.8930409810898188, "grad_norm": 7.188420510433043, "learning_rate": 3.733715165909285e-08, "loss": 0.6315, "step": 26203 }, { "epoch": 1.8931132262900283, "grad_norm": 8.02418201116115, "learning_rate": 3.7286804843318925e-08, "loss": 0.6463, "step": 26204 }, { "epoch": 1.8931854714902379, "grad_norm": 7.459465352962689, "learning_rate": 3.72364917401763e-08, "loss": 0.5559, "step": 26205 }, { "epoch": 1.8932577166904474, "grad_norm": 8.28828747260347, "learning_rate": 3.718621235035386e-08, "loss": 0.691, "step": 26206 }, { "epoch": 1.893329961890657, "grad_norm": 7.207579321555446, "learning_rate": 3.7135966674540225e-08, "loss": 0.5882, "step": 26207 }, { "epoch": 1.8934022070908663, "grad_norm": 7.281158839625157, "learning_rate": 3.7085754713422625e-08, "loss": 0.5789, "step": 26208 }, { "epoch": 1.893474452291076, "grad_norm": 8.301493535616046, "learning_rate": 3.703557646768857e-08, "loss": 0.6026, "step": 26209 }, { "epoch": 1.8935466974912853, "grad_norm": 8.85289450687753, "learning_rate": 3.698543193802528e-08, "loss": 0.6308, "step": 26210 }, { "epoch": 1.8936189426914949, "grad_norm": 7.625413727089, "learning_rate": 3.693532112511916e-08, "loss": 0.5876, "step": 26211 }, { "epoch": 1.8936911878917044, "grad_norm": 6.275735588577197, "learning_rate": 3.688524402965604e-08, "loss": 0.5114, "step": 26212 }, { "epoch": 1.893763433091914, "grad_norm": 7.436509779719073, "learning_rate": 3.683520065232121e-08, "loss": 0.5723, "step": 26213 }, { "epoch": 1.8938356782921235, "grad_norm": 6.707417427547154, "learning_rate": 3.6785190993799957e-08, "loss": 0.583, "step": 26214 }, { "epoch": 1.8939079234923328, "grad_norm": 9.086783031111295, "learning_rate": 3.6735215054777287e-08, "loss": 0.6529, "step": 26215 }, { "epoch": 1.8939801686925426, "grad_norm": 8.44094620015471, "learning_rate": 3.668527283593654e-08, "loss": 0.5674, "step": 26216 }, { "epoch": 1.894052413892752, "grad_norm": 7.146602874632469, "learning_rate": 3.6635364337961885e-08, "loss": 0.6069, "step": 26217 }, { "epoch": 1.8941246590929615, "grad_norm": 7.148051133684315, "learning_rate": 3.65854895615364e-08, "loss": 0.5653, "step": 26218 }, { "epoch": 1.894196904293171, "grad_norm": 9.014605221517174, "learning_rate": 3.653564850734287e-08, "loss": 0.6121, "step": 26219 }, { "epoch": 1.8942691494933805, "grad_norm": 7.000245771180723, "learning_rate": 3.648584117606324e-08, "loss": 0.6287, "step": 26220 }, { "epoch": 1.89434139469359, "grad_norm": 7.1408547128669015, "learning_rate": 3.6436067568380026e-08, "loss": 0.6092, "step": 26221 }, { "epoch": 1.8944136398937994, "grad_norm": 7.8140691783489755, "learning_rate": 3.6386327684974356e-08, "loss": 0.5939, "step": 26222 }, { "epoch": 1.8944858850940092, "grad_norm": 9.114050579668186, "learning_rate": 3.6336621526526504e-08, "loss": 0.6457, "step": 26223 }, { "epoch": 1.8945581302942185, "grad_norm": 8.752468959706455, "learning_rate": 3.628694909371788e-08, "loss": 0.5947, "step": 26224 }, { "epoch": 1.8946303754944283, "grad_norm": 8.661504039234739, "learning_rate": 3.6237310387227665e-08, "loss": 0.6282, "step": 26225 }, { "epoch": 1.8947026206946376, "grad_norm": 7.506392488548588, "learning_rate": 3.618770540773586e-08, "loss": 0.5378, "step": 26226 }, { "epoch": 1.8947748658948471, "grad_norm": 8.011345447787722, "learning_rate": 3.61381341559211e-08, "loss": 0.5953, "step": 26227 }, { "epoch": 1.8948471110950567, "grad_norm": 6.803454632323349, "learning_rate": 3.608859663246228e-08, "loss": 0.6266, "step": 26228 }, { "epoch": 1.894919356295266, "grad_norm": 7.418634417950332, "learning_rate": 3.603909283803747e-08, "loss": 0.5923, "step": 26229 }, { "epoch": 1.8949916014954757, "grad_norm": 8.320743918539385, "learning_rate": 3.598962277332446e-08, "loss": 0.5399, "step": 26230 }, { "epoch": 1.895063846695685, "grad_norm": 7.252330273993055, "learning_rate": 3.594018643900021e-08, "loss": 0.616, "step": 26231 }, { "epoch": 1.8951360918958948, "grad_norm": 9.251720371584108, "learning_rate": 3.589078383574196e-08, "loss": 0.6723, "step": 26232 }, { "epoch": 1.8952083370961041, "grad_norm": 6.660390506327489, "learning_rate": 3.584141496422527e-08, "loss": 0.6018, "step": 26233 }, { "epoch": 1.8952805822963137, "grad_norm": 7.785058985139394, "learning_rate": 3.579207982512628e-08, "loss": 0.5345, "step": 26234 }, { "epoch": 1.8953528274965232, "grad_norm": 7.5676105980404555, "learning_rate": 3.574277841912055e-08, "loss": 0.5929, "step": 26235 }, { "epoch": 1.8954250726967325, "grad_norm": 8.645832499921521, "learning_rate": 3.569351074688282e-08, "loss": 0.6393, "step": 26236 }, { "epoch": 1.8954973178969423, "grad_norm": 8.506408519515091, "learning_rate": 3.5644276809086996e-08, "loss": 0.6617, "step": 26237 }, { "epoch": 1.8955695630971516, "grad_norm": 8.385092959266872, "learning_rate": 3.55950766064081e-08, "loss": 0.6099, "step": 26238 }, { "epoch": 1.8956418082973614, "grad_norm": 6.937078239748672, "learning_rate": 3.554591013951891e-08, "loss": 0.5898, "step": 26239 }, { "epoch": 1.8957140534975707, "grad_norm": 6.866515872126706, "learning_rate": 3.549677740909307e-08, "loss": 0.5394, "step": 26240 }, { "epoch": 1.8957862986977803, "grad_norm": 7.09739799638514, "learning_rate": 3.544767841580254e-08, "loss": 0.6219, "step": 26241 }, { "epoch": 1.8958585438979898, "grad_norm": 7.408389526886399, "learning_rate": 3.5398613160319564e-08, "loss": 0.5864, "step": 26242 }, { "epoch": 1.8959307890981993, "grad_norm": 7.94561400915133, "learning_rate": 3.534958164331609e-08, "loss": 0.5461, "step": 26243 }, { "epoch": 1.8960030342984089, "grad_norm": 6.181911526449534, "learning_rate": 3.5300583865463255e-08, "loss": 0.6008, "step": 26244 }, { "epoch": 1.8960752794986182, "grad_norm": 6.446882502911594, "learning_rate": 3.525161982743136e-08, "loss": 0.5984, "step": 26245 }, { "epoch": 1.896147524698828, "grad_norm": 7.177234251283898, "learning_rate": 3.5202689529891256e-08, "loss": 0.5847, "step": 26246 }, { "epoch": 1.8962197698990373, "grad_norm": 7.411505914581395, "learning_rate": 3.51537929735124e-08, "loss": 0.5606, "step": 26247 }, { "epoch": 1.8962920150992468, "grad_norm": 7.058821538850093, "learning_rate": 3.5104930158964546e-08, "loss": 0.5578, "step": 26248 }, { "epoch": 1.8963642602994564, "grad_norm": 6.837784627677443, "learning_rate": 3.505610108691604e-08, "loss": 0.5967, "step": 26249 }, { "epoch": 1.896436505499666, "grad_norm": 6.777015313097284, "learning_rate": 3.500730575803607e-08, "loss": 0.619, "step": 26250 }, { "epoch": 1.8965087506998755, "grad_norm": 7.107231605123558, "learning_rate": 3.4958544172991606e-08, "loss": 0.6099, "step": 26251 }, { "epoch": 1.8965809959000848, "grad_norm": 8.529031430134987, "learning_rate": 3.4909816332450996e-08, "loss": 0.6031, "step": 26252 }, { "epoch": 1.8966532411002945, "grad_norm": 6.961094230488637, "learning_rate": 3.486112223708094e-08, "loss": 0.6778, "step": 26253 }, { "epoch": 1.8967254863005039, "grad_norm": 7.955112651072879, "learning_rate": 3.481246188754811e-08, "loss": 0.6076, "step": 26254 }, { "epoch": 1.8967977315007134, "grad_norm": 7.353419654088216, "learning_rate": 3.476383528451838e-08, "loss": 0.5942, "step": 26255 }, { "epoch": 1.896869976700923, "grad_norm": 8.319831082023146, "learning_rate": 3.471524242865787e-08, "loss": 0.5852, "step": 26256 }, { "epoch": 1.8969422219011325, "grad_norm": 6.7022846824496805, "learning_rate": 3.4666683320631336e-08, "loss": 0.5912, "step": 26257 }, { "epoch": 1.897014467101342, "grad_norm": 7.538281968581554, "learning_rate": 3.4618157961103794e-08, "loss": 0.686, "step": 26258 }, { "epoch": 1.8970867123015513, "grad_norm": 8.436602057253912, "learning_rate": 3.456966635073916e-08, "loss": 0.5446, "step": 26259 }, { "epoch": 1.8971589575017611, "grad_norm": 6.5770741255313805, "learning_rate": 3.452120849020163e-08, "loss": 0.5371, "step": 26260 }, { "epoch": 1.8972312027019704, "grad_norm": 8.20480591037079, "learning_rate": 3.4472784380154836e-08, "loss": 0.5474, "step": 26261 }, { "epoch": 1.89730344790218, "grad_norm": 7.156228944172763, "learning_rate": 3.442439402126102e-08, "loss": 0.5555, "step": 26262 }, { "epoch": 1.8973756931023895, "grad_norm": 7.880584432161846, "learning_rate": 3.437603741418272e-08, "loss": 0.524, "step": 26263 }, { "epoch": 1.897447938302599, "grad_norm": 7.7873868833014095, "learning_rate": 3.432771455958189e-08, "loss": 0.609, "step": 26264 }, { "epoch": 1.8975201835028086, "grad_norm": 6.9193648012610005, "learning_rate": 3.42794254581208e-08, "loss": 0.5839, "step": 26265 }, { "epoch": 1.897592428703018, "grad_norm": 8.139966121668692, "learning_rate": 3.4231170110459176e-08, "loss": 0.648, "step": 26266 }, { "epoch": 1.8976646739032277, "grad_norm": 8.123245284529867, "learning_rate": 3.418294851725845e-08, "loss": 0.6178, "step": 26267 }, { "epoch": 1.897736919103437, "grad_norm": 8.622967024333004, "learning_rate": 3.413476067917893e-08, "loss": 0.6028, "step": 26268 }, { "epoch": 1.8978091643036465, "grad_norm": 7.865252562758335, "learning_rate": 3.4086606596879514e-08, "loss": 0.6067, "step": 26269 }, { "epoch": 1.897881409503856, "grad_norm": 7.062110552557427, "learning_rate": 3.4038486271019963e-08, "loss": 0.5534, "step": 26270 }, { "epoch": 1.8979536547040656, "grad_norm": 6.691907624514052, "learning_rate": 3.399039970225892e-08, "loss": 0.6144, "step": 26271 }, { "epoch": 1.8980258999042752, "grad_norm": 6.930300989701769, "learning_rate": 3.394234689125475e-08, "loss": 0.5555, "step": 26272 }, { "epoch": 1.8980981451044845, "grad_norm": 7.304353006629607, "learning_rate": 3.38943278386647e-08, "loss": 0.6169, "step": 26273 }, { "epoch": 1.8981703903046943, "grad_norm": 7.3664149084555115, "learning_rate": 3.3846342545146574e-08, "loss": 0.6892, "step": 26274 }, { "epoch": 1.8982426355049036, "grad_norm": 6.766197268486503, "learning_rate": 3.379839101135707e-08, "loss": 0.5502, "step": 26275 }, { "epoch": 1.8983148807051131, "grad_norm": 9.170089729893425, "learning_rate": 3.3750473237952886e-08, "loss": 0.6402, "step": 26276 }, { "epoch": 1.8983871259053227, "grad_norm": 6.52401522516641, "learning_rate": 3.3702589225589887e-08, "loss": 0.6109, "step": 26277 }, { "epoch": 1.8984593711055322, "grad_norm": 8.800555853194462, "learning_rate": 3.365473897492338e-08, "loss": 0.6201, "step": 26278 }, { "epoch": 1.8985316163057417, "grad_norm": 7.3129906122978605, "learning_rate": 3.360692248660868e-08, "loss": 0.6414, "step": 26279 }, { "epoch": 1.898603861505951, "grad_norm": 8.044168139232676, "learning_rate": 3.355913976129999e-08, "loss": 0.6383, "step": 26280 }, { "epoch": 1.8986761067061608, "grad_norm": 8.088034714440349, "learning_rate": 3.3511390799651486e-08, "loss": 0.5542, "step": 26281 }, { "epoch": 1.8987483519063701, "grad_norm": 6.282512480984854, "learning_rate": 3.3463675602317115e-08, "loss": 0.569, "step": 26282 }, { "epoch": 1.8988205971065797, "grad_norm": 6.497104880342619, "learning_rate": 3.341599416994995e-08, "loss": 0.5463, "step": 26283 }, { "epoch": 1.8988928423067892, "grad_norm": 7.41683268271956, "learning_rate": 3.336834650320253e-08, "loss": 0.5927, "step": 26284 }, { "epoch": 1.8989650875069988, "grad_norm": 7.245117121685031, "learning_rate": 3.332073260272711e-08, "loss": 0.5647, "step": 26285 }, { "epoch": 1.8990373327072083, "grad_norm": 7.43525803057306, "learning_rate": 3.327315246917567e-08, "loss": 0.6057, "step": 26286 }, { "epoch": 1.8991095779074176, "grad_norm": 8.500851756784158, "learning_rate": 3.322560610319964e-08, "loss": 0.5963, "step": 26287 }, { "epoch": 1.8991818231076274, "grad_norm": 7.311603458578566, "learning_rate": 3.3178093505449594e-08, "loss": 0.6185, "step": 26288 }, { "epoch": 1.8992540683078367, "grad_norm": 6.96594893608029, "learning_rate": 3.313061467657641e-08, "loss": 0.5951, "step": 26289 }, { "epoch": 1.8993263135080463, "grad_norm": 7.996358042469045, "learning_rate": 3.308316961722929e-08, "loss": 0.5663, "step": 26290 }, { "epoch": 1.8993985587082558, "grad_norm": 7.146326906231789, "learning_rate": 3.303575832805828e-08, "loss": 0.5418, "step": 26291 }, { "epoch": 1.8994708039084653, "grad_norm": 8.109354302117131, "learning_rate": 3.2988380809712285e-08, "loss": 0.5511, "step": 26292 }, { "epoch": 1.8995430491086749, "grad_norm": 6.5425276786967945, "learning_rate": 3.2941037062839964e-08, "loss": 0.597, "step": 26293 }, { "epoch": 1.8996152943088842, "grad_norm": 6.81594761395807, "learning_rate": 3.289372708808886e-08, "loss": 0.5635, "step": 26294 }, { "epoch": 1.899687539509094, "grad_norm": 9.974780515775302, "learning_rate": 3.284645088610733e-08, "loss": 0.6059, "step": 26295 }, { "epoch": 1.8997597847093033, "grad_norm": 5.508399964514867, "learning_rate": 3.2799208457542364e-08, "loss": 0.5284, "step": 26296 }, { "epoch": 1.8998320299095128, "grad_norm": 8.357028639016372, "learning_rate": 3.275199980304067e-08, "loss": 0.581, "step": 26297 }, { "epoch": 1.8999042751097224, "grad_norm": 8.261872043495103, "learning_rate": 3.2704824923248116e-08, "loss": 0.6332, "step": 26298 }, { "epoch": 1.899976520309932, "grad_norm": 7.047173538143429, "learning_rate": 3.265768381881085e-08, "loss": 0.5715, "step": 26299 }, { "epoch": 1.9000487655101415, "grad_norm": 7.415808067200147, "learning_rate": 3.261057649037447e-08, "loss": 0.6068, "step": 26300 }, { "epoch": 1.9001210107103508, "grad_norm": 7.092104279777221, "learning_rate": 3.256350293858318e-08, "loss": 0.5676, "step": 26301 }, { "epoch": 1.9001932559105605, "grad_norm": 7.729581453310352, "learning_rate": 3.251646316408147e-08, "loss": 0.5919, "step": 26302 }, { "epoch": 1.9002655011107699, "grad_norm": 7.121574247537099, "learning_rate": 3.2469457167513816e-08, "loss": 0.5668, "step": 26303 }, { "epoch": 1.9003377463109796, "grad_norm": 6.705438530311724, "learning_rate": 3.2422484949523325e-08, "loss": 0.6237, "step": 26304 }, { "epoch": 1.900409991511189, "grad_norm": 7.464508767748808, "learning_rate": 3.2375546510753084e-08, "loss": 0.5763, "step": 26305 }, { "epoch": 1.9004822367113985, "grad_norm": 8.035512779979074, "learning_rate": 3.2328641851845364e-08, "loss": 0.5512, "step": 26306 }, { "epoch": 1.900554481911608, "grad_norm": 8.60195808297902, "learning_rate": 3.228177097344298e-08, "loss": 0.6085, "step": 26307 }, { "epoch": 1.9006267271118173, "grad_norm": 6.674882054804912, "learning_rate": 3.2234933876186814e-08, "loss": 0.6523, "step": 26308 }, { "epoch": 1.9006989723120271, "grad_norm": 6.750594783998232, "learning_rate": 3.2188130560718287e-08, "loss": 0.585, "step": 26309 }, { "epoch": 1.9007712175122364, "grad_norm": 7.023353584645385, "learning_rate": 3.2141361027678286e-08, "loss": 0.628, "step": 26310 }, { "epoch": 1.9008434627124462, "grad_norm": 8.936893042382042, "learning_rate": 3.2094625277706846e-08, "loss": 0.5871, "step": 26311 }, { "epoch": 1.9009157079126555, "grad_norm": 7.305305528250272, "learning_rate": 3.204792331144374e-08, "loss": 0.6328, "step": 26312 }, { "epoch": 1.900987953112865, "grad_norm": 6.256603262242502, "learning_rate": 3.2001255129528174e-08, "loss": 0.6021, "step": 26313 }, { "epoch": 1.9010601983130746, "grad_norm": 9.049106988000165, "learning_rate": 3.195462073259936e-08, "loss": 0.606, "step": 26314 }, { "epoch": 1.901132443513284, "grad_norm": 8.448392943202506, "learning_rate": 3.190802012129512e-08, "loss": 0.5378, "step": 26315 }, { "epoch": 1.9012046887134937, "grad_norm": 8.56260736774039, "learning_rate": 3.186145329625412e-08, "loss": 0.6376, "step": 26316 }, { "epoch": 1.901276933913703, "grad_norm": 6.348662480342347, "learning_rate": 3.181492025811334e-08, "loss": 0.5871, "step": 26317 }, { "epoch": 1.9013491791139128, "grad_norm": 7.364956757728649, "learning_rate": 3.176842100751004e-08, "loss": 0.6263, "step": 26318 }, { "epoch": 1.901421424314122, "grad_norm": 7.348072873704737, "learning_rate": 3.17219555450804e-08, "loss": 0.5769, "step": 26319 }, { "epoch": 1.9014936695143316, "grad_norm": 6.643532509173072, "learning_rate": 3.167552387146056e-08, "loss": 0.5459, "step": 26320 }, { "epoch": 1.9015659147145412, "grad_norm": 8.092164344555595, "learning_rate": 3.1629125987286405e-08, "loss": 0.5901, "step": 26321 }, { "epoch": 1.9016381599147507, "grad_norm": 7.141402254436307, "learning_rate": 3.158276189319298e-08, "loss": 0.6048, "step": 26322 }, { "epoch": 1.9017104051149603, "grad_norm": 8.87635900273912, "learning_rate": 3.1536431589814783e-08, "loss": 0.6244, "step": 26323 }, { "epoch": 1.9017826503151696, "grad_norm": 7.354653433304892, "learning_rate": 3.149013507778631e-08, "loss": 0.6073, "step": 26324 }, { "epoch": 1.9018548955153793, "grad_norm": 6.654581496609651, "learning_rate": 3.144387235774149e-08, "loss": 0.5941, "step": 26325 }, { "epoch": 1.9019271407155887, "grad_norm": 8.165994369964565, "learning_rate": 3.139764343031315e-08, "loss": 0.6621, "step": 26326 }, { "epoch": 1.9019993859157982, "grad_norm": 6.607283313097828, "learning_rate": 3.1351448296134125e-08, "loss": 0.5627, "step": 26327 }, { "epoch": 1.9020716311160077, "grad_norm": 6.859976983212486, "learning_rate": 3.1305286955837234e-08, "loss": 0.5773, "step": 26328 }, { "epoch": 1.9021438763162173, "grad_norm": 6.709370724410608, "learning_rate": 3.12591594100542e-08, "loss": 0.6248, "step": 26329 }, { "epoch": 1.9022161215164268, "grad_norm": 8.388234627355997, "learning_rate": 3.1213065659416176e-08, "loss": 0.6938, "step": 26330 }, { "epoch": 1.9022883667166361, "grad_norm": 7.936825941405665, "learning_rate": 3.116700570455433e-08, "loss": 0.5967, "step": 26331 }, { "epoch": 1.902360611916846, "grad_norm": 7.590446350080938, "learning_rate": 3.112097954609955e-08, "loss": 0.6292, "step": 26332 }, { "epoch": 1.9024328571170552, "grad_norm": 8.438702984988135, "learning_rate": 3.1074987184681324e-08, "loss": 0.6197, "step": 26333 }, { "epoch": 1.9025051023172648, "grad_norm": 8.379491711772037, "learning_rate": 3.102902862092971e-08, "loss": 0.6033, "step": 26334 }, { "epoch": 1.9025773475174743, "grad_norm": 7.041142671790689, "learning_rate": 3.0983103855473654e-08, "loss": 0.5574, "step": 26335 }, { "epoch": 1.9026495927176839, "grad_norm": 6.265913274131357, "learning_rate": 3.0937212888941804e-08, "loss": 0.5659, "step": 26336 }, { "epoch": 1.9027218379178934, "grad_norm": 7.225170896416352, "learning_rate": 3.0891355721962556e-08, "loss": 0.6478, "step": 26337 }, { "epoch": 1.9027940831181027, "grad_norm": 6.623682718851331, "learning_rate": 3.084553235516319e-08, "loss": 0.6115, "step": 26338 }, { "epoch": 1.9028663283183125, "grad_norm": 8.416852086608838, "learning_rate": 3.0799742789171526e-08, "loss": 0.623, "step": 26339 }, { "epoch": 1.9029385735185218, "grad_norm": 7.156056230745248, "learning_rate": 3.07539870246143e-08, "loss": 0.5793, "step": 26340 }, { "epoch": 1.9030108187187313, "grad_norm": 7.780097355172172, "learning_rate": 3.070826506211738e-08, "loss": 0.585, "step": 26341 }, { "epoch": 1.9030830639189409, "grad_norm": 8.094194436850586, "learning_rate": 3.0662576902307226e-08, "loss": 0.5498, "step": 26342 }, { "epoch": 1.9031553091191504, "grad_norm": 7.485851930540475, "learning_rate": 3.061692254580917e-08, "loss": 0.6218, "step": 26343 }, { "epoch": 1.90322755431936, "grad_norm": 5.6153457869382155, "learning_rate": 3.057130199324798e-08, "loss": 0.5363, "step": 26344 }, { "epoch": 1.9032997995195693, "grad_norm": 7.8970814492608445, "learning_rate": 3.052571524524817e-08, "loss": 0.5856, "step": 26345 }, { "epoch": 1.903372044719779, "grad_norm": 8.680294424240683, "learning_rate": 3.0480162302433945e-08, "loss": 0.6518, "step": 26346 }, { "epoch": 1.9034442899199884, "grad_norm": 7.225825553640264, "learning_rate": 3.043464316542927e-08, "loss": 0.6098, "step": 26347 }, { "epoch": 1.903516535120198, "grad_norm": 8.531387272104174, "learning_rate": 3.038915783485641e-08, "loss": 0.6784, "step": 26348 }, { "epoch": 1.9035887803204075, "grad_norm": 9.307026528266242, "learning_rate": 3.034370631133848e-08, "loss": 0.5689, "step": 26349 }, { "epoch": 1.903661025520617, "grad_norm": 7.154434973414627, "learning_rate": 3.029828859549777e-08, "loss": 0.5466, "step": 26350 }, { "epoch": 1.9037332707208265, "grad_norm": 10.268826570416932, "learning_rate": 3.025290468795572e-08, "loss": 0.6037, "step": 26351 }, { "epoch": 1.9038055159210359, "grad_norm": 6.187475955800821, "learning_rate": 3.020755458933378e-08, "loss": 0.6344, "step": 26352 }, { "epoch": 1.9038777611212456, "grad_norm": 8.690638572136336, "learning_rate": 3.016223830025283e-08, "loss": 0.5362, "step": 26353 }, { "epoch": 1.903950006321455, "grad_norm": 6.818117398044391, "learning_rate": 3.0116955821333226e-08, "loss": 0.6008, "step": 26354 }, { "epoch": 1.9040222515216645, "grad_norm": 8.575589387173476, "learning_rate": 3.007170715319446e-08, "loss": 0.6003, "step": 26355 }, { "epoch": 1.904094496721874, "grad_norm": 6.906038997412994, "learning_rate": 3.00264922964566e-08, "loss": 0.5991, "step": 26356 }, { "epoch": 1.9041667419220836, "grad_norm": 7.892071098537141, "learning_rate": 2.998131125173831e-08, "loss": 0.6154, "step": 26357 }, { "epoch": 1.9042389871222931, "grad_norm": 8.458699586452262, "learning_rate": 2.993616401965771e-08, "loss": 0.5908, "step": 26358 }, { "epoch": 1.9043112323225024, "grad_norm": 6.46420674023639, "learning_rate": 2.9891050600833195e-08, "loss": 0.5852, "step": 26359 }, { "epoch": 1.9043834775227122, "grad_norm": 7.742642016883599, "learning_rate": 2.9845970995882323e-08, "loss": 0.6495, "step": 26360 }, { "epoch": 1.9044557227229215, "grad_norm": 6.733283642626422, "learning_rate": 2.9800925205422105e-08, "loss": 0.5412, "step": 26361 }, { "epoch": 1.904527967923131, "grad_norm": 6.77905350371643, "learning_rate": 2.9755913230069267e-08, "loss": 0.5924, "step": 26362 }, { "epoch": 1.9046002131233406, "grad_norm": 9.587440030980963, "learning_rate": 2.9710935070439984e-08, "loss": 0.689, "step": 26363 }, { "epoch": 1.9046724583235501, "grad_norm": 7.370738980681509, "learning_rate": 2.966599072715015e-08, "loss": 0.571, "step": 26364 }, { "epoch": 1.9047447035237597, "grad_norm": 8.378804723306816, "learning_rate": 2.9621080200814555e-08, "loss": 0.5934, "step": 26365 }, { "epoch": 1.904816948723969, "grad_norm": 7.839033068760776, "learning_rate": 2.957620349204826e-08, "loss": 0.6083, "step": 26366 }, { "epoch": 1.9048891939241788, "grad_norm": 8.208283813120637, "learning_rate": 2.9531360601465497e-08, "loss": 0.6938, "step": 26367 }, { "epoch": 1.904961439124388, "grad_norm": 7.622128055214494, "learning_rate": 2.9486551529680497e-08, "loss": 0.705, "step": 26368 }, { "epoch": 1.9050336843245976, "grad_norm": 7.181054451108497, "learning_rate": 2.94417762773061e-08, "loss": 0.5829, "step": 26369 }, { "epoch": 1.9051059295248072, "grad_norm": 7.508817702690373, "learning_rate": 2.9397034844955708e-08, "loss": 0.6167, "step": 26370 }, { "epoch": 1.9051781747250167, "grad_norm": 6.518630105486001, "learning_rate": 2.935232723324133e-08, "loss": 0.6231, "step": 26371 }, { "epoch": 1.9052504199252263, "grad_norm": 8.63228399117504, "learning_rate": 2.930765344277553e-08, "loss": 0.6672, "step": 26372 }, { "epoch": 1.9053226651254356, "grad_norm": 7.728915667164231, "learning_rate": 2.9263013474169487e-08, "loss": 0.641, "step": 26373 }, { "epoch": 1.9053949103256453, "grad_norm": 6.341307221434026, "learning_rate": 2.921840732803438e-08, "loss": 0.6197, "step": 26374 }, { "epoch": 1.9054671555258547, "grad_norm": 6.4745053842516125, "learning_rate": 2.917383500498111e-08, "loss": 0.5652, "step": 26375 }, { "epoch": 1.9055394007260644, "grad_norm": 8.973096266284225, "learning_rate": 2.9129296505619186e-08, "loss": 0.6358, "step": 26376 }, { "epoch": 1.9056116459262737, "grad_norm": 6.949094688676181, "learning_rate": 2.9084791830558955e-08, "loss": 0.61, "step": 26377 }, { "epoch": 1.9056838911264833, "grad_norm": 8.260397168233082, "learning_rate": 2.9040320980409098e-08, "loss": 0.6098, "step": 26378 }, { "epoch": 1.9057561363266928, "grad_norm": 6.7632756548145965, "learning_rate": 2.8995883955778848e-08, "loss": 0.5814, "step": 26379 }, { "epoch": 1.9058283815269021, "grad_norm": 6.878651429568295, "learning_rate": 2.895148075727633e-08, "loss": 0.5057, "step": 26380 }, { "epoch": 1.905900626727112, "grad_norm": 8.309597340363995, "learning_rate": 2.8907111385509388e-08, "loss": 0.632, "step": 26381 }, { "epoch": 1.9059728719273212, "grad_norm": 7.048698781370789, "learning_rate": 2.88627758410856e-08, "loss": 0.6058, "step": 26382 }, { "epoch": 1.906045117127531, "grad_norm": 7.988615996195334, "learning_rate": 2.8818474124611417e-08, "loss": 0.6169, "step": 26383 }, { "epoch": 1.9061173623277403, "grad_norm": 7.414591409611481, "learning_rate": 2.877420623669358e-08, "loss": 0.628, "step": 26384 }, { "epoch": 1.9061896075279499, "grad_norm": 7.170882677894469, "learning_rate": 2.872997217793827e-08, "loss": 0.6189, "step": 26385 }, { "epoch": 1.9062618527281594, "grad_norm": 7.728010668682056, "learning_rate": 2.8685771948950836e-08, "loss": 0.6104, "step": 26386 }, { "epoch": 1.9063340979283687, "grad_norm": 7.968258592001998, "learning_rate": 2.8641605550336072e-08, "loss": 0.6892, "step": 26387 }, { "epoch": 1.9064063431285785, "grad_norm": 7.923217659655412, "learning_rate": 2.8597472982699047e-08, "loss": 0.6196, "step": 26388 }, { "epoch": 1.9064785883287878, "grad_norm": 7.892070615178928, "learning_rate": 2.8553374246643727e-08, "loss": 0.5165, "step": 26389 }, { "epoch": 1.9065508335289976, "grad_norm": 7.068464613060179, "learning_rate": 2.8509309342773518e-08, "loss": 0.5534, "step": 26390 }, { "epoch": 1.9066230787292069, "grad_norm": 8.161239325711689, "learning_rate": 2.8465278271692098e-08, "loss": 0.601, "step": 26391 }, { "epoch": 1.9066953239294164, "grad_norm": 7.3426549216770445, "learning_rate": 2.842128103400177e-08, "loss": 0.6907, "step": 26392 }, { "epoch": 1.906767569129626, "grad_norm": 6.769735237477672, "learning_rate": 2.8377317630305378e-08, "loss": 0.5755, "step": 26393 }, { "epoch": 1.9068398143298355, "grad_norm": 7.235490684083086, "learning_rate": 2.8333388061204114e-08, "loss": 0.595, "step": 26394 }, { "epoch": 1.906912059530045, "grad_norm": 7.168135588572674, "learning_rate": 2.8289492327299716e-08, "loss": 0.6293, "step": 26395 }, { "epoch": 1.9069843047302544, "grad_norm": 8.819958716055858, "learning_rate": 2.8245630429193373e-08, "loss": 0.5932, "step": 26396 }, { "epoch": 1.9070565499304641, "grad_norm": 7.282871311411058, "learning_rate": 2.82018023674846e-08, "loss": 0.5285, "step": 26397 }, { "epoch": 1.9071287951306735, "grad_norm": 7.1568168848716525, "learning_rate": 2.8158008142774308e-08, "loss": 0.6208, "step": 26398 }, { "epoch": 1.907201040330883, "grad_norm": 7.256563569882786, "learning_rate": 2.8114247755661183e-08, "loss": 0.584, "step": 26399 }, { "epoch": 1.9072732855310925, "grad_norm": 8.335898602152685, "learning_rate": 2.8070521206745304e-08, "loss": 0.6487, "step": 26400 }, { "epoch": 1.907345530731302, "grad_norm": 7.982159510613707, "learning_rate": 2.8026828496624246e-08, "loss": 0.6266, "step": 26401 }, { "epoch": 1.9074177759315116, "grad_norm": 7.761909931748385, "learning_rate": 2.79831696258967e-08, "loss": 0.5569, "step": 26402 }, { "epoch": 1.907490021131721, "grad_norm": 7.999622097626486, "learning_rate": 2.7939544595159962e-08, "loss": 0.6047, "step": 26403 }, { "epoch": 1.9075622663319307, "grad_norm": 6.9530868700675015, "learning_rate": 2.789595340501189e-08, "loss": 0.5764, "step": 26404 }, { "epoch": 1.90763451153214, "grad_norm": 6.833914879344416, "learning_rate": 2.7852396056048392e-08, "loss": 0.5605, "step": 26405 }, { "epoch": 1.9077067567323496, "grad_norm": 8.802571649192078, "learning_rate": 2.7808872548865938e-08, "loss": 0.6004, "step": 26406 }, { "epoch": 1.9077790019325591, "grad_norm": 6.877110105422161, "learning_rate": 2.776538288406072e-08, "loss": 0.5892, "step": 26407 }, { "epoch": 1.9078512471327687, "grad_norm": 6.260884686926304, "learning_rate": 2.7721927062227816e-08, "loss": 0.5419, "step": 26408 }, { "epoch": 1.9079234923329782, "grad_norm": 8.30757488537264, "learning_rate": 2.7678505083962303e-08, "loss": 0.5476, "step": 26409 }, { "epoch": 1.9079957375331875, "grad_norm": 8.610047135883107, "learning_rate": 2.7635116949858154e-08, "loss": 0.6346, "step": 26410 }, { "epoch": 1.9080679827333973, "grad_norm": 8.616182104341561, "learning_rate": 2.759176266050989e-08, "loss": 0.6604, "step": 26411 }, { "epoch": 1.9081402279336066, "grad_norm": 7.449930270079897, "learning_rate": 2.7548442216510374e-08, "loss": 0.5381, "step": 26412 }, { "epoch": 1.9082124731338161, "grad_norm": 6.806073158285731, "learning_rate": 2.7505155618453017e-08, "loss": 0.6685, "step": 26413 }, { "epoch": 1.9082847183340257, "grad_norm": 8.02039836017869, "learning_rate": 2.74619028669304e-08, "loss": 0.6585, "step": 26414 }, { "epoch": 1.9083569635342352, "grad_norm": 7.881230871432972, "learning_rate": 2.7418683962534552e-08, "loss": 0.5215, "step": 26415 }, { "epoch": 1.9084292087344448, "grad_norm": 7.107019323410489, "learning_rate": 2.7375498905856945e-08, "loss": 0.6471, "step": 26416 }, { "epoch": 1.908501453934654, "grad_norm": 7.775699704199782, "learning_rate": 2.7332347697489046e-08, "loss": 0.5963, "step": 26417 }, { "epoch": 1.9085736991348639, "grad_norm": 6.894402054827054, "learning_rate": 2.728923033802122e-08, "loss": 0.6348, "step": 26418 }, { "epoch": 1.9086459443350732, "grad_norm": 7.788488246368377, "learning_rate": 2.7246146828044108e-08, "loss": 0.5889, "step": 26419 }, { "epoch": 1.9087181895352827, "grad_norm": 7.2562378981010465, "learning_rate": 2.720309716814723e-08, "loss": 0.6205, "step": 26420 }, { "epoch": 1.9087904347354923, "grad_norm": 6.98737531902194, "learning_rate": 2.7160081358919844e-08, "loss": 0.596, "step": 26421 }, { "epoch": 1.9088626799357018, "grad_norm": 7.196889300711068, "learning_rate": 2.711709940095092e-08, "loss": 0.6165, "step": 26422 }, { "epoch": 1.9089349251359113, "grad_norm": 7.229076842996838, "learning_rate": 2.7074151294828875e-08, "loss": 0.5733, "step": 26423 }, { "epoch": 1.9090071703361207, "grad_norm": 6.5183313544372865, "learning_rate": 2.7031237041141577e-08, "loss": 0.5488, "step": 26424 }, { "epoch": 1.9090794155363304, "grad_norm": 7.3086529907239655, "learning_rate": 2.698835664047661e-08, "loss": 0.6871, "step": 26425 }, { "epoch": 1.9091516607365397, "grad_norm": 8.949165893558444, "learning_rate": 2.6945510093420724e-08, "loss": 0.6741, "step": 26426 }, { "epoch": 1.9092239059367493, "grad_norm": 6.942772863326171, "learning_rate": 2.6902697400560672e-08, "loss": 0.6243, "step": 26427 }, { "epoch": 1.9092961511369588, "grad_norm": 7.33136110083389, "learning_rate": 2.685991856248238e-08, "loss": 0.6583, "step": 26428 }, { "epoch": 1.9093683963371684, "grad_norm": 6.7501226343388225, "learning_rate": 2.681717357977176e-08, "loss": 0.5621, "step": 26429 }, { "epoch": 1.909440641537378, "grad_norm": 7.406740067762367, "learning_rate": 2.677446245301363e-08, "loss": 0.6152, "step": 26430 }, { "epoch": 1.9095128867375872, "grad_norm": 7.783137203688838, "learning_rate": 2.6731785182792514e-08, "loss": 0.6889, "step": 26431 }, { "epoch": 1.909585131937797, "grad_norm": 8.037654949966848, "learning_rate": 2.6689141769693228e-08, "loss": 0.572, "step": 26432 }, { "epoch": 1.9096573771380063, "grad_norm": 9.223674118571486, "learning_rate": 2.6646532214299193e-08, "loss": 0.633, "step": 26433 }, { "epoch": 1.9097296223382159, "grad_norm": 7.63262233282488, "learning_rate": 2.6603956517193553e-08, "loss": 0.6132, "step": 26434 }, { "epoch": 1.9098018675384254, "grad_norm": 6.455440373723637, "learning_rate": 2.6561414678959176e-08, "loss": 0.5806, "step": 26435 }, { "epoch": 1.909874112738635, "grad_norm": 7.701349699950505, "learning_rate": 2.6518906700178647e-08, "loss": 0.6234, "step": 26436 }, { "epoch": 1.9099463579388445, "grad_norm": 7.990643274721232, "learning_rate": 2.647643258143373e-08, "loss": 0.6152, "step": 26437 }, { "epoch": 1.9100186031390538, "grad_norm": 7.80806123903367, "learning_rate": 2.6433992323305903e-08, "loss": 0.6128, "step": 26438 }, { "epoch": 1.9100908483392636, "grad_norm": 7.9244296377094425, "learning_rate": 2.6391585926376084e-08, "loss": 0.6445, "step": 26439 }, { "epoch": 1.910163093539473, "grad_norm": 6.958574606848879, "learning_rate": 2.6349213391224926e-08, "loss": 0.5383, "step": 26440 }, { "epoch": 1.9102353387396824, "grad_norm": 7.618426882343959, "learning_rate": 2.630687471843224e-08, "loss": 0.5555, "step": 26441 }, { "epoch": 1.910307583939892, "grad_norm": 7.724944189172507, "learning_rate": 2.626456990857784e-08, "loss": 0.603, "step": 26442 }, { "epoch": 1.9103798291401015, "grad_norm": 8.00053070215428, "learning_rate": 2.622229896224071e-08, "loss": 0.6155, "step": 26443 }, { "epoch": 1.910452074340311, "grad_norm": 7.922501939873835, "learning_rate": 2.6180061879999552e-08, "loss": 0.5368, "step": 26444 }, { "epoch": 1.9105243195405204, "grad_norm": 8.868117121561841, "learning_rate": 2.6137858662432512e-08, "loss": 0.6334, "step": 26445 }, { "epoch": 1.9105965647407301, "grad_norm": 10.854287955301423, "learning_rate": 2.6095689310117467e-08, "loss": 0.6924, "step": 26446 }, { "epoch": 1.9106688099409395, "grad_norm": 7.492806608935416, "learning_rate": 2.605355382363145e-08, "loss": 0.5548, "step": 26447 }, { "epoch": 1.9107410551411492, "grad_norm": 8.237576928018306, "learning_rate": 2.6011452203551224e-08, "loss": 0.569, "step": 26448 }, { "epoch": 1.9108133003413585, "grad_norm": 6.345380000525911, "learning_rate": 2.596938445045355e-08, "loss": 0.6163, "step": 26449 }, { "epoch": 1.910885545541568, "grad_norm": 6.678489533017698, "learning_rate": 2.592735056491408e-08, "loss": 0.5364, "step": 26450 }, { "epoch": 1.9109577907417776, "grad_norm": 7.717530930348687, "learning_rate": 2.5885350547508183e-08, "loss": 0.5432, "step": 26451 }, { "epoch": 1.911030035941987, "grad_norm": 8.389249152503067, "learning_rate": 2.584338439881068e-08, "loss": 0.5828, "step": 26452 }, { "epoch": 1.9111022811421967, "grad_norm": 5.886369782187192, "learning_rate": 2.580145211939611e-08, "loss": 0.5178, "step": 26453 }, { "epoch": 1.911174526342406, "grad_norm": 7.782846064974802, "learning_rate": 2.5759553709838736e-08, "loss": 0.5572, "step": 26454 }, { "epoch": 1.9112467715426158, "grad_norm": 7.4644172901960495, "learning_rate": 2.571768917071199e-08, "loss": 0.6197, "step": 26455 }, { "epoch": 1.9113190167428251, "grad_norm": 6.710113794581316, "learning_rate": 2.5675858502588745e-08, "loss": 0.5881, "step": 26456 }, { "epoch": 1.9113912619430347, "grad_norm": 8.540224825113011, "learning_rate": 2.563406170604188e-08, "loss": 0.5947, "step": 26457 }, { "epoch": 1.9114635071432442, "grad_norm": 8.578040506471398, "learning_rate": 2.559229878164343e-08, "loss": 0.6098, "step": 26458 }, { "epoch": 1.9115357523434535, "grad_norm": 6.688388943637844, "learning_rate": 2.555056972996517e-08, "loss": 0.5747, "step": 26459 }, { "epoch": 1.9116079975436633, "grad_norm": 7.26704560204672, "learning_rate": 2.5508874551578577e-08, "loss": 0.6248, "step": 26460 }, { "epoch": 1.9116802427438726, "grad_norm": 7.940586241019879, "learning_rate": 2.5467213247054034e-08, "loss": 0.5682, "step": 26461 }, { "epoch": 1.9117524879440824, "grad_norm": 6.926809235697497, "learning_rate": 2.5425585816961917e-08, "loss": 0.5971, "step": 26462 }, { "epoch": 1.9118247331442917, "grad_norm": 8.309967343556565, "learning_rate": 2.538399226187205e-08, "loss": 0.586, "step": 26463 }, { "epoch": 1.9118969783445012, "grad_norm": 8.431855298619366, "learning_rate": 2.534243258235397e-08, "loss": 0.5806, "step": 26464 }, { "epoch": 1.9119692235447108, "grad_norm": 7.254954782525139, "learning_rate": 2.5300906778976675e-08, "loss": 0.6331, "step": 26465 }, { "epoch": 1.9120414687449203, "grad_norm": 6.818262305507116, "learning_rate": 2.5259414852308316e-08, "loss": 0.5509, "step": 26466 }, { "epoch": 1.9121137139451299, "grad_norm": 7.00845779628964, "learning_rate": 2.5217956802917053e-08, "loss": 0.6097, "step": 26467 }, { "epoch": 1.9121859591453392, "grad_norm": 6.100541150212779, "learning_rate": 2.5176532631370766e-08, "loss": 0.6112, "step": 26468 }, { "epoch": 1.912258204345549, "grad_norm": 7.076860260126479, "learning_rate": 2.513514233823594e-08, "loss": 0.5425, "step": 26469 }, { "epoch": 1.9123304495457583, "grad_norm": 7.930345810497312, "learning_rate": 2.509378592407935e-08, "loss": 0.5483, "step": 26470 }, { "epoch": 1.9124026947459678, "grad_norm": 6.809377812266371, "learning_rate": 2.505246338946721e-08, "loss": 0.632, "step": 26471 }, { "epoch": 1.9124749399461773, "grad_norm": 7.929416896167865, "learning_rate": 2.5011174734965172e-08, "loss": 0.59, "step": 26472 }, { "epoch": 1.9125471851463869, "grad_norm": 8.405494400724177, "learning_rate": 2.4969919961138624e-08, "loss": 0.6215, "step": 26473 }, { "epoch": 1.9126194303465964, "grad_norm": 6.505064238598341, "learning_rate": 2.492869906855183e-08, "loss": 0.6287, "step": 26474 }, { "epoch": 1.9126916755468057, "grad_norm": 8.263859522435203, "learning_rate": 2.4887512057769626e-08, "loss": 0.5746, "step": 26475 }, { "epoch": 1.9127639207470155, "grad_norm": 9.70441323787811, "learning_rate": 2.4846358929355442e-08, "loss": 0.5902, "step": 26476 }, { "epoch": 1.9128361659472248, "grad_norm": 7.73391901078495, "learning_rate": 2.4805239683872716e-08, "loss": 0.5491, "step": 26477 }, { "epoch": 1.9129084111474344, "grad_norm": 7.769601460058587, "learning_rate": 2.4764154321884615e-08, "loss": 0.5709, "step": 26478 }, { "epoch": 1.912980656347644, "grad_norm": 7.034711062225933, "learning_rate": 2.4723102843953184e-08, "loss": 0.6948, "step": 26479 }, { "epoch": 1.9130529015478535, "grad_norm": 7.1484559105808, "learning_rate": 2.4682085250640476e-08, "loss": 0.69, "step": 26480 }, { "epoch": 1.913125146748063, "grad_norm": 6.249374663540563, "learning_rate": 2.4641101542508262e-08, "loss": 0.5856, "step": 26481 }, { "epoch": 1.9131973919482723, "grad_norm": 7.136922665309974, "learning_rate": 2.460015172011748e-08, "loss": 0.5745, "step": 26482 }, { "epoch": 1.913269637148482, "grad_norm": 8.905350060755042, "learning_rate": 2.4559235784028245e-08, "loss": 0.6757, "step": 26483 }, { "epoch": 1.9133418823486914, "grad_norm": 7.152527753128774, "learning_rate": 2.4518353734800938e-08, "loss": 0.5766, "step": 26484 }, { "epoch": 1.913414127548901, "grad_norm": 8.059691420486086, "learning_rate": 2.4477505572995664e-08, "loss": 0.4955, "step": 26485 }, { "epoch": 1.9134863727491105, "grad_norm": 7.8539275142238, "learning_rate": 2.4436691299171143e-08, "loss": 0.6386, "step": 26486 }, { "epoch": 1.91355861794932, "grad_norm": 8.390638184492667, "learning_rate": 2.43959109138861e-08, "loss": 0.5327, "step": 26487 }, { "epoch": 1.9136308631495296, "grad_norm": 7.912941012514203, "learning_rate": 2.4355164417698695e-08, "loss": 0.6022, "step": 26488 }, { "epoch": 1.913703108349739, "grad_norm": 7.961534532016563, "learning_rate": 2.4314451811167094e-08, "loss": 0.5591, "step": 26489 }, { "epoch": 1.9137753535499487, "grad_norm": 8.962041279693247, "learning_rate": 2.427377309484835e-08, "loss": 0.5312, "step": 26490 }, { "epoch": 1.913847598750158, "grad_norm": 8.336883805823184, "learning_rate": 2.423312826929952e-08, "loss": 0.6446, "step": 26491 }, { "epoch": 1.9139198439503675, "grad_norm": 7.860396635419442, "learning_rate": 2.4192517335076548e-08, "loss": 0.6286, "step": 26492 }, { "epoch": 1.913992089150577, "grad_norm": 9.107446378036414, "learning_rate": 2.4151940292735653e-08, "loss": 0.6305, "step": 26493 }, { "epoch": 1.9140643343507866, "grad_norm": 7.738713567713013, "learning_rate": 2.4111397142832503e-08, "loss": 0.6539, "step": 26494 }, { "epoch": 1.9141365795509961, "grad_norm": 7.293748859840179, "learning_rate": 2.4070887885921935e-08, "loss": 0.6028, "step": 26495 }, { "epoch": 1.9142088247512055, "grad_norm": 5.795991156719404, "learning_rate": 2.40304125225585e-08, "loss": 0.5774, "step": 26496 }, { "epoch": 1.9142810699514152, "grad_norm": 7.230425751577307, "learning_rate": 2.3989971053296202e-08, "loss": 0.6568, "step": 26497 }, { "epoch": 1.9143533151516245, "grad_norm": 8.890151723590382, "learning_rate": 2.3949563478688764e-08, "loss": 0.6008, "step": 26498 }, { "epoch": 1.914425560351834, "grad_norm": 9.286872573545176, "learning_rate": 2.3909189799289078e-08, "loss": 0.6034, "step": 26499 }, { "epoch": 1.9144978055520436, "grad_norm": 5.647511513180288, "learning_rate": 2.3868850015650312e-08, "loss": 0.5467, "step": 26500 }, { "epoch": 1.9145700507522532, "grad_norm": 6.537299024362856, "learning_rate": 2.382854412832425e-08, "loss": 0.5455, "step": 26501 }, { "epoch": 1.9146422959524627, "grad_norm": 6.293633395708721, "learning_rate": 2.3788272137862945e-08, "loss": 0.6009, "step": 26502 }, { "epoch": 1.914714541152672, "grad_norm": 6.8376019179090095, "learning_rate": 2.3748034044817347e-08, "loss": 0.6129, "step": 26503 }, { "epoch": 1.9147867863528818, "grad_norm": 7.981356593341031, "learning_rate": 2.3707829849738405e-08, "loss": 0.6361, "step": 26504 }, { "epoch": 1.9148590315530911, "grad_norm": 8.176813270984894, "learning_rate": 2.3667659553176513e-08, "loss": 0.578, "step": 26505 }, { "epoch": 1.9149312767533007, "grad_norm": 7.622620007904901, "learning_rate": 2.3627523155681785e-08, "loss": 0.5857, "step": 26506 }, { "epoch": 1.9150035219535102, "grad_norm": 8.465498521774602, "learning_rate": 2.3587420657803507e-08, "loss": 0.6755, "step": 26507 }, { "epoch": 1.9150757671537197, "grad_norm": 8.35295734230127, "learning_rate": 2.3547352060090677e-08, "loss": 0.6415, "step": 26508 }, { "epoch": 1.9151480123539293, "grad_norm": 8.65793237966319, "learning_rate": 2.3507317363091752e-08, "loss": 0.6368, "step": 26509 }, { "epoch": 1.9152202575541386, "grad_norm": 7.512080064357045, "learning_rate": 2.3467316567354625e-08, "loss": 0.5557, "step": 26510 }, { "epoch": 1.9152925027543484, "grad_norm": 7.6176755152163675, "learning_rate": 2.3427349673427193e-08, "loss": 0.6302, "step": 26511 }, { "epoch": 1.9153647479545577, "grad_norm": 7.122385448794608, "learning_rate": 2.3387416681856235e-08, "loss": 0.6711, "step": 26512 }, { "epoch": 1.9154369931547672, "grad_norm": 8.50893829715695, "learning_rate": 2.334751759318854e-08, "loss": 0.6514, "step": 26513 }, { "epoch": 1.9155092383549768, "grad_norm": 6.360318500533024, "learning_rate": 2.3307652407970614e-08, "loss": 0.5216, "step": 26514 }, { "epoch": 1.9155814835551863, "grad_norm": 7.305421712766977, "learning_rate": 2.326782112674758e-08, "loss": 0.5204, "step": 26515 }, { "epoch": 1.9156537287553959, "grad_norm": 7.35679137280503, "learning_rate": 2.322802375006539e-08, "loss": 0.6241, "step": 26516 }, { "epoch": 1.9157259739556052, "grad_norm": 8.010290203576137, "learning_rate": 2.3188260278468043e-08, "loss": 0.5809, "step": 26517 }, { "epoch": 1.915798219155815, "grad_norm": 6.217690535733065, "learning_rate": 2.3148530712500673e-08, "loss": 0.6216, "step": 26518 }, { "epoch": 1.9158704643560243, "grad_norm": 6.479327855042142, "learning_rate": 2.3108835052706445e-08, "loss": 0.5443, "step": 26519 }, { "epoch": 1.9159427095562338, "grad_norm": 6.848094101446447, "learning_rate": 2.3069173299629377e-08, "loss": 0.556, "step": 26520 }, { "epoch": 1.9160149547564433, "grad_norm": 6.945030455553414, "learning_rate": 2.3029545453811807e-08, "loss": 0.6154, "step": 26521 }, { "epoch": 1.9160871999566529, "grad_norm": 6.016720679437845, "learning_rate": 2.2989951515796637e-08, "loss": 0.5437, "step": 26522 }, { "epoch": 1.9161594451568624, "grad_norm": 9.211051390172695, "learning_rate": 2.2950391486125932e-08, "loss": 0.6082, "step": 26523 }, { "epoch": 1.9162316903570717, "grad_norm": 8.542743249052863, "learning_rate": 2.291086536534093e-08, "loss": 0.5852, "step": 26524 }, { "epoch": 1.9163039355572815, "grad_norm": 6.332270114730435, "learning_rate": 2.2871373153982857e-08, "loss": 0.561, "step": 26525 }, { "epoch": 1.9163761807574908, "grad_norm": 7.733618373660163, "learning_rate": 2.2831914852592396e-08, "loss": 0.5823, "step": 26526 }, { "epoch": 1.9164484259577006, "grad_norm": 6.714194949385918, "learning_rate": 2.2792490461709395e-08, "loss": 0.5915, "step": 26527 }, { "epoch": 1.91652067115791, "grad_norm": 6.936922891343067, "learning_rate": 2.2753099981873972e-08, "loss": 0.6035, "step": 26528 }, { "epoch": 1.9165929163581195, "grad_norm": 7.197928649921812, "learning_rate": 2.2713743413625145e-08, "loss": 0.6102, "step": 26529 }, { "epoch": 1.916665161558329, "grad_norm": 7.59561498526581, "learning_rate": 2.2674420757501647e-08, "loss": 0.5724, "step": 26530 }, { "epoch": 1.9167374067585383, "grad_norm": 6.397490975186123, "learning_rate": 2.2635132014041662e-08, "loss": 0.5875, "step": 26531 }, { "epoch": 1.916809651958748, "grad_norm": 6.940570873339711, "learning_rate": 2.2595877183783365e-08, "loss": 0.6443, "step": 26532 }, { "epoch": 1.9168818971589574, "grad_norm": 7.861835921099662, "learning_rate": 2.2556656267263834e-08, "loss": 0.5797, "step": 26533 }, { "epoch": 1.9169541423591672, "grad_norm": 9.089345592951423, "learning_rate": 2.251746926502013e-08, "loss": 0.5453, "step": 26534 }, { "epoch": 1.9170263875593765, "grad_norm": 9.494461051024121, "learning_rate": 2.24783161775885e-08, "loss": 0.5908, "step": 26535 }, { "epoch": 1.917098632759586, "grad_norm": 8.539430601626785, "learning_rate": 2.2439197005505175e-08, "loss": 0.6044, "step": 26536 }, { "epoch": 1.9171708779597956, "grad_norm": 8.203501200450987, "learning_rate": 2.2400111749305566e-08, "loss": 0.5303, "step": 26537 }, { "epoch": 1.917243123160005, "grad_norm": 6.876547067144265, "learning_rate": 2.2361060409524793e-08, "loss": 0.4996, "step": 26538 }, { "epoch": 1.9173153683602147, "grad_norm": 9.765540624635497, "learning_rate": 2.2322042986697156e-08, "loss": 0.6038, "step": 26539 }, { "epoch": 1.917387613560424, "grad_norm": 7.627837356713797, "learning_rate": 2.228305948135695e-08, "loss": 0.6138, "step": 26540 }, { "epoch": 1.9174598587606337, "grad_norm": 7.82394596106155, "learning_rate": 2.2244109894037915e-08, "loss": 0.5758, "step": 26541 }, { "epoch": 1.917532103960843, "grad_norm": 7.269391140005243, "learning_rate": 2.2205194225272953e-08, "loss": 0.5922, "step": 26542 }, { "epoch": 1.9176043491610526, "grad_norm": 7.415450550057735, "learning_rate": 2.2166312475595254e-08, "loss": 0.569, "step": 26543 }, { "epoch": 1.9176765943612621, "grad_norm": 10.496982322478111, "learning_rate": 2.2127464645536888e-08, "loss": 0.6822, "step": 26544 }, { "epoch": 1.9177488395614717, "grad_norm": 7.976040246737608, "learning_rate": 2.208865073562938e-08, "loss": 0.6297, "step": 26545 }, { "epoch": 1.9178210847616812, "grad_norm": 8.067228128529301, "learning_rate": 2.2049870746404524e-08, "loss": 0.6043, "step": 26546 }, { "epoch": 1.9178933299618905, "grad_norm": 7.96303697628877, "learning_rate": 2.2011124678392725e-08, "loss": 0.6406, "step": 26547 }, { "epoch": 1.9179655751621003, "grad_norm": 6.864613958897288, "learning_rate": 2.19724125321244e-08, "loss": 0.567, "step": 26548 }, { "epoch": 1.9180378203623096, "grad_norm": 7.0386769044116, "learning_rate": 2.193373430812995e-08, "loss": 0.6463, "step": 26549 }, { "epoch": 1.9181100655625192, "grad_norm": 6.973412839729251, "learning_rate": 2.18950900069384e-08, "loss": 0.5613, "step": 26550 }, { "epoch": 1.9181823107627287, "grad_norm": 7.876453810420369, "learning_rate": 2.1856479629079053e-08, "loss": 0.6486, "step": 26551 }, { "epoch": 1.9182545559629383, "grad_norm": 7.566838052744031, "learning_rate": 2.181790317508037e-08, "loss": 0.6316, "step": 26552 }, { "epoch": 1.9183268011631478, "grad_norm": 8.171415181106129, "learning_rate": 2.1779360645470537e-08, "loss": 0.63, "step": 26553 }, { "epoch": 1.9183990463633571, "grad_norm": 7.27315113393034, "learning_rate": 2.1740852040776916e-08, "loss": 0.5681, "step": 26554 }, { "epoch": 1.9184712915635669, "grad_norm": 6.746421713220808, "learning_rate": 2.170237736152686e-08, "loss": 0.5675, "step": 26555 }, { "epoch": 1.9185435367637762, "grad_norm": 7.223310607020045, "learning_rate": 2.1663936608246892e-08, "loss": 0.641, "step": 26556 }, { "epoch": 1.9186157819639857, "grad_norm": 7.417692334979661, "learning_rate": 2.162552978146326e-08, "loss": 0.6315, "step": 26557 }, { "epoch": 1.9186880271641953, "grad_norm": 8.096073715507634, "learning_rate": 2.1587156881701933e-08, "loss": 0.6482, "step": 26558 }, { "epoch": 1.9187602723644048, "grad_norm": 7.800049405063762, "learning_rate": 2.154881790948804e-08, "loss": 0.574, "step": 26559 }, { "epoch": 1.9188325175646144, "grad_norm": 7.929932598967656, "learning_rate": 2.1510512865346445e-08, "loss": 0.6952, "step": 26560 }, { "epoch": 1.9189047627648237, "grad_norm": 7.530521088461909, "learning_rate": 2.147224174980145e-08, "loss": 0.6408, "step": 26561 }, { "epoch": 1.9189770079650335, "grad_norm": 7.304867926832545, "learning_rate": 2.1434004563376797e-08, "loss": 0.634, "step": 26562 }, { "epoch": 1.9190492531652428, "grad_norm": 9.030491358166612, "learning_rate": 2.139580130659652e-08, "loss": 0.5862, "step": 26563 }, { "epoch": 1.9191214983654523, "grad_norm": 6.130892974382286, "learning_rate": 2.1357631979982974e-08, "loss": 0.6061, "step": 26564 }, { "epoch": 1.9191937435656619, "grad_norm": 7.233816038746208, "learning_rate": 2.1319496584059075e-08, "loss": 0.5995, "step": 26565 }, { "epoch": 1.9192659887658714, "grad_norm": 7.651606930772573, "learning_rate": 2.1281395119346625e-08, "loss": 0.6342, "step": 26566 }, { "epoch": 1.919338233966081, "grad_norm": 6.5143511874032605, "learning_rate": 2.1243327586367156e-08, "loss": 0.6141, "step": 26567 }, { "epoch": 1.9194104791662903, "grad_norm": 7.124209443789157, "learning_rate": 2.1205293985642194e-08, "loss": 0.5542, "step": 26568 }, { "epoch": 1.9194827243665, "grad_norm": 8.848506874810813, "learning_rate": 2.11672943176916e-08, "loss": 0.5988, "step": 26569 }, { "epoch": 1.9195549695667093, "grad_norm": 6.399566289988031, "learning_rate": 2.1129328583036344e-08, "loss": 0.6212, "step": 26570 }, { "epoch": 1.9196272147669189, "grad_norm": 7.75462892177024, "learning_rate": 2.1091396782196017e-08, "loss": 0.6131, "step": 26571 }, { "epoch": 1.9196994599671284, "grad_norm": 7.229844324485751, "learning_rate": 2.105349891568964e-08, "loss": 0.5951, "step": 26572 }, { "epoch": 1.919771705167338, "grad_norm": 8.126468467058354, "learning_rate": 2.101563498403597e-08, "loss": 0.5838, "step": 26573 }, { "epoch": 1.9198439503675475, "grad_norm": 7.46881103490686, "learning_rate": 2.097780498775348e-08, "loss": 0.6472, "step": 26574 }, { "epoch": 1.9199161955677568, "grad_norm": 7.093543768082161, "learning_rate": 2.0940008927360367e-08, "loss": 0.6175, "step": 26575 }, { "epoch": 1.9199884407679666, "grad_norm": 7.459295825194472, "learning_rate": 2.090224680337316e-08, "loss": 0.5821, "step": 26576 }, { "epoch": 1.920060685968176, "grad_norm": 8.019868972735386, "learning_rate": 2.0864518616309504e-08, "loss": 0.5522, "step": 26577 }, { "epoch": 1.9201329311683855, "grad_norm": 7.478733939259272, "learning_rate": 2.082682436668565e-08, "loss": 0.5659, "step": 26578 }, { "epoch": 1.920205176368595, "grad_norm": 7.236654429215261, "learning_rate": 2.0789164055017574e-08, "loss": 0.5971, "step": 26579 }, { "epoch": 1.9202774215688045, "grad_norm": 7.686126089990993, "learning_rate": 2.07515376818207e-08, "loss": 0.6194, "step": 26580 }, { "epoch": 1.920349666769014, "grad_norm": 8.377702874687248, "learning_rate": 2.0713945247610447e-08, "loss": 0.632, "step": 26581 }, { "epoch": 1.9204219119692234, "grad_norm": 8.197129970806904, "learning_rate": 2.06763867529014e-08, "loss": 0.6108, "step": 26582 }, { "epoch": 1.9204941571694332, "grad_norm": 7.349593803963043, "learning_rate": 2.063886219820732e-08, "loss": 0.6053, "step": 26583 }, { "epoch": 1.9205664023696425, "grad_norm": 7.698746061403035, "learning_rate": 2.060137158404224e-08, "loss": 0.6034, "step": 26584 }, { "epoch": 1.920638647569852, "grad_norm": 8.073040363635663, "learning_rate": 2.056391491091936e-08, "loss": 0.6654, "step": 26585 }, { "epoch": 1.9207108927700616, "grad_norm": 8.104334446233462, "learning_rate": 2.0526492179351043e-08, "loss": 0.6155, "step": 26586 }, { "epoch": 1.9207831379702711, "grad_norm": 7.0214506909831815, "learning_rate": 2.0489103389850218e-08, "loss": 0.6005, "step": 26587 }, { "epoch": 1.9208553831704807, "grad_norm": 6.624094775272836, "learning_rate": 2.0451748542927862e-08, "loss": 0.6737, "step": 26588 }, { "epoch": 1.92092762837069, "grad_norm": 7.110013492963089, "learning_rate": 2.0414427639096067e-08, "loss": 0.608, "step": 26589 }, { "epoch": 1.9209998735708997, "grad_norm": 7.34265310333719, "learning_rate": 2.0377140678865537e-08, "loss": 0.563, "step": 26590 }, { "epoch": 1.921072118771109, "grad_norm": 7.7726267636263495, "learning_rate": 2.0339887662746695e-08, "loss": 0.6294, "step": 26591 }, { "epoch": 1.9211443639713186, "grad_norm": 6.990006261121486, "learning_rate": 2.030266859124941e-08, "loss": 0.5375, "step": 26592 }, { "epoch": 1.9212166091715281, "grad_norm": 6.658964794454845, "learning_rate": 2.0265483464883275e-08, "loss": 0.5951, "step": 26593 }, { "epoch": 1.9212888543717377, "grad_norm": 7.613351898883334, "learning_rate": 2.022833228415705e-08, "loss": 0.6225, "step": 26594 }, { "epoch": 1.9213610995719472, "grad_norm": 10.04534739127651, "learning_rate": 2.019121504957977e-08, "loss": 0.6776, "step": 26595 }, { "epoch": 1.9214333447721565, "grad_norm": 8.149858681793392, "learning_rate": 2.0154131761659367e-08, "loss": 0.5765, "step": 26596 }, { "epoch": 1.9215055899723663, "grad_norm": 8.625409102758049, "learning_rate": 2.0117082420902934e-08, "loss": 0.7393, "step": 26597 }, { "epoch": 1.9215778351725756, "grad_norm": 8.478900077309326, "learning_rate": 2.0080067027818673e-08, "loss": 0.5691, "step": 26598 }, { "epoch": 1.9216500803727854, "grad_norm": 7.6792642781399065, "learning_rate": 2.004308558291257e-08, "loss": 0.5921, "step": 26599 }, { "epoch": 1.9217223255729947, "grad_norm": 6.622280498364852, "learning_rate": 2.0006138086691164e-08, "loss": 0.5671, "step": 26600 }, { "epoch": 1.9217945707732043, "grad_norm": 8.29206435689136, "learning_rate": 1.996922453965988e-08, "loss": 0.5771, "step": 26601 }, { "epoch": 1.9218668159734138, "grad_norm": 7.345012150514861, "learning_rate": 1.993234494232443e-08, "loss": 0.5365, "step": 26602 }, { "epoch": 1.9219390611736231, "grad_norm": 8.377261183934023, "learning_rate": 1.9895499295189692e-08, "loss": 0.6317, "step": 26603 }, { "epoch": 1.9220113063738329, "grad_norm": 7.5664947299776335, "learning_rate": 1.9858687598759695e-08, "loss": 0.6163, "step": 26604 }, { "epoch": 1.9220835515740422, "grad_norm": 6.352944912213326, "learning_rate": 1.9821909853538767e-08, "loss": 0.6074, "step": 26605 }, { "epoch": 1.922155796774252, "grad_norm": 6.340150610642069, "learning_rate": 1.9785166060029835e-08, "loss": 0.5797, "step": 26606 }, { "epoch": 1.9222280419744613, "grad_norm": 6.309240424963233, "learning_rate": 1.9748456218736388e-08, "loss": 0.5768, "step": 26607 }, { "epoch": 1.9223002871746708, "grad_norm": 6.598084963776155, "learning_rate": 1.97117803301608e-08, "loss": 0.5888, "step": 26608 }, { "epoch": 1.9223725323748804, "grad_norm": 6.474183385307509, "learning_rate": 1.967513839480517e-08, "loss": 0.657, "step": 26609 }, { "epoch": 1.9224447775750897, "grad_norm": 8.597360303431032, "learning_rate": 1.9638530413171042e-08, "loss": 0.6289, "step": 26610 }, { "epoch": 1.9225170227752995, "grad_norm": 7.495264974621973, "learning_rate": 1.960195638575968e-08, "loss": 0.5707, "step": 26611 }, { "epoch": 1.9225892679755088, "grad_norm": 8.248241815198691, "learning_rate": 1.956541631307124e-08, "loss": 0.607, "step": 26612 }, { "epoch": 1.9226615131757185, "grad_norm": 7.26105025458167, "learning_rate": 1.9528910195606708e-08, "loss": 0.6386, "step": 26613 }, { "epoch": 1.9227337583759279, "grad_norm": 6.589094932355118, "learning_rate": 1.9492438033865136e-08, "loss": 0.5668, "step": 26614 }, { "epoch": 1.9228060035761374, "grad_norm": 7.978993970896184, "learning_rate": 1.9455999828346393e-08, "loss": 0.6585, "step": 26615 }, { "epoch": 1.922878248776347, "grad_norm": 7.211511194422108, "learning_rate": 1.94195955795487e-08, "loss": 0.5623, "step": 26616 }, { "epoch": 1.9229504939765565, "grad_norm": 8.38802270298088, "learning_rate": 1.9383225287970818e-08, "loss": 0.6091, "step": 26617 }, { "epoch": 1.923022739176766, "grad_norm": 7.463861501637407, "learning_rate": 1.934688895411041e-08, "loss": 0.5707, "step": 26618 }, { "epoch": 1.9230949843769753, "grad_norm": 6.67792374375028, "learning_rate": 1.9310586578465128e-08, "loss": 0.6417, "step": 26619 }, { "epoch": 1.9231672295771851, "grad_norm": 8.021540253107664, "learning_rate": 1.9274318161531803e-08, "loss": 0.6599, "step": 26620 }, { "epoch": 1.9232394747773944, "grad_norm": 8.066951025792772, "learning_rate": 1.9238083703806975e-08, "loss": 0.6132, "step": 26621 }, { "epoch": 1.923311719977604, "grad_norm": 8.306664734784551, "learning_rate": 1.9201883205786365e-08, "loss": 0.6297, "step": 26622 }, { "epoch": 1.9233839651778135, "grad_norm": 7.437747983244381, "learning_rate": 1.916571666796596e-08, "loss": 0.6117, "step": 26623 }, { "epoch": 1.923456210378023, "grad_norm": 7.893934949337687, "learning_rate": 1.912958409084037e-08, "loss": 0.6055, "step": 26624 }, { "epoch": 1.9235284555782326, "grad_norm": 7.25521768065921, "learning_rate": 1.9093485474904748e-08, "loss": 0.6072, "step": 26625 }, { "epoch": 1.923600700778442, "grad_norm": 5.627343431393594, "learning_rate": 1.9057420820652872e-08, "loss": 0.5897, "step": 26626 }, { "epoch": 1.9236729459786517, "grad_norm": 7.527893078035576, "learning_rate": 1.902139012857851e-08, "loss": 0.5992, "step": 26627 }, { "epoch": 1.923745191178861, "grad_norm": 6.694631036643981, "learning_rate": 1.8985393399175434e-08, "loss": 0.6288, "step": 26628 }, { "epoch": 1.9238174363790705, "grad_norm": 8.126691906175022, "learning_rate": 1.894943063293547e-08, "loss": 0.5942, "step": 26629 }, { "epoch": 1.92388968157928, "grad_norm": 8.078324748950845, "learning_rate": 1.8913501830351566e-08, "loss": 0.6093, "step": 26630 }, { "epoch": 1.9239619267794896, "grad_norm": 8.209426058409692, "learning_rate": 1.8877606991915265e-08, "loss": 0.6066, "step": 26631 }, { "epoch": 1.9240341719796992, "grad_norm": 8.550847275898581, "learning_rate": 1.8841746118118396e-08, "loss": 0.5806, "step": 26632 }, { "epoch": 1.9241064171799085, "grad_norm": 8.292226290068495, "learning_rate": 1.8805919209451128e-08, "loss": 0.6034, "step": 26633 }, { "epoch": 1.9241786623801183, "grad_norm": 7.462738299119337, "learning_rate": 1.8770126266404454e-08, "loss": 0.5722, "step": 26634 }, { "epoch": 1.9242509075803276, "grad_norm": 6.275192756275935, "learning_rate": 1.873436728946798e-08, "loss": 0.5733, "step": 26635 }, { "epoch": 1.9243231527805371, "grad_norm": 7.929702894073728, "learning_rate": 1.8698642279131872e-08, "loss": 0.5583, "step": 26636 }, { "epoch": 1.9243953979807467, "grad_norm": 8.863334618614116, "learning_rate": 1.8662951235884353e-08, "loss": 0.5938, "step": 26637 }, { "epoch": 1.9244676431809562, "grad_norm": 6.17399464195323, "learning_rate": 1.8627294160214472e-08, "loss": 0.5732, "step": 26638 }, { "epoch": 1.9245398883811657, "grad_norm": 6.281206273168694, "learning_rate": 1.859167105261045e-08, "loss": 0.5588, "step": 26639 }, { "epoch": 1.924612133581375, "grad_norm": 7.433689455587928, "learning_rate": 1.8556081913559676e-08, "loss": 0.5834, "step": 26640 }, { "epoch": 1.9246843787815848, "grad_norm": 7.025974583476391, "learning_rate": 1.8520526743549538e-08, "loss": 0.5633, "step": 26641 }, { "epoch": 1.9247566239817941, "grad_norm": 7.486493473829481, "learning_rate": 1.8485005543066593e-08, "loss": 0.697, "step": 26642 }, { "epoch": 1.9248288691820037, "grad_norm": 6.931493960215779, "learning_rate": 1.8449518312597114e-08, "loss": 0.5729, "step": 26643 }, { "epoch": 1.9249011143822132, "grad_norm": 8.990859582391076, "learning_rate": 1.8414065052626828e-08, "loss": 0.6506, "step": 26644 }, { "epoch": 1.9249733595824228, "grad_norm": 7.382808366400959, "learning_rate": 1.837864576364118e-08, "loss": 0.6488, "step": 26645 }, { "epoch": 1.9250456047826323, "grad_norm": 6.688258333081139, "learning_rate": 1.8343260446125055e-08, "loss": 0.5828, "step": 26646 }, { "epoch": 1.9251178499828416, "grad_norm": 7.605579095244717, "learning_rate": 1.8307909100562794e-08, "loss": 0.658, "step": 26647 }, { "epoch": 1.9251900951830514, "grad_norm": 7.6705858192384975, "learning_rate": 1.827259172743845e-08, "loss": 0.5888, "step": 26648 }, { "epoch": 1.9252623403832607, "grad_norm": 6.834071452896217, "learning_rate": 1.8237308327235246e-08, "loss": 0.5027, "step": 26649 }, { "epoch": 1.9253345855834703, "grad_norm": 8.043241461956244, "learning_rate": 1.8202058900436692e-08, "loss": 0.586, "step": 26650 }, { "epoch": 1.9254068307836798, "grad_norm": 6.145044963724934, "learning_rate": 1.816684344752462e-08, "loss": 0.5529, "step": 26651 }, { "epoch": 1.9254790759838893, "grad_norm": 6.51855110311231, "learning_rate": 1.8131661968981417e-08, "loss": 0.5995, "step": 26652 }, { "epoch": 1.9255513211840989, "grad_norm": 7.624644536972562, "learning_rate": 1.809651446528893e-08, "loss": 0.5747, "step": 26653 }, { "epoch": 1.9256235663843082, "grad_norm": 8.464329089972777, "learning_rate": 1.8061400936927876e-08, "loss": 0.5917, "step": 26654 }, { "epoch": 1.925695811584518, "grad_norm": 6.647872309077015, "learning_rate": 1.8026321384379265e-08, "loss": 0.6097, "step": 26655 }, { "epoch": 1.9257680567847273, "grad_norm": 6.901097926264387, "learning_rate": 1.7991275808122987e-08, "loss": 0.5601, "step": 26656 }, { "epoch": 1.9258403019849368, "grad_norm": 7.0860047931272705, "learning_rate": 1.7956264208639217e-08, "loss": 0.5728, "step": 26657 }, { "epoch": 1.9259125471851464, "grad_norm": 7.402685152499802, "learning_rate": 1.7921286586407015e-08, "loss": 0.5586, "step": 26658 }, { "epoch": 1.925984792385356, "grad_norm": 6.630422712058121, "learning_rate": 1.7886342941904888e-08, "loss": 0.5745, "step": 26659 }, { "epoch": 1.9260570375855655, "grad_norm": 8.023674267745594, "learning_rate": 1.7851433275611896e-08, "loss": 0.6235, "step": 26660 }, { "epoch": 1.9261292827857748, "grad_norm": 7.401733051160689, "learning_rate": 1.7816557588005436e-08, "loss": 0.6195, "step": 26661 }, { "epoch": 1.9262015279859845, "grad_norm": 7.094522980238958, "learning_rate": 1.77817158795629e-08, "loss": 0.613, "step": 26662 }, { "epoch": 1.9262737731861939, "grad_norm": 7.908376294720458, "learning_rate": 1.774690815076113e-08, "loss": 0.6064, "step": 26663 }, { "epoch": 1.9263460183864034, "grad_norm": 6.596199340047463, "learning_rate": 1.7712134402077252e-08, "loss": 0.6267, "step": 26664 }, { "epoch": 1.926418263586613, "grad_norm": 6.110411417044427, "learning_rate": 1.767739463398671e-08, "loss": 0.6549, "step": 26665 }, { "epoch": 1.9264905087868225, "grad_norm": 7.1609308517861345, "learning_rate": 1.764268884696524e-08, "loss": 0.62, "step": 26666 }, { "epoch": 1.926562753987032, "grad_norm": 6.736822475699638, "learning_rate": 1.7608017041487733e-08, "loss": 0.5062, "step": 26667 }, { "epoch": 1.9266349991872413, "grad_norm": 8.473737648542592, "learning_rate": 1.7573379218029374e-08, "loss": 0.7134, "step": 26668 }, { "epoch": 1.9267072443874511, "grad_norm": 8.780077438534365, "learning_rate": 1.7538775377063665e-08, "loss": 0.6597, "step": 26669 }, { "epoch": 1.9267794895876604, "grad_norm": 7.0625658960559665, "learning_rate": 1.7504205519064955e-08, "loss": 0.6212, "step": 26670 }, { "epoch": 1.9268517347878702, "grad_norm": 7.301691136368871, "learning_rate": 1.7469669644505914e-08, "loss": 0.5676, "step": 26671 }, { "epoch": 1.9269239799880795, "grad_norm": 8.61083528904136, "learning_rate": 1.7435167753859506e-08, "loss": 0.5444, "step": 26672 }, { "epoch": 1.926996225188289, "grad_norm": 7.830580153134758, "learning_rate": 1.7400699847598125e-08, "loss": 0.5091, "step": 26673 }, { "epoch": 1.9270684703884986, "grad_norm": 6.3876420873078725, "learning_rate": 1.7366265926193616e-08, "loss": 0.6245, "step": 26674 }, { "epoch": 1.927140715588708, "grad_norm": 7.435985763659298, "learning_rate": 1.733186599011727e-08, "loss": 0.61, "step": 26675 }, { "epoch": 1.9272129607889177, "grad_norm": 8.354035056988835, "learning_rate": 1.729750003983982e-08, "loss": 0.6324, "step": 26676 }, { "epoch": 1.927285205989127, "grad_norm": 7.110924185176916, "learning_rate": 1.7263168075832005e-08, "loss": 0.5463, "step": 26677 }, { "epoch": 1.9273574511893368, "grad_norm": 7.6537204845656195, "learning_rate": 1.7228870098564e-08, "loss": 0.609, "step": 26678 }, { "epoch": 1.927429696389546, "grad_norm": 7.87261151619721, "learning_rate": 1.7194606108504596e-08, "loss": 0.6311, "step": 26679 }, { "epoch": 1.9275019415897556, "grad_norm": 7.565516098984532, "learning_rate": 1.716037610612342e-08, "loss": 0.6598, "step": 26680 }, { "epoch": 1.9275741867899652, "grad_norm": 6.9178066310055515, "learning_rate": 1.7126180091888988e-08, "loss": 0.5886, "step": 26681 }, { "epoch": 1.9276464319901745, "grad_norm": 7.448025988507847, "learning_rate": 1.7092018066268977e-08, "loss": 0.5345, "step": 26682 }, { "epoch": 1.9277186771903843, "grad_norm": 8.831519414486786, "learning_rate": 1.7057890029731628e-08, "loss": 0.602, "step": 26683 }, { "epoch": 1.9277909223905936, "grad_norm": 6.97503652255433, "learning_rate": 1.702379598274406e-08, "loss": 0.5314, "step": 26684 }, { "epoch": 1.9278631675908033, "grad_norm": 6.523833668266012, "learning_rate": 1.6989735925772578e-08, "loss": 0.5861, "step": 26685 }, { "epoch": 1.9279354127910127, "grad_norm": 8.21028612277145, "learning_rate": 1.6955709859283743e-08, "loss": 0.7061, "step": 26686 }, { "epoch": 1.9280076579912222, "grad_norm": 6.653168587387311, "learning_rate": 1.6921717783743297e-08, "loss": 0.5395, "step": 26687 }, { "epoch": 1.9280799031914317, "grad_norm": 7.961250396359554, "learning_rate": 1.68877596996167e-08, "loss": 0.5715, "step": 26688 }, { "epoch": 1.9281521483916413, "grad_norm": 7.149039421379456, "learning_rate": 1.6853835607368585e-08, "loss": 0.6012, "step": 26689 }, { "epoch": 1.9282243935918508, "grad_norm": 6.391257107911838, "learning_rate": 1.68199455074633e-08, "loss": 0.5218, "step": 26690 }, { "epoch": 1.9282966387920601, "grad_norm": 7.548896092433348, "learning_rate": 1.6786089400364913e-08, "loss": 0.589, "step": 26691 }, { "epoch": 1.92836888399227, "grad_norm": 7.7738394015137295, "learning_rate": 1.6752267286536674e-08, "loss": 0.5818, "step": 26692 }, { "epoch": 1.9284411291924792, "grad_norm": 9.392589699969474, "learning_rate": 1.671847916644209e-08, "loss": 0.6571, "step": 26693 }, { "epoch": 1.9285133743926888, "grad_norm": 7.583769845101878, "learning_rate": 1.6684725040543303e-08, "loss": 0.6808, "step": 26694 }, { "epoch": 1.9285856195928983, "grad_norm": 7.592347317725181, "learning_rate": 1.6651004909302438e-08, "loss": 0.5121, "step": 26695 }, { "epoch": 1.9286578647931079, "grad_norm": 7.904753894003017, "learning_rate": 1.661731877318107e-08, "loss": 0.5939, "step": 26696 }, { "epoch": 1.9287301099933174, "grad_norm": 7.754580712952275, "learning_rate": 1.6583666632640495e-08, "loss": 0.5999, "step": 26697 }, { "epoch": 1.9288023551935267, "grad_norm": 7.512765574402527, "learning_rate": 1.6550048488141178e-08, "loss": 0.6137, "step": 26698 }, { "epoch": 1.9288746003937365, "grad_norm": 7.225988680765476, "learning_rate": 1.651646434014359e-08, "loss": 0.6585, "step": 26699 }, { "epoch": 1.9289468455939458, "grad_norm": 7.84956466536782, "learning_rate": 1.648291418910708e-08, "loss": 0.6664, "step": 26700 }, { "epoch": 1.9290190907941553, "grad_norm": 7.4097879063861845, "learning_rate": 1.644939803549128e-08, "loss": 0.5903, "step": 26701 }, { "epoch": 1.9290913359943649, "grad_norm": 7.679595853582741, "learning_rate": 1.641591587975472e-08, "loss": 0.5779, "step": 26702 }, { "epoch": 1.9291635811945744, "grad_norm": 8.486973822889414, "learning_rate": 1.6382467722356187e-08, "loss": 0.5718, "step": 26703 }, { "epoch": 1.929235826394784, "grad_norm": 8.499072304757968, "learning_rate": 1.6349053563753105e-08, "loss": 0.5708, "step": 26704 }, { "epoch": 1.9293080715949933, "grad_norm": 6.929580154361638, "learning_rate": 1.6315673404402878e-08, "loss": 0.5607, "step": 26705 }, { "epoch": 1.929380316795203, "grad_norm": 7.572862067850017, "learning_rate": 1.628232724476292e-08, "loss": 0.733, "step": 26706 }, { "epoch": 1.9294525619954124, "grad_norm": 8.197255154344635, "learning_rate": 1.6249015085289253e-08, "loss": 0.5878, "step": 26707 }, { "epoch": 1.929524807195622, "grad_norm": 8.672292008554637, "learning_rate": 1.6215736926438186e-08, "loss": 0.6096, "step": 26708 }, { "epoch": 1.9295970523958315, "grad_norm": 7.278095134904778, "learning_rate": 1.6182492768664903e-08, "loss": 0.6345, "step": 26709 }, { "epoch": 1.929669297596041, "grad_norm": 8.431846702731221, "learning_rate": 1.6149282612424877e-08, "loss": 0.6843, "step": 26710 }, { "epoch": 1.9297415427962505, "grad_norm": 6.845219145249067, "learning_rate": 1.611610645817274e-08, "loss": 0.6244, "step": 26711 }, { "epoch": 1.9298137879964599, "grad_norm": 8.67185388510943, "learning_rate": 1.6082964306362302e-08, "loss": 0.6261, "step": 26712 }, { "epoch": 1.9298860331966696, "grad_norm": 7.007679405351914, "learning_rate": 1.6049856157447363e-08, "loss": 0.6283, "step": 26713 }, { "epoch": 1.929958278396879, "grad_norm": 7.03948085951545, "learning_rate": 1.6016782011881447e-08, "loss": 0.5737, "step": 26714 }, { "epoch": 1.9300305235970885, "grad_norm": 7.604972928994055, "learning_rate": 1.5983741870116977e-08, "loss": 0.6544, "step": 26715 }, { "epoch": 1.930102768797298, "grad_norm": 7.3950288003666325, "learning_rate": 1.595073573260636e-08, "loss": 0.581, "step": 26716 }, { "epoch": 1.9301750139975076, "grad_norm": 9.215486627770728, "learning_rate": 1.591776359980146e-08, "loss": 0.5924, "step": 26717 }, { "epoch": 1.9302472591977171, "grad_norm": 6.667938715057575, "learning_rate": 1.5884825472153863e-08, "loss": 0.6351, "step": 26718 }, { "epoch": 1.9303195043979264, "grad_norm": 6.779876991889048, "learning_rate": 1.5851921350113763e-08, "loss": 0.5467, "step": 26719 }, { "epoch": 1.9303917495981362, "grad_norm": 6.907502073825411, "learning_rate": 1.5819051234132464e-08, "loss": 0.5569, "step": 26720 }, { "epoch": 1.9304639947983455, "grad_norm": 5.330221619427004, "learning_rate": 1.5786215124659055e-08, "loss": 0.5321, "step": 26721 }, { "epoch": 1.930536239998555, "grad_norm": 8.594153932268345, "learning_rate": 1.5753413022143726e-08, "loss": 0.6135, "step": 26722 }, { "epoch": 1.9306084851987646, "grad_norm": 8.458405993921538, "learning_rate": 1.572064492703529e-08, "loss": 0.5974, "step": 26723 }, { "epoch": 1.9306807303989741, "grad_norm": 7.484424184745094, "learning_rate": 1.568791083978227e-08, "loss": 0.5895, "step": 26724 }, { "epoch": 1.9307529755991837, "grad_norm": 9.03523815759458, "learning_rate": 1.5655210760833204e-08, "loss": 0.5629, "step": 26725 }, { "epoch": 1.930825220799393, "grad_norm": 7.666945853194759, "learning_rate": 1.5622544690634955e-08, "loss": 0.5933, "step": 26726 }, { "epoch": 1.9308974659996028, "grad_norm": 6.631260918438378, "learning_rate": 1.558991262963494e-08, "loss": 0.5821, "step": 26727 }, { "epoch": 1.930969711199812, "grad_norm": 8.864435489794845, "learning_rate": 1.55573145782803e-08, "loss": 0.6071, "step": 26728 }, { "epoch": 1.9310419564000216, "grad_norm": 8.086582382949578, "learning_rate": 1.55247505370168e-08, "loss": 0.5987, "step": 26729 }, { "epoch": 1.9311142016002312, "grad_norm": 7.86933937683356, "learning_rate": 1.5492220506290457e-08, "loss": 0.5942, "step": 26730 }, { "epoch": 1.9311864468004407, "grad_norm": 7.187811537915406, "learning_rate": 1.5459724486546202e-08, "loss": 0.5767, "step": 26731 }, { "epoch": 1.9312586920006503, "grad_norm": 6.40212567314682, "learning_rate": 1.5427262478229787e-08, "loss": 0.5681, "step": 26732 }, { "epoch": 1.9313309372008596, "grad_norm": 7.505680730293399, "learning_rate": 1.5394834481784472e-08, "loss": 0.6499, "step": 26733 }, { "epoch": 1.9314031824010693, "grad_norm": 6.904356545887831, "learning_rate": 1.5362440497654895e-08, "loss": 0.6005, "step": 26734 }, { "epoch": 1.9314754276012787, "grad_norm": 8.612221359817006, "learning_rate": 1.5330080526284597e-08, "loss": 0.6203, "step": 26735 }, { "epoch": 1.9315476728014882, "grad_norm": 7.962128644634608, "learning_rate": 1.5297754568115996e-08, "loss": 0.6525, "step": 26736 }, { "epoch": 1.9316199180016977, "grad_norm": 8.46454180789357, "learning_rate": 1.5265462623591797e-08, "loss": 0.684, "step": 26737 }, { "epoch": 1.9316921632019073, "grad_norm": 6.901794929900086, "learning_rate": 1.5233204693154424e-08, "loss": 0.5776, "step": 26738 }, { "epoch": 1.9317644084021168, "grad_norm": 8.737257043519508, "learning_rate": 1.5200980777244912e-08, "loss": 0.6133, "step": 26739 }, { "epoch": 1.9318366536023261, "grad_norm": 6.467023093766753, "learning_rate": 1.5168790876304852e-08, "loss": 0.621, "step": 26740 }, { "epoch": 1.931908898802536, "grad_norm": 8.099312892074886, "learning_rate": 1.5136634990774446e-08, "loss": 0.6044, "step": 26741 }, { "epoch": 1.9319811440027452, "grad_norm": 8.103834078386571, "learning_rate": 1.5104513121094456e-08, "loss": 0.5693, "step": 26742 }, { "epoch": 1.9320533892029548, "grad_norm": 8.480435463142872, "learning_rate": 1.5072425267703973e-08, "loss": 0.6595, "step": 26743 }, { "epoch": 1.9321256344031643, "grad_norm": 8.038545730930547, "learning_rate": 1.504037143104292e-08, "loss": 0.6133, "step": 26744 }, { "epoch": 1.9321978796033739, "grad_norm": 7.014596162728962, "learning_rate": 1.5008351611549288e-08, "loss": 0.6058, "step": 26745 }, { "epoch": 1.9322701248035834, "grad_norm": 6.480727748742445, "learning_rate": 1.497636580966244e-08, "loss": 0.5945, "step": 26746 }, { "epoch": 1.9323423700037927, "grad_norm": 8.141854982205807, "learning_rate": 1.4944414025819253e-08, "loss": 0.5412, "step": 26747 }, { "epoch": 1.9324146152040025, "grad_norm": 6.540446692094425, "learning_rate": 1.491249626045743e-08, "loss": 0.546, "step": 26748 }, { "epoch": 1.9324868604042118, "grad_norm": 6.780143120079381, "learning_rate": 1.4880612514014125e-08, "loss": 0.544, "step": 26749 }, { "epoch": 1.9325591056044216, "grad_norm": 7.469338110589367, "learning_rate": 1.4848762786925653e-08, "loss": 0.5835, "step": 26750 }, { "epoch": 1.9326313508046309, "grad_norm": 7.392157817064163, "learning_rate": 1.4816947079628053e-08, "loss": 0.6163, "step": 26751 }, { "epoch": 1.9327035960048404, "grad_norm": 8.24205050241595, "learning_rate": 1.478516539255681e-08, "loss": 0.6147, "step": 26752 }, { "epoch": 1.93277584120505, "grad_norm": 7.164448269074468, "learning_rate": 1.4753417726147135e-08, "loss": 0.6452, "step": 26753 }, { "epoch": 1.9328480864052593, "grad_norm": 8.216031977634918, "learning_rate": 1.4721704080833399e-08, "loss": 0.6196, "step": 26754 }, { "epoch": 1.932920331605469, "grad_norm": 8.299897930942434, "learning_rate": 1.4690024457049701e-08, "loss": 0.6552, "step": 26755 }, { "epoch": 1.9329925768056784, "grad_norm": 7.232132566199137, "learning_rate": 1.465837885522986e-08, "loss": 0.5785, "step": 26756 }, { "epoch": 1.9330648220058881, "grad_norm": 6.794773101274445, "learning_rate": 1.4626767275807418e-08, "loss": 0.5501, "step": 26757 }, { "epoch": 1.9331370672060975, "grad_norm": 7.613443591137304, "learning_rate": 1.459518971921453e-08, "loss": 0.5807, "step": 26758 }, { "epoch": 1.933209312406307, "grad_norm": 7.331245327400872, "learning_rate": 1.4563646185883629e-08, "loss": 0.6286, "step": 26759 }, { "epoch": 1.9332815576065165, "grad_norm": 7.840747273701569, "learning_rate": 1.4532136676246588e-08, "loss": 0.6268, "step": 26760 }, { "epoch": 1.9333538028067259, "grad_norm": 8.090548673868682, "learning_rate": 1.4500661190734733e-08, "loss": 0.6318, "step": 26761 }, { "epoch": 1.9334260480069356, "grad_norm": 7.352890494676838, "learning_rate": 1.4469219729779105e-08, "loss": 0.5626, "step": 26762 }, { "epoch": 1.933498293207145, "grad_norm": 6.4648711881026495, "learning_rate": 1.4437812293809916e-08, "loss": 0.5893, "step": 26763 }, { "epoch": 1.9335705384073547, "grad_norm": 6.680486977009984, "learning_rate": 1.4406438883257379e-08, "loss": 0.5767, "step": 26764 }, { "epoch": 1.933642783607564, "grad_norm": 7.356807447134184, "learning_rate": 1.4375099498550316e-08, "loss": 0.6178, "step": 26765 }, { "epoch": 1.9337150288077736, "grad_norm": 8.994028865687302, "learning_rate": 1.4343794140118384e-08, "loss": 0.6612, "step": 26766 }, { "epoch": 1.9337872740079831, "grad_norm": 7.459263606811882, "learning_rate": 1.4312522808390128e-08, "loss": 0.567, "step": 26767 }, { "epoch": 1.9338595192081927, "grad_norm": 7.087099966234178, "learning_rate": 1.4281285503793262e-08, "loss": 0.5394, "step": 26768 }, { "epoch": 1.9339317644084022, "grad_norm": 9.117150261679521, "learning_rate": 1.4250082226755501e-08, "loss": 0.6177, "step": 26769 }, { "epoch": 1.9340040096086115, "grad_norm": 8.196448175600972, "learning_rate": 1.4218912977704002e-08, "loss": 0.6061, "step": 26770 }, { "epoch": 1.9340762548088213, "grad_norm": 7.699795202529004, "learning_rate": 1.4187777757065647e-08, "loss": 0.5603, "step": 26771 }, { "epoch": 1.9341485000090306, "grad_norm": 8.468750900887867, "learning_rate": 1.4156676565266481e-08, "loss": 0.6832, "step": 26772 }, { "epoch": 1.9342207452092401, "grad_norm": 7.412158267295927, "learning_rate": 1.4125609402732277e-08, "loss": 0.5272, "step": 26773 }, { "epoch": 1.9342929904094497, "grad_norm": 8.284454992275773, "learning_rate": 1.4094576269888249e-08, "loss": 0.6019, "step": 26774 }, { "epoch": 1.9343652356096592, "grad_norm": 6.793464313525934, "learning_rate": 1.4063577167159337e-08, "loss": 0.5832, "step": 26775 }, { "epoch": 1.9344374808098688, "grad_norm": 7.951138531233819, "learning_rate": 1.4032612094969922e-08, "loss": 0.6325, "step": 26776 }, { "epoch": 1.934509726010078, "grad_norm": 8.966688178319075, "learning_rate": 1.4001681053743555e-08, "loss": 0.6369, "step": 26777 }, { "epoch": 1.9345819712102879, "grad_norm": 7.096788201080549, "learning_rate": 1.397078404390434e-08, "loss": 0.5591, "step": 26778 }, { "epoch": 1.9346542164104972, "grad_norm": 7.739115054381293, "learning_rate": 1.393992106587444e-08, "loss": 0.5982, "step": 26779 }, { "epoch": 1.9347264616107067, "grad_norm": 8.382932180384019, "learning_rate": 1.390909212007685e-08, "loss": 0.6212, "step": 26780 }, { "epoch": 1.9347987068109163, "grad_norm": 7.5746349989574835, "learning_rate": 1.3878297206933455e-08, "loss": 0.6749, "step": 26781 }, { "epoch": 1.9348709520111258, "grad_norm": 6.780569578568931, "learning_rate": 1.3847536326865862e-08, "loss": 0.6443, "step": 26782 }, { "epoch": 1.9349431972113353, "grad_norm": 7.468676450997694, "learning_rate": 1.3816809480295123e-08, "loss": 0.5381, "step": 26783 }, { "epoch": 1.9350154424115447, "grad_norm": 7.428282595877647, "learning_rate": 1.3786116667641736e-08, "loss": 0.5784, "step": 26784 }, { "epoch": 1.9350876876117544, "grad_norm": 6.753614799957941, "learning_rate": 1.3755457889326473e-08, "loss": 0.5772, "step": 26785 }, { "epoch": 1.9351599328119637, "grad_norm": 7.112049044239361, "learning_rate": 1.372483314576789e-08, "loss": 0.6091, "step": 26786 }, { "epoch": 1.9352321780121733, "grad_norm": 7.876072644039633, "learning_rate": 1.3694242437386207e-08, "loss": 0.6122, "step": 26787 }, { "epoch": 1.9353044232123828, "grad_norm": 8.901057714124528, "learning_rate": 1.3663685764599699e-08, "loss": 0.6955, "step": 26788 }, { "epoch": 1.9353766684125924, "grad_norm": 6.476439692272791, "learning_rate": 1.3633163127827197e-08, "loss": 0.5876, "step": 26789 }, { "epoch": 1.935448913612802, "grad_norm": 8.729856137622988, "learning_rate": 1.360267452748587e-08, "loss": 0.6261, "step": 26790 }, { "epoch": 1.9355211588130112, "grad_norm": 7.657061599999794, "learning_rate": 1.357221996399316e-08, "loss": 0.5482, "step": 26791 }, { "epoch": 1.935593404013221, "grad_norm": 6.883331244640018, "learning_rate": 1.3541799437766511e-08, "loss": 0.5675, "step": 26792 }, { "epoch": 1.9356656492134303, "grad_norm": 7.738438011095165, "learning_rate": 1.351141294922198e-08, "loss": 0.6051, "step": 26793 }, { "epoch": 1.9357378944136399, "grad_norm": 7.903852136842782, "learning_rate": 1.3481060498775345e-08, "loss": 0.5815, "step": 26794 }, { "epoch": 1.9358101396138494, "grad_norm": 8.42694335620059, "learning_rate": 1.3450742086842661e-08, "loss": 0.5865, "step": 26795 }, { "epoch": 1.935882384814059, "grad_norm": 6.990885932019056, "learning_rate": 1.342045771383832e-08, "loss": 0.6898, "step": 26796 }, { "epoch": 1.9359546300142685, "grad_norm": 7.275938790171363, "learning_rate": 1.3390207380177266e-08, "loss": 0.5949, "step": 26797 }, { "epoch": 1.9360268752144778, "grad_norm": 6.83951357355562, "learning_rate": 1.3359991086273617e-08, "loss": 0.6469, "step": 26798 }, { "epoch": 1.9360991204146876, "grad_norm": 7.379021017172604, "learning_rate": 1.3329808832540924e-08, "loss": 0.5492, "step": 26799 }, { "epoch": 1.9361713656148969, "grad_norm": 9.025340539314067, "learning_rate": 1.3299660619392475e-08, "loss": 0.6855, "step": 26800 }, { "epoch": 1.9362436108151064, "grad_norm": 6.8700407694977, "learning_rate": 1.3269546447240712e-08, "loss": 0.6185, "step": 26801 }, { "epoch": 1.936315856015316, "grad_norm": 8.035134411431189, "learning_rate": 1.3239466316498362e-08, "loss": 0.5879, "step": 26802 }, { "epoch": 1.9363881012155255, "grad_norm": 8.064919751803322, "learning_rate": 1.3209420227576485e-08, "loss": 0.6186, "step": 26803 }, { "epoch": 1.936460346415735, "grad_norm": 7.870125609271613, "learning_rate": 1.3179408180886977e-08, "loss": 0.5915, "step": 26804 }, { "epoch": 1.9365325916159444, "grad_norm": 7.581948234186179, "learning_rate": 1.3149430176840338e-08, "loss": 0.5634, "step": 26805 }, { "epoch": 1.9366048368161541, "grad_norm": 8.078453190111595, "learning_rate": 1.3119486215847354e-08, "loss": 0.6115, "step": 26806 }, { "epoch": 1.9366770820163635, "grad_norm": 7.087304232684039, "learning_rate": 1.308957629831742e-08, "loss": 0.5487, "step": 26807 }, { "epoch": 1.936749327216573, "grad_norm": 7.872978314248115, "learning_rate": 1.3059700424660204e-08, "loss": 0.6121, "step": 26808 }, { "epoch": 1.9368215724167825, "grad_norm": 6.317041859350245, "learning_rate": 1.3029858595284828e-08, "loss": 0.5835, "step": 26809 }, { "epoch": 1.936893817616992, "grad_norm": 6.814902065858051, "learning_rate": 1.3000050810599574e-08, "loss": 0.5691, "step": 26810 }, { "epoch": 1.9369660628172016, "grad_norm": 7.771243359859109, "learning_rate": 1.2970277071012726e-08, "loss": 0.6556, "step": 26811 }, { "epoch": 1.937038308017411, "grad_norm": 7.539906192671571, "learning_rate": 1.2940537376931739e-08, "loss": 0.553, "step": 26812 }, { "epoch": 1.9371105532176207, "grad_norm": 6.888594901398701, "learning_rate": 1.2910831728763507e-08, "loss": 0.5998, "step": 26813 }, { "epoch": 1.93718279841783, "grad_norm": 7.661312251423927, "learning_rate": 1.2881160126915204e-08, "loss": 0.56, "step": 26814 }, { "epoch": 1.9372550436180396, "grad_norm": 5.193059309331483, "learning_rate": 1.285152257179234e-08, "loss": 0.5551, "step": 26815 }, { "epoch": 1.9373272888182491, "grad_norm": 7.083185471132111, "learning_rate": 1.2821919063801257e-08, "loss": 0.5902, "step": 26816 }, { "epoch": 1.9373995340184587, "grad_norm": 7.970673691672561, "learning_rate": 1.279234960334691e-08, "loss": 0.5814, "step": 26817 }, { "epoch": 1.9374717792186682, "grad_norm": 7.983920388614137, "learning_rate": 1.2762814190833972e-08, "loss": 0.5956, "step": 26818 }, { "epoch": 1.9375440244188775, "grad_norm": 7.823214576490566, "learning_rate": 1.2733312826666843e-08, "loss": 0.5657, "step": 26819 }, { "epoch": 1.9376162696190873, "grad_norm": 6.768734682018246, "learning_rate": 1.2703845511249646e-08, "loss": 0.5901, "step": 26820 }, { "epoch": 1.9376885148192966, "grad_norm": 6.160726246318698, "learning_rate": 1.267441224498539e-08, "loss": 0.4802, "step": 26821 }, { "epoch": 1.9377607600195064, "grad_norm": 7.531481220922184, "learning_rate": 1.2645013028277364e-08, "loss": 0.6326, "step": 26822 }, { "epoch": 1.9378330052197157, "grad_norm": 8.327543256241322, "learning_rate": 1.261564786152747e-08, "loss": 0.6228, "step": 26823 }, { "epoch": 1.9379052504199252, "grad_norm": 6.9041115052025, "learning_rate": 1.258631674513816e-08, "loss": 0.5628, "step": 26824 }, { "epoch": 1.9379774956201348, "grad_norm": 7.711263013962617, "learning_rate": 1.2557019679510785e-08, "loss": 0.6775, "step": 26825 }, { "epoch": 1.938049740820344, "grad_norm": 6.003518979966855, "learning_rate": 1.2527756665046686e-08, "loss": 0.6085, "step": 26826 }, { "epoch": 1.9381219860205539, "grad_norm": 9.113423149747172, "learning_rate": 1.2498527702145824e-08, "loss": 0.6324, "step": 26827 }, { "epoch": 1.9381942312207632, "grad_norm": 9.38798986139713, "learning_rate": 1.2469332791208988e-08, "loss": 0.6231, "step": 26828 }, { "epoch": 1.938266476420973, "grad_norm": 6.476828722664695, "learning_rate": 1.244017193263558e-08, "loss": 0.5637, "step": 26829 }, { "epoch": 1.9383387216211823, "grad_norm": 7.260854553707687, "learning_rate": 1.241104512682445e-08, "loss": 0.5075, "step": 26830 }, { "epoch": 1.9384109668213918, "grad_norm": 8.345448296118285, "learning_rate": 1.2381952374174721e-08, "loss": 0.6038, "step": 26831 }, { "epoch": 1.9384832120216013, "grad_norm": 6.9730597200318645, "learning_rate": 1.2352893675084965e-08, "loss": 0.5593, "step": 26832 }, { "epoch": 1.9385554572218107, "grad_norm": 8.554965302995052, "learning_rate": 1.2323869029952084e-08, "loss": 0.597, "step": 26833 }, { "epoch": 1.9386277024220204, "grad_norm": 8.797247490864818, "learning_rate": 1.2294878439174095e-08, "loss": 0.6807, "step": 26834 }, { "epoch": 1.9386999476222297, "grad_norm": 6.747647758638428, "learning_rate": 1.2265921903147348e-08, "loss": 0.5106, "step": 26835 }, { "epoch": 1.9387721928224395, "grad_norm": 6.329517653951955, "learning_rate": 1.2236999422268746e-08, "loss": 0.4608, "step": 26836 }, { "epoch": 1.9388444380226488, "grad_norm": 6.759160642712536, "learning_rate": 1.2208110996934086e-08, "loss": 0.5526, "step": 26837 }, { "epoch": 1.9389166832228584, "grad_norm": 8.154968394879551, "learning_rate": 1.2179256627538605e-08, "loss": 0.5897, "step": 26838 }, { "epoch": 1.938988928423068, "grad_norm": 6.394916315433415, "learning_rate": 1.2150436314477543e-08, "loss": 0.5381, "step": 26839 }, { "epoch": 1.9390611736232775, "grad_norm": 6.257197393887636, "learning_rate": 1.2121650058145306e-08, "loss": 0.5801, "step": 26840 }, { "epoch": 1.939133418823487, "grad_norm": 7.037974486269808, "learning_rate": 1.2092897858935748e-08, "loss": 0.6117, "step": 26841 }, { "epoch": 1.9392056640236963, "grad_norm": 7.3978525190294615, "learning_rate": 1.2064179717242996e-08, "loss": 0.591, "step": 26842 }, { "epoch": 1.939277909223906, "grad_norm": 7.7128480903803105, "learning_rate": 1.2035495633459792e-08, "loss": 0.5991, "step": 26843 }, { "epoch": 1.9393501544241154, "grad_norm": 7.931812080685017, "learning_rate": 1.20068456079786e-08, "loss": 0.6191, "step": 26844 }, { "epoch": 1.939422399624325, "grad_norm": 7.5893526885339195, "learning_rate": 1.1978229641192162e-08, "loss": 0.594, "step": 26845 }, { "epoch": 1.9394946448245345, "grad_norm": 6.02304198553534, "learning_rate": 1.1949647733491831e-08, "loss": 0.5658, "step": 26846 }, { "epoch": 1.939566890024744, "grad_norm": 7.989359455047264, "learning_rate": 1.192109988526896e-08, "loss": 0.5843, "step": 26847 }, { "epoch": 1.9396391352249536, "grad_norm": 8.68716978913803, "learning_rate": 1.1892586096914626e-08, "loss": 0.6395, "step": 26848 }, { "epoch": 1.939711380425163, "grad_norm": 8.58095505242956, "learning_rate": 1.1864106368818517e-08, "loss": 0.6257, "step": 26849 }, { "epoch": 1.9397836256253727, "grad_norm": 6.809573043457254, "learning_rate": 1.1835660701371155e-08, "loss": 0.6126, "step": 26850 }, { "epoch": 1.939855870825582, "grad_norm": 7.771528061261982, "learning_rate": 1.180724909496167e-08, "loss": 0.5341, "step": 26851 }, { "epoch": 1.9399281160257915, "grad_norm": 7.090865973139207, "learning_rate": 1.1778871549978643e-08, "loss": 0.5864, "step": 26852 }, { "epoch": 1.940000361226001, "grad_norm": 7.442746659952781, "learning_rate": 1.1750528066811207e-08, "loss": 0.5752, "step": 26853 }, { "epoch": 1.9400726064262106, "grad_norm": 7.940401282944061, "learning_rate": 1.1722218645846827e-08, "loss": 0.5926, "step": 26854 }, { "epoch": 1.9401448516264201, "grad_norm": 7.125148570870815, "learning_rate": 1.169394328747353e-08, "loss": 0.5926, "step": 26855 }, { "epoch": 1.9402170968266295, "grad_norm": 7.724650363355224, "learning_rate": 1.1665701992077948e-08, "loss": 0.5709, "step": 26856 }, { "epoch": 1.9402893420268392, "grad_norm": 7.462343668618087, "learning_rate": 1.1637494760046997e-08, "loss": 0.5864, "step": 26857 }, { "epoch": 1.9403615872270485, "grad_norm": 8.573509100046154, "learning_rate": 1.1609321591766476e-08, "loss": 0.5919, "step": 26858 }, { "epoch": 1.940433832427258, "grad_norm": 8.403029721969379, "learning_rate": 1.1581182487622188e-08, "loss": 0.6217, "step": 26859 }, { "epoch": 1.9405060776274676, "grad_norm": 6.960857763379593, "learning_rate": 1.1553077447999384e-08, "loss": 0.6022, "step": 26860 }, { "epoch": 1.9405783228276772, "grad_norm": 8.720807888858406, "learning_rate": 1.1525006473283028e-08, "loss": 0.6545, "step": 26861 }, { "epoch": 1.9406505680278867, "grad_norm": 7.503198068970793, "learning_rate": 1.1496969563856708e-08, "loss": 0.4864, "step": 26862 }, { "epoch": 1.940722813228096, "grad_norm": 7.152281614540997, "learning_rate": 1.1468966720105112e-08, "loss": 0.5712, "step": 26863 }, { "epoch": 1.9407950584283058, "grad_norm": 6.791160829893552, "learning_rate": 1.1440997942410992e-08, "loss": 0.6293, "step": 26864 }, { "epoch": 1.9408673036285151, "grad_norm": 6.1332939201968335, "learning_rate": 1.1413063231157374e-08, "loss": 0.5918, "step": 26865 }, { "epoch": 1.9409395488287247, "grad_norm": 7.512799086623227, "learning_rate": 1.138516258672645e-08, "loss": 0.5361, "step": 26866 }, { "epoch": 1.9410117940289342, "grad_norm": 8.268584156175343, "learning_rate": 1.1357296009500696e-08, "loss": 0.6648, "step": 26867 }, { "epoch": 1.9410840392291437, "grad_norm": 7.5248162738258255, "learning_rate": 1.1329463499861193e-08, "loss": 0.587, "step": 26868 }, { "epoch": 1.9411562844293533, "grad_norm": 6.478873913724622, "learning_rate": 1.1301665058188748e-08, "loss": 0.5818, "step": 26869 }, { "epoch": 1.9412285296295626, "grad_norm": 8.657488682668165, "learning_rate": 1.1273900684864448e-08, "loss": 0.6758, "step": 26870 }, { "epoch": 1.9413007748297724, "grad_norm": 8.344094258610818, "learning_rate": 1.1246170380267984e-08, "loss": 0.6267, "step": 26871 }, { "epoch": 1.9413730200299817, "grad_norm": 7.667680700938385, "learning_rate": 1.1218474144779057e-08, "loss": 0.6378, "step": 26872 }, { "epoch": 1.9414452652301912, "grad_norm": 8.208599828534677, "learning_rate": 1.1190811978776806e-08, "loss": 0.5647, "step": 26873 }, { "epoch": 1.9415175104304008, "grad_norm": 7.269334203149562, "learning_rate": 1.1163183882639816e-08, "loss": 0.598, "step": 26874 }, { "epoch": 1.9415897556306103, "grad_norm": 9.667093947164307, "learning_rate": 1.1135589856746398e-08, "loss": 0.5755, "step": 26875 }, { "epoch": 1.9416620008308199, "grad_norm": 7.767386591647884, "learning_rate": 1.1108029901474304e-08, "loss": 0.6035, "step": 26876 }, { "epoch": 1.9417342460310292, "grad_norm": 6.84414824977459, "learning_rate": 1.108050401720101e-08, "loss": 0.5875, "step": 26877 }, { "epoch": 1.941806491231239, "grad_norm": 7.280066398066292, "learning_rate": 1.1053012204302881e-08, "loss": 0.5297, "step": 26878 }, { "epoch": 1.9418787364314483, "grad_norm": 8.820202646846914, "learning_rate": 1.1025554463156562e-08, "loss": 0.6278, "step": 26879 }, { "epoch": 1.9419509816316578, "grad_norm": 7.252651650474795, "learning_rate": 1.0998130794137585e-08, "loss": 0.5745, "step": 26880 }, { "epoch": 1.9420232268318673, "grad_norm": 8.61025492469091, "learning_rate": 1.097074119762176e-08, "loss": 0.6231, "step": 26881 }, { "epoch": 1.9420954720320769, "grad_norm": 9.591208055957093, "learning_rate": 1.0943385673984064e-08, "loss": 0.6597, "step": 26882 }, { "epoch": 1.9421677172322864, "grad_norm": 8.141432357847258, "learning_rate": 1.0916064223598643e-08, "loss": 0.5415, "step": 26883 }, { "epoch": 1.9422399624324957, "grad_norm": 7.675975566083655, "learning_rate": 1.0888776846839366e-08, "loss": 0.5889, "step": 26884 }, { "epoch": 1.9423122076327055, "grad_norm": 7.650392619510845, "learning_rate": 1.0861523544080654e-08, "loss": 0.5718, "step": 26885 }, { "epoch": 1.9423844528329148, "grad_norm": 7.602927607785603, "learning_rate": 1.083430431569471e-08, "loss": 0.6107, "step": 26886 }, { "epoch": 1.9424566980331244, "grad_norm": 7.116322101527273, "learning_rate": 1.0807119162054291e-08, "loss": 0.5988, "step": 26887 }, { "epoch": 1.942528943233334, "grad_norm": 8.473477441186061, "learning_rate": 1.07799680835316e-08, "loss": 0.622, "step": 26888 }, { "epoch": 1.9426011884335435, "grad_norm": 6.775386120818624, "learning_rate": 1.0752851080498839e-08, "loss": 0.5794, "step": 26889 }, { "epoch": 1.942673433633753, "grad_norm": 7.353207476113235, "learning_rate": 1.0725768153326544e-08, "loss": 0.5689, "step": 26890 }, { "epoch": 1.9427456788339623, "grad_norm": 7.23841036801325, "learning_rate": 1.069871930238553e-08, "loss": 0.6043, "step": 26891 }, { "epoch": 1.942817924034172, "grad_norm": 7.52882947844017, "learning_rate": 1.0671704528046334e-08, "loss": 0.5702, "step": 26892 }, { "epoch": 1.9428901692343814, "grad_norm": 7.528261216872252, "learning_rate": 1.064472383067866e-08, "loss": 0.561, "step": 26893 }, { "epoch": 1.9429624144345912, "grad_norm": 5.899272692836242, "learning_rate": 1.0617777210651937e-08, "loss": 0.6437, "step": 26894 }, { "epoch": 1.9430346596348005, "grad_norm": 7.408622265199394, "learning_rate": 1.0590864668334755e-08, "loss": 0.5673, "step": 26895 }, { "epoch": 1.94310690483501, "grad_norm": 7.9096898975182555, "learning_rate": 1.0563986204095988e-08, "loss": 0.5593, "step": 26896 }, { "epoch": 1.9431791500352196, "grad_norm": 7.790898358277042, "learning_rate": 1.0537141818303398e-08, "loss": 0.6401, "step": 26897 }, { "epoch": 1.943251395235429, "grad_norm": 7.56999705063877, "learning_rate": 1.051033151132419e-08, "loss": 0.6381, "step": 26898 }, { "epoch": 1.9433236404356387, "grad_norm": 6.754032484632185, "learning_rate": 1.048355528352557e-08, "loss": 0.6365, "step": 26899 }, { "epoch": 1.943395885635848, "grad_norm": 7.0341625369645495, "learning_rate": 1.0456813135274191e-08, "loss": 0.628, "step": 26900 }, { "epoch": 1.9434681308360577, "grad_norm": 9.92117600269077, "learning_rate": 1.0430105066935869e-08, "loss": 0.6216, "step": 26901 }, { "epoch": 1.943540376036267, "grad_norm": 7.2323620098622845, "learning_rate": 1.0403431078876425e-08, "loss": 0.6083, "step": 26902 }, { "epoch": 1.9436126212364766, "grad_norm": 6.911260246038819, "learning_rate": 1.037679117146112e-08, "loss": 0.5854, "step": 26903 }, { "epoch": 1.9436848664366861, "grad_norm": 7.059692364871227, "learning_rate": 1.0350185345054387e-08, "loss": 0.585, "step": 26904 }, { "epoch": 1.9437571116368955, "grad_norm": 8.50341773042725, "learning_rate": 1.0323613600020654e-08, "loss": 0.5712, "step": 26905 }, { "epoch": 1.9438293568371052, "grad_norm": 7.042821695376174, "learning_rate": 1.0297075936723245e-08, "loss": 0.6068, "step": 26906 }, { "epoch": 1.9439016020373145, "grad_norm": 6.3600533988198436, "learning_rate": 1.0270572355526031e-08, "loss": 0.5323, "step": 26907 }, { "epoch": 1.9439738472375243, "grad_norm": 7.0632164515782545, "learning_rate": 1.0244102856791505e-08, "loss": 0.6669, "step": 26908 }, { "epoch": 1.9440460924377336, "grad_norm": 7.220828595153463, "learning_rate": 1.0217667440881874e-08, "loss": 0.5894, "step": 26909 }, { "epoch": 1.9441183376379432, "grad_norm": 7.565241293175956, "learning_rate": 1.019126610815907e-08, "loss": 0.6294, "step": 26910 }, { "epoch": 1.9441905828381527, "grad_norm": 9.71696391973248, "learning_rate": 1.0164898858985028e-08, "loss": 0.6521, "step": 26911 }, { "epoch": 1.9442628280383623, "grad_norm": 7.477245144188403, "learning_rate": 1.0138565693720015e-08, "loss": 0.6752, "step": 26912 }, { "epoch": 1.9443350732385718, "grad_norm": 7.872397098589531, "learning_rate": 1.0112266612724852e-08, "loss": 0.5922, "step": 26913 }, { "epoch": 1.9444073184387811, "grad_norm": 6.603326433163073, "learning_rate": 1.008600161635953e-08, "loss": 0.5539, "step": 26914 }, { "epoch": 1.9444795636389909, "grad_norm": 8.148476824569538, "learning_rate": 1.0059770704983485e-08, "loss": 0.6667, "step": 26915 }, { "epoch": 1.9445518088392002, "grad_norm": 7.1209726830767295, "learning_rate": 1.003357387895587e-08, "loss": 0.6498, "step": 26916 }, { "epoch": 1.9446240540394097, "grad_norm": 5.930425987446275, "learning_rate": 1.0007411138635292e-08, "loss": 0.5546, "step": 26917 }, { "epoch": 1.9446962992396193, "grad_norm": 6.875964010449593, "learning_rate": 9.981282484379795e-09, "loss": 0.6134, "step": 26918 }, { "epoch": 1.9447685444398288, "grad_norm": 5.767450472681493, "learning_rate": 9.955187916547426e-09, "loss": 0.5335, "step": 26919 }, { "epoch": 1.9448407896400384, "grad_norm": 7.288282411951215, "learning_rate": 9.929127435494845e-09, "loss": 0.6251, "step": 26920 }, { "epoch": 1.9449130348402477, "grad_norm": 7.035575383995734, "learning_rate": 9.903101041579266e-09, "loss": 0.6295, "step": 26921 }, { "epoch": 1.9449852800404575, "grad_norm": 7.606725838665355, "learning_rate": 9.877108735156794e-09, "loss": 0.6107, "step": 26922 }, { "epoch": 1.9450575252406668, "grad_norm": 7.513599273434008, "learning_rate": 9.851150516583252e-09, "loss": 0.5782, "step": 26923 }, { "epoch": 1.9451297704408763, "grad_norm": 6.246181389609519, "learning_rate": 9.825226386213915e-09, "loss": 0.5009, "step": 26924 }, { "epoch": 1.9452020156410859, "grad_norm": 8.689483635665136, "learning_rate": 9.799336344403775e-09, "loss": 0.6707, "step": 26925 }, { "epoch": 1.9452742608412954, "grad_norm": 7.228607976983681, "learning_rate": 9.773480391507272e-09, "loss": 0.5655, "step": 26926 }, { "epoch": 1.945346506041505, "grad_norm": 6.415336900458675, "learning_rate": 9.74765852787829e-09, "loss": 0.5714, "step": 26927 }, { "epoch": 1.9454187512417143, "grad_norm": 7.747679578356151, "learning_rate": 9.721870753870155e-09, "loss": 0.5784, "step": 26928 }, { "epoch": 1.945490996441924, "grad_norm": 7.060376143336868, "learning_rate": 9.696117069836198e-09, "loss": 0.5831, "step": 26929 }, { "epoch": 1.9455632416421333, "grad_norm": 7.059273852214308, "learning_rate": 9.670397476128634e-09, "loss": 0.6406, "step": 26930 }, { "epoch": 1.9456354868423429, "grad_norm": 7.422893638978397, "learning_rate": 9.644711973099685e-09, "loss": 0.5848, "step": 26931 }, { "epoch": 1.9457077320425524, "grad_norm": 8.051624622249673, "learning_rate": 9.61906056110129e-09, "loss": 0.6094, "step": 26932 }, { "epoch": 1.945779977242762, "grad_norm": 7.699823689609667, "learning_rate": 9.593443240484002e-09, "loss": 0.6464, "step": 26933 }, { "epoch": 1.9458522224429715, "grad_norm": 5.6553205706516705, "learning_rate": 9.567860011598928e-09, "loss": 0.53, "step": 26934 }, { "epoch": 1.9459244676431808, "grad_norm": 8.858160672399597, "learning_rate": 9.542310874796346e-09, "loss": 0.5883, "step": 26935 }, { "epoch": 1.9459967128433906, "grad_norm": 5.860540248717343, "learning_rate": 9.516795830425418e-09, "loss": 0.5117, "step": 26936 }, { "epoch": 1.9460689580436, "grad_norm": 8.38708307676877, "learning_rate": 9.491314878836144e-09, "loss": 0.6776, "step": 26937 }, { "epoch": 1.9461412032438095, "grad_norm": 7.274685106311894, "learning_rate": 9.465868020376856e-09, "loss": 0.5638, "step": 26938 }, { "epoch": 1.946213448444019, "grad_norm": 7.764252385922593, "learning_rate": 9.440455255396163e-09, "loss": 0.6026, "step": 26939 }, { "epoch": 1.9462856936442285, "grad_norm": 7.2903670833143, "learning_rate": 9.415076584241567e-09, "loss": 0.566, "step": 26940 }, { "epoch": 1.946357938844438, "grad_norm": 7.561272639781313, "learning_rate": 9.389732007261121e-09, "loss": 0.5849, "step": 26941 }, { "epoch": 1.9464301840446474, "grad_norm": 7.530944564767268, "learning_rate": 9.364421524801215e-09, "loss": 0.5769, "step": 26942 }, { "epoch": 1.9465024292448572, "grad_norm": 7.534321898467477, "learning_rate": 9.339145137208516e-09, "loss": 0.5197, "step": 26943 }, { "epoch": 1.9465746744450665, "grad_norm": 7.603033223442953, "learning_rate": 9.313902844829136e-09, "loss": 0.5702, "step": 26944 }, { "epoch": 1.946646919645276, "grad_norm": 7.206041651655949, "learning_rate": 9.288694648008357e-09, "loss": 0.6363, "step": 26945 }, { "epoch": 1.9467191648454856, "grad_norm": 6.490252889399563, "learning_rate": 9.263520547091732e-09, "loss": 0.6398, "step": 26946 }, { "epoch": 1.9467914100456951, "grad_norm": 7.064838933600315, "learning_rate": 9.23838054242343e-09, "loss": 0.5933, "step": 26947 }, { "epoch": 1.9468636552459047, "grad_norm": 8.062890331666944, "learning_rate": 9.213274634347624e-09, "loss": 0.5472, "step": 26948 }, { "epoch": 1.946935900446114, "grad_norm": 7.218670733128109, "learning_rate": 9.188202823208203e-09, "loss": 0.6327, "step": 26949 }, { "epoch": 1.9470081456463237, "grad_norm": 7.363985532880059, "learning_rate": 9.163165109348227e-09, "loss": 0.5861, "step": 26950 }, { "epoch": 1.947080390846533, "grad_norm": 8.288452701267731, "learning_rate": 9.138161493110753e-09, "loss": 0.649, "step": 26951 }, { "epoch": 1.9471526360467426, "grad_norm": 5.89369829788265, "learning_rate": 9.113191974837454e-09, "loss": 0.5801, "step": 26952 }, { "epoch": 1.9472248812469521, "grad_norm": 7.6291797626021145, "learning_rate": 9.088256554870833e-09, "loss": 0.6037, "step": 26953 }, { "epoch": 1.9472971264471617, "grad_norm": 7.522551138727182, "learning_rate": 9.063355233552006e-09, "loss": 0.6019, "step": 26954 }, { "epoch": 1.9473693716473712, "grad_norm": 6.55724538514578, "learning_rate": 9.038488011221535e-09, "loss": 0.5801, "step": 26955 }, { "epoch": 1.9474416168475805, "grad_norm": 7.101903858940282, "learning_rate": 9.01365488821998e-09, "loss": 0.6119, "step": 26956 }, { "epoch": 1.9475138620477903, "grad_norm": 7.841189753116855, "learning_rate": 8.988855864887347e-09, "loss": 0.5863, "step": 26957 }, { "epoch": 1.9475861072479996, "grad_norm": 7.393569069564832, "learning_rate": 8.964090941563363e-09, "loss": 0.5915, "step": 26958 }, { "epoch": 1.9476583524482092, "grad_norm": 7.148290480472439, "learning_rate": 8.939360118586648e-09, "loss": 0.5447, "step": 26959 }, { "epoch": 1.9477305976484187, "grad_norm": 6.513136280922363, "learning_rate": 8.91466339629582e-09, "loss": 0.5637, "step": 26960 }, { "epoch": 1.9478028428486283, "grad_norm": 9.239944353472776, "learning_rate": 8.890000775029218e-09, "loss": 0.5795, "step": 26961 }, { "epoch": 1.9478750880488378, "grad_norm": 7.449582071550825, "learning_rate": 8.86537225512435e-09, "loss": 0.6193, "step": 26962 }, { "epoch": 1.9479473332490471, "grad_norm": 7.980438396678082, "learning_rate": 8.840777836918169e-09, "loss": 0.6124, "step": 26963 }, { "epoch": 1.9480195784492569, "grad_norm": 8.375492366153066, "learning_rate": 8.816217520747628e-09, "loss": 0.54, "step": 26964 }, { "epoch": 1.9480918236494662, "grad_norm": 8.72551549574602, "learning_rate": 8.791691306948568e-09, "loss": 0.6296, "step": 26965 }, { "epoch": 1.9481640688496757, "grad_norm": 7.575010434385862, "learning_rate": 8.767199195857113e-09, "loss": 0.5813, "step": 26966 }, { "epoch": 1.9482363140498853, "grad_norm": 7.186297241416624, "learning_rate": 8.742741187808267e-09, "loss": 0.566, "step": 26967 }, { "epoch": 1.9483085592500948, "grad_norm": 8.019398535342432, "learning_rate": 8.718317283136768e-09, "loss": 0.7046, "step": 26968 }, { "epoch": 1.9483808044503044, "grad_norm": 6.976460523383341, "learning_rate": 8.693927482177623e-09, "loss": 0.6149, "step": 26969 }, { "epoch": 1.9484530496505137, "grad_norm": 8.451679437540292, "learning_rate": 8.669571785263897e-09, "loss": 0.536, "step": 26970 }, { "epoch": 1.9485252948507235, "grad_norm": 7.9931482537450576, "learning_rate": 8.645250192729494e-09, "loss": 0.621, "step": 26971 }, { "epoch": 1.9485975400509328, "grad_norm": 7.047659887345254, "learning_rate": 8.620962704907199e-09, "loss": 0.596, "step": 26972 }, { "epoch": 1.9486697852511425, "grad_norm": 7.941109625009342, "learning_rate": 8.59670932212925e-09, "loss": 0.5835, "step": 26973 }, { "epoch": 1.9487420304513519, "grad_norm": 6.232606412282881, "learning_rate": 8.572490044728155e-09, "loss": 0.5562, "step": 26974 }, { "epoch": 1.9488142756515614, "grad_norm": 6.372236962020989, "learning_rate": 8.548304873035318e-09, "loss": 0.5949, "step": 26975 }, { "epoch": 1.948886520851771, "grad_norm": 7.353051062267717, "learning_rate": 8.524153807381586e-09, "loss": 0.5648, "step": 26976 }, { "epoch": 1.9489587660519803, "grad_norm": 7.065659078105015, "learning_rate": 8.500036848097249e-09, "loss": 0.5129, "step": 26977 }, { "epoch": 1.94903101125219, "grad_norm": 6.698681191989515, "learning_rate": 8.475953995513431e-09, "loss": 0.656, "step": 26978 }, { "epoch": 1.9491032564523993, "grad_norm": 8.240273030026069, "learning_rate": 8.451905249959035e-09, "loss": 0.5856, "step": 26979 }, { "epoch": 1.949175501652609, "grad_norm": 6.022700916579405, "learning_rate": 8.427890611763523e-09, "loss": 0.5384, "step": 26980 }, { "epoch": 1.9492477468528184, "grad_norm": 8.50790755513933, "learning_rate": 8.403910081255794e-09, "loss": 0.5815, "step": 26981 }, { "epoch": 1.949319992053028, "grad_norm": 7.022417955581332, "learning_rate": 8.379963658763646e-09, "loss": 0.5622, "step": 26982 }, { "epoch": 1.9493922372532375, "grad_norm": 6.606057779769144, "learning_rate": 8.356051344615423e-09, "loss": 0.4744, "step": 26983 }, { "epoch": 1.9494644824534468, "grad_norm": 8.552102117173215, "learning_rate": 8.332173139138089e-09, "loss": 0.6295, "step": 26984 }, { "epoch": 1.9495367276536566, "grad_norm": 6.141260167631168, "learning_rate": 8.308329042658602e-09, "loss": 0.6432, "step": 26985 }, { "epoch": 1.949608972853866, "grad_norm": 6.986454801234834, "learning_rate": 8.28451905550337e-09, "loss": 0.5721, "step": 26986 }, { "epoch": 1.9496812180540757, "grad_norm": 8.83585590355229, "learning_rate": 8.26074317799852e-09, "loss": 0.5645, "step": 26987 }, { "epoch": 1.949753463254285, "grad_norm": 7.938993058241618, "learning_rate": 8.237001410469347e-09, "loss": 0.5511, "step": 26988 }, { "epoch": 1.9498257084544945, "grad_norm": 7.7836988647800425, "learning_rate": 8.213293753241147e-09, "loss": 0.5602, "step": 26989 }, { "epoch": 1.949897953654704, "grad_norm": 7.832597440959106, "learning_rate": 8.189620206637827e-09, "loss": 0.627, "step": 26990 }, { "epoch": 1.9499701988549136, "grad_norm": 8.177345093508059, "learning_rate": 8.16598077098385e-09, "loss": 0.6133, "step": 26991 }, { "epoch": 1.9500424440551232, "grad_norm": 7.666819722955969, "learning_rate": 8.142375446603124e-09, "loss": 0.6132, "step": 26992 }, { "epoch": 1.9501146892553325, "grad_norm": 8.721335407159827, "learning_rate": 8.118804233818168e-09, "loss": 0.64, "step": 26993 }, { "epoch": 1.9501869344555423, "grad_norm": 8.221235124372422, "learning_rate": 8.095267132952056e-09, "loss": 0.5906, "step": 26994 }, { "epoch": 1.9502591796557516, "grad_norm": 7.555754119782057, "learning_rate": 8.071764144327033e-09, "loss": 0.5611, "step": 26995 }, { "epoch": 1.9503314248559611, "grad_norm": 8.049546829641882, "learning_rate": 8.048295268264506e-09, "loss": 0.581, "step": 26996 }, { "epoch": 1.9504036700561707, "grad_norm": 7.40653121993785, "learning_rate": 8.024860505086162e-09, "loss": 0.5483, "step": 26997 }, { "epoch": 1.9504759152563802, "grad_norm": 7.524373440939182, "learning_rate": 8.001459855112304e-09, "loss": 0.6133, "step": 26998 }, { "epoch": 1.9505481604565897, "grad_norm": 6.505809828410103, "learning_rate": 7.978093318663782e-09, "loss": 0.6811, "step": 26999 }, { "epoch": 1.950620405656799, "grad_norm": 7.141919575875346, "learning_rate": 7.954760896060065e-09, "loss": 0.5112, "step": 27000 }, { "epoch": 1.9506926508570088, "grad_norm": 7.58969799300341, "learning_rate": 7.931462587620897e-09, "loss": 0.5318, "step": 27001 }, { "epoch": 1.9507648960572181, "grad_norm": 8.558824627476968, "learning_rate": 7.908198393664912e-09, "loss": 0.621, "step": 27002 }, { "epoch": 1.9508371412574277, "grad_norm": 6.623615624079259, "learning_rate": 7.884968314510744e-09, "loss": 0.5734, "step": 27003 }, { "epoch": 1.9509093864576372, "grad_norm": 9.917174083219221, "learning_rate": 7.861772350476472e-09, "loss": 0.5991, "step": 27004 }, { "epoch": 1.9509816316578468, "grad_norm": 8.139402330656285, "learning_rate": 7.838610501879341e-09, "loss": 0.5768, "step": 27005 }, { "epoch": 1.9510538768580563, "grad_norm": 6.410955719778978, "learning_rate": 7.815482769036597e-09, "loss": 0.5793, "step": 27006 }, { "epoch": 1.9511261220582656, "grad_norm": 7.060401807378998, "learning_rate": 7.792389152264933e-09, "loss": 0.5768, "step": 27007 }, { "epoch": 1.9511983672584754, "grad_norm": 7.921924598433244, "learning_rate": 7.769329651880486e-09, "loss": 0.6178, "step": 27008 }, { "epoch": 1.9512706124586847, "grad_norm": 7.830828354087557, "learning_rate": 7.746304268198556e-09, "loss": 0.6028, "step": 27009 }, { "epoch": 1.9513428576588943, "grad_norm": 6.776194573411282, "learning_rate": 7.723313001534727e-09, "loss": 0.6188, "step": 27010 }, { "epoch": 1.9514151028591038, "grad_norm": 7.891308805843899, "learning_rate": 7.700355852203744e-09, "loss": 0.6382, "step": 27011 }, { "epoch": 1.9514873480593133, "grad_norm": 7.296656211170544, "learning_rate": 7.677432820519526e-09, "loss": 0.6871, "step": 27012 }, { "epoch": 1.9515595932595229, "grad_norm": 7.411564332727221, "learning_rate": 7.654543906796263e-09, "loss": 0.6317, "step": 27013 }, { "epoch": 1.9516318384597322, "grad_norm": 7.569527631607789, "learning_rate": 7.63168911134704e-09, "loss": 0.5809, "step": 27014 }, { "epoch": 1.951704083659942, "grad_norm": 8.198122079320447, "learning_rate": 7.608868434484662e-09, "loss": 0.6631, "step": 27015 }, { "epoch": 1.9517763288601513, "grad_norm": 7.6138008296599455, "learning_rate": 7.586081876521933e-09, "loss": 0.5748, "step": 27016 }, { "epoch": 1.9518485740603608, "grad_norm": 7.037670408526726, "learning_rate": 7.563329437770273e-09, "loss": 0.6162, "step": 27017 }, { "epoch": 1.9519208192605704, "grad_norm": 8.837446682565474, "learning_rate": 7.540611118541652e-09, "loss": 0.6402, "step": 27018 }, { "epoch": 1.95199306446078, "grad_norm": 8.135516124968895, "learning_rate": 7.517926919146379e-09, "loss": 0.5974, "step": 27019 }, { "epoch": 1.9520653096609895, "grad_norm": 6.038379625646103, "learning_rate": 7.495276839895593e-09, "loss": 0.5715, "step": 27020 }, { "epoch": 1.9521375548611988, "grad_norm": 8.710524093329216, "learning_rate": 7.472660881099325e-09, "loss": 0.624, "step": 27021 }, { "epoch": 1.9522098000614085, "grad_norm": 8.269684398676345, "learning_rate": 7.450079043067049e-09, "loss": 0.6099, "step": 27022 }, { "epoch": 1.9522820452616179, "grad_norm": 8.202858068573665, "learning_rate": 7.4275313261076845e-09, "loss": 0.5828, "step": 27023 }, { "epoch": 1.9523542904618274, "grad_norm": 7.1392185153223515, "learning_rate": 7.405017730529873e-09, "loss": 0.6167, "step": 27024 }, { "epoch": 1.952426535662037, "grad_norm": 9.274619665856335, "learning_rate": 7.382538256642258e-09, "loss": 0.6691, "step": 27025 }, { "epoch": 1.9524987808622465, "grad_norm": 7.3860403926518545, "learning_rate": 7.360092904752092e-09, "loss": 0.5898, "step": 27026 }, { "epoch": 1.952571026062456, "grad_norm": 6.823568329496998, "learning_rate": 7.3376816751671856e-09, "loss": 0.576, "step": 27027 }, { "epoch": 1.9526432712626653, "grad_norm": 7.261313457372619, "learning_rate": 7.315304568193682e-09, "loss": 0.5306, "step": 27028 }, { "epoch": 1.9527155164628751, "grad_norm": 7.202823657437181, "learning_rate": 7.29296158413828e-09, "loss": 0.6172, "step": 27029 }, { "epoch": 1.9527877616630844, "grad_norm": 7.864885889427191, "learning_rate": 7.270652723306848e-09, "loss": 0.5706, "step": 27030 }, { "epoch": 1.952860006863294, "grad_norm": 6.937780151209295, "learning_rate": 7.248377986004696e-09, "loss": 0.5956, "step": 27031 }, { "epoch": 1.9529322520635035, "grad_norm": 6.570547346891592, "learning_rate": 7.226137372536856e-09, "loss": 0.5611, "step": 27032 }, { "epoch": 1.953004497263713, "grad_norm": 7.542285226426244, "learning_rate": 7.203930883207533e-09, "loss": 0.6369, "step": 27033 }, { "epoch": 1.9530767424639226, "grad_norm": 6.764968664036118, "learning_rate": 7.181758518320647e-09, "loss": 0.6171, "step": 27034 }, { "epoch": 1.953148987664132, "grad_norm": 6.924431666660449, "learning_rate": 7.159620278180401e-09, "loss": 0.5915, "step": 27035 }, { "epoch": 1.9532212328643417, "grad_norm": 8.044184736880096, "learning_rate": 7.137516163089053e-09, "loss": 0.6651, "step": 27036 }, { "epoch": 1.953293478064551, "grad_norm": 7.656069383631683, "learning_rate": 7.1154461733496935e-09, "loss": 0.6148, "step": 27037 }, { "epoch": 1.9533657232647605, "grad_norm": 7.818622600880541, "learning_rate": 7.093410309264026e-09, "loss": 0.6796, "step": 27038 }, { "epoch": 1.95343796846497, "grad_norm": 7.249389359638954, "learning_rate": 7.071408571134308e-09, "loss": 0.6079, "step": 27039 }, { "epoch": 1.9535102136651796, "grad_norm": 7.255823098832601, "learning_rate": 7.049440959261134e-09, "loss": 0.5578, "step": 27040 }, { "epoch": 1.9535824588653892, "grad_norm": 7.6367479718910225, "learning_rate": 7.02750747394565e-09, "loss": 0.624, "step": 27041 }, { "epoch": 1.9536547040655985, "grad_norm": 8.424357258987227, "learning_rate": 7.005608115487617e-09, "loss": 0.6214, "step": 27042 }, { "epoch": 1.9537269492658083, "grad_norm": 7.011771522259627, "learning_rate": 6.98374288418735e-09, "loss": 0.5339, "step": 27043 }, { "epoch": 1.9537991944660176, "grad_norm": 7.190700713964704, "learning_rate": 6.961911780344055e-09, "loss": 0.6582, "step": 27044 }, { "epoch": 1.9538714396662273, "grad_norm": 6.637423975737229, "learning_rate": 6.940114804256104e-09, "loss": 0.5792, "step": 27045 }, { "epoch": 1.9539436848664367, "grad_norm": 7.619147384081211, "learning_rate": 6.918351956222702e-09, "loss": 0.6092, "step": 27046 }, { "epoch": 1.9540159300666462, "grad_norm": 7.673471941100676, "learning_rate": 6.896623236540834e-09, "loss": 0.6447, "step": 27047 }, { "epoch": 1.9540881752668557, "grad_norm": 6.8536422513416175, "learning_rate": 6.874928645508872e-09, "loss": 0.517, "step": 27048 }, { "epoch": 1.954160420467065, "grad_norm": 7.0956724813541445, "learning_rate": 6.853268183422968e-09, "loss": 0.6195, "step": 27049 }, { "epoch": 1.9542326656672748, "grad_norm": 6.698503799768839, "learning_rate": 6.831641850580384e-09, "loss": 0.5605, "step": 27050 }, { "epoch": 1.9543049108674841, "grad_norm": 6.99827118369579, "learning_rate": 6.8100496472764396e-09, "loss": 0.6236, "step": 27051 }, { "epoch": 1.954377156067694, "grad_norm": 7.529123093000973, "learning_rate": 6.7884915738072875e-09, "loss": 0.6035, "step": 27052 }, { "epoch": 1.9544494012679032, "grad_norm": 6.052942351823777, "learning_rate": 6.7669676304676915e-09, "loss": 0.522, "step": 27053 }, { "epoch": 1.9545216464681128, "grad_norm": 7.851308152363473, "learning_rate": 6.745477817552693e-09, "loss": 0.6001, "step": 27054 }, { "epoch": 1.9545938916683223, "grad_norm": 7.9885825699588775, "learning_rate": 6.724022135355945e-09, "loss": 0.571, "step": 27055 }, { "epoch": 1.9546661368685316, "grad_norm": 7.931133450372474, "learning_rate": 6.702600584171659e-09, "loss": 0.6292, "step": 27056 }, { "epoch": 1.9547383820687414, "grad_norm": 6.798283012386577, "learning_rate": 6.681213164292655e-09, "loss": 0.603, "step": 27057 }, { "epoch": 1.9548106272689507, "grad_norm": 7.591529552953682, "learning_rate": 6.659859876012032e-09, "loss": 0.5716, "step": 27058 }, { "epoch": 1.9548828724691605, "grad_norm": 8.493612245364712, "learning_rate": 6.638540719621778e-09, "loss": 0.5793, "step": 27059 }, { "epoch": 1.9549551176693698, "grad_norm": 6.7061754946022925, "learning_rate": 6.61725569541416e-09, "loss": 0.5938, "step": 27060 }, { "epoch": 1.9550273628695793, "grad_norm": 7.804806025829859, "learning_rate": 6.596004803680334e-09, "loss": 0.6279, "step": 27061 }, { "epoch": 1.9550996080697889, "grad_norm": 5.85588502835669, "learning_rate": 6.5747880447109e-09, "loss": 0.5532, "step": 27062 }, { "epoch": 1.9551718532699984, "grad_norm": 6.600211353963765, "learning_rate": 6.5536054187970155e-09, "loss": 0.6323, "step": 27063 }, { "epoch": 1.955244098470208, "grad_norm": 8.499907549187023, "learning_rate": 6.532456926227892e-09, "loss": 0.6442, "step": 27064 }, { "epoch": 1.9553163436704173, "grad_norm": 7.746835123923566, "learning_rate": 6.511342567293577e-09, "loss": 0.6173, "step": 27065 }, { "epoch": 1.955388588870627, "grad_norm": 6.6448171510360705, "learning_rate": 6.490262342282727e-09, "loss": 0.599, "step": 27066 }, { "epoch": 1.9554608340708364, "grad_norm": 8.131626185541654, "learning_rate": 6.469216251484278e-09, "loss": 0.5608, "step": 27067 }, { "epoch": 1.955533079271046, "grad_norm": 7.779193407233084, "learning_rate": 6.448204295186333e-09, "loss": 0.5781, "step": 27068 }, { "epoch": 1.9556053244712555, "grad_norm": 7.001668186322273, "learning_rate": 6.427226473675885e-09, "loss": 0.5991, "step": 27069 }, { "epoch": 1.955677569671465, "grad_norm": 6.064311907071568, "learning_rate": 6.4062827872410364e-09, "loss": 0.543, "step": 27070 }, { "epoch": 1.9557498148716745, "grad_norm": 8.981727276293945, "learning_rate": 6.385373236167669e-09, "loss": 0.6349, "step": 27071 }, { "epoch": 1.9558220600718839, "grad_norm": 9.108695736091711, "learning_rate": 6.364497820742499e-09, "loss": 0.5474, "step": 27072 }, { "epoch": 1.9558943052720936, "grad_norm": 8.06524043860563, "learning_rate": 6.343656541251131e-09, "loss": 0.6441, "step": 27073 }, { "epoch": 1.955966550472303, "grad_norm": 7.526149177883329, "learning_rate": 6.322849397979169e-09, "loss": 0.6306, "step": 27074 }, { "epoch": 1.9560387956725125, "grad_norm": 6.994685198971776, "learning_rate": 6.302076391210832e-09, "loss": 0.6344, "step": 27075 }, { "epoch": 1.956111040872722, "grad_norm": 7.286144788829622, "learning_rate": 6.28133752123089e-09, "loss": 0.5828, "step": 27076 }, { "epoch": 1.9561832860729316, "grad_norm": 8.041605051493093, "learning_rate": 6.2606327883232846e-09, "loss": 0.5284, "step": 27077 }, { "epoch": 1.9562555312731411, "grad_norm": 6.4249667036922995, "learning_rate": 6.239962192771398e-09, "loss": 0.6557, "step": 27078 }, { "epoch": 1.9563277764733504, "grad_norm": 6.981478028428206, "learning_rate": 6.219325734858062e-09, "loss": 0.6708, "step": 27079 }, { "epoch": 1.9564000216735602, "grad_norm": 9.667889440962583, "learning_rate": 6.198723414866103e-09, "loss": 0.6017, "step": 27080 }, { "epoch": 1.9564722668737695, "grad_norm": 5.773264055137423, "learning_rate": 6.178155233076966e-09, "loss": 0.5451, "step": 27081 }, { "epoch": 1.956544512073979, "grad_norm": 7.176595360306736, "learning_rate": 6.157621189772644e-09, "loss": 0.6856, "step": 27082 }, { "epoch": 1.9566167572741886, "grad_norm": 7.8695805384761455, "learning_rate": 6.1371212852343045e-09, "loss": 0.5827, "step": 27083 }, { "epoch": 1.9566890024743981, "grad_norm": 7.443681215982185, "learning_rate": 6.116655519742276e-09, "loss": 0.6449, "step": 27084 }, { "epoch": 1.9567612476746077, "grad_norm": 7.589090055620269, "learning_rate": 6.096223893576891e-09, "loss": 0.5965, "step": 27085 }, { "epoch": 1.956833492874817, "grad_norm": 6.785795736182556, "learning_rate": 6.075826407017648e-09, "loss": 0.5512, "step": 27086 }, { "epoch": 1.9569057380750268, "grad_norm": 7.904906147370155, "learning_rate": 6.0554630603440464e-09, "loss": 0.5648, "step": 27087 }, { "epoch": 1.956977983275236, "grad_norm": 7.679964667874761, "learning_rate": 6.035133853835029e-09, "loss": 0.549, "step": 27088 }, { "epoch": 1.9570502284754456, "grad_norm": 7.339136890524087, "learning_rate": 6.014838787768151e-09, "loss": 0.5573, "step": 27089 }, { "epoch": 1.9571224736756552, "grad_norm": 8.509742090640511, "learning_rate": 5.994577862421802e-09, "loss": 0.6568, "step": 27090 }, { "epoch": 1.9571947188758647, "grad_norm": 7.410132054685064, "learning_rate": 5.97435107807326e-09, "loss": 0.6071, "step": 27091 }, { "epoch": 1.9572669640760743, "grad_norm": 7.4876227927607255, "learning_rate": 5.954158434999524e-09, "loss": 0.5426, "step": 27092 }, { "epoch": 1.9573392092762836, "grad_norm": 7.087229685531471, "learning_rate": 5.933999933476764e-09, "loss": 0.5985, "step": 27093 }, { "epoch": 1.9574114544764933, "grad_norm": 7.881537252057266, "learning_rate": 5.9138755737808695e-09, "loss": 0.5872, "step": 27094 }, { "epoch": 1.9574836996767027, "grad_norm": 10.812040991770003, "learning_rate": 5.893785356187731e-09, "loss": 0.605, "step": 27095 }, { "epoch": 1.9575559448769122, "grad_norm": 7.3393759829658505, "learning_rate": 5.8737292809718514e-09, "loss": 0.5268, "step": 27096 }, { "epoch": 1.9576281900771217, "grad_norm": 7.227494925065372, "learning_rate": 5.853707348408289e-09, "loss": 0.6195, "step": 27097 }, { "epoch": 1.9577004352773313, "grad_norm": 8.91507281262439, "learning_rate": 5.833719558770712e-09, "loss": 0.6083, "step": 27098 }, { "epoch": 1.9577726804775408, "grad_norm": 7.398713603200068, "learning_rate": 5.813765912333069e-09, "loss": 0.6226, "step": 27099 }, { "epoch": 1.9578449256777501, "grad_norm": 8.509623297165861, "learning_rate": 5.793846409368198e-09, "loss": 0.6076, "step": 27100 }, { "epoch": 1.95791717087796, "grad_norm": 6.9488838891355424, "learning_rate": 5.773961050148935e-09, "loss": 0.5995, "step": 27101 }, { "epoch": 1.9579894160781692, "grad_norm": 6.296880149365916, "learning_rate": 5.754109834947564e-09, "loss": 0.6197, "step": 27102 }, { "epoch": 1.9580616612783788, "grad_norm": 7.368570143403125, "learning_rate": 5.734292764036087e-09, "loss": 0.5433, "step": 27103 }, { "epoch": 1.9581339064785883, "grad_norm": 7.864133573559961, "learning_rate": 5.714509837685122e-09, "loss": 0.6028, "step": 27104 }, { "epoch": 1.9582061516787979, "grad_norm": 7.89746208469217, "learning_rate": 5.694761056165843e-09, "loss": 0.5758, "step": 27105 }, { "epoch": 1.9582783968790074, "grad_norm": 7.972118189447123, "learning_rate": 5.675046419748587e-09, "loss": 0.6073, "step": 27106 }, { "epoch": 1.9583506420792167, "grad_norm": 6.891376480334039, "learning_rate": 5.6553659287034154e-09, "loss": 0.6417, "step": 27107 }, { "epoch": 1.9584228872794265, "grad_norm": 8.471348662931145, "learning_rate": 5.635719583299282e-09, "loss": 0.5907, "step": 27108 }, { "epoch": 1.9584951324796358, "grad_norm": 8.661844917397614, "learning_rate": 5.616107383805691e-09, "loss": 0.6007, "step": 27109 }, { "epoch": 1.9585673776798453, "grad_norm": 7.818424999334255, "learning_rate": 5.5965293304904855e-09, "loss": 0.6236, "step": 27110 }, { "epoch": 1.9586396228800549, "grad_norm": 7.240828138588041, "learning_rate": 5.576985423622339e-09, "loss": 0.5337, "step": 27111 }, { "epoch": 1.9587118680802644, "grad_norm": 7.2669899591513305, "learning_rate": 5.557475663468259e-09, "loss": 0.5994, "step": 27112 }, { "epoch": 1.958784113280474, "grad_norm": 7.027999465655772, "learning_rate": 5.538000050295533e-09, "loss": 0.5824, "step": 27113 }, { "epoch": 1.9588563584806833, "grad_norm": 7.511067934056366, "learning_rate": 5.5185585843708926e-09, "loss": 0.5697, "step": 27114 }, { "epoch": 1.958928603680893, "grad_norm": 6.923883770932949, "learning_rate": 5.499151265960234e-09, "loss": 0.5699, "step": 27115 }, { "epoch": 1.9590008488811024, "grad_norm": 7.943049375789531, "learning_rate": 5.4797780953294575e-09, "loss": 0.5448, "step": 27116 }, { "epoch": 1.9590730940813121, "grad_norm": 8.453152076550905, "learning_rate": 5.460439072743629e-09, "loss": 0.5836, "step": 27117 }, { "epoch": 1.9591453392815215, "grad_norm": 6.521600777637409, "learning_rate": 5.441134198467535e-09, "loss": 0.5442, "step": 27118 }, { "epoch": 1.959217584481731, "grad_norm": 5.941110256228221, "learning_rate": 5.421863472765132e-09, "loss": 0.6315, "step": 27119 }, { "epoch": 1.9592898296819405, "grad_norm": 6.786164783639071, "learning_rate": 5.402626895900653e-09, "loss": 0.58, "step": 27120 }, { "epoch": 1.9593620748821499, "grad_norm": 8.2790114346142, "learning_rate": 5.383424468137499e-09, "loss": 0.6147, "step": 27121 }, { "epoch": 1.9594343200823596, "grad_norm": 8.099913382361432, "learning_rate": 5.36425618973796e-09, "loss": 0.6585, "step": 27122 }, { "epoch": 1.959506565282569, "grad_norm": 7.55761283404142, "learning_rate": 5.3451220609651576e-09, "loss": 0.6018, "step": 27123 }, { "epoch": 1.9595788104827787, "grad_norm": 7.230048515188707, "learning_rate": 5.326022082080551e-09, "loss": 0.5193, "step": 27124 }, { "epoch": 1.959651055682988, "grad_norm": 8.045840535752484, "learning_rate": 5.306956253345597e-09, "loss": 0.6112, "step": 27125 }, { "epoch": 1.9597233008831976, "grad_norm": 7.209868809621512, "learning_rate": 5.287924575021475e-09, "loss": 0.5348, "step": 27126 }, { "epoch": 1.9597955460834071, "grad_norm": 7.660725931774208, "learning_rate": 5.268927047368533e-09, "loss": 0.603, "step": 27127 }, { "epoch": 1.9598677912836164, "grad_norm": 8.558012071601429, "learning_rate": 5.249963670647118e-09, "loss": 0.6738, "step": 27128 }, { "epoch": 1.9599400364838262, "grad_norm": 6.821809340059215, "learning_rate": 5.231034445116467e-09, "loss": 0.5653, "step": 27129 }, { "epoch": 1.9600122816840355, "grad_norm": 7.106144902803395, "learning_rate": 5.212139371036096e-09, "loss": 0.5583, "step": 27130 }, { "epoch": 1.9600845268842453, "grad_norm": 8.032097797695087, "learning_rate": 5.193278448664407e-09, "loss": 0.6686, "step": 27131 }, { "epoch": 1.9601567720844546, "grad_norm": 6.871084589515639, "learning_rate": 5.1744516782595285e-09, "loss": 0.6443, "step": 27132 }, { "epoch": 1.9602290172846641, "grad_norm": 7.6778287762894, "learning_rate": 5.1556590600793096e-09, "loss": 0.5417, "step": 27133 }, { "epoch": 1.9603012624848737, "grad_norm": 7.602177719515741, "learning_rate": 5.1369005943810445e-09, "loss": 0.6198, "step": 27134 }, { "epoch": 1.9603735076850832, "grad_norm": 8.762327828668063, "learning_rate": 5.118176281421472e-09, "loss": 0.5699, "step": 27135 }, { "epoch": 1.9604457528852928, "grad_norm": 7.019070397364886, "learning_rate": 5.099486121457054e-09, "loss": 0.6264, "step": 27136 }, { "epoch": 1.960517998085502, "grad_norm": 7.133012783418252, "learning_rate": 5.080830114743418e-09, "loss": 0.5707, "step": 27137 }, { "epoch": 1.9605902432857119, "grad_norm": 6.32684901053436, "learning_rate": 5.0622082615359166e-09, "loss": 0.6104, "step": 27138 }, { "epoch": 1.9606624884859212, "grad_norm": 8.91267329364528, "learning_rate": 5.043620562089624e-09, "loss": 0.616, "step": 27139 }, { "epoch": 1.9607347336861307, "grad_norm": 8.142183414590704, "learning_rate": 5.025067016659058e-09, "loss": 0.5863, "step": 27140 }, { "epoch": 1.9608069788863403, "grad_norm": 7.317071773404864, "learning_rate": 5.006547625497904e-09, "loss": 0.5843, "step": 27141 }, { "epoch": 1.9608792240865498, "grad_norm": 7.462607695091565, "learning_rate": 4.988062388860127e-09, "loss": 0.6244, "step": 27142 }, { "epoch": 1.9609514692867593, "grad_norm": 8.505943912826167, "learning_rate": 4.9696113069985785e-09, "loss": 0.6143, "step": 27143 }, { "epoch": 1.9610237144869687, "grad_norm": 6.842713996124905, "learning_rate": 4.951194380165558e-09, "loss": 0.6438, "step": 27144 }, { "epoch": 1.9610959596871784, "grad_norm": 8.576879303193147, "learning_rate": 4.932811608613364e-09, "loss": 0.5921, "step": 27145 }, { "epoch": 1.9611682048873877, "grad_norm": 7.220281529313717, "learning_rate": 4.914462992594016e-09, "loss": 0.615, "step": 27146 }, { "epoch": 1.9612404500875973, "grad_norm": 8.800726635150113, "learning_rate": 4.896148532357869e-09, "loss": 0.642, "step": 27147 }, { "epoch": 1.9613126952878068, "grad_norm": 8.973014641705088, "learning_rate": 4.87786822815639e-09, "loss": 0.6041, "step": 27148 }, { "epoch": 1.9613849404880164, "grad_norm": 7.344408371003642, "learning_rate": 4.8596220802396565e-09, "loss": 0.5643, "step": 27149 }, { "epoch": 1.961457185688226, "grad_norm": 6.7107902750512665, "learning_rate": 4.841410088857468e-09, "loss": 0.6519, "step": 27150 }, { "epoch": 1.9615294308884352, "grad_norm": 7.718275047855931, "learning_rate": 4.823232254258514e-09, "loss": 0.6505, "step": 27151 }, { "epoch": 1.961601676088645, "grad_norm": 6.903745171115118, "learning_rate": 4.8050885766925955e-09, "loss": 0.6013, "step": 27152 }, { "epoch": 1.9616739212888543, "grad_norm": 10.570320800417104, "learning_rate": 4.78697905640757e-09, "loss": 0.6065, "step": 27153 }, { "epoch": 1.9617461664890639, "grad_norm": 7.853467294600728, "learning_rate": 4.768903693651017e-09, "loss": 0.5866, "step": 27154 }, { "epoch": 1.9618184116892734, "grad_norm": 5.494487948106689, "learning_rate": 4.750862488671071e-09, "loss": 0.5673, "step": 27155 }, { "epoch": 1.961890656889483, "grad_norm": 6.247730606054496, "learning_rate": 4.732855441714202e-09, "loss": 0.6078, "step": 27156 }, { "epoch": 1.9619629020896925, "grad_norm": 7.778233445156024, "learning_rate": 4.714882553027433e-09, "loss": 0.6392, "step": 27157 }, { "epoch": 1.9620351472899018, "grad_norm": 7.985504846826618, "learning_rate": 4.696943822856126e-09, "loss": 0.5854, "step": 27158 }, { "epoch": 1.9621073924901116, "grad_norm": 8.995268107601724, "learning_rate": 4.679039251446193e-09, "loss": 0.568, "step": 27159 }, { "epoch": 1.9621796376903209, "grad_norm": 7.712666821028087, "learning_rate": 4.661168839042996e-09, "loss": 0.6435, "step": 27160 }, { "epoch": 1.9622518828905304, "grad_norm": 7.137308297864786, "learning_rate": 4.643332585890503e-09, "loss": 0.5522, "step": 27161 }, { "epoch": 1.96232412809074, "grad_norm": 7.181921079137733, "learning_rate": 4.6255304922335206e-09, "loss": 0.5869, "step": 27162 }, { "epoch": 1.9623963732909495, "grad_norm": 7.832527795590717, "learning_rate": 4.607762558315465e-09, "loss": 0.5303, "step": 27163 }, { "epoch": 1.962468618491159, "grad_norm": 5.567036354139476, "learning_rate": 4.590028784379752e-09, "loss": 0.6072, "step": 27164 }, { "epoch": 1.9625408636913684, "grad_norm": 8.328142405880028, "learning_rate": 4.572329170668688e-09, "loss": 0.5802, "step": 27165 }, { "epoch": 1.9626131088915781, "grad_norm": 8.05892841570594, "learning_rate": 4.554663717424856e-09, "loss": 0.6229, "step": 27166 }, { "epoch": 1.9626853540917875, "grad_norm": 6.841957170991901, "learning_rate": 4.5370324248902864e-09, "loss": 0.6019, "step": 27167 }, { "epoch": 1.962757599291997, "grad_norm": 8.56483639215113, "learning_rate": 4.519435293306174e-09, "loss": 0.596, "step": 27168 }, { "epoch": 1.9628298444922065, "grad_norm": 7.498812772243621, "learning_rate": 4.501872322913159e-09, "loss": 0.6224, "step": 27169 }, { "epoch": 1.962902089692416, "grad_norm": 6.10547187399952, "learning_rate": 4.4843435139518835e-09, "loss": 0.4969, "step": 27170 }, { "epoch": 1.9629743348926256, "grad_norm": 7.114971397916092, "learning_rate": 4.466848866662432e-09, "loss": 0.6462, "step": 27171 }, { "epoch": 1.963046580092835, "grad_norm": 7.4953705805090225, "learning_rate": 4.449388381284337e-09, "loss": 0.6656, "step": 27172 }, { "epoch": 1.9631188252930447, "grad_norm": 9.221453771274989, "learning_rate": 4.431962058056016e-09, "loss": 0.7036, "step": 27173 }, { "epoch": 1.963191070493254, "grad_norm": 7.247340273509935, "learning_rate": 4.414569897216725e-09, "loss": 0.5751, "step": 27174 }, { "epoch": 1.9632633156934636, "grad_norm": 7.362487005406879, "learning_rate": 4.397211899004328e-09, "loss": 0.6359, "step": 27175 }, { "epoch": 1.9633355608936731, "grad_norm": 8.975550608627158, "learning_rate": 4.379888063656135e-09, "loss": 0.568, "step": 27176 }, { "epoch": 1.9634078060938827, "grad_norm": 7.812310056284718, "learning_rate": 4.362598391409734e-09, "loss": 0.5548, "step": 27177 }, { "epoch": 1.9634800512940922, "grad_norm": 8.35725504295683, "learning_rate": 4.345342882501602e-09, "loss": 0.5982, "step": 27178 }, { "epoch": 1.9635522964943015, "grad_norm": 6.991742304262847, "learning_rate": 4.3281215371679395e-09, "loss": 0.6352, "step": 27179 }, { "epoch": 1.9636245416945113, "grad_norm": 6.8342196502262835, "learning_rate": 4.310934355644669e-09, "loss": 0.5579, "step": 27180 }, { "epoch": 1.9636967868947206, "grad_norm": 7.886844749256635, "learning_rate": 4.29378133816688e-09, "loss": 0.6198, "step": 27181 }, { "epoch": 1.9637690320949301, "grad_norm": 7.896409495996696, "learning_rate": 4.276662484969385e-09, "loss": 0.5333, "step": 27182 }, { "epoch": 1.9638412772951397, "grad_norm": 7.425147561812982, "learning_rate": 4.2595777962864405e-09, "loss": 0.5628, "step": 27183 }, { "epoch": 1.9639135224953492, "grad_norm": 7.088089486825218, "learning_rate": 4.242527272352304e-09, "loss": 0.5782, "step": 27184 }, { "epoch": 1.9639857676955588, "grad_norm": 7.739817174679318, "learning_rate": 4.225510913400121e-09, "loss": 0.5981, "step": 27185 }, { "epoch": 1.964058012895768, "grad_norm": 7.455501793207845, "learning_rate": 4.208528719662486e-09, "loss": 0.5261, "step": 27186 }, { "epoch": 1.9641302580959779, "grad_norm": 7.551556700837056, "learning_rate": 4.191580691372543e-09, "loss": 0.5972, "step": 27187 }, { "epoch": 1.9642025032961872, "grad_norm": 7.106903652703377, "learning_rate": 4.174666828762053e-09, "loss": 0.5874, "step": 27188 }, { "epoch": 1.9642747484963967, "grad_norm": 6.731660300158388, "learning_rate": 4.1577871320624965e-09, "loss": 0.6207, "step": 27189 }, { "epoch": 1.9643469936966063, "grad_norm": 7.859141776717909, "learning_rate": 4.1409416015048e-09, "loss": 0.6445, "step": 27190 }, { "epoch": 1.9644192388968158, "grad_norm": 5.869157848872398, "learning_rate": 4.124130237319613e-09, "loss": 0.532, "step": 27191 }, { "epoch": 1.9644914840970253, "grad_norm": 8.898457648545012, "learning_rate": 4.107353039737305e-09, "loss": 0.6086, "step": 27192 }, { "epoch": 1.9645637292972347, "grad_norm": 7.393257430397572, "learning_rate": 4.090610008987416e-09, "loss": 0.5588, "step": 27193 }, { "epoch": 1.9646359744974444, "grad_norm": 6.854365507293024, "learning_rate": 4.073901145299208e-09, "loss": 0.5258, "step": 27194 }, { "epoch": 1.9647082196976537, "grad_norm": 8.362676407075258, "learning_rate": 4.0572264489011085e-09, "loss": 0.5894, "step": 27195 }, { "epoch": 1.9647804648978635, "grad_norm": 7.877813411907649, "learning_rate": 4.040585920021544e-09, "loss": 0.6327, "step": 27196 }, { "epoch": 1.9648527100980728, "grad_norm": 6.309577189696956, "learning_rate": 4.023979558888669e-09, "loss": 0.6068, "step": 27197 }, { "epoch": 1.9649249552982824, "grad_norm": 6.67317419334287, "learning_rate": 4.007407365729244e-09, "loss": 0.5749, "step": 27198 }, { "epoch": 1.964997200498492, "grad_norm": 7.991903738165734, "learning_rate": 3.990869340770586e-09, "loss": 0.5667, "step": 27199 }, { "epoch": 1.9650694456987012, "grad_norm": 8.869998435930976, "learning_rate": 3.974365484238907e-09, "loss": 0.559, "step": 27200 }, { "epoch": 1.965141690898911, "grad_norm": 6.87842072436522, "learning_rate": 3.957895796360134e-09, "loss": 0.5546, "step": 27201 }, { "epoch": 1.9652139360991203, "grad_norm": 6.642262550850696, "learning_rate": 3.941460277359643e-09, "loss": 0.5821, "step": 27202 }, { "epoch": 1.96528618129933, "grad_norm": 7.544793957626484, "learning_rate": 3.925058927462533e-09, "loss": 0.6157, "step": 27203 }, { "epoch": 1.9653584264995394, "grad_norm": 8.007145075082502, "learning_rate": 3.908691746893067e-09, "loss": 0.5726, "step": 27204 }, { "epoch": 1.965430671699749, "grad_norm": 7.140586802841628, "learning_rate": 3.89235873587579e-09, "loss": 0.5475, "step": 27205 }, { "epoch": 1.9655029168999585, "grad_norm": 6.3279158192664555, "learning_rate": 3.876059894633855e-09, "loss": 0.6379, "step": 27206 }, { "epoch": 1.9655751621001678, "grad_norm": 7.338742111064067, "learning_rate": 3.8597952233906945e-09, "loss": 0.6285, "step": 27207 }, { "epoch": 1.9656474073003776, "grad_norm": 7.564245857927274, "learning_rate": 3.843564722368909e-09, "loss": 0.5797, "step": 27208 }, { "epoch": 1.965719652500587, "grad_norm": 6.704835473416625, "learning_rate": 3.827368391790265e-09, "loss": 0.5579, "step": 27209 }, { "epoch": 1.9657918977007967, "grad_norm": 7.9558259166239305, "learning_rate": 3.811206231877085e-09, "loss": 0.626, "step": 27210 }, { "epoch": 1.965864142901006, "grad_norm": 6.986166773280039, "learning_rate": 3.795078242850026e-09, "loss": 0.6578, "step": 27211 }, { "epoch": 1.9659363881012155, "grad_norm": 8.196321118075199, "learning_rate": 3.778984424930298e-09, "loss": 0.6901, "step": 27212 }, { "epoch": 1.966008633301425, "grad_norm": 7.812843010048148, "learning_rate": 3.762924778338284e-09, "loss": 0.5526, "step": 27213 }, { "epoch": 1.9660808785016346, "grad_norm": 7.088240714952157, "learning_rate": 3.74689930329325e-09, "loss": 0.6213, "step": 27214 }, { "epoch": 1.9661531237018441, "grad_norm": 6.294780070346636, "learning_rate": 3.730908000015299e-09, "loss": 0.5633, "step": 27215 }, { "epoch": 1.9662253689020535, "grad_norm": 7.071849204980159, "learning_rate": 3.7149508687228665e-09, "loss": 0.6209, "step": 27216 }, { "epoch": 1.9662976141022632, "grad_norm": 8.289092413500454, "learning_rate": 3.6990279096343897e-09, "loss": 0.6192, "step": 27217 }, { "epoch": 1.9663698593024725, "grad_norm": 8.02963442453635, "learning_rate": 3.6831391229683045e-09, "loss": 0.5903, "step": 27218 }, { "epoch": 1.966442104502682, "grad_norm": 7.553772990311541, "learning_rate": 3.6672845089413823e-09, "loss": 0.5668, "step": 27219 }, { "epoch": 1.9665143497028916, "grad_norm": 6.9420107362005306, "learning_rate": 3.6514640677712266e-09, "loss": 0.5898, "step": 27220 }, { "epoch": 1.9665865949031012, "grad_norm": 6.497114274544074, "learning_rate": 3.635677799674331e-09, "loss": 0.6303, "step": 27221 }, { "epoch": 1.9666588401033107, "grad_norm": 9.286355820177542, "learning_rate": 3.619925704866634e-09, "loss": 0.6517, "step": 27222 }, { "epoch": 1.96673108530352, "grad_norm": 7.4167257013721715, "learning_rate": 3.6042077835637955e-09, "loss": 0.5568, "step": 27223 }, { "epoch": 1.9668033305037298, "grad_norm": 6.640163127320132, "learning_rate": 3.5885240359809227e-09, "loss": 0.5677, "step": 27224 }, { "epoch": 1.9668755757039391, "grad_norm": 7.03409962871368, "learning_rate": 3.5728744623331203e-09, "loss": 0.6065, "step": 27225 }, { "epoch": 1.9669478209041487, "grad_norm": 7.503953273294231, "learning_rate": 3.557259062834106e-09, "loss": 0.6683, "step": 27226 }, { "epoch": 1.9670200661043582, "grad_norm": 6.494427566499137, "learning_rate": 3.5416778376978765e-09, "loss": 0.5189, "step": 27227 }, { "epoch": 1.9670923113045677, "grad_norm": 7.919247042766455, "learning_rate": 3.5261307871375937e-09, "loss": 0.6103, "step": 27228 }, { "epoch": 1.9671645565047773, "grad_norm": 8.072394871409657, "learning_rate": 3.510617911366421e-09, "loss": 0.5801, "step": 27229 }, { "epoch": 1.9672368017049866, "grad_norm": 6.821958642535074, "learning_rate": 3.49513921059641e-09, "loss": 0.6012, "step": 27230 }, { "epoch": 1.9673090469051964, "grad_norm": 7.549360225425226, "learning_rate": 3.479694685039614e-09, "loss": 0.5845, "step": 27231 }, { "epoch": 1.9673812921054057, "grad_norm": 7.401813707523898, "learning_rate": 3.4642843349069756e-09, "loss": 0.6716, "step": 27232 }, { "epoch": 1.9674535373056152, "grad_norm": 7.98065445245576, "learning_rate": 3.448908160410269e-09, "loss": 0.6353, "step": 27233 }, { "epoch": 1.9675257825058248, "grad_norm": 8.054671397595447, "learning_rate": 3.4335661617593276e-09, "loss": 0.6403, "step": 27234 }, { "epoch": 1.9675980277060343, "grad_norm": 7.236516845548568, "learning_rate": 3.4182583391645374e-09, "loss": 0.6036, "step": 27235 }, { "epoch": 1.9676702729062439, "grad_norm": 7.568214968380404, "learning_rate": 3.4029846928354537e-09, "loss": 0.6359, "step": 27236 }, { "epoch": 1.9677425181064532, "grad_norm": 7.0025867042498735, "learning_rate": 3.3877452229807985e-09, "loss": 0.6267, "step": 27237 }, { "epoch": 1.967814763306663, "grad_norm": 8.165486568382653, "learning_rate": 3.3725399298095705e-09, "loss": 0.5908, "step": 27238 }, { "epoch": 1.9678870085068723, "grad_norm": 7.051614660319078, "learning_rate": 3.35736881352966e-09, "loss": 0.5144, "step": 27239 }, { "epoch": 1.9679592537070818, "grad_norm": 7.3635094564271855, "learning_rate": 3.3422318743489556e-09, "loss": 0.5751, "step": 27240 }, { "epoch": 1.9680314989072913, "grad_norm": 6.916371932893092, "learning_rate": 3.327129112474237e-09, "loss": 0.5773, "step": 27241 }, { "epoch": 1.9681037441075009, "grad_norm": 7.117467278049233, "learning_rate": 3.312060528112837e-09, "loss": 0.5577, "step": 27242 }, { "epoch": 1.9681759893077104, "grad_norm": 6.554590890602102, "learning_rate": 3.297026121470981e-09, "loss": 0.5741, "step": 27243 }, { "epoch": 1.9682482345079197, "grad_norm": 6.834602269151456, "learning_rate": 3.282025892754059e-09, "loss": 0.5332, "step": 27244 }, { "epoch": 1.9683204797081295, "grad_norm": 6.722708454184291, "learning_rate": 3.2670598421674636e-09, "loss": 0.5389, "step": 27245 }, { "epoch": 1.9683927249083388, "grad_norm": 7.706713724531667, "learning_rate": 3.2521279699165853e-09, "loss": 0.6339, "step": 27246 }, { "epoch": 1.9684649701085484, "grad_norm": 7.901296639566558, "learning_rate": 3.23723027620515e-09, "loss": 0.5574, "step": 27247 }, { "epoch": 1.968537215308758, "grad_norm": 7.928038958487459, "learning_rate": 3.2223667612374386e-09, "loss": 0.6074, "step": 27248 }, { "epoch": 1.9686094605089675, "grad_norm": 7.55168526137479, "learning_rate": 3.207537425217178e-09, "loss": 0.5283, "step": 27249 }, { "epoch": 1.968681705709177, "grad_norm": 8.111809455153564, "learning_rate": 3.1927422683469824e-09, "loss": 0.6237, "step": 27250 }, { "epoch": 1.9687539509093863, "grad_norm": 7.302861569917302, "learning_rate": 3.177981290829468e-09, "loss": 0.5651, "step": 27251 }, { "epoch": 1.968826196109596, "grad_norm": 7.811643996073216, "learning_rate": 3.1632544928666963e-09, "loss": 0.6203, "step": 27252 }, { "epoch": 1.9688984413098054, "grad_norm": 6.901091293060558, "learning_rate": 3.148561874660172e-09, "loss": 0.5696, "step": 27253 }, { "epoch": 1.968970686510015, "grad_norm": 8.125019249526476, "learning_rate": 3.1339034364114007e-09, "loss": 0.5885, "step": 27254 }, { "epoch": 1.9690429317102245, "grad_norm": 7.453365042157814, "learning_rate": 3.1192791783207775e-09, "loss": 0.6378, "step": 27255 }, { "epoch": 1.969115176910434, "grad_norm": 7.853954956547221, "learning_rate": 3.1046891005884204e-09, "loss": 0.6544, "step": 27256 }, { "epoch": 1.9691874221106436, "grad_norm": 7.678825384357119, "learning_rate": 3.0901332034141695e-09, "loss": 0.6024, "step": 27257 }, { "epoch": 1.969259667310853, "grad_norm": 8.196282023055627, "learning_rate": 3.0756114869973097e-09, "loss": 0.6346, "step": 27258 }, { "epoch": 1.9693319125110627, "grad_norm": 7.964499383341435, "learning_rate": 3.061123951536571e-09, "loss": 0.6066, "step": 27259 }, { "epoch": 1.969404157711272, "grad_norm": 7.07358268856751, "learning_rate": 3.046670597230128e-09, "loss": 0.5849, "step": 27260 }, { "epoch": 1.9694764029114815, "grad_norm": 6.775176391557547, "learning_rate": 3.0322514242764335e-09, "loss": 0.6576, "step": 27261 }, { "epoch": 1.969548648111691, "grad_norm": 8.083588219258521, "learning_rate": 3.0178664328719964e-09, "loss": 0.569, "step": 27262 }, { "epoch": 1.9696208933119006, "grad_norm": 7.9918624498598865, "learning_rate": 3.003515623214437e-09, "loss": 0.5876, "step": 27263 }, { "epoch": 1.9696931385121101, "grad_norm": 6.732376828885418, "learning_rate": 2.9891989954999866e-09, "loss": 0.5725, "step": 27264 }, { "epoch": 1.9697653837123195, "grad_norm": 6.565955714485141, "learning_rate": 2.9749165499243225e-09, "loss": 0.5355, "step": 27265 }, { "epoch": 1.9698376289125292, "grad_norm": 5.8362358864691, "learning_rate": 2.960668286683399e-09, "loss": 0.5626, "step": 27266 }, { "epoch": 1.9699098741127385, "grad_norm": 6.9115378732902695, "learning_rate": 2.946454205972338e-09, "loss": 0.6166, "step": 27267 }, { "epoch": 1.9699821193129483, "grad_norm": 6.753988147457478, "learning_rate": 2.93227430798515e-09, "loss": 0.5306, "step": 27268 }, { "epoch": 1.9700543645131576, "grad_norm": 6.535005064935334, "learning_rate": 2.9181285929164025e-09, "loss": 0.5517, "step": 27269 }, { "epoch": 1.9701266097133672, "grad_norm": 8.19021305240285, "learning_rate": 2.904017060959552e-09, "loss": 0.5854, "step": 27270 }, { "epoch": 1.9701988549135767, "grad_norm": 7.681475007352053, "learning_rate": 2.889939712308054e-09, "loss": 0.6185, "step": 27271 }, { "epoch": 1.970271100113786, "grad_norm": 7.649620455632709, "learning_rate": 2.8758965471542556e-09, "loss": 0.5875, "step": 27272 }, { "epoch": 1.9703433453139958, "grad_norm": 7.48725469375265, "learning_rate": 2.861887565690502e-09, "loss": 0.6234, "step": 27273 }, { "epoch": 1.9704155905142051, "grad_norm": 7.350315746350283, "learning_rate": 2.847912768108585e-09, "loss": 0.6499, "step": 27274 }, { "epoch": 1.9704878357144149, "grad_norm": 7.395707106968241, "learning_rate": 2.8339721546000177e-09, "loss": 0.6888, "step": 27275 }, { "epoch": 1.9705600809146242, "grad_norm": 7.522705296015731, "learning_rate": 2.8200657253552034e-09, "loss": 0.5795, "step": 27276 }, { "epoch": 1.9706323261148337, "grad_norm": 7.129461397477038, "learning_rate": 2.806193480564823e-09, "loss": 0.6008, "step": 27277 }, { "epoch": 1.9707045713150433, "grad_norm": 8.680486468909105, "learning_rate": 2.792355420418724e-09, "loss": 0.6109, "step": 27278 }, { "epoch": 1.9707768165152526, "grad_norm": 7.999987840643225, "learning_rate": 2.7785515451064782e-09, "loss": 0.6183, "step": 27279 }, { "epoch": 1.9708490617154624, "grad_norm": 7.965852816628701, "learning_rate": 2.764781854816545e-09, "loss": 0.6122, "step": 27280 }, { "epoch": 1.9709213069156717, "grad_norm": 7.616899784229673, "learning_rate": 2.751046349738218e-09, "loss": 0.5719, "step": 27281 }, { "epoch": 1.9709935521158815, "grad_norm": 6.388533046966846, "learning_rate": 2.7373450300588467e-09, "loss": 0.5449, "step": 27282 }, { "epoch": 1.9710657973160908, "grad_norm": 6.05481665688856, "learning_rate": 2.7236778959660594e-09, "loss": 0.6015, "step": 27283 }, { "epoch": 1.9711380425163003, "grad_norm": 7.723482850852465, "learning_rate": 2.7100449476472057e-09, "loss": 0.6285, "step": 27284 }, { "epoch": 1.9712102877165099, "grad_norm": 6.76827704273379, "learning_rate": 2.696446185289081e-09, "loss": 0.5806, "step": 27285 }, { "epoch": 1.9712825329167194, "grad_norm": 9.178166845458106, "learning_rate": 2.6828816090773703e-09, "loss": 0.6803, "step": 27286 }, { "epoch": 1.971354778116929, "grad_norm": 7.396042626513393, "learning_rate": 2.669351219197758e-09, "loss": 0.5859, "step": 27287 }, { "epoch": 1.9714270233171383, "grad_norm": 7.485235558971476, "learning_rate": 2.6558550158359287e-09, "loss": 0.6242, "step": 27288 }, { "epoch": 1.971499268517348, "grad_norm": 5.934932434674416, "learning_rate": 2.6423929991764575e-09, "loss": 0.5373, "step": 27289 }, { "epoch": 1.9715715137175573, "grad_norm": 6.9341626087065755, "learning_rate": 2.6289651694033634e-09, "loss": 0.5719, "step": 27290 }, { "epoch": 1.9716437589177669, "grad_norm": 7.799653255873958, "learning_rate": 2.6155715267006663e-09, "loss": 0.5814, "step": 27291 }, { "epoch": 1.9717160041179764, "grad_norm": 7.153167727148221, "learning_rate": 2.6022120712518305e-09, "loss": 0.5675, "step": 27292 }, { "epoch": 1.971788249318186, "grad_norm": 8.130247329009086, "learning_rate": 2.588886803239488e-09, "loss": 0.6394, "step": 27293 }, { "epoch": 1.9718604945183955, "grad_norm": 8.639757440482406, "learning_rate": 2.575595722846269e-09, "loss": 0.583, "step": 27294 }, { "epoch": 1.9719327397186048, "grad_norm": 6.927181508986701, "learning_rate": 2.5623388302539743e-09, "loss": 0.616, "step": 27295 }, { "epoch": 1.9720049849188146, "grad_norm": 6.880382026445478, "learning_rate": 2.5491161256441243e-09, "loss": 0.5284, "step": 27296 }, { "epoch": 1.972077230119024, "grad_norm": 8.461920768264077, "learning_rate": 2.5359276091979635e-09, "loss": 0.6241, "step": 27297 }, { "epoch": 1.9721494753192335, "grad_norm": 7.5100648103601975, "learning_rate": 2.5227732810953477e-09, "loss": 0.622, "step": 27298 }, { "epoch": 1.972221720519443, "grad_norm": 7.9431984938834574, "learning_rate": 2.5096531415169655e-09, "loss": 0.5345, "step": 27299 }, { "epoch": 1.9722939657196525, "grad_norm": 6.292298577850726, "learning_rate": 2.496567190642396e-09, "loss": 0.5, "step": 27300 }, { "epoch": 1.972366210919862, "grad_norm": 7.247603711020764, "learning_rate": 2.4835154286506623e-09, "loss": 0.5847, "step": 27301 }, { "epoch": 1.9724384561200714, "grad_norm": 6.978075023699369, "learning_rate": 2.470497855720233e-09, "loss": 0.5606, "step": 27302 }, { "epoch": 1.9725107013202812, "grad_norm": 7.4767533201223735, "learning_rate": 2.457514472029576e-09, "loss": 0.5648, "step": 27303 }, { "epoch": 1.9725829465204905, "grad_norm": 8.015723036376595, "learning_rate": 2.4445652777563276e-09, "loss": 0.6415, "step": 27304 }, { "epoch": 1.9726551917207, "grad_norm": 7.17690630852716, "learning_rate": 2.4316502730775682e-09, "loss": 0.6125, "step": 27305 }, { "epoch": 1.9727274369209096, "grad_norm": 8.251579826985454, "learning_rate": 2.4187694581706557e-09, "loss": 0.6205, "step": 27306 }, { "epoch": 1.9727996821211191, "grad_norm": 7.612742092248097, "learning_rate": 2.4059228332112825e-09, "loss": 0.6138, "step": 27307 }, { "epoch": 1.9728719273213287, "grad_norm": 8.897324972293507, "learning_rate": 2.39311039837542e-09, "loss": 0.651, "step": 27308 }, { "epoch": 1.972944172521538, "grad_norm": 7.549994856472663, "learning_rate": 2.3803321538387604e-09, "loss": 0.6325, "step": 27309 }, { "epoch": 1.9730164177217477, "grad_norm": 6.952774690525848, "learning_rate": 2.3675880997761636e-09, "loss": 0.6091, "step": 27310 }, { "epoch": 1.973088662921957, "grad_norm": 7.954707200028396, "learning_rate": 2.354878236361935e-09, "loss": 0.5611, "step": 27311 }, { "epoch": 1.9731609081221666, "grad_norm": 6.900909983020355, "learning_rate": 2.3422025637701017e-09, "loss": 0.578, "step": 27312 }, { "epoch": 1.9732331533223761, "grad_norm": 6.387848416435492, "learning_rate": 2.329561082174414e-09, "loss": 0.5629, "step": 27313 }, { "epoch": 1.9733053985225857, "grad_norm": 6.559449276951117, "learning_rate": 2.3169537917475118e-09, "loss": 0.5361, "step": 27314 }, { "epoch": 1.9733776437227952, "grad_norm": 7.10879762421954, "learning_rate": 2.3043806926623112e-09, "loss": 0.578, "step": 27315 }, { "epoch": 1.9734498889230045, "grad_norm": 8.48610404084184, "learning_rate": 2.2918417850906203e-09, "loss": 0.6207, "step": 27316 }, { "epoch": 1.9735221341232143, "grad_norm": 6.510964462630907, "learning_rate": 2.2793370692045237e-09, "loss": 0.5357, "step": 27317 }, { "epoch": 1.9735943793234236, "grad_norm": 6.916623157532895, "learning_rate": 2.2668665451747174e-09, "loss": 0.53, "step": 27318 }, { "epoch": 1.9736666245236332, "grad_norm": 7.192605237247276, "learning_rate": 2.2544302131721764e-09, "loss": 0.6305, "step": 27319 }, { "epoch": 1.9737388697238427, "grad_norm": 8.281518089705365, "learning_rate": 2.2420280733673194e-09, "loss": 0.606, "step": 27320 }, { "epoch": 1.9738111149240523, "grad_norm": 7.245923178585386, "learning_rate": 2.229660125929456e-09, "loss": 0.657, "step": 27321 }, { "epoch": 1.9738833601242618, "grad_norm": 9.747094919769685, "learning_rate": 2.2173263710281723e-09, "loss": 0.5959, "step": 27322 }, { "epoch": 1.9739556053244711, "grad_norm": 7.063623659660972, "learning_rate": 2.2050268088325e-09, "loss": 0.6972, "step": 27323 }, { "epoch": 1.9740278505246809, "grad_norm": 7.176227254613895, "learning_rate": 2.1927614395103604e-09, "loss": 0.6355, "step": 27324 }, { "epoch": 1.9741000957248902, "grad_norm": 7.769526585672934, "learning_rate": 2.1805302632299517e-09, "loss": 0.6136, "step": 27325 }, { "epoch": 1.9741723409250997, "grad_norm": 7.160018528322936, "learning_rate": 2.168333280158641e-09, "loss": 0.6319, "step": 27326 }, { "epoch": 1.9742445861253093, "grad_norm": 7.987932879399464, "learning_rate": 2.156170490463516e-09, "loss": 0.6319, "step": 27327 }, { "epoch": 1.9743168313255188, "grad_norm": 6.809668276153399, "learning_rate": 2.144041894310833e-09, "loss": 0.6228, "step": 27328 }, { "epoch": 1.9743890765257284, "grad_norm": 8.818468169663879, "learning_rate": 2.1319474918668483e-09, "loss": 0.604, "step": 27329 }, { "epoch": 1.9744613217259377, "grad_norm": 6.285800481180498, "learning_rate": 2.1198872832972615e-09, "loss": 0.5816, "step": 27330 }, { "epoch": 1.9745335669261475, "grad_norm": 7.805073618764652, "learning_rate": 2.1078612687666645e-09, "loss": 0.6074, "step": 27331 }, { "epoch": 1.9746058121263568, "grad_norm": 7.886909320072526, "learning_rate": 2.0958694484399245e-09, "loss": 0.6093, "step": 27332 }, { "epoch": 1.9746780573265663, "grad_norm": 7.762893779797955, "learning_rate": 2.083911822481355e-09, "loss": 0.6528, "step": 27333 }, { "epoch": 1.9747503025267759, "grad_norm": 5.939651902399642, "learning_rate": 2.0719883910544357e-09, "loss": 0.598, "step": 27334 }, { "epoch": 1.9748225477269854, "grad_norm": 7.0464987643812, "learning_rate": 2.060099154322648e-09, "loss": 0.5717, "step": 27335 }, { "epoch": 1.974894792927195, "grad_norm": 7.230305723952232, "learning_rate": 2.048244112448361e-09, "loss": 0.6612, "step": 27336 }, { "epoch": 1.9749670381274043, "grad_norm": 8.056045669320145, "learning_rate": 2.0364232655939453e-09, "loss": 0.5734, "step": 27337 }, { "epoch": 1.975039283327614, "grad_norm": 7.1266121629867625, "learning_rate": 2.024636613921771e-09, "loss": 0.5521, "step": 27338 }, { "epoch": 1.9751115285278233, "grad_norm": 7.441002285623116, "learning_rate": 2.012884157592543e-09, "loss": 0.6294, "step": 27339 }, { "epoch": 1.975183773728033, "grad_norm": 7.833475503916444, "learning_rate": 2.0011658967672433e-09, "loss": 0.6053, "step": 27340 }, { "epoch": 1.9752560189282424, "grad_norm": 7.748951010314487, "learning_rate": 1.9894818316065768e-09, "loss": 0.5726, "step": 27341 }, { "epoch": 1.975328264128452, "grad_norm": 6.50959949298588, "learning_rate": 1.9778319622704156e-09, "loss": 0.5516, "step": 27342 }, { "epoch": 1.9754005093286615, "grad_norm": 8.856604625679385, "learning_rate": 1.9662162889180768e-09, "loss": 0.6313, "step": 27343 }, { "epoch": 1.9754727545288708, "grad_norm": 7.840049326605255, "learning_rate": 1.9546348117086e-09, "loss": 0.5873, "step": 27344 }, { "epoch": 1.9755449997290806, "grad_norm": 8.541463362592602, "learning_rate": 1.9430875308004694e-09, "loss": 0.6087, "step": 27345 }, { "epoch": 1.97561724492929, "grad_norm": 8.079051925633962, "learning_rate": 1.931574446352169e-09, "loss": 0.5398, "step": 27346 }, { "epoch": 1.9756894901294997, "grad_norm": 6.813939213747257, "learning_rate": 1.9200955585205185e-09, "loss": 0.6397, "step": 27347 }, { "epoch": 1.975761735329709, "grad_norm": 7.071592436373977, "learning_rate": 1.9086508674634465e-09, "loss": 0.6136, "step": 27348 }, { "epoch": 1.9758339805299185, "grad_norm": 7.771154511251474, "learning_rate": 1.897240373337217e-09, "loss": 0.5931, "step": 27349 }, { "epoch": 1.975906225730128, "grad_norm": 6.8951127366330205, "learning_rate": 1.8858640762983716e-09, "loss": 0.5744, "step": 27350 }, { "epoch": 1.9759784709303374, "grad_norm": 8.41880886904065, "learning_rate": 1.874521976502064e-09, "loss": 0.6156, "step": 27351 }, { "epoch": 1.9760507161305472, "grad_norm": 7.009494609381983, "learning_rate": 1.863214074104003e-09, "loss": 0.6137, "step": 27352 }, { "epoch": 1.9761229613307565, "grad_norm": 7.137842750670596, "learning_rate": 1.8519403692587867e-09, "loss": 0.5947, "step": 27353 }, { "epoch": 1.9761952065309663, "grad_norm": 6.168322504105021, "learning_rate": 1.8407008621207368e-09, "loss": 0.6474, "step": 27354 }, { "epoch": 1.9762674517311756, "grad_norm": 8.276280938433338, "learning_rate": 1.8294955528438963e-09, "loss": 0.6374, "step": 27355 }, { "epoch": 1.9763396969313851, "grad_norm": 6.941861542908228, "learning_rate": 1.8183244415817535e-09, "loss": 0.5372, "step": 27356 }, { "epoch": 1.9764119421315947, "grad_norm": 6.287442774730265, "learning_rate": 1.807187528486687e-09, "loss": 0.6302, "step": 27357 }, { "epoch": 1.9764841873318042, "grad_norm": 7.118347542415946, "learning_rate": 1.7960848137119068e-09, "loss": 0.5402, "step": 27358 }, { "epoch": 1.9765564325320137, "grad_norm": 6.318104286291968, "learning_rate": 1.7850162974086815e-09, "loss": 0.594, "step": 27359 }, { "epoch": 1.976628677732223, "grad_norm": 6.179101040835274, "learning_rate": 1.7739819797288337e-09, "loss": 0.5406, "step": 27360 }, { "epoch": 1.9767009229324328, "grad_norm": 7.2187824991658855, "learning_rate": 1.7629818608236314e-09, "loss": 0.5227, "step": 27361 }, { "epoch": 1.9767731681326421, "grad_norm": 7.210718484282292, "learning_rate": 1.7520159408432325e-09, "loss": 0.6475, "step": 27362 }, { "epoch": 1.9768454133328517, "grad_norm": 6.531799585317503, "learning_rate": 1.7410842199383493e-09, "loss": 0.537, "step": 27363 }, { "epoch": 1.9769176585330612, "grad_norm": 7.767044029836948, "learning_rate": 1.730186698257752e-09, "loss": 0.5437, "step": 27364 }, { "epoch": 1.9769899037332708, "grad_norm": 8.44259286401862, "learning_rate": 1.7193233759513206e-09, "loss": 0.6409, "step": 27365 }, { "epoch": 1.9770621489334803, "grad_norm": 6.431083370085346, "learning_rate": 1.7084942531675474e-09, "loss": 0.5342, "step": 27366 }, { "epoch": 1.9771343941336896, "grad_norm": 8.308922019108671, "learning_rate": 1.697699330054925e-09, "loss": 0.5987, "step": 27367 }, { "epoch": 1.9772066393338994, "grad_norm": 7.4847445147887415, "learning_rate": 1.6869386067608351e-09, "loss": 0.5865, "step": 27368 }, { "epoch": 1.9772788845341087, "grad_norm": 7.724957769087135, "learning_rate": 1.67621208343266e-09, "loss": 0.5895, "step": 27369 }, { "epoch": 1.9773511297343183, "grad_norm": 6.561412030815834, "learning_rate": 1.6655197602172267e-09, "loss": 0.5169, "step": 27370 }, { "epoch": 1.9774233749345278, "grad_norm": 7.912938361055226, "learning_rate": 1.6548616372613624e-09, "loss": 0.521, "step": 27371 }, { "epoch": 1.9774956201347373, "grad_norm": 8.838470932260295, "learning_rate": 1.6442377147102285e-09, "loss": 0.6217, "step": 27372 }, { "epoch": 1.9775678653349469, "grad_norm": 6.664466335855261, "learning_rate": 1.6336479927098193e-09, "loss": 0.6279, "step": 27373 }, { "epoch": 1.9776401105351562, "grad_norm": 7.33145658001988, "learning_rate": 1.6230924714047413e-09, "loss": 0.5382, "step": 27374 }, { "epoch": 1.977712355735366, "grad_norm": 6.705819679809844, "learning_rate": 1.612571150939879e-09, "loss": 0.5844, "step": 27375 }, { "epoch": 1.9777846009355753, "grad_norm": 6.254881516515283, "learning_rate": 1.6020840314590059e-09, "loss": 0.5363, "step": 27376 }, { "epoch": 1.9778568461357848, "grad_norm": 8.874272706225177, "learning_rate": 1.5916311131056184e-09, "loss": 0.7349, "step": 27377 }, { "epoch": 1.9779290913359944, "grad_norm": 6.882488819006538, "learning_rate": 1.5812123960229354e-09, "loss": 0.614, "step": 27378 }, { "epoch": 1.978001336536204, "grad_norm": 9.46569654123468, "learning_rate": 1.5708278803536202e-09, "loss": 0.599, "step": 27379 }, { "epoch": 1.9780735817364135, "grad_norm": 7.781699294022347, "learning_rate": 1.560477566239782e-09, "loss": 0.5348, "step": 27380 }, { "epoch": 1.9781458269366228, "grad_norm": 7.469079173986268, "learning_rate": 1.5501614538229736e-09, "loss": 0.5526, "step": 27381 }, { "epoch": 1.9782180721368325, "grad_norm": 8.297052931089965, "learning_rate": 1.5398795432447488e-09, "loss": 0.6158, "step": 27382 }, { "epoch": 1.9782903173370419, "grad_norm": 9.229858326594396, "learning_rate": 1.5296318346455509e-09, "loss": 0.6485, "step": 27383 }, { "epoch": 1.9783625625372514, "grad_norm": 8.796198058295511, "learning_rate": 1.519418328165545e-09, "loss": 0.6142, "step": 27384 }, { "epoch": 1.978434807737461, "grad_norm": 7.878067630133733, "learning_rate": 1.5092390239448974e-09, "loss": 0.6818, "step": 27385 }, { "epoch": 1.9785070529376705, "grad_norm": 7.709644707831619, "learning_rate": 1.4990939221229406e-09, "loss": 0.6068, "step": 27386 }, { "epoch": 1.97857929813788, "grad_norm": 7.7571867568478226, "learning_rate": 1.4889830228384527e-09, "loss": 0.5825, "step": 27387 }, { "epoch": 1.9786515433380893, "grad_norm": 7.1024506436344685, "learning_rate": 1.4789063262296566e-09, "loss": 0.6054, "step": 27388 }, { "epoch": 1.978723788538299, "grad_norm": 5.475827939213672, "learning_rate": 1.4688638324344973e-09, "loss": 0.5773, "step": 27389 }, { "epoch": 1.9787960337385084, "grad_norm": 8.141765961160306, "learning_rate": 1.45885554159092e-09, "loss": 0.6637, "step": 27390 }, { "epoch": 1.978868278938718, "grad_norm": 7.297687101406122, "learning_rate": 1.4488814538354823e-09, "loss": 0.5926, "step": 27391 }, { "epoch": 1.9789405241389275, "grad_norm": 7.3270223800673495, "learning_rate": 1.4389415693050191e-09, "loss": 0.5953, "step": 27392 }, { "epoch": 1.979012769339137, "grad_norm": 8.636507180951169, "learning_rate": 1.429035888135255e-09, "loss": 0.535, "step": 27393 }, { "epoch": 1.9790850145393466, "grad_norm": 7.360928575983632, "learning_rate": 1.4191644104619151e-09, "loss": 0.6019, "step": 27394 }, { "epoch": 1.979157259739556, "grad_norm": 7.600326390535855, "learning_rate": 1.4093271364204464e-09, "loss": 0.6439, "step": 27395 }, { "epoch": 1.9792295049397657, "grad_norm": 9.402907680689124, "learning_rate": 1.3995240661449083e-09, "loss": 0.6733, "step": 27396 }, { "epoch": 1.979301750139975, "grad_norm": 8.472787267745641, "learning_rate": 1.389755199770193e-09, "loss": 0.5421, "step": 27397 }, { "epoch": 1.9793739953401845, "grad_norm": 6.891339115844635, "learning_rate": 1.3800205374295272e-09, "loss": 0.5797, "step": 27398 }, { "epoch": 1.979446240540394, "grad_norm": 7.515170965933962, "learning_rate": 1.3703200792564153e-09, "loss": 0.5855, "step": 27399 }, { "epoch": 1.9795184857406036, "grad_norm": 7.413244107804916, "learning_rate": 1.3606538253832512e-09, "loss": 0.5851, "step": 27400 }, { "epoch": 1.9795907309408132, "grad_norm": 7.193570964848985, "learning_rate": 1.3510217759429843e-09, "loss": 0.5846, "step": 27401 }, { "epoch": 1.9796629761410225, "grad_norm": 8.106497958000508, "learning_rate": 1.3414239310671761e-09, "loss": 0.6141, "step": 27402 }, { "epoch": 1.9797352213412323, "grad_norm": 6.829736584879951, "learning_rate": 1.3318602908871103e-09, "loss": 0.5161, "step": 27403 }, { "epoch": 1.9798074665414416, "grad_norm": 7.659188590238514, "learning_rate": 1.3223308555335157e-09, "loss": 0.6036, "step": 27404 }, { "epoch": 1.9798797117416511, "grad_norm": 6.5871302926193405, "learning_rate": 1.3128356251373987e-09, "loss": 0.6459, "step": 27405 }, { "epoch": 1.9799519569418607, "grad_norm": 8.15386904753418, "learning_rate": 1.3033745998286552e-09, "loss": 0.6506, "step": 27406 }, { "epoch": 1.9800242021420702, "grad_norm": 7.854530009020123, "learning_rate": 1.2939477797363487e-09, "loss": 0.5343, "step": 27407 }, { "epoch": 1.9800964473422797, "grad_norm": 6.85444175222309, "learning_rate": 1.2845551649895426e-09, "loss": 0.5955, "step": 27408 }, { "epoch": 1.980168692542489, "grad_norm": 7.047397907081889, "learning_rate": 1.2751967557173006e-09, "loss": 0.6467, "step": 27409 }, { "epoch": 1.9802409377426988, "grad_norm": 7.283959928164585, "learning_rate": 1.2658725520475757e-09, "loss": 0.6872, "step": 27410 }, { "epoch": 1.9803131829429081, "grad_norm": 6.595566630398437, "learning_rate": 1.256582554107766e-09, "loss": 0.5752, "step": 27411 }, { "epoch": 1.9803854281431177, "grad_norm": 7.844468771536871, "learning_rate": 1.2473267620252693e-09, "loss": 0.5919, "step": 27412 }, { "epoch": 1.9804576733433272, "grad_norm": 7.134444552469773, "learning_rate": 1.2381051759266517e-09, "loss": 0.5672, "step": 27413 }, { "epoch": 1.9805299185435368, "grad_norm": 7.141376347318015, "learning_rate": 1.2289177959382004e-09, "loss": 0.6258, "step": 27414 }, { "epoch": 1.9806021637437463, "grad_norm": 6.597604501813785, "learning_rate": 1.2197646221859261e-09, "loss": 0.6245, "step": 27415 }, { "epoch": 1.9806744089439556, "grad_norm": 6.424274079990204, "learning_rate": 1.2106456547947287e-09, "loss": 0.6127, "step": 27416 }, { "epoch": 1.9807466541441654, "grad_norm": 7.137219842065579, "learning_rate": 1.2015608938895085e-09, "loss": 0.5905, "step": 27417 }, { "epoch": 1.9808188993443747, "grad_norm": 6.669784611508532, "learning_rate": 1.1925103395948878e-09, "loss": 0.5766, "step": 27418 }, { "epoch": 1.9808911445445845, "grad_norm": 6.1614775948909974, "learning_rate": 1.1834939920343792e-09, "loss": 0.5596, "step": 27419 }, { "epoch": 1.9809633897447938, "grad_norm": 9.335103502889936, "learning_rate": 1.1745118513317722e-09, "loss": 0.5942, "step": 27420 }, { "epoch": 1.9810356349450033, "grad_norm": 7.347687659837836, "learning_rate": 1.165563917610024e-09, "loss": 0.5516, "step": 27421 }, { "epoch": 1.9811078801452129, "grad_norm": 7.345240145523505, "learning_rate": 1.1566501909912597e-09, "loss": 0.5075, "step": 27422 }, { "epoch": 1.9811801253454222, "grad_norm": 7.55266947258447, "learning_rate": 1.1477706715978809e-09, "loss": 0.6205, "step": 27423 }, { "epoch": 1.981252370545632, "grad_norm": 7.604915244127401, "learning_rate": 1.1389253595511796e-09, "loss": 0.5857, "step": 27424 }, { "epoch": 1.9813246157458413, "grad_norm": 6.546615704702591, "learning_rate": 1.1301142549724476e-09, "loss": 0.5648, "step": 27425 }, { "epoch": 1.981396860946051, "grad_norm": 7.357091334740574, "learning_rate": 1.121337357982144e-09, "loss": 0.5586, "step": 27426 }, { "epoch": 1.9814691061462604, "grad_norm": 7.721730757091299, "learning_rate": 1.1125946687004508e-09, "loss": 0.6091, "step": 27427 }, { "epoch": 1.98154135134647, "grad_norm": 6.186102815342806, "learning_rate": 1.1038861872469942e-09, "loss": 0.6514, "step": 27428 }, { "epoch": 1.9816135965466795, "grad_norm": 7.272014212331113, "learning_rate": 1.0952119137414008e-09, "loss": 0.6216, "step": 27429 }, { "epoch": 1.9816858417468888, "grad_norm": 7.484455020612149, "learning_rate": 1.0865718483016318e-09, "loss": 0.6095, "step": 27430 }, { "epoch": 1.9817580869470985, "grad_norm": 7.974652905220404, "learning_rate": 1.0779659910467589e-09, "loss": 0.6158, "step": 27431 }, { "epoch": 1.9818303321473079, "grad_norm": 7.577111371079108, "learning_rate": 1.0693943420941877e-09, "loss": 0.6557, "step": 27432 }, { "epoch": 1.9819025773475176, "grad_norm": 7.307121711724866, "learning_rate": 1.060856901561047e-09, "loss": 0.6415, "step": 27433 }, { "epoch": 1.981974822547727, "grad_norm": 8.509363290566448, "learning_rate": 1.052353669564743e-09, "loss": 0.6846, "step": 27434 }, { "epoch": 1.9820470677479365, "grad_norm": 7.855732432017847, "learning_rate": 1.0438846462215713e-09, "loss": 0.5411, "step": 27435 }, { "epoch": 1.982119312948146, "grad_norm": 8.288572363424866, "learning_rate": 1.0354498316469952e-09, "loss": 0.5851, "step": 27436 }, { "epoch": 1.9821915581483556, "grad_norm": 6.801197440237546, "learning_rate": 1.0270492259567554e-09, "loss": 0.5866, "step": 27437 }, { "epoch": 1.9822638033485651, "grad_norm": 7.405637047750032, "learning_rate": 1.0186828292660377e-09, "loss": 0.6146, "step": 27438 }, { "epoch": 1.9823360485487744, "grad_norm": 8.543754607773538, "learning_rate": 1.0103506416891951e-09, "loss": 0.6301, "step": 27439 }, { "epoch": 1.9824082937489842, "grad_norm": 7.865121609959576, "learning_rate": 1.0020526633403027e-09, "loss": 0.6151, "step": 27440 }, { "epoch": 1.9824805389491935, "grad_norm": 7.019232894848902, "learning_rate": 9.937888943331587e-10, "loss": 0.4737, "step": 27441 }, { "epoch": 1.982552784149403, "grad_norm": 7.319145899488412, "learning_rate": 9.855593347804504e-10, "loss": 0.6487, "step": 27442 }, { "epoch": 1.9826250293496126, "grad_norm": 8.44568288098552, "learning_rate": 9.773639847951432e-10, "loss": 0.6486, "step": 27443 }, { "epoch": 1.9826972745498221, "grad_norm": 7.837072683850337, "learning_rate": 9.692028444893697e-10, "loss": 0.5626, "step": 27444 }, { "epoch": 1.9827695197500317, "grad_norm": 7.118166943017149, "learning_rate": 9.610759139747072e-10, "loss": 0.5055, "step": 27445 }, { "epoch": 1.982841764950241, "grad_norm": 6.821211258615875, "learning_rate": 9.52983193362733e-10, "loss": 0.4659, "step": 27446 }, { "epoch": 1.9829140101504508, "grad_norm": 7.641423532770431, "learning_rate": 9.449246827639147e-10, "loss": 0.642, "step": 27447 }, { "epoch": 1.98298625535066, "grad_norm": 6.226676451670591, "learning_rate": 9.36900382288719e-10, "loss": 0.567, "step": 27448 }, { "epoch": 1.9830585005508696, "grad_norm": 7.300236779122304, "learning_rate": 9.289102920467808e-10, "loss": 0.5155, "step": 27449 }, { "epoch": 1.9831307457510792, "grad_norm": 6.834087640331548, "learning_rate": 9.209544121480119e-10, "loss": 0.6148, "step": 27450 }, { "epoch": 1.9832029909512887, "grad_norm": 6.924573247547067, "learning_rate": 9.130327427006591e-10, "loss": 0.6262, "step": 27451 }, { "epoch": 1.9832752361514983, "grad_norm": 7.265153241992812, "learning_rate": 9.051452838135244e-10, "loss": 0.6065, "step": 27452 }, { "epoch": 1.9833474813517076, "grad_norm": 7.177087821645307, "learning_rate": 8.972920355945769e-10, "loss": 0.6403, "step": 27453 }, { "epoch": 1.9834197265519173, "grad_norm": 7.236190007959909, "learning_rate": 8.894729981515082e-10, "loss": 0.5699, "step": 27454 }, { "epoch": 1.9834919717521267, "grad_norm": 7.770794933998066, "learning_rate": 8.816881715908998e-10, "loss": 0.5982, "step": 27455 }, { "epoch": 1.9835642169523362, "grad_norm": 7.8136315098567435, "learning_rate": 8.739375560193331e-10, "loss": 0.5964, "step": 27456 }, { "epoch": 1.9836364621525457, "grad_norm": 7.937544454615303, "learning_rate": 8.662211515433893e-10, "loss": 0.5937, "step": 27457 }, { "epoch": 1.9837087073527553, "grad_norm": 7.3378669711989, "learning_rate": 8.5853895826854e-10, "loss": 0.6264, "step": 27458 }, { "epoch": 1.9837809525529648, "grad_norm": 8.78550188292968, "learning_rate": 8.508909762994233e-10, "loss": 0.6235, "step": 27459 }, { "epoch": 1.9838531977531741, "grad_norm": 8.056860082238343, "learning_rate": 8.432772057415106e-10, "loss": 0.6575, "step": 27460 }, { "epoch": 1.983925442953384, "grad_norm": 7.531169716814699, "learning_rate": 8.356976466986077e-10, "loss": 0.5969, "step": 27461 }, { "epoch": 1.9839976881535932, "grad_norm": 8.011099744492697, "learning_rate": 8.281522992745206e-10, "loss": 0.6657, "step": 27462 }, { "epoch": 1.9840699333538028, "grad_norm": 7.412412502036148, "learning_rate": 8.206411635724998e-10, "loss": 0.6331, "step": 27463 }, { "epoch": 1.9841421785540123, "grad_norm": 6.263125865370163, "learning_rate": 8.131642396952411e-10, "loss": 0.5977, "step": 27464 }, { "epoch": 1.9842144237542219, "grad_norm": 9.306492038753529, "learning_rate": 8.0572152774544e-10, "loss": 0.6045, "step": 27465 }, { "epoch": 1.9842866689544314, "grad_norm": 7.247607921732397, "learning_rate": 7.98313027824682e-10, "loss": 0.6278, "step": 27466 }, { "epoch": 1.9843589141546407, "grad_norm": 5.973162073904643, "learning_rate": 7.9093874003483e-10, "loss": 0.5305, "step": 27467 }, { "epoch": 1.9844311593548505, "grad_norm": 9.10907641489104, "learning_rate": 7.835986644763593e-10, "loss": 0.5532, "step": 27468 }, { "epoch": 1.9845034045550598, "grad_norm": 6.683567209820917, "learning_rate": 7.762928012497451e-10, "loss": 0.5846, "step": 27469 }, { "epoch": 1.9845756497552693, "grad_norm": 7.031957158449665, "learning_rate": 7.690211504551847e-10, "loss": 0.5833, "step": 27470 }, { "epoch": 1.9846478949554789, "grad_norm": 6.870692620379712, "learning_rate": 7.61783712192321e-10, "loss": 0.571, "step": 27471 }, { "epoch": 1.9847201401556884, "grad_norm": 8.151660548756213, "learning_rate": 7.545804865602413e-10, "loss": 0.5748, "step": 27472 }, { "epoch": 1.984792385355898, "grad_norm": 7.1542504861313425, "learning_rate": 7.474114736572002e-10, "loss": 0.6294, "step": 27473 }, { "epoch": 1.9848646305561073, "grad_norm": 6.589404658515222, "learning_rate": 7.402766735814526e-10, "loss": 0.5924, "step": 27474 }, { "epoch": 1.984936875756317, "grad_norm": 7.4429644858022845, "learning_rate": 7.331760864309756e-10, "loss": 0.5793, "step": 27475 }, { "epoch": 1.9850091209565264, "grad_norm": 7.149863245796646, "learning_rate": 7.261097123029137e-10, "loss": 0.6495, "step": 27476 }, { "epoch": 1.985081366156736, "grad_norm": 7.131739591062377, "learning_rate": 7.190775512935789e-10, "loss": 0.5933, "step": 27477 }, { "epoch": 1.9851536113569455, "grad_norm": 6.876632219241548, "learning_rate": 7.120796034995603e-10, "loss": 0.5538, "step": 27478 }, { "epoch": 1.985225856557155, "grad_norm": 7.77755710954829, "learning_rate": 7.051158690166149e-10, "loss": 0.5815, "step": 27479 }, { "epoch": 1.9852981017573645, "grad_norm": 7.417885955139579, "learning_rate": 6.981863479399442e-10, "loss": 0.5844, "step": 27480 }, { "epoch": 1.9853703469575739, "grad_norm": 7.793740661174988, "learning_rate": 6.912910403647499e-10, "loss": 0.6163, "step": 27481 }, { "epoch": 1.9854425921577836, "grad_norm": 7.384702087468004, "learning_rate": 6.844299463848459e-10, "loss": 0.6038, "step": 27482 }, { "epoch": 1.985514837357993, "grad_norm": 6.295253043084514, "learning_rate": 6.776030660948785e-10, "loss": 0.6877, "step": 27483 }, { "epoch": 1.9855870825582025, "grad_norm": 7.2376767362582735, "learning_rate": 6.70810399587829e-10, "loss": 0.5525, "step": 27484 }, { "epoch": 1.985659327758412, "grad_norm": 7.482847308693051, "learning_rate": 6.640519469566787e-10, "loss": 0.5987, "step": 27485 }, { "epoch": 1.9857315729586216, "grad_norm": 8.064999688202295, "learning_rate": 6.573277082941309e-10, "loss": 0.5833, "step": 27486 }, { "epoch": 1.9858038181588311, "grad_norm": 6.297894492337982, "learning_rate": 6.506376836920569e-10, "loss": 0.5694, "step": 27487 }, { "epoch": 1.9858760633590404, "grad_norm": 7.066285865648133, "learning_rate": 6.439818732423275e-10, "loss": 0.5464, "step": 27488 }, { "epoch": 1.9859483085592502, "grad_norm": 7.050766101091759, "learning_rate": 6.373602770357034e-10, "loss": 0.6241, "step": 27489 }, { "epoch": 1.9860205537594595, "grad_norm": 8.389135473583712, "learning_rate": 6.307728951629454e-10, "loss": 0.6751, "step": 27490 }, { "epoch": 1.9860927989596693, "grad_norm": 8.241061830714306, "learning_rate": 6.242197277142592e-10, "loss": 0.5998, "step": 27491 }, { "epoch": 1.9861650441598786, "grad_norm": 8.967869093961273, "learning_rate": 6.177007747795727e-10, "loss": 0.5658, "step": 27492 }, { "epoch": 1.9862372893600881, "grad_norm": 7.54314524732011, "learning_rate": 6.112160364477038e-10, "loss": 0.568, "step": 27493 }, { "epoch": 1.9863095345602977, "grad_norm": 7.188505152202521, "learning_rate": 6.047655128077479e-10, "loss": 0.6056, "step": 27494 }, { "epoch": 1.986381779760507, "grad_norm": 8.14385676163951, "learning_rate": 5.983492039479677e-10, "loss": 0.6572, "step": 27495 }, { "epoch": 1.9864540249607168, "grad_norm": 7.646209236349418, "learning_rate": 5.919671099560708e-10, "loss": 0.6899, "step": 27496 }, { "epoch": 1.986526270160926, "grad_norm": 7.423694010232936, "learning_rate": 5.856192309192099e-10, "loss": 0.5722, "step": 27497 }, { "epoch": 1.9865985153611359, "grad_norm": 7.35533053632425, "learning_rate": 5.793055669248149e-10, "loss": 0.553, "step": 27498 }, { "epoch": 1.9866707605613452, "grad_norm": 6.577059335536957, "learning_rate": 5.730261180589281e-10, "loss": 0.5295, "step": 27499 }, { "epoch": 1.9867430057615547, "grad_norm": 7.297526099748714, "learning_rate": 5.667808844078693e-10, "loss": 0.5585, "step": 27500 }, { "epoch": 1.9868152509617643, "grad_norm": 6.8849196569648745, "learning_rate": 5.605698660568481e-10, "loss": 0.5984, "step": 27501 }, { "epoch": 1.9868874961619736, "grad_norm": 8.254007002165487, "learning_rate": 5.543930630907967e-10, "loss": 0.6448, "step": 27502 }, { "epoch": 1.9869597413621833, "grad_norm": 7.488134280231578, "learning_rate": 5.482504755943696e-10, "loss": 0.5665, "step": 27503 }, { "epoch": 1.9870319865623927, "grad_norm": 7.130217666192608, "learning_rate": 5.42142103651666e-10, "loss": 0.5403, "step": 27504 }, { "epoch": 1.9871042317626024, "grad_norm": 7.437494614543087, "learning_rate": 5.360679473465081e-10, "loss": 0.5489, "step": 27505 }, { "epoch": 1.9871764769628117, "grad_norm": 5.902991632315062, "learning_rate": 5.300280067618846e-10, "loss": 0.5698, "step": 27506 }, { "epoch": 1.9872487221630213, "grad_norm": 8.91631019331309, "learning_rate": 5.240222819805074e-10, "loss": 0.5885, "step": 27507 }, { "epoch": 1.9873209673632308, "grad_norm": 9.906645469112316, "learning_rate": 5.180507730842554e-10, "loss": 0.5777, "step": 27508 }, { "epoch": 1.9873932125634404, "grad_norm": 6.06615062796516, "learning_rate": 5.121134801552852e-10, "loss": 0.5679, "step": 27509 }, { "epoch": 1.98746545776365, "grad_norm": 8.188053956838441, "learning_rate": 5.062104032749204e-10, "loss": 0.5526, "step": 27510 }, { "epoch": 1.9875377029638592, "grad_norm": 7.517355610583855, "learning_rate": 5.003415425236525e-10, "loss": 0.5892, "step": 27511 }, { "epoch": 1.987609948164069, "grad_norm": 8.572792271715464, "learning_rate": 4.945068979819723e-10, "loss": 0.5929, "step": 27512 }, { "epoch": 1.9876821933642783, "grad_norm": 7.901631208607242, "learning_rate": 4.887064697298161e-10, "loss": 0.6221, "step": 27513 }, { "epoch": 1.9877544385644879, "grad_norm": 9.26277062642352, "learning_rate": 4.829402578465647e-10, "loss": 0.6073, "step": 27514 }, { "epoch": 1.9878266837646974, "grad_norm": 7.227613415984419, "learning_rate": 4.772082624110441e-10, "loss": 0.6082, "step": 27515 }, { "epoch": 1.987898928964907, "grad_norm": 6.990359616176879, "learning_rate": 4.715104835018025e-10, "loss": 0.6373, "step": 27516 }, { "epoch": 1.9879711741651165, "grad_norm": 8.053441362898505, "learning_rate": 4.658469211968331e-10, "loss": 0.602, "step": 27517 }, { "epoch": 1.9880434193653258, "grad_norm": 7.63267206162383, "learning_rate": 4.6021757557357385e-10, "loss": 0.5947, "step": 27518 }, { "epoch": 1.9881156645655356, "grad_norm": 6.841760076085737, "learning_rate": 4.546224467091853e-10, "loss": 0.6323, "step": 27519 }, { "epoch": 1.9881879097657449, "grad_norm": 6.906660939414302, "learning_rate": 4.4906153468027294e-10, "loss": 0.6708, "step": 27520 }, { "epoch": 1.9882601549659544, "grad_norm": 7.458409257498796, "learning_rate": 4.435348395628869e-10, "loss": 0.5437, "step": 27521 }, { "epoch": 1.988332400166164, "grad_norm": 8.838915039040867, "learning_rate": 4.380423614328e-10, "loss": 0.5761, "step": 27522 }, { "epoch": 1.9884046453663735, "grad_norm": 7.249647986975546, "learning_rate": 4.325841003652298e-10, "loss": 0.5409, "step": 27523 }, { "epoch": 1.988476890566583, "grad_norm": 7.620869753003021, "learning_rate": 4.271600564348388e-10, "loss": 0.6036, "step": 27524 }, { "epoch": 1.9885491357667924, "grad_norm": 7.416681982588938, "learning_rate": 4.2177022971545687e-10, "loss": 0.6795, "step": 27525 }, { "epoch": 1.9886213809670021, "grad_norm": 8.663472051426025, "learning_rate": 4.1641462028146895e-10, "loss": 0.6409, "step": 27526 }, { "epoch": 1.9886936261672115, "grad_norm": 7.486668500121592, "learning_rate": 4.110932282061497e-10, "loss": 0.609, "step": 27527 }, { "epoch": 1.988765871367421, "grad_norm": 9.012726157389114, "learning_rate": 4.0580605356194125e-10, "loss": 0.5821, "step": 27528 }, { "epoch": 1.9888381165676305, "grad_norm": 7.2184329272897765, "learning_rate": 4.0055309642128557e-10, "loss": 0.5376, "step": 27529 }, { "epoch": 1.98891036176784, "grad_norm": 8.842052498997024, "learning_rate": 3.953343568566248e-10, "loss": 0.5964, "step": 27530 }, { "epoch": 1.9889826069680496, "grad_norm": 6.246048250205604, "learning_rate": 3.9014983493873557e-10, "loss": 0.5914, "step": 27531 }, { "epoch": 1.989054852168259, "grad_norm": 7.397787289108977, "learning_rate": 3.849995307389498e-10, "loss": 0.5769, "step": 27532 }, { "epoch": 1.9891270973684687, "grad_norm": 8.095318852083254, "learning_rate": 3.798834443277666e-10, "loss": 0.6011, "step": 27533 }, { "epoch": 1.989199342568678, "grad_norm": 7.2061123228527535, "learning_rate": 3.7480157577513e-10, "loss": 0.6254, "step": 27534 }, { "epoch": 1.9892715877688876, "grad_norm": 7.103932336725582, "learning_rate": 3.6975392515042895e-10, "loss": 0.5614, "step": 27535 }, { "epoch": 1.9893438329690971, "grad_norm": 6.926933145448545, "learning_rate": 3.6474049252305243e-10, "loss": 0.5357, "step": 27536 }, { "epoch": 1.9894160781693067, "grad_norm": 7.854639526487798, "learning_rate": 3.597612779615567e-10, "loss": 0.5419, "step": 27537 }, { "epoch": 1.9894883233695162, "grad_norm": 6.642113516536189, "learning_rate": 3.548162815342204e-10, "loss": 0.5348, "step": 27538 }, { "epoch": 1.9895605685697255, "grad_norm": 7.91293137084094, "learning_rate": 3.499055033084897e-10, "loss": 0.5929, "step": 27539 }, { "epoch": 1.9896328137699353, "grad_norm": 7.244099550806984, "learning_rate": 3.450289433518106e-10, "loss": 0.6391, "step": 27540 }, { "epoch": 1.9897050589701446, "grad_norm": 7.991009428692543, "learning_rate": 3.4018660173051886e-10, "loss": 0.587, "step": 27541 }, { "epoch": 1.9897773041703541, "grad_norm": 5.920673500103195, "learning_rate": 3.3537847851150553e-10, "loss": 0.582, "step": 27542 }, { "epoch": 1.9898495493705637, "grad_norm": 7.284271268298308, "learning_rate": 3.3060457376027364e-10, "loss": 0.5305, "step": 27543 }, { "epoch": 1.9899217945707732, "grad_norm": 7.66408191176823, "learning_rate": 3.258648875420489e-10, "loss": 0.606, "step": 27544 }, { "epoch": 1.9899940397709828, "grad_norm": 7.353312528320835, "learning_rate": 3.211594199220569e-10, "loss": 0.559, "step": 27545 }, { "epoch": 1.990066284971192, "grad_norm": 8.242749814201067, "learning_rate": 3.1648817096441295e-10, "loss": 0.6063, "step": 27546 }, { "epoch": 1.9901385301714019, "grad_norm": 6.626013624274573, "learning_rate": 3.118511407332325e-10, "loss": 0.657, "step": 27547 }, { "epoch": 1.9902107753716112, "grad_norm": 8.824069173422416, "learning_rate": 3.0724832929179826e-10, "loss": 0.661, "step": 27548 }, { "epoch": 1.9902830205718207, "grad_norm": 7.836573262157642, "learning_rate": 3.026797367033929e-10, "loss": 0.6522, "step": 27549 }, { "epoch": 1.9903552657720303, "grad_norm": 6.383675450385881, "learning_rate": 2.9814536303018893e-10, "loss": 0.5785, "step": 27550 }, { "epoch": 1.9904275109722398, "grad_norm": 8.637040289695326, "learning_rate": 2.9364520833463637e-10, "loss": 0.5863, "step": 27551 }, { "epoch": 1.9904997561724493, "grad_norm": 7.001085469690128, "learning_rate": 2.891792726780751e-10, "loss": 0.6139, "step": 27552 }, { "epoch": 1.9905720013726587, "grad_norm": 6.210724986537275, "learning_rate": 2.8474755612184493e-10, "loss": 0.5212, "step": 27553 }, { "epoch": 1.9906442465728684, "grad_norm": 7.849967042890237, "learning_rate": 2.803500587267305e-10, "loss": 0.6104, "step": 27554 }, { "epoch": 1.9907164917730777, "grad_norm": 8.824957085091743, "learning_rate": 2.7598678055240633e-10, "loss": 0.5723, "step": 27555 }, { "epoch": 1.9907887369732873, "grad_norm": 7.406915948600261, "learning_rate": 2.716577216591021e-10, "loss": 0.6414, "step": 27556 }, { "epoch": 1.9908609821734968, "grad_norm": 6.38768717571814, "learning_rate": 2.6736288210565954e-10, "loss": 0.5785, "step": 27557 }, { "epoch": 1.9909332273737064, "grad_norm": 7.922299465868751, "learning_rate": 2.63102261951198e-10, "loss": 0.6822, "step": 27558 }, { "epoch": 1.991005472573916, "grad_norm": 6.919772205462278, "learning_rate": 2.588758612540043e-10, "loss": 0.5632, "step": 27559 }, { "epoch": 1.9910777177741252, "grad_norm": 6.92159034712035, "learning_rate": 2.5468368007181e-10, "loss": 0.6501, "step": 27560 }, { "epoch": 1.991149962974335, "grad_norm": 8.334674371263702, "learning_rate": 2.5052571846234664e-10, "loss": 0.6626, "step": 27561 }, { "epoch": 1.9912222081745443, "grad_norm": 7.43525803057306, "learning_rate": 2.464019764819581e-10, "loss": 0.6452, "step": 27562 }, { "epoch": 1.991294453374754, "grad_norm": 7.391872695867015, "learning_rate": 2.4231245418726567e-10, "loss": 0.6076, "step": 27563 }, { "epoch": 1.9913666985749634, "grad_norm": 9.423418263431701, "learning_rate": 2.3825715163461325e-10, "loss": 0.6823, "step": 27564 }, { "epoch": 1.991438943775173, "grad_norm": 6.724301054415259, "learning_rate": 2.3423606887923446e-10, "loss": 0.5914, "step": 27565 }, { "epoch": 1.9915111889753825, "grad_norm": 7.00857971822737, "learning_rate": 2.302492059763628e-10, "loss": 0.5491, "step": 27566 }, { "epoch": 1.9915834341755918, "grad_norm": 6.935580426207216, "learning_rate": 2.2629656298039925e-10, "loss": 0.5988, "step": 27567 }, { "epoch": 1.9916556793758016, "grad_norm": 6.132956494627223, "learning_rate": 2.2237813994518964e-10, "loss": 0.602, "step": 27568 }, { "epoch": 1.9917279245760109, "grad_norm": 8.329985849063938, "learning_rate": 2.1849393692485731e-10, "loss": 0.6513, "step": 27569 }, { "epoch": 1.9918001697762207, "grad_norm": 6.345838982656181, "learning_rate": 2.1464395397241543e-10, "loss": 0.6215, "step": 27570 }, { "epoch": 1.99187241497643, "grad_norm": 7.105525657757269, "learning_rate": 2.108281911405996e-10, "loss": 0.6051, "step": 27571 }, { "epoch": 1.9919446601766395, "grad_norm": 7.3682385500915135, "learning_rate": 2.070466484815903e-10, "loss": 0.6059, "step": 27572 }, { "epoch": 1.992016905376849, "grad_norm": 6.376481426371394, "learning_rate": 2.0329932604701286e-10, "loss": 0.6273, "step": 27573 }, { "epoch": 1.9920891505770584, "grad_norm": 7.312534430007086, "learning_rate": 1.995862238884927e-10, "loss": 0.6107, "step": 27574 }, { "epoch": 1.9921613957772681, "grad_norm": 7.154523749559282, "learning_rate": 1.9590734205626739e-10, "loss": 0.6391, "step": 27575 }, { "epoch": 1.9922336409774775, "grad_norm": 7.1347220498087625, "learning_rate": 1.9226268060140718e-10, "loss": 0.5803, "step": 27576 }, { "epoch": 1.9923058861776872, "grad_norm": 7.1574708655134485, "learning_rate": 1.886522395735946e-10, "loss": 0.544, "step": 27577 }, { "epoch": 1.9923781313778965, "grad_norm": 7.369126389381294, "learning_rate": 1.8507601902195692e-10, "loss": 0.5846, "step": 27578 }, { "epoch": 1.992450376578106, "grad_norm": 6.841539278443582, "learning_rate": 1.8153401899562162e-10, "loss": 0.5821, "step": 27579 }, { "epoch": 1.9925226217783156, "grad_norm": 8.526903994759166, "learning_rate": 1.780262395431609e-10, "loss": 0.5855, "step": 27580 }, { "epoch": 1.992594866978525, "grad_norm": 8.774159239569801, "learning_rate": 1.745526807125919e-10, "loss": 0.5911, "step": 27581 }, { "epoch": 1.9926671121787347, "grad_norm": 7.738850356486284, "learning_rate": 1.7111334255137667e-10, "loss": 0.6247, "step": 27582 }, { "epoch": 1.992739357378944, "grad_norm": 6.657157155576468, "learning_rate": 1.6770822510669972e-10, "loss": 0.6041, "step": 27583 }, { "epoch": 1.9928116025791538, "grad_norm": 7.738119802470919, "learning_rate": 1.6433732842491279e-10, "loss": 0.6388, "step": 27584 }, { "epoch": 1.9928838477793631, "grad_norm": 7.071471600862627, "learning_rate": 1.610006525526453e-10, "loss": 0.6076, "step": 27585 }, { "epoch": 1.9929560929795727, "grad_norm": 8.263238169235104, "learning_rate": 1.5769819753513883e-10, "loss": 0.6436, "step": 27586 }, { "epoch": 1.9930283381797822, "grad_norm": 8.101252668557171, "learning_rate": 1.5442996341763495e-10, "loss": 0.6016, "step": 27587 }, { "epoch": 1.9931005833799917, "grad_norm": 7.956477030281583, "learning_rate": 1.511959502450977e-10, "loss": 0.6292, "step": 27588 }, { "epoch": 1.9931728285802013, "grad_norm": 7.810893633681709, "learning_rate": 1.4799615806165845e-10, "loss": 0.6751, "step": 27589 }, { "epoch": 1.9932450737804106, "grad_norm": 6.753765892392781, "learning_rate": 1.4483058691089346e-10, "loss": 0.5752, "step": 27590 }, { "epoch": 1.9933173189806204, "grad_norm": 6.305876023060784, "learning_rate": 1.4169923683665655e-10, "loss": 0.6118, "step": 27591 }, { "epoch": 1.9933895641808297, "grad_norm": 7.098337446242777, "learning_rate": 1.386021078814137e-10, "loss": 0.631, "step": 27592 }, { "epoch": 1.9934618093810392, "grad_norm": 7.126404740908981, "learning_rate": 1.3553920008790855e-10, "loss": 0.6015, "step": 27593 }, { "epoch": 1.9935340545812488, "grad_norm": 6.483678720618201, "learning_rate": 1.3251051349777444e-10, "loss": 0.5273, "step": 27594 }, { "epoch": 1.9936062997814583, "grad_norm": 8.20733755303586, "learning_rate": 1.295160481523672e-10, "loss": 0.5745, "step": 27595 }, { "epoch": 1.9936785449816679, "grad_norm": 7.693639273199211, "learning_rate": 1.2655580409304258e-10, "loss": 0.6329, "step": 27596 }, { "epoch": 1.9937507901818772, "grad_norm": 6.898322755716857, "learning_rate": 1.2362978136004622e-10, "loss": 0.569, "step": 27597 }, { "epoch": 1.993823035382087, "grad_norm": 6.617454118097605, "learning_rate": 1.207379799936237e-10, "loss": 0.5381, "step": 27598 }, { "epoch": 1.9938952805822963, "grad_norm": 8.35366380911062, "learning_rate": 1.178804000331879e-10, "loss": 0.6079, "step": 27599 }, { "epoch": 1.9939675257825058, "grad_norm": 7.090311031863444, "learning_rate": 1.1505704151787423e-10, "loss": 0.5886, "step": 27600 }, { "epoch": 1.9940397709827153, "grad_norm": 7.040591667628044, "learning_rate": 1.1226790448654046e-10, "loss": 0.6939, "step": 27601 }, { "epoch": 1.9941120161829249, "grad_norm": 7.581999553160175, "learning_rate": 1.0951298897721175e-10, "loss": 0.5827, "step": 27602 }, { "epoch": 1.9941842613831344, "grad_norm": 5.5134208965305564, "learning_rate": 1.0679229502763567e-10, "loss": 0.5531, "step": 27603 }, { "epoch": 1.9942565065833437, "grad_norm": 6.351010231698164, "learning_rate": 1.0410582267472714e-10, "loss": 0.6309, "step": 27604 }, { "epoch": 1.9943287517835535, "grad_norm": 7.480760693144495, "learning_rate": 1.0145357195595618e-10, "loss": 0.621, "step": 27605 }, { "epoch": 1.9944009969837628, "grad_norm": 7.644933690943043, "learning_rate": 9.883554290712749e-11, "loss": 0.5748, "step": 27606 }, { "epoch": 1.9944732421839724, "grad_norm": 6.53833733004836, "learning_rate": 9.625173556404576e-11, "loss": 0.6048, "step": 27607 }, { "epoch": 1.994545487384182, "grad_norm": 6.474317725331753, "learning_rate": 9.370214996223814e-11, "loss": 0.5692, "step": 27608 }, { "epoch": 1.9946177325843915, "grad_norm": 8.404342037773725, "learning_rate": 9.118678613667665e-11, "loss": 0.6115, "step": 27609 }, { "epoch": 1.994689977784601, "grad_norm": 8.353586634971158, "learning_rate": 8.870564412177818e-11, "loss": 0.5993, "step": 27610 }, { "epoch": 1.9947622229848103, "grad_norm": 6.314515651271778, "learning_rate": 8.625872395140456e-11, "loss": 0.5625, "step": 27611 }, { "epoch": 1.99483446818502, "grad_norm": 6.184069674811234, "learning_rate": 8.384602565914002e-11, "loss": 0.561, "step": 27612 }, { "epoch": 1.9949067133852294, "grad_norm": 7.825332234797506, "learning_rate": 8.146754927773614e-11, "loss": 0.6064, "step": 27613 }, { "epoch": 1.994978958585439, "grad_norm": 6.497514396202399, "learning_rate": 7.912329484022208e-11, "loss": 0.603, "step": 27614 }, { "epoch": 1.9950512037856485, "grad_norm": 7.32427135397776, "learning_rate": 7.681326237851671e-11, "loss": 0.593, "step": 27615 }, { "epoch": 1.995123448985858, "grad_norm": 7.468400380398962, "learning_rate": 7.453745192398387e-11, "loss": 0.664, "step": 27616 }, { "epoch": 1.9951956941860676, "grad_norm": 7.397344071374721, "learning_rate": 7.229586350798734e-11, "loss": 0.6127, "step": 27617 }, { "epoch": 1.995267939386277, "grad_norm": 6.054099643841237, "learning_rate": 7.008849716105826e-11, "loss": 0.6353, "step": 27618 }, { "epoch": 1.9953401845864867, "grad_norm": 6.908214997250895, "learning_rate": 6.791535291372775e-11, "loss": 0.5373, "step": 27619 }, { "epoch": 1.995412429786696, "grad_norm": 6.829931513385699, "learning_rate": 6.577643079569429e-11, "loss": 0.5964, "step": 27620 }, { "epoch": 1.9954846749869055, "grad_norm": 7.420887064181474, "learning_rate": 6.367173083582367e-11, "loss": 0.5641, "step": 27621 }, { "epoch": 1.995556920187115, "grad_norm": 7.678118680362173, "learning_rate": 6.160125306325926e-11, "loss": 0.6485, "step": 27622 }, { "epoch": 1.9956291653873246, "grad_norm": 8.210991391949689, "learning_rate": 5.956499750658929e-11, "loss": 0.5813, "step": 27623 }, { "epoch": 1.9957014105875341, "grad_norm": 7.977917472988207, "learning_rate": 5.756296419301421e-11, "loss": 0.5963, "step": 27624 }, { "epoch": 1.9957736557877435, "grad_norm": 7.821870357132561, "learning_rate": 5.5595153150567183e-11, "loss": 0.6551, "step": 27625 }, { "epoch": 1.9958459009879532, "grad_norm": 7.90760061918217, "learning_rate": 5.366156440589354e-11, "loss": 0.6814, "step": 27626 }, { "epoch": 1.9959181461881625, "grad_norm": 6.806440826066881, "learning_rate": 5.176219798536109e-11, "loss": 0.6476, "step": 27627 }, { "epoch": 1.995990391388372, "grad_norm": 7.321288476328008, "learning_rate": 4.9897053915337614e-11, "loss": 0.6625, "step": 27628 }, { "epoch": 1.9960626365885816, "grad_norm": 7.289584779989079, "learning_rate": 4.8066132221080696e-11, "loss": 0.5807, "step": 27629 }, { "epoch": 1.9961348817887912, "grad_norm": 6.7420268122508515, "learning_rate": 4.6269432927570354e-11, "loss": 0.623, "step": 27630 }, { "epoch": 1.9962071269890007, "grad_norm": 7.253441355592767, "learning_rate": 4.450695605950906e-11, "loss": 0.5737, "step": 27631 }, { "epoch": 1.99627937218921, "grad_norm": 8.446851732996072, "learning_rate": 4.27787016413217e-11, "loss": 0.5672, "step": 27632 }, { "epoch": 1.9963516173894198, "grad_norm": 7.352966239437697, "learning_rate": 4.108466969632297e-11, "loss": 0.6153, "step": 27633 }, { "epoch": 1.9964238625896291, "grad_norm": 9.192511404952173, "learning_rate": 3.942486024754999e-11, "loss": 0.5589, "step": 27634 }, { "epoch": 1.9964961077898387, "grad_norm": 6.512586584998284, "learning_rate": 3.7799273318317454e-11, "loss": 0.5573, "step": 27635 }, { "epoch": 1.9965683529900482, "grad_norm": 6.705080116242786, "learning_rate": 3.62079089302747e-11, "loss": 0.5785, "step": 27636 }, { "epoch": 1.9966405981902577, "grad_norm": 6.745902901994274, "learning_rate": 3.46507671056262e-11, "loss": 0.5367, "step": 27637 }, { "epoch": 1.9967128433904673, "grad_norm": 6.825782356554649, "learning_rate": 3.3127847865188635e-11, "loss": 0.6088, "step": 27638 }, { "epoch": 1.9967850885906766, "grad_norm": 6.718835732556216, "learning_rate": 3.163915123033379e-11, "loss": 0.5379, "step": 27639 }, { "epoch": 1.9968573337908864, "grad_norm": 7.454072072377961, "learning_rate": 3.0184677221323234e-11, "loss": 0.5561, "step": 27640 }, { "epoch": 1.9969295789910957, "grad_norm": 7.175444468910341, "learning_rate": 2.876442585786343e-11, "loss": 0.5587, "step": 27641 }, { "epoch": 1.9970018241913055, "grad_norm": 7.109409340576403, "learning_rate": 2.737839715966084e-11, "loss": 0.5679, "step": 27642 }, { "epoch": 1.9970740693915148, "grad_norm": 6.514116364144506, "learning_rate": 2.6026591145311697e-11, "loss": 0.5608, "step": 27643 }, { "epoch": 1.9971463145917243, "grad_norm": 7.993768649344205, "learning_rate": 2.470900783368979e-11, "loss": 0.6221, "step": 27644 }, { "epoch": 1.9972185597919339, "grad_norm": 6.8861149497784675, "learning_rate": 2.3425647242836247e-11, "loss": 0.5943, "step": 27645 }, { "epoch": 1.9972908049921432, "grad_norm": 8.817653150361389, "learning_rate": 2.2176509389959523e-11, "loss": 0.5849, "step": 27646 }, { "epoch": 1.997363050192353, "grad_norm": 9.696438904525683, "learning_rate": 2.096159429254563e-11, "loss": 0.6698, "step": 27647 }, { "epoch": 1.9974352953925623, "grad_norm": 7.1526200195824226, "learning_rate": 1.9780901966970357e-11, "loss": 0.6028, "step": 27648 }, { "epoch": 1.997507540592772, "grad_norm": 7.455832575379483, "learning_rate": 1.863443242933194e-11, "loss": 0.5337, "step": 27649 }, { "epoch": 1.9975797857929813, "grad_norm": 7.163041136639892, "learning_rate": 1.752218569572861e-11, "loss": 0.5857, "step": 27650 }, { "epoch": 1.9976520309931909, "grad_norm": 8.39113236070805, "learning_rate": 1.6444161780870827e-11, "loss": 0.5579, "step": 27651 }, { "epoch": 1.9977242761934004, "grad_norm": 7.175254939531112, "learning_rate": 1.5400360700024154e-11, "loss": 0.6432, "step": 27652 }, { "epoch": 1.9977965213936097, "grad_norm": 7.560678562208879, "learning_rate": 1.4390782467066378e-11, "loss": 0.6285, "step": 27653 }, { "epoch": 1.9978687665938195, "grad_norm": 7.025264379338999, "learning_rate": 1.3415427096152845e-11, "loss": 0.5357, "step": 27654 }, { "epoch": 1.9979410117940288, "grad_norm": 7.857743748068064, "learning_rate": 1.2474294600328674e-11, "loss": 0.5985, "step": 27655 }, { "epoch": 1.9980132569942386, "grad_norm": 7.759472871947442, "learning_rate": 1.1567384992638986e-11, "loss": 0.6049, "step": 27656 }, { "epoch": 1.998085502194448, "grad_norm": 7.7420241630254, "learning_rate": 1.069469828557379e-11, "loss": 0.5621, "step": 27657 }, { "epoch": 1.9981577473946575, "grad_norm": 7.912111303535929, "learning_rate": 9.856234491067984e-12, "loss": 0.6485, "step": 27658 }, { "epoch": 1.998229992594867, "grad_norm": 8.71628943284072, "learning_rate": 9.051993620223798e-12, "loss": 0.604, "step": 27659 }, { "epoch": 1.9983022377950765, "grad_norm": 10.509515719303952, "learning_rate": 8.281975684421017e-12, "loss": 0.6039, "step": 27660 }, { "epoch": 1.998374482995286, "grad_norm": 7.212876874309936, "learning_rate": 7.546180694206761e-12, "loss": 0.557, "step": 27661 }, { "epoch": 1.9984467281954954, "grad_norm": 7.211281880827159, "learning_rate": 6.8446086595730374e-12, "loss": 0.5816, "step": 27662 }, { "epoch": 1.9985189733957052, "grad_norm": 7.23308114990718, "learning_rate": 6.177259590234297e-12, "loss": 0.5956, "step": 27663 }, { "epoch": 1.9985912185959145, "grad_norm": 7.592814069927452, "learning_rate": 5.544133495072324e-12, "loss": 0.6853, "step": 27664 }, { "epoch": 1.998663463796124, "grad_norm": 6.948119687188758, "learning_rate": 4.945230382691346e-12, "loss": 0.5817, "step": 27665 }, { "epoch": 1.9987357089963336, "grad_norm": 6.760613186355035, "learning_rate": 4.380550261695593e-12, "loss": 0.5987, "step": 27666 }, { "epoch": 1.9988079541965431, "grad_norm": 6.6612054708735196, "learning_rate": 3.85009313957907e-12, "loss": 0.6224, "step": 27667 }, { "epoch": 1.9988801993967527, "grad_norm": 8.053498203450987, "learning_rate": 3.3538590238357815e-12, "loss": 0.6129, "step": 27668 }, { "epoch": 1.998952444596962, "grad_norm": 7.62718119457336, "learning_rate": 2.891847920849511e-12, "loss": 0.5844, "step": 27669 }, { "epoch": 1.9990246897971717, "grad_norm": 7.796769563806764, "learning_rate": 2.4640598370040404e-12, "loss": 0.5461, "step": 27670 }, { "epoch": 1.999096934997381, "grad_norm": 7.794256286573532, "learning_rate": 2.0704947784055962e-12, "loss": 0.6329, "step": 27671 }, { "epoch": 1.9991691801975906, "grad_norm": 8.532809942804278, "learning_rate": 1.7111527506052939e-12, "loss": 0.5999, "step": 27672 }, { "epoch": 1.9992414253978001, "grad_norm": 7.468246123742503, "learning_rate": 1.3860337580440253e-12, "loss": 0.5642, "step": 27673 }, { "epoch": 1.9993136705980097, "grad_norm": 8.047742008055593, "learning_rate": 1.0951378054402383e-12, "loss": 0.6071, "step": 27674 }, { "epoch": 1.9993859157982192, "grad_norm": 7.9230491475308575, "learning_rate": 8.38464896402158e-13, "loss": 0.572, "step": 27675 }, { "epoch": 1.9994581609984285, "grad_norm": 7.349601589485645, "learning_rate": 6.160150350931204e-13, "loss": 0.6082, "step": 27676 }, { "epoch": 1.9995304061986383, "grad_norm": 7.643453312478533, "learning_rate": 4.2778822428868326e-13, "loss": 0.6224, "step": 27677 }, { "epoch": 1.9996026513988476, "grad_norm": 8.19567323368971, "learning_rate": 2.7378446620929256e-13, "loss": 0.6153, "step": 27678 }, { "epoch": 1.9996748965990572, "grad_norm": 7.5635677876398155, "learning_rate": 1.5400376363050584e-13, "loss": 0.6119, "step": 27679 }, { "epoch": 1.9997471417992667, "grad_norm": 7.835210399419649, "learning_rate": 6.844611738499041e-14, "loss": 0.5718, "step": 27680 }, { "epoch": 1.9998193869994763, "grad_norm": 7.066121750940971, "learning_rate": 1.711152941563654e-14, "loss": 0.5958, "step": 27681 }, { "epoch": 1.9998916321996858, "grad_norm": 7.088110475975167, "learning_rate": 0.0, "loss": 0.6426, "step": 27682 }, { "epoch": 1.9998916321996858, "step": 27682, "total_flos": 6025310322507776.0, "train_loss": 0.7615339792853068, "train_runtime": 1037563.6546, "train_samples_per_second": 3.415, "train_steps_per_second": 0.027 } ], "logging_steps": 1.0, "max_steps": 27682, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6025310322507776.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }