diff --git "a/pft/trainer_state.json" "b/pft/trainer_state.json" deleted file mode 100644--- "a/pft/trainer_state.json" +++ /dev/null @@ -1,51674 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9999322171761675, - "eval_steps": 500, - "global_step": 7376, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.00013556564766488172, - "grad_norm": 18.125877412826984, - "learning_rate": 9.009009009009009e-09, - "loss": 2.2256, - "step": 1 - }, - { - "epoch": 0.00027113129532976344, - "grad_norm": 18.575106069665193, - "learning_rate": 1.8018018018018017e-08, - "loss": 2.2097, - "step": 2 - }, - { - "epoch": 0.00040669694299464516, - "grad_norm": 17.987579225304927, - "learning_rate": 2.7027027027027028e-08, - "loss": 2.1808, - "step": 3 - }, - { - "epoch": 0.0005422625906595269, - "grad_norm": 18.14957323842981, - "learning_rate": 3.6036036036036035e-08, - "loss": 2.2013, - "step": 4 - }, - { - "epoch": 0.0006778282383244086, - "grad_norm": 18.891833829939664, - "learning_rate": 4.504504504504504e-08, - "loss": 2.2111, - "step": 5 - }, - { - "epoch": 0.0008133938859892903, - "grad_norm": 18.03446624709322, - "learning_rate": 5.4054054054054056e-08, - "loss": 2.2461, - "step": 6 - }, - { - "epoch": 0.000948959533654172, - "grad_norm": 18.24050155332701, - "learning_rate": 6.306306306306305e-08, - "loss": 2.211, - "step": 7 - }, - { - "epoch": 0.0010845251813190538, - "grad_norm": 19.018440407380332, - "learning_rate": 7.207207207207207e-08, - "loss": 2.218, - "step": 8 - }, - { - "epoch": 0.0012200908289839354, - "grad_norm": 18.329567473829734, - "learning_rate": 8.108108108108108e-08, - "loss": 2.2347, - "step": 9 - }, - { - "epoch": 0.0013556564766488172, - "grad_norm": 17.932607346687153, - "learning_rate": 9.009009009009008e-08, - "loss": 2.1847, - "step": 10 - }, - { - "epoch": 0.0014912221243136988, - "grad_norm": 17.529153704317615, - "learning_rate": 9.909909909909909e-08, - "loss": 2.1931, - "step": 11 - }, - { - "epoch": 0.0016267877719785807, - "grad_norm": 18.69285777846952, - "learning_rate": 1.0810810810810811e-07, - "loss": 2.2519, - "step": 12 - }, - { - "epoch": 0.0017623534196434623, - "grad_norm": 19.077419217385636, - "learning_rate": 1.171171171171171e-07, - "loss": 2.1795, - "step": 13 - }, - { - "epoch": 0.001897919067308344, - "grad_norm": 18.06789605667497, - "learning_rate": 1.261261261261261e-07, - "loss": 2.2142, - "step": 14 - }, - { - "epoch": 0.002033484714973226, - "grad_norm": 18.75831609191383, - "learning_rate": 1.3513513513513515e-07, - "loss": 2.2478, - "step": 15 - }, - { - "epoch": 0.0021690503626381075, - "grad_norm": 18.39612340288509, - "learning_rate": 1.4414414414414414e-07, - "loss": 2.1925, - "step": 16 - }, - { - "epoch": 0.002304616010302989, - "grad_norm": 18.554913488079723, - "learning_rate": 1.5315315315315313e-07, - "loss": 2.2179, - "step": 17 - }, - { - "epoch": 0.0024401816579678708, - "grad_norm": 38.07597474747701, - "learning_rate": 1.6216216216216215e-07, - "loss": 2.2219, - "step": 18 - }, - { - "epoch": 0.002575747305632753, - "grad_norm": 17.43170832915659, - "learning_rate": 1.7117117117117117e-07, - "loss": 2.1919, - "step": 19 - }, - { - "epoch": 0.0027113129532976344, - "grad_norm": 17.86869497780154, - "learning_rate": 1.8018018018018017e-07, - "loss": 2.1832, - "step": 20 - }, - { - "epoch": 0.002846878600962516, - "grad_norm": 17.400942579077945, - "learning_rate": 1.891891891891892e-07, - "loss": 2.1731, - "step": 21 - }, - { - "epoch": 0.0029824442486273976, - "grad_norm": 17.794419976167035, - "learning_rate": 1.9819819819819818e-07, - "loss": 2.157, - "step": 22 - }, - { - "epoch": 0.0031180098962922797, - "grad_norm": 17.60989825541936, - "learning_rate": 2.072072072072072e-07, - "loss": 2.1693, - "step": 23 - }, - { - "epoch": 0.0032535755439571613, - "grad_norm": 17.877157930048536, - "learning_rate": 2.1621621621621622e-07, - "loss": 2.1597, - "step": 24 - }, - { - "epoch": 0.003389141191622043, - "grad_norm": 17.214382261373643, - "learning_rate": 2.2522522522522522e-07, - "loss": 2.1885, - "step": 25 - }, - { - "epoch": 0.0035247068392869245, - "grad_norm": 17.82215354363436, - "learning_rate": 2.342342342342342e-07, - "loss": 2.213, - "step": 26 - }, - { - "epoch": 0.0036602724869518066, - "grad_norm": 17.48605450775996, - "learning_rate": 2.4324324324324326e-07, - "loss": 2.1852, - "step": 27 - }, - { - "epoch": 0.003795838134616688, - "grad_norm": 17.194467765872567, - "learning_rate": 2.522522522522522e-07, - "loss": 2.1676, - "step": 28 - }, - { - "epoch": 0.00393140378228157, - "grad_norm": 17.461563514138195, - "learning_rate": 2.6126126126126124e-07, - "loss": 2.2146, - "step": 29 - }, - { - "epoch": 0.004066969429946452, - "grad_norm": 17.553802619475682, - "learning_rate": 2.702702702702703e-07, - "loss": 2.1804, - "step": 30 - }, - { - "epoch": 0.0042025350776113335, - "grad_norm": 15.727757919973058, - "learning_rate": 2.7927927927927923e-07, - "loss": 2.1141, - "step": 31 - }, - { - "epoch": 0.004338100725276215, - "grad_norm": 15.870045263004812, - "learning_rate": 2.882882882882883e-07, - "loss": 2.1322, - "step": 32 - }, - { - "epoch": 0.004473666372941097, - "grad_norm": 15.653808206129256, - "learning_rate": 2.972972972972973e-07, - "loss": 2.074, - "step": 33 - }, - { - "epoch": 0.004609232020605978, - "grad_norm": 16.202900988650217, - "learning_rate": 3.0630630630630627e-07, - "loss": 2.1224, - "step": 34 - }, - { - "epoch": 0.00474479766827086, - "grad_norm": 15.112372739808663, - "learning_rate": 3.153153153153153e-07, - "loss": 2.0799, - "step": 35 - }, - { - "epoch": 0.0048803633159357415, - "grad_norm": 16.273271545011845, - "learning_rate": 3.243243243243243e-07, - "loss": 2.0979, - "step": 36 - }, - { - "epoch": 0.005015928963600624, - "grad_norm": 15.492324542235627, - "learning_rate": 3.333333333333333e-07, - "loss": 2.1027, - "step": 37 - }, - { - "epoch": 0.005151494611265506, - "grad_norm": 15.658889991500574, - "learning_rate": 3.4234234234234235e-07, - "loss": 2.103, - "step": 38 - }, - { - "epoch": 0.005287060258930387, - "grad_norm": 15.686295524607946, - "learning_rate": 3.5135135135135134e-07, - "loss": 2.1413, - "step": 39 - }, - { - "epoch": 0.005422625906595269, - "grad_norm": 14.892668679393763, - "learning_rate": 3.6036036036036033e-07, - "loss": 2.0617, - "step": 40 - }, - { - "epoch": 0.0055581915542601504, - "grad_norm": 15.104561965162503, - "learning_rate": 3.6936936936936933e-07, - "loss": 2.1291, - "step": 41 - }, - { - "epoch": 0.005693757201925032, - "grad_norm": 50.84289315840259, - "learning_rate": 3.783783783783784e-07, - "loss": 2.0986, - "step": 42 - }, - { - "epoch": 0.005829322849589914, - "grad_norm": 14.365304452673064, - "learning_rate": 3.8738738738738737e-07, - "loss": 2.0043, - "step": 43 - }, - { - "epoch": 0.005964888497254795, - "grad_norm": 10.862225563349813, - "learning_rate": 3.9639639639639636e-07, - "loss": 1.9274, - "step": 44 - }, - { - "epoch": 0.006100454144919678, - "grad_norm": 10.281201109516811, - "learning_rate": 4.054054054054054e-07, - "loss": 1.914, - "step": 45 - }, - { - "epoch": 0.006236019792584559, - "grad_norm": 10.563454835939782, - "learning_rate": 4.144144144144144e-07, - "loss": 1.8657, - "step": 46 - }, - { - "epoch": 0.006371585440249441, - "grad_norm": 10.413475523735038, - "learning_rate": 4.234234234234234e-07, - "loss": 1.8987, - "step": 47 - }, - { - "epoch": 0.006507151087914323, - "grad_norm": 9.759551915050734, - "learning_rate": 4.3243243243243244e-07, - "loss": 1.8896, - "step": 48 - }, - { - "epoch": 0.006642716735579204, - "grad_norm": 10.897712729575717, - "learning_rate": 4.414414414414414e-07, - "loss": 1.8855, - "step": 49 - }, - { - "epoch": 0.006778282383244086, - "grad_norm": 9.505649038101781, - "learning_rate": 4.5045045045045043e-07, - "loss": 1.8994, - "step": 50 - }, - { - "epoch": 0.0069138480309089674, - "grad_norm": 9.578178265042242, - "learning_rate": 4.594594594594595e-07, - "loss": 1.8742, - "step": 51 - }, - { - "epoch": 0.007049413678573849, - "grad_norm": 9.307823018071025, - "learning_rate": 4.684684684684684e-07, - "loss": 1.8742, - "step": 52 - }, - { - "epoch": 0.0071849793262387315, - "grad_norm": 14.49170364918165, - "learning_rate": 4.774774774774775e-07, - "loss": 1.8863, - "step": 53 - }, - { - "epoch": 0.007320544973903613, - "grad_norm": 8.535634607737279, - "learning_rate": 4.864864864864865e-07, - "loss": 1.8812, - "step": 54 - }, - { - "epoch": 0.007456110621568495, - "grad_norm": 8.670029705651041, - "learning_rate": 4.954954954954955e-07, - "loss": 1.8653, - "step": 55 - }, - { - "epoch": 0.007591676269233376, - "grad_norm": 8.694807721816511, - "learning_rate": 5.045045045045044e-07, - "loss": 1.8576, - "step": 56 - }, - { - "epoch": 0.007727241916898258, - "grad_norm": 7.9799064372643675, - "learning_rate": 5.135135135135134e-07, - "loss": 1.8281, - "step": 57 - }, - { - "epoch": 0.00786280756456314, - "grad_norm": 8.090538394129242, - "learning_rate": 5.225225225225225e-07, - "loss": 1.8089, - "step": 58 - }, - { - "epoch": 0.007998373212228021, - "grad_norm": 7.293605455837586, - "learning_rate": 5.315315315315315e-07, - "loss": 1.822, - "step": 59 - }, - { - "epoch": 0.008133938859892904, - "grad_norm": 6.7733444296458165, - "learning_rate": 5.405405405405406e-07, - "loss": 1.8025, - "step": 60 - }, - { - "epoch": 0.008269504507557784, - "grad_norm": 6.526193824925845, - "learning_rate": 5.495495495495495e-07, - "loss": 1.7774, - "step": 61 - }, - { - "epoch": 0.008405070155222667, - "grad_norm": 6.169869450080902, - "learning_rate": 5.585585585585585e-07, - "loss": 1.759, - "step": 62 - }, - { - "epoch": 0.008540635802887548, - "grad_norm": 6.244580753381197, - "learning_rate": 5.675675675675675e-07, - "loss": 1.7203, - "step": 63 - }, - { - "epoch": 0.00867620145055243, - "grad_norm": 6.413451234785374, - "learning_rate": 5.765765765765766e-07, - "loss": 1.7197, - "step": 64 - }, - { - "epoch": 0.008811767098217311, - "grad_norm": 5.7985441197410825, - "learning_rate": 5.855855855855856e-07, - "loss": 1.7207, - "step": 65 - }, - { - "epoch": 0.008947332745882193, - "grad_norm": 5.172037609166836, - "learning_rate": 5.945945945945947e-07, - "loss": 1.6594, - "step": 66 - }, - { - "epoch": 0.009082898393547076, - "grad_norm": 4.789146941489647, - "learning_rate": 6.036036036036036e-07, - "loss": 1.6631, - "step": 67 - }, - { - "epoch": 0.009218464041211957, - "grad_norm": 4.888130228558099, - "learning_rate": 6.126126126126125e-07, - "loss": 1.6505, - "step": 68 - }, - { - "epoch": 0.009354029688876839, - "grad_norm": 4.72145895286354, - "learning_rate": 6.216216216216216e-07, - "loss": 1.6738, - "step": 69 - }, - { - "epoch": 0.00948959533654172, - "grad_norm": 4.688849568932902, - "learning_rate": 6.306306306306306e-07, - "loss": 1.5781, - "step": 70 - }, - { - "epoch": 0.009625160984206602, - "grad_norm": 4.472983807698425, - "learning_rate": 6.396396396396397e-07, - "loss": 1.6653, - "step": 71 - }, - { - "epoch": 0.009760726631871483, - "grad_norm": 6.282537547307923, - "learning_rate": 6.486486486486486e-07, - "loss": 1.6129, - "step": 72 - }, - { - "epoch": 0.009896292279536366, - "grad_norm": 4.4389597615669425, - "learning_rate": 6.576576576576577e-07, - "loss": 1.6464, - "step": 73 - }, - { - "epoch": 0.010031857927201248, - "grad_norm": 4.744669092593942, - "learning_rate": 6.666666666666666e-07, - "loss": 1.6015, - "step": 74 - }, - { - "epoch": 0.010167423574866129, - "grad_norm": 4.043033516989053, - "learning_rate": 6.756756756756756e-07, - "loss": 1.5977, - "step": 75 - }, - { - "epoch": 0.010302989222531011, - "grad_norm": 4.0901548745402065, - "learning_rate": 6.846846846846847e-07, - "loss": 1.6015, - "step": 76 - }, - { - "epoch": 0.010438554870195892, - "grad_norm": 3.5345655746592524, - "learning_rate": 6.936936936936936e-07, - "loss": 1.5632, - "step": 77 - }, - { - "epoch": 0.010574120517860774, - "grad_norm": 3.42165843123686, - "learning_rate": 7.027027027027027e-07, - "loss": 1.5857, - "step": 78 - }, - { - "epoch": 0.010709686165525655, - "grad_norm": 4.0097105004671185, - "learning_rate": 7.117117117117116e-07, - "loss": 1.5817, - "step": 79 - }, - { - "epoch": 0.010845251813190538, - "grad_norm": 3.62178830640266, - "learning_rate": 7.207207207207207e-07, - "loss": 1.5962, - "step": 80 - }, - { - "epoch": 0.010980817460855418, - "grad_norm": 2.9689517832347243, - "learning_rate": 7.297297297297297e-07, - "loss": 1.5797, - "step": 81 - }, - { - "epoch": 0.011116383108520301, - "grad_norm": 3.932773383636214, - "learning_rate": 7.387387387387387e-07, - "loss": 1.5595, - "step": 82 - }, - { - "epoch": 0.011251948756185183, - "grad_norm": 6.215147773809456, - "learning_rate": 7.477477477477477e-07, - "loss": 1.558, - "step": 83 - }, - { - "epoch": 0.011387514403850064, - "grad_norm": 2.5732430251458602, - "learning_rate": 7.567567567567568e-07, - "loss": 1.5785, - "step": 84 - }, - { - "epoch": 0.011523080051514947, - "grad_norm": 2.3541213515066692, - "learning_rate": 7.657657657657657e-07, - "loss": 1.5525, - "step": 85 - }, - { - "epoch": 0.011658645699179827, - "grad_norm": 2.511064144446062, - "learning_rate": 7.747747747747747e-07, - "loss": 1.5145, - "step": 86 - }, - { - "epoch": 0.01179421134684471, - "grad_norm": 2.5389146513400247, - "learning_rate": 7.837837837837838e-07, - "loss": 1.5299, - "step": 87 - }, - { - "epoch": 0.01192977699450959, - "grad_norm": 2.309950607482175, - "learning_rate": 7.927927927927927e-07, - "loss": 1.5318, - "step": 88 - }, - { - "epoch": 0.012065342642174473, - "grad_norm": 2.2782333246844497, - "learning_rate": 8.018018018018018e-07, - "loss": 1.4753, - "step": 89 - }, - { - "epoch": 0.012200908289839356, - "grad_norm": 2.2522351609780276, - "learning_rate": 8.108108108108108e-07, - "loss": 1.5165, - "step": 90 - }, - { - "epoch": 0.012336473937504236, - "grad_norm": 2.1489656127024834, - "learning_rate": 8.198198198198198e-07, - "loss": 1.5621, - "step": 91 - }, - { - "epoch": 0.012472039585169119, - "grad_norm": 2.0640732346322954, - "learning_rate": 8.288288288288288e-07, - "loss": 1.5032, - "step": 92 - }, - { - "epoch": 0.012607605232834, - "grad_norm": 2.1850771808883245, - "learning_rate": 8.378378378378377e-07, - "loss": 1.5248, - "step": 93 - }, - { - "epoch": 0.012743170880498882, - "grad_norm": 5.397629167169215, - "learning_rate": 8.468468468468468e-07, - "loss": 1.5472, - "step": 94 - }, - { - "epoch": 0.012878736528163763, - "grad_norm": 3.0343865733000936, - "learning_rate": 8.558558558558558e-07, - "loss": 1.5012, - "step": 95 - }, - { - "epoch": 0.013014302175828645, - "grad_norm": 2.0247553082514234, - "learning_rate": 8.648648648648649e-07, - "loss": 1.5016, - "step": 96 - }, - { - "epoch": 0.013149867823493526, - "grad_norm": 2.028684912356176, - "learning_rate": 8.738738738738738e-07, - "loss": 1.5198, - "step": 97 - }, - { - "epoch": 0.013285433471158408, - "grad_norm": 1.9936719747295053, - "learning_rate": 8.828828828828828e-07, - "loss": 1.4768, - "step": 98 - }, - { - "epoch": 0.013420999118823291, - "grad_norm": 2.394638054988374, - "learning_rate": 8.918918918918918e-07, - "loss": 1.4677, - "step": 99 - }, - { - "epoch": 0.013556564766488172, - "grad_norm": 1.8856183755568405, - "learning_rate": 9.009009009009009e-07, - "loss": 1.4968, - "step": 100 - }, - { - "epoch": 0.013692130414153054, - "grad_norm": 1.9825808581112305, - "learning_rate": 9.099099099099099e-07, - "loss": 1.5007, - "step": 101 - }, - { - "epoch": 0.013827696061817935, - "grad_norm": 1.7659364717549417, - "learning_rate": 9.18918918918919e-07, - "loss": 1.4824, - "step": 102 - }, - { - "epoch": 0.013963261709482817, - "grad_norm": 1.7762659690983733, - "learning_rate": 9.279279279279278e-07, - "loss": 1.5033, - "step": 103 - }, - { - "epoch": 0.014098827357147698, - "grad_norm": 2.1391833734896735, - "learning_rate": 9.369369369369368e-07, - "loss": 1.5005, - "step": 104 - }, - { - "epoch": 0.01423439300481258, - "grad_norm": 1.9011595037236182, - "learning_rate": 9.459459459459459e-07, - "loss": 1.5045, - "step": 105 - }, - { - "epoch": 0.014369958652477463, - "grad_norm": 2.2002247031894195, - "learning_rate": 9.54954954954955e-07, - "loss": 1.4792, - "step": 106 - }, - { - "epoch": 0.014505524300142344, - "grad_norm": 1.7448928450465162, - "learning_rate": 9.63963963963964e-07, - "loss": 1.4462, - "step": 107 - }, - { - "epoch": 0.014641089947807226, - "grad_norm": 1.7211374264964578, - "learning_rate": 9.72972972972973e-07, - "loss": 1.5107, - "step": 108 - }, - { - "epoch": 0.014776655595472107, - "grad_norm": 1.600752809444846, - "learning_rate": 9.819819819819819e-07, - "loss": 1.4589, - "step": 109 - }, - { - "epoch": 0.01491222124313699, - "grad_norm": 1.7932620551261558, - "learning_rate": 9.90990990990991e-07, - "loss": 1.456, - "step": 110 - }, - { - "epoch": 0.01504778689080187, - "grad_norm": 2.5563079053550974, - "learning_rate": 1e-06, - "loss": 1.5029, - "step": 111 - }, - { - "epoch": 0.015183352538466753, - "grad_norm": 1.651804197346688, - "learning_rate": 1.0090090090090088e-06, - "loss": 1.4614, - "step": 112 - }, - { - "epoch": 0.015318918186131633, - "grad_norm": 1.809665649726903, - "learning_rate": 1.018018018018018e-06, - "loss": 1.4983, - "step": 113 - }, - { - "epoch": 0.015454483833796516, - "grad_norm": 1.9704955423231216, - "learning_rate": 1.0270270270270269e-06, - "loss": 1.4995, - "step": 114 - }, - { - "epoch": 0.015590049481461398, - "grad_norm": 1.8564147008584495, - "learning_rate": 1.0360360360360361e-06, - "loss": 1.4982, - "step": 115 - }, - { - "epoch": 0.01572561512912628, - "grad_norm": 2.7752294926333874, - "learning_rate": 1.045045045045045e-06, - "loss": 1.5092, - "step": 116 - }, - { - "epoch": 0.01586118077679116, - "grad_norm": 1.9978296660606034, - "learning_rate": 1.0540540540540538e-06, - "loss": 1.5197, - "step": 117 - }, - { - "epoch": 0.015996746424456042, - "grad_norm": 2.292743255856348, - "learning_rate": 1.063063063063063e-06, - "loss": 1.4486, - "step": 118 - }, - { - "epoch": 0.016132312072120923, - "grad_norm": 1.6221312610569312, - "learning_rate": 1.072072072072072e-06, - "loss": 1.521, - "step": 119 - }, - { - "epoch": 0.016267877719785807, - "grad_norm": 1.5226308477955839, - "learning_rate": 1.0810810810810812e-06, - "loss": 1.4713, - "step": 120 - }, - { - "epoch": 0.016403443367450688, - "grad_norm": 1.4943840457168778, - "learning_rate": 1.09009009009009e-06, - "loss": 1.4652, - "step": 121 - }, - { - "epoch": 0.01653900901511557, - "grad_norm": 1.8456896776980867, - "learning_rate": 1.099099099099099e-06, - "loss": 1.4847, - "step": 122 - }, - { - "epoch": 0.016674574662780453, - "grad_norm": 2.2109612951023463, - "learning_rate": 1.108108108108108e-06, - "loss": 1.4555, - "step": 123 - }, - { - "epoch": 0.016810140310445334, - "grad_norm": 1.9060824289056564, - "learning_rate": 1.117117117117117e-06, - "loss": 1.4835, - "step": 124 - }, - { - "epoch": 0.016945705958110215, - "grad_norm": 2.17978891463711, - "learning_rate": 1.1261261261261262e-06, - "loss": 1.4566, - "step": 125 - }, - { - "epoch": 0.017081271605775095, - "grad_norm": 2.130487148499443, - "learning_rate": 1.135135135135135e-06, - "loss": 1.4639, - "step": 126 - }, - { - "epoch": 0.01721683725343998, - "grad_norm": 1.4411412214757613, - "learning_rate": 1.1441441441441443e-06, - "loss": 1.4661, - "step": 127 - }, - { - "epoch": 0.01735240290110486, - "grad_norm": 3.34110909004883, - "learning_rate": 1.1531531531531531e-06, - "loss": 1.4544, - "step": 128 - }, - { - "epoch": 0.01748796854876974, - "grad_norm": 1.6208206011256618, - "learning_rate": 1.162162162162162e-06, - "loss": 1.4568, - "step": 129 - }, - { - "epoch": 0.017623534196434622, - "grad_norm": 1.7308675609587647, - "learning_rate": 1.1711711711711712e-06, - "loss": 1.4409, - "step": 130 - }, - { - "epoch": 0.017759099844099506, - "grad_norm": 1.6152176317721023, - "learning_rate": 1.18018018018018e-06, - "loss": 1.4611, - "step": 131 - }, - { - "epoch": 0.017894665491764387, - "grad_norm": 2.293184739829164, - "learning_rate": 1.1891891891891893e-06, - "loss": 1.5031, - "step": 132 - }, - { - "epoch": 0.018030231139429267, - "grad_norm": 1.5892167186194808, - "learning_rate": 1.1981981981981981e-06, - "loss": 1.4471, - "step": 133 - }, - { - "epoch": 0.01816579678709415, - "grad_norm": 1.7844967448919111, - "learning_rate": 1.2072072072072072e-06, - "loss": 1.4468, - "step": 134 - }, - { - "epoch": 0.018301362434759032, - "grad_norm": 2.0389564578521457, - "learning_rate": 1.2162162162162162e-06, - "loss": 1.4848, - "step": 135 - }, - { - "epoch": 0.018436928082423913, - "grad_norm": 1.4783055906275804, - "learning_rate": 1.225225225225225e-06, - "loss": 1.4674, - "step": 136 - }, - { - "epoch": 0.018572493730088794, - "grad_norm": 2.4562318823134457, - "learning_rate": 1.2342342342342343e-06, - "loss": 1.4601, - "step": 137 - }, - { - "epoch": 0.018708059377753678, - "grad_norm": 1.6909573117382803, - "learning_rate": 1.2432432432432432e-06, - "loss": 1.4712, - "step": 138 - }, - { - "epoch": 0.01884362502541856, - "grad_norm": 2.939979936304758, - "learning_rate": 1.2522522522522522e-06, - "loss": 1.4358, - "step": 139 - }, - { - "epoch": 0.01897919067308344, - "grad_norm": 1.4102524226155284, - "learning_rate": 1.2612612612612613e-06, - "loss": 1.4583, - "step": 140 - }, - { - "epoch": 0.019114756320748324, - "grad_norm": 6.426404923995833, - "learning_rate": 1.27027027027027e-06, - "loss": 1.4173, - "step": 141 - }, - { - "epoch": 0.019250321968413205, - "grad_norm": 2.255748860786786, - "learning_rate": 1.2792792792792793e-06, - "loss": 1.4446, - "step": 142 - }, - { - "epoch": 0.019385887616078085, - "grad_norm": 1.699843312858549, - "learning_rate": 1.2882882882882882e-06, - "loss": 1.4728, - "step": 143 - }, - { - "epoch": 0.019521453263742966, - "grad_norm": 1.78156546308971, - "learning_rate": 1.2972972972972972e-06, - "loss": 1.4793, - "step": 144 - }, - { - "epoch": 0.01965701891140785, - "grad_norm": 1.5366396140321357, - "learning_rate": 1.3063063063063063e-06, - "loss": 1.4538, - "step": 145 - }, - { - "epoch": 0.01979258455907273, - "grad_norm": 1.547307226903954, - "learning_rate": 1.3153153153153153e-06, - "loss": 1.4461, - "step": 146 - }, - { - "epoch": 0.019928150206737612, - "grad_norm": 2.0633466637585194, - "learning_rate": 1.3243243243243244e-06, - "loss": 1.4722, - "step": 147 - }, - { - "epoch": 0.020063715854402496, - "grad_norm": 1.4619401177013418, - "learning_rate": 1.3333333333333332e-06, - "loss": 1.4309, - "step": 148 - }, - { - "epoch": 0.020199281502067377, - "grad_norm": 1.45083484339537, - "learning_rate": 1.3423423423423422e-06, - "loss": 1.4565, - "step": 149 - }, - { - "epoch": 0.020334847149732257, - "grad_norm": 2.5877409100427395, - "learning_rate": 1.3513513513513513e-06, - "loss": 1.4302, - "step": 150 - }, - { - "epoch": 0.020470412797397138, - "grad_norm": 1.508521639038679, - "learning_rate": 1.3603603603603603e-06, - "loss": 1.4859, - "step": 151 - }, - { - "epoch": 0.020605978445062022, - "grad_norm": 1.436472506897216, - "learning_rate": 1.3693693693693694e-06, - "loss": 1.4413, - "step": 152 - }, - { - "epoch": 0.020741544092726903, - "grad_norm": 2.4578930596905773, - "learning_rate": 1.3783783783783782e-06, - "loss": 1.4195, - "step": 153 - }, - { - "epoch": 0.020877109740391784, - "grad_norm": 2.0500962423136997, - "learning_rate": 1.3873873873873873e-06, - "loss": 1.4243, - "step": 154 - }, - { - "epoch": 0.021012675388056668, - "grad_norm": 1.5882460823207682, - "learning_rate": 1.3963963963963963e-06, - "loss": 1.4329, - "step": 155 - }, - { - "epoch": 0.02114824103572155, - "grad_norm": 3.1752762150764218, - "learning_rate": 1.4054054054054054e-06, - "loss": 1.4409, - "step": 156 - }, - { - "epoch": 0.02128380668338643, - "grad_norm": 1.6982173765105, - "learning_rate": 1.4144144144144144e-06, - "loss": 1.4204, - "step": 157 - }, - { - "epoch": 0.02141937233105131, - "grad_norm": 1.581035863249022, - "learning_rate": 1.4234234234234232e-06, - "loss": 1.4585, - "step": 158 - }, - { - "epoch": 0.021554937978716195, - "grad_norm": 1.93521312225197, - "learning_rate": 1.4324324324324323e-06, - "loss": 1.4704, - "step": 159 - }, - { - "epoch": 0.021690503626381075, - "grad_norm": 1.6249367150225102, - "learning_rate": 1.4414414414414413e-06, - "loss": 1.4134, - "step": 160 - }, - { - "epoch": 0.021826069274045956, - "grad_norm": 1.4811609234146867, - "learning_rate": 1.4504504504504504e-06, - "loss": 1.4293, - "step": 161 - }, - { - "epoch": 0.021961634921710837, - "grad_norm": 1.3878222282088852, - "learning_rate": 1.4594594594594594e-06, - "loss": 1.442, - "step": 162 - }, - { - "epoch": 0.02209720056937572, - "grad_norm": 1.5391308775646662, - "learning_rate": 1.4684684684684685e-06, - "loss": 1.4161, - "step": 163 - }, - { - "epoch": 0.022232766217040602, - "grad_norm": 1.9920836524815897, - "learning_rate": 1.4774774774774773e-06, - "loss": 1.4121, - "step": 164 - }, - { - "epoch": 0.022368331864705483, - "grad_norm": 1.4346200638714859, - "learning_rate": 1.4864864864864864e-06, - "loss": 1.4251, - "step": 165 - }, - { - "epoch": 0.022503897512370367, - "grad_norm": 2.3132809185165972, - "learning_rate": 1.4954954954954954e-06, - "loss": 1.4283, - "step": 166 - }, - { - "epoch": 0.022639463160035248, - "grad_norm": 1.41423836433183, - "learning_rate": 1.5045045045045045e-06, - "loss": 1.4433, - "step": 167 - }, - { - "epoch": 0.022775028807700128, - "grad_norm": 1.7796186206985323, - "learning_rate": 1.5135135135135135e-06, - "loss": 1.4494, - "step": 168 - }, - { - "epoch": 0.02291059445536501, - "grad_norm": 6.889500514542377, - "learning_rate": 1.5225225225225225e-06, - "loss": 1.4313, - "step": 169 - }, - { - "epoch": 0.023046160103029893, - "grad_norm": 4.9336646627198775, - "learning_rate": 1.5315315315315314e-06, - "loss": 1.468, - "step": 170 - }, - { - "epoch": 0.023181725750694774, - "grad_norm": 1.4030117249228922, - "learning_rate": 1.5405405405405404e-06, - "loss": 1.4206, - "step": 171 - }, - { - "epoch": 0.023317291398359655, - "grad_norm": 1.7007506196957312, - "learning_rate": 1.5495495495495495e-06, - "loss": 1.44, - "step": 172 - }, - { - "epoch": 0.02345285704602454, - "grad_norm": 1.8971210833189927, - "learning_rate": 1.5585585585585585e-06, - "loss": 1.4063, - "step": 173 - }, - { - "epoch": 0.02358842269368942, - "grad_norm": 1.302982687460494, - "learning_rate": 1.5675675675675676e-06, - "loss": 1.4093, - "step": 174 - }, - { - "epoch": 0.0237239883413543, - "grad_norm": 2.2037029089976627, - "learning_rate": 1.5765765765765766e-06, - "loss": 1.4092, - "step": 175 - }, - { - "epoch": 0.02385955398901918, - "grad_norm": 1.546495077213584, - "learning_rate": 1.5855855855855855e-06, - "loss": 1.4189, - "step": 176 - }, - { - "epoch": 0.023995119636684065, - "grad_norm": 1.3966886247323087, - "learning_rate": 1.5945945945945945e-06, - "loss": 1.4001, - "step": 177 - }, - { - "epoch": 0.024130685284348946, - "grad_norm": 1.8904525443350804, - "learning_rate": 1.6036036036036035e-06, - "loss": 1.4298, - "step": 178 - }, - { - "epoch": 0.024266250932013827, - "grad_norm": 1.9875049087964187, - "learning_rate": 1.6126126126126126e-06, - "loss": 1.4165, - "step": 179 - }, - { - "epoch": 0.02440181657967871, - "grad_norm": 1.8295107220300855, - "learning_rate": 1.6216216216216216e-06, - "loss": 1.413, - "step": 180 - }, - { - "epoch": 0.024537382227343592, - "grad_norm": 1.8721875972432258, - "learning_rate": 1.6306306306306305e-06, - "loss": 1.445, - "step": 181 - }, - { - "epoch": 0.024672947875008473, - "grad_norm": 1.6163221657653166, - "learning_rate": 1.6396396396396395e-06, - "loss": 1.4308, - "step": 182 - }, - { - "epoch": 0.024808513522673353, - "grad_norm": 1.682771990716447, - "learning_rate": 1.6486486486486486e-06, - "loss": 1.4708, - "step": 183 - }, - { - "epoch": 0.024944079170338238, - "grad_norm": 1.3278602918850941, - "learning_rate": 1.6576576576576576e-06, - "loss": 1.4075, - "step": 184 - }, - { - "epoch": 0.025079644818003118, - "grad_norm": 1.5370038639960675, - "learning_rate": 1.6666666666666667e-06, - "loss": 1.4227, - "step": 185 - }, - { - "epoch": 0.025215210465668, - "grad_norm": 1.3609780817540533, - "learning_rate": 1.6756756756756755e-06, - "loss": 1.396, - "step": 186 - }, - { - "epoch": 0.02535077611333288, - "grad_norm": 1.9616205364055233, - "learning_rate": 1.6846846846846845e-06, - "loss": 1.4416, - "step": 187 - }, - { - "epoch": 0.025486341760997764, - "grad_norm": 1.502754110534782, - "learning_rate": 1.6936936936936936e-06, - "loss": 1.4101, - "step": 188 - }, - { - "epoch": 0.025621907408662645, - "grad_norm": 1.4521395335603045, - "learning_rate": 1.7027027027027026e-06, - "loss": 1.4428, - "step": 189 - }, - { - "epoch": 0.025757473056327525, - "grad_norm": 3.7487179759648113, - "learning_rate": 1.7117117117117117e-06, - "loss": 1.3872, - "step": 190 - }, - { - "epoch": 0.02589303870399241, - "grad_norm": 1.4140542815630024, - "learning_rate": 1.7207207207207205e-06, - "loss": 1.3796, - "step": 191 - }, - { - "epoch": 0.02602860435165729, - "grad_norm": 2.8669528622986946, - "learning_rate": 1.7297297297297298e-06, - "loss": 1.4689, - "step": 192 - }, - { - "epoch": 0.02616416999932217, - "grad_norm": 1.4424153910055988, - "learning_rate": 1.7387387387387386e-06, - "loss": 1.4577, - "step": 193 - }, - { - "epoch": 0.026299735646987052, - "grad_norm": 1.9261284405368198, - "learning_rate": 1.7477477477477477e-06, - "loss": 1.438, - "step": 194 - }, - { - "epoch": 0.026435301294651936, - "grad_norm": 1.550197529171603, - "learning_rate": 1.7567567567567567e-06, - "loss": 1.4081, - "step": 195 - }, - { - "epoch": 0.026570866942316817, - "grad_norm": 1.5827461344328615, - "learning_rate": 1.7657657657657655e-06, - "loss": 1.3965, - "step": 196 - }, - { - "epoch": 0.026706432589981698, - "grad_norm": 1.6333341826873844, - "learning_rate": 1.7747747747747748e-06, - "loss": 1.3822, - "step": 197 - }, - { - "epoch": 0.026841998237646582, - "grad_norm": 1.47768039344046, - "learning_rate": 1.7837837837837836e-06, - "loss": 1.3954, - "step": 198 - }, - { - "epoch": 0.026977563885311463, - "grad_norm": 1.5898366360488652, - "learning_rate": 1.7927927927927927e-06, - "loss": 1.3794, - "step": 199 - }, - { - "epoch": 0.027113129532976343, - "grad_norm": 1.2938267773765495, - "learning_rate": 1.8018018018018017e-06, - "loss": 1.3826, - "step": 200 - }, - { - "epoch": 0.027248695180641224, - "grad_norm": 2.227679548552012, - "learning_rate": 1.8108108108108106e-06, - "loss": 1.4095, - "step": 201 - }, - { - "epoch": 0.02738426082830611, - "grad_norm": 2.4805075352385337, - "learning_rate": 1.8198198198198198e-06, - "loss": 1.4351, - "step": 202 - }, - { - "epoch": 0.02751982647597099, - "grad_norm": 1.7940482497185923, - "learning_rate": 1.8288288288288287e-06, - "loss": 1.4303, - "step": 203 - }, - { - "epoch": 0.02765539212363587, - "grad_norm": 1.457976892866785, - "learning_rate": 1.837837837837838e-06, - "loss": 1.4472, - "step": 204 - }, - { - "epoch": 0.027790957771300754, - "grad_norm": 2.53622589650347, - "learning_rate": 1.8468468468468467e-06, - "loss": 1.3951, - "step": 205 - }, - { - "epoch": 0.027926523418965635, - "grad_norm": 1.6195927538794186, - "learning_rate": 1.8558558558558556e-06, - "loss": 1.415, - "step": 206 - }, - { - "epoch": 0.028062089066630515, - "grad_norm": 2.1837454661422706, - "learning_rate": 1.8648648648648648e-06, - "loss": 1.4127, - "step": 207 - }, - { - "epoch": 0.028197654714295396, - "grad_norm": 1.4198349174104905, - "learning_rate": 1.8738738738738737e-06, - "loss": 1.3787, - "step": 208 - }, - { - "epoch": 0.02833322036196028, - "grad_norm": 1.4656777320207848, - "learning_rate": 1.882882882882883e-06, - "loss": 1.4205, - "step": 209 - }, - { - "epoch": 0.02846878600962516, - "grad_norm": 1.6785057466910236, - "learning_rate": 1.8918918918918918e-06, - "loss": 1.4213, - "step": 210 - }, - { - "epoch": 0.028604351657290042, - "grad_norm": 1.9029803580014049, - "learning_rate": 1.9009009009009008e-06, - "loss": 1.3933, - "step": 211 - }, - { - "epoch": 0.028739917304954926, - "grad_norm": 1.4514965193801417, - "learning_rate": 1.90990990990991e-06, - "loss": 1.398, - "step": 212 - }, - { - "epoch": 0.028875482952619807, - "grad_norm": 1.5073996378558847, - "learning_rate": 1.9189189189189187e-06, - "loss": 1.3896, - "step": 213 - }, - { - "epoch": 0.029011048600284688, - "grad_norm": 1.7748347060874479, - "learning_rate": 1.927927927927928e-06, - "loss": 1.4348, - "step": 214 - }, - { - "epoch": 0.02914661424794957, - "grad_norm": 1.8375574018886902, - "learning_rate": 1.936936936936937e-06, - "loss": 1.4113, - "step": 215 - }, - { - "epoch": 0.029282179895614453, - "grad_norm": 2.0217922617020148, - "learning_rate": 1.945945945945946e-06, - "loss": 1.4095, - "step": 216 - }, - { - "epoch": 0.029417745543279333, - "grad_norm": 2.5576946487773156, - "learning_rate": 1.954954954954955e-06, - "loss": 1.4181, - "step": 217 - }, - { - "epoch": 0.029553311190944214, - "grad_norm": 1.4934484346103987, - "learning_rate": 1.9639639639639637e-06, - "loss": 1.4385, - "step": 218 - }, - { - "epoch": 0.029688876838609095, - "grad_norm": 1.6591344813889835, - "learning_rate": 1.972972972972973e-06, - "loss": 1.3929, - "step": 219 - }, - { - "epoch": 0.02982444248627398, - "grad_norm": 2.267077270370139, - "learning_rate": 1.981981981981982e-06, - "loss": 1.3873, - "step": 220 - }, - { - "epoch": 0.02996000813393886, - "grad_norm": 1.6180749945734663, - "learning_rate": 1.990990990990991e-06, - "loss": 1.3392, - "step": 221 - }, - { - "epoch": 0.03009557378160374, - "grad_norm": 1.9129981942757963, - "learning_rate": 2e-06, - "loss": 1.401, - "step": 222 - }, - { - "epoch": 0.030231139429268625, - "grad_norm": 1.8423101243120148, - "learning_rate": 1.9999999035789467e-06, - "loss": 1.4093, - "step": 223 - }, - { - "epoch": 0.030366705076933505, - "grad_norm": 1.7923225098739226, - "learning_rate": 1.9999996143158056e-06, - "loss": 1.3785, - "step": 224 - }, - { - "epoch": 0.030502270724598386, - "grad_norm": 1.4152977108066773, - "learning_rate": 1.9999991322106323e-06, - "loss": 1.4059, - "step": 225 - }, - { - "epoch": 0.030637836372263267, - "grad_norm": 1.3968389316481733, - "learning_rate": 1.99999845726352e-06, - "loss": 1.3961, - "step": 226 - }, - { - "epoch": 0.03077340201992815, - "grad_norm": 3.053507055159351, - "learning_rate": 1.9999975894745984e-06, - "loss": 1.4086, - "step": 227 - }, - { - "epoch": 0.030908967667593032, - "grad_norm": 2.9751924850683524, - "learning_rate": 1.9999965288440357e-06, - "loss": 1.3839, - "step": 228 - }, - { - "epoch": 0.031044533315257913, - "grad_norm": 1.646364576594217, - "learning_rate": 1.9999952753720353e-06, - "loss": 1.3706, - "step": 229 - }, - { - "epoch": 0.031180098962922797, - "grad_norm": 3.230983478795083, - "learning_rate": 1.99999382905884e-06, - "loss": 1.3953, - "step": 230 - }, - { - "epoch": 0.03131566461058768, - "grad_norm": 2.26181431775139, - "learning_rate": 1.9999921899047284e-06, - "loss": 1.4046, - "step": 231 - }, - { - "epoch": 0.03145123025825256, - "grad_norm": 1.588638845962109, - "learning_rate": 1.999990357910016e-06, - "loss": 1.3482, - "step": 232 - }, - { - "epoch": 0.03158679590591744, - "grad_norm": 1.6597400418101047, - "learning_rate": 1.9999883330750567e-06, - "loss": 1.3582, - "step": 233 - }, - { - "epoch": 0.03172236155358232, - "grad_norm": 2.1747177060830114, - "learning_rate": 1.9999861154002405e-06, - "loss": 1.4298, - "step": 234 - }, - { - "epoch": 0.0318579272012472, - "grad_norm": 1.6836443961752021, - "learning_rate": 1.9999837048859957e-06, - "loss": 1.3691, - "step": 235 - }, - { - "epoch": 0.031993492848912085, - "grad_norm": 2.669920245046704, - "learning_rate": 1.999981101532787e-06, - "loss": 1.3863, - "step": 236 - }, - { - "epoch": 0.03212905849657697, - "grad_norm": 1.7847142852608644, - "learning_rate": 1.9999783053411157e-06, - "loss": 1.3718, - "step": 237 - }, - { - "epoch": 0.032264624144241846, - "grad_norm": 1.5100144572864929, - "learning_rate": 1.999975316311522e-06, - "loss": 1.3664, - "step": 238 - }, - { - "epoch": 0.03240018979190673, - "grad_norm": 2.045560083966139, - "learning_rate": 1.9999721344445816e-06, - "loss": 1.4312, - "step": 239 - }, - { - "epoch": 0.032535755439571615, - "grad_norm": 1.7578228212823335, - "learning_rate": 1.9999687597409084e-06, - "loss": 1.4197, - "step": 240 - }, - { - "epoch": 0.03267132108723649, - "grad_norm": 1.5150763427478837, - "learning_rate": 1.9999651922011532e-06, - "loss": 1.4094, - "step": 241 - }, - { - "epoch": 0.032806886734901376, - "grad_norm": 1.4228113562579867, - "learning_rate": 1.999961431826004e-06, - "loss": 1.3764, - "step": 242 - }, - { - "epoch": 0.03294245238256626, - "grad_norm": 1.9042803018030525, - "learning_rate": 1.999957478616186e-06, - "loss": 1.3931, - "step": 243 - }, - { - "epoch": 0.03307801803023114, - "grad_norm": 3.7439796611228138, - "learning_rate": 1.9999533325724613e-06, - "loss": 1.4239, - "step": 244 - }, - { - "epoch": 0.03321358367789602, - "grad_norm": 1.618571786703449, - "learning_rate": 1.9999489936956295e-06, - "loss": 1.3549, - "step": 245 - }, - { - "epoch": 0.033349149325560906, - "grad_norm": 2.1638352228442366, - "learning_rate": 1.9999444619865273e-06, - "loss": 1.382, - "step": 246 - }, - { - "epoch": 0.03348471497322578, - "grad_norm": 1.9762693184900288, - "learning_rate": 1.999939737446029e-06, - "loss": 1.3722, - "step": 247 - }, - { - "epoch": 0.03362028062089067, - "grad_norm": 1.5884978959141258, - "learning_rate": 1.999934820075045e-06, - "loss": 1.3963, - "step": 248 - }, - { - "epoch": 0.033755846268555545, - "grad_norm": 3.3597247998139865, - "learning_rate": 1.9999297098745245e-06, - "loss": 1.4029, - "step": 249 - }, - { - "epoch": 0.03389141191622043, - "grad_norm": 2.533795469663496, - "learning_rate": 1.999924406845452e-06, - "loss": 1.4026, - "step": 250 - }, - { - "epoch": 0.03402697756388531, - "grad_norm": 1.7138149535131035, - "learning_rate": 1.9999189109888503e-06, - "loss": 1.371, - "step": 251 - }, - { - "epoch": 0.03416254321155019, - "grad_norm": 3.5813752229584384, - "learning_rate": 1.9999132223057797e-06, - "loss": 1.4068, - "step": 252 - }, - { - "epoch": 0.034298108859215075, - "grad_norm": 3.025953083706524, - "learning_rate": 1.999907340797337e-06, - "loss": 1.3787, - "step": 253 - }, - { - "epoch": 0.03443367450687996, - "grad_norm": 1.786445380141776, - "learning_rate": 1.9999012664646567e-06, - "loss": 1.3976, - "step": 254 - }, - { - "epoch": 0.034569240154544836, - "grad_norm": 1.5656536352639006, - "learning_rate": 1.99989499930891e-06, - "loss": 1.3897, - "step": 255 - }, - { - "epoch": 0.03470480580220972, - "grad_norm": 1.3648540026649725, - "learning_rate": 1.999888539331305e-06, - "loss": 1.3393, - "step": 256 - }, - { - "epoch": 0.034840371449874605, - "grad_norm": 1.5203044333366489, - "learning_rate": 1.999881886533088e-06, - "loss": 1.4017, - "step": 257 - }, - { - "epoch": 0.03497593709753948, - "grad_norm": 1.6437939139098705, - "learning_rate": 1.9998750409155416e-06, - "loss": 1.3432, - "step": 258 - }, - { - "epoch": 0.035111502745204366, - "grad_norm": 1.3632232085799463, - "learning_rate": 1.999868002479986e-06, - "loss": 1.4045, - "step": 259 - }, - { - "epoch": 0.035247068392869244, - "grad_norm": 1.8013308999434519, - "learning_rate": 1.9998607712277792e-06, - "loss": 1.383, - "step": 260 - }, - { - "epoch": 0.03538263404053413, - "grad_norm": 2.1705263078904675, - "learning_rate": 1.9998533471603145e-06, - "loss": 1.3833, - "step": 261 - }, - { - "epoch": 0.03551819968819901, - "grad_norm": 3.5599028899477614, - "learning_rate": 1.9998457302790245e-06, - "loss": 1.3548, - "step": 262 - }, - { - "epoch": 0.03565376533586389, - "grad_norm": 1.476502682456324, - "learning_rate": 1.9998379205853775e-06, - "loss": 1.3657, - "step": 263 - }, - { - "epoch": 0.03578933098352877, - "grad_norm": 1.3611372130590276, - "learning_rate": 1.9998299180808796e-06, - "loss": 1.3753, - "step": 264 - }, - { - "epoch": 0.03592489663119366, - "grad_norm": 1.5840286110035533, - "learning_rate": 1.999821722767075e-06, - "loss": 1.3539, - "step": 265 - }, - { - "epoch": 0.036060462278858535, - "grad_norm": 4.3459998401920075, - "learning_rate": 1.9998133346455422e-06, - "loss": 1.3669, - "step": 266 - }, - { - "epoch": 0.03619602792652342, - "grad_norm": 2.765006940144401, - "learning_rate": 1.9998047537179007e-06, - "loss": 1.3655, - "step": 267 - }, - { - "epoch": 0.0363315935741883, - "grad_norm": 2.431275820592414, - "learning_rate": 1.999795979985804e-06, - "loss": 1.3602, - "step": 268 - }, - { - "epoch": 0.03646715922185318, - "grad_norm": 3.5342801795274776, - "learning_rate": 1.9997870134509444e-06, - "loss": 1.3687, - "step": 269 - }, - { - "epoch": 0.036602724869518065, - "grad_norm": 3.808465354088291, - "learning_rate": 1.9997778541150515e-06, - "loss": 1.3466, - "step": 270 - }, - { - "epoch": 0.03673829051718295, - "grad_norm": 1.4780015542853115, - "learning_rate": 1.9997685019798908e-06, - "loss": 1.349, - "step": 271 - }, - { - "epoch": 0.036873856164847826, - "grad_norm": 1.714870548940344, - "learning_rate": 1.999758957047266e-06, - "loss": 1.4021, - "step": 272 - }, - { - "epoch": 0.03700942181251271, - "grad_norm": 1.3230004777146438, - "learning_rate": 1.9997492193190185e-06, - "loss": 1.3568, - "step": 273 - }, - { - "epoch": 0.03714498746017759, - "grad_norm": 1.8110858636213139, - "learning_rate": 1.9997392887970253e-06, - "loss": 1.3444, - "step": 274 - }, - { - "epoch": 0.03728055310784247, - "grad_norm": 1.7348389529794637, - "learning_rate": 1.999729165483202e-06, - "loss": 1.3589, - "step": 275 - }, - { - "epoch": 0.037416118755507356, - "grad_norm": 4.58905979756494, - "learning_rate": 1.9997188493795e-06, - "loss": 1.3841, - "step": 276 - }, - { - "epoch": 0.037551684403172234, - "grad_norm": 1.6616078857912495, - "learning_rate": 1.99970834048791e-06, - "loss": 1.3707, - "step": 277 - }, - { - "epoch": 0.03768725005083712, - "grad_norm": 4.100661358329321, - "learning_rate": 1.999697638810457e-06, - "loss": 1.3559, - "step": 278 - }, - { - "epoch": 0.037822815698502, - "grad_norm": 1.6221211967660296, - "learning_rate": 1.9996867443492057e-06, - "loss": 1.4025, - "step": 279 - }, - { - "epoch": 0.03795838134616688, - "grad_norm": 1.8909396951712054, - "learning_rate": 1.999675657106257e-06, - "loss": 1.3776, - "step": 280 - }, - { - "epoch": 0.038093946993831763, - "grad_norm": 1.4818904244208466, - "learning_rate": 1.9996643770837486e-06, - "loss": 1.3619, - "step": 281 - }, - { - "epoch": 0.03822951264149665, - "grad_norm": 1.5812633425411216, - "learning_rate": 1.999652904283856e-06, - "loss": 1.382, - "step": 282 - }, - { - "epoch": 0.038365078289161525, - "grad_norm": 2.9052633029195594, - "learning_rate": 1.9996412387087914e-06, - "loss": 1.3617, - "step": 283 - }, - { - "epoch": 0.03850064393682641, - "grad_norm": 2.415691397097965, - "learning_rate": 1.9996293803608053e-06, - "loss": 1.3109, - "step": 284 - }, - { - "epoch": 0.038636209584491286, - "grad_norm": 4.4138928962698385, - "learning_rate": 1.9996173292421828e-06, - "loss": 1.362, - "step": 285 - }, - { - "epoch": 0.03877177523215617, - "grad_norm": 2.3766560639895897, - "learning_rate": 1.9996050853552494e-06, - "loss": 1.3952, - "step": 286 - }, - { - "epoch": 0.038907340879821055, - "grad_norm": 1.4418157325319962, - "learning_rate": 1.999592648702366e-06, - "loss": 1.3757, - "step": 287 - }, - { - "epoch": 0.03904290652748593, - "grad_norm": 4.055586109581618, - "learning_rate": 1.99958001928593e-06, - "loss": 1.4183, - "step": 288 - }, - { - "epoch": 0.039178472175150816, - "grad_norm": 1.5242713187387358, - "learning_rate": 1.9995671971083777e-06, - "loss": 1.361, - "step": 289 - }, - { - "epoch": 0.0393140378228157, - "grad_norm": 2.2334324103608356, - "learning_rate": 1.9995541821721814e-06, - "loss": 1.3576, - "step": 290 - }, - { - "epoch": 0.03944960347048058, - "grad_norm": 1.5343574253424124, - "learning_rate": 1.9995409744798512e-06, - "loss": 1.382, - "step": 291 - }, - { - "epoch": 0.03958516911814546, - "grad_norm": 1.583373180152813, - "learning_rate": 1.999527574033934e-06, - "loss": 1.3135, - "step": 292 - }, - { - "epoch": 0.039720734765810346, - "grad_norm": 2.352486658860294, - "learning_rate": 1.9995139808370142e-06, - "loss": 1.3639, - "step": 293 - }, - { - "epoch": 0.039856300413475224, - "grad_norm": 1.6484056620695784, - "learning_rate": 1.9995001948917124e-06, - "loss": 1.3332, - "step": 294 - }, - { - "epoch": 0.03999186606114011, - "grad_norm": 1.4791136008289367, - "learning_rate": 1.999486216200688e-06, - "loss": 1.4007, - "step": 295 - }, - { - "epoch": 0.04012743170880499, - "grad_norm": 2.9236723957249935, - "learning_rate": 1.999472044766636e-06, - "loss": 1.3845, - "step": 296 - }, - { - "epoch": 0.04026299735646987, - "grad_norm": 2.249130489607479, - "learning_rate": 1.9994576805922898e-06, - "loss": 1.3467, - "step": 297 - }, - { - "epoch": 0.040398563004134753, - "grad_norm": 2.1734040392364733, - "learning_rate": 1.9994431236804187e-06, - "loss": 1.3609, - "step": 298 - }, - { - "epoch": 0.04053412865179963, - "grad_norm": 1.6823636153893295, - "learning_rate": 1.9994283740338306e-06, - "loss": 1.3823, - "step": 299 - }, - { - "epoch": 0.040669694299464515, - "grad_norm": 1.5942223645940168, - "learning_rate": 1.9994134316553693e-06, - "loss": 1.3737, - "step": 300 - }, - { - "epoch": 0.0408052599471294, - "grad_norm": 3.934694898981306, - "learning_rate": 1.999398296547917e-06, - "loss": 1.343, - "step": 301 - }, - { - "epoch": 0.040940825594794276, - "grad_norm": 2.1365601314987357, - "learning_rate": 1.9993829687143913e-06, - "loss": 1.3778, - "step": 302 - }, - { - "epoch": 0.04107639124245916, - "grad_norm": 1.6688410529771212, - "learning_rate": 1.9993674481577497e-06, - "loss": 1.3612, - "step": 303 - }, - { - "epoch": 0.041211956890124045, - "grad_norm": 1.445156093247341, - "learning_rate": 1.9993517348809836e-06, - "loss": 1.3442, - "step": 304 - }, - { - "epoch": 0.04134752253778892, - "grad_norm": 1.6797246076382437, - "learning_rate": 1.999335828887124e-06, - "loss": 1.3551, - "step": 305 - }, - { - "epoch": 0.041483088185453806, - "grad_norm": 1.4807200739511652, - "learning_rate": 1.999319730179238e-06, - "loss": 1.3689, - "step": 306 - }, - { - "epoch": 0.04161865383311869, - "grad_norm": 1.5732891502193933, - "learning_rate": 1.9993034387604302e-06, - "loss": 1.3534, - "step": 307 - }, - { - "epoch": 0.04175421948078357, - "grad_norm": 1.7907093428905916, - "learning_rate": 1.9992869546338428e-06, - "loss": 1.3544, - "step": 308 - }, - { - "epoch": 0.04188978512844845, - "grad_norm": 1.4432705106405477, - "learning_rate": 1.9992702778026532e-06, - "loss": 1.3242, - "step": 309 - }, - { - "epoch": 0.042025350776113336, - "grad_norm": 1.507957929311929, - "learning_rate": 1.999253408270079e-06, - "loss": 1.3268, - "step": 310 - }, - { - "epoch": 0.042160916423778214, - "grad_norm": 2.021480862810751, - "learning_rate": 1.9992363460393724e-06, - "loss": 1.3381, - "step": 311 - }, - { - "epoch": 0.0422964820714431, - "grad_norm": 1.4655177625555775, - "learning_rate": 1.9992190911138236e-06, - "loss": 1.3423, - "step": 312 - }, - { - "epoch": 0.042432047719107975, - "grad_norm": 1.557338696652907, - "learning_rate": 1.999201643496761e-06, - "loss": 1.3508, - "step": 313 - }, - { - "epoch": 0.04256761336677286, - "grad_norm": 1.7631541953217642, - "learning_rate": 1.9991840031915484e-06, - "loss": 1.3467, - "step": 314 - }, - { - "epoch": 0.042703179014437743, - "grad_norm": 1.5028353366919411, - "learning_rate": 1.9991661702015877e-06, - "loss": 1.3539, - "step": 315 - }, - { - "epoch": 0.04283874466210262, - "grad_norm": 2.295092356146177, - "learning_rate": 1.9991481445303182e-06, - "loss": 1.3458, - "step": 316 - }, - { - "epoch": 0.042974310309767505, - "grad_norm": 2.790374286694081, - "learning_rate": 1.999129926181216e-06, - "loss": 1.335, - "step": 317 - }, - { - "epoch": 0.04310987595743239, - "grad_norm": 1.4576901644431888, - "learning_rate": 1.9991115151577938e-06, - "loss": 1.3345, - "step": 318 - }, - { - "epoch": 0.043245441605097266, - "grad_norm": 1.7461486806201925, - "learning_rate": 1.999092911463603e-06, - "loss": 1.3929, - "step": 319 - }, - { - "epoch": 0.04338100725276215, - "grad_norm": 2.069875655321941, - "learning_rate": 1.99907411510223e-06, - "loss": 1.3811, - "step": 320 - }, - { - "epoch": 0.043516572900427035, - "grad_norm": 1.527983376757812, - "learning_rate": 1.9990551260773003e-06, - "loss": 1.3192, - "step": 321 - }, - { - "epoch": 0.04365213854809191, - "grad_norm": 2.25858086886602, - "learning_rate": 1.9990359443924755e-06, - "loss": 1.3629, - "step": 322 - }, - { - "epoch": 0.043787704195756796, - "grad_norm": 1.4590870633625341, - "learning_rate": 1.999016570051455e-06, - "loss": 1.3548, - "step": 323 - }, - { - "epoch": 0.043923269843421674, - "grad_norm": 1.489840231507941, - "learning_rate": 1.9989970030579744e-06, - "loss": 1.342, - "step": 324 - }, - { - "epoch": 0.04405883549108656, - "grad_norm": 1.3602016998108024, - "learning_rate": 1.9989772434158076e-06, - "loss": 1.3622, - "step": 325 - }, - { - "epoch": 0.04419440113875144, - "grad_norm": 1.4431552442543385, - "learning_rate": 1.9989572911287647e-06, - "loss": 1.3421, - "step": 326 - }, - { - "epoch": 0.04432996678641632, - "grad_norm": 1.5287812055082524, - "learning_rate": 1.9989371462006938e-06, - "loss": 1.332, - "step": 327 - }, - { - "epoch": 0.044465532434081204, - "grad_norm": 1.548941937251561, - "learning_rate": 1.998916808635479e-06, - "loss": 1.3543, - "step": 328 - }, - { - "epoch": 0.04460109808174609, - "grad_norm": 1.4005227344051445, - "learning_rate": 1.998896278437043e-06, - "loss": 1.3504, - "step": 329 - }, - { - "epoch": 0.044736663729410965, - "grad_norm": 2.283482308203264, - "learning_rate": 1.998875555609344e-06, - "loss": 1.357, - "step": 330 - }, - { - "epoch": 0.04487222937707585, - "grad_norm": 1.4186019016749811, - "learning_rate": 1.998854640156379e-06, - "loss": 1.3469, - "step": 331 - }, - { - "epoch": 0.045007795024740734, - "grad_norm": 1.3390962680005782, - "learning_rate": 1.998833532082181e-06, - "loss": 1.2842, - "step": 332 - }, - { - "epoch": 0.04514336067240561, - "grad_norm": 5.860717030717646, - "learning_rate": 1.9988122313908212e-06, - "loss": 1.3753, - "step": 333 - }, - { - "epoch": 0.045278926320070495, - "grad_norm": 1.618106217979852, - "learning_rate": 1.998790738086406e-06, - "loss": 1.3889, - "step": 334 - }, - { - "epoch": 0.04541449196773538, - "grad_norm": 1.4616368136814442, - "learning_rate": 1.9987690521730817e-06, - "loss": 1.3626, - "step": 335 - }, - { - "epoch": 0.045550057615400256, - "grad_norm": 1.5439152338544893, - "learning_rate": 1.9987471736550287e-06, - "loss": 1.3815, - "step": 336 - }, - { - "epoch": 0.04568562326306514, - "grad_norm": 1.4904743508531704, - "learning_rate": 1.9987251025364677e-06, - "loss": 1.3926, - "step": 337 - }, - { - "epoch": 0.04582118891073002, - "grad_norm": 1.7104431384970558, - "learning_rate": 1.9987028388216532e-06, - "loss": 1.3498, - "step": 338 - }, - { - "epoch": 0.0459567545583949, - "grad_norm": 1.9871322000163192, - "learning_rate": 1.99868038251488e-06, - "loss": 1.3535, - "step": 339 - }, - { - "epoch": 0.046092320206059786, - "grad_norm": 2.2622135153707226, - "learning_rate": 1.9986577336204782e-06, - "loss": 1.3214, - "step": 340 - }, - { - "epoch": 0.046227885853724664, - "grad_norm": 1.8411111935251365, - "learning_rate": 1.9986348921428154e-06, - "loss": 1.3622, - "step": 341 - }, - { - "epoch": 0.04636345150138955, - "grad_norm": 5.494406066239442, - "learning_rate": 1.9986118580862964e-06, - "loss": 1.4114, - "step": 342 - }, - { - "epoch": 0.04649901714905443, - "grad_norm": 1.3489395394272685, - "learning_rate": 1.998588631455363e-06, - "loss": 1.3313, - "step": 343 - }, - { - "epoch": 0.04663458279671931, - "grad_norm": 1.9813144604074164, - "learning_rate": 1.9985652122544947e-06, - "loss": 1.309, - "step": 344 - }, - { - "epoch": 0.046770148444384194, - "grad_norm": 1.6903097159004108, - "learning_rate": 1.998541600488207e-06, - "loss": 1.311, - "step": 345 - }, - { - "epoch": 0.04690571409204908, - "grad_norm": 2.2276062021794227, - "learning_rate": 1.998517796161054e-06, - "loss": 1.3767, - "step": 346 - }, - { - "epoch": 0.047041279739713955, - "grad_norm": 1.471609708172449, - "learning_rate": 1.9984937992776257e-06, - "loss": 1.371, - "step": 347 - }, - { - "epoch": 0.04717684538737884, - "grad_norm": 1.5370347383457588, - "learning_rate": 1.99846960984255e-06, - "loss": 1.376, - "step": 348 - }, - { - "epoch": 0.04731241103504372, - "grad_norm": 1.5523030914392526, - "learning_rate": 1.9984452278604907e-06, - "loss": 1.3692, - "step": 349 - }, - { - "epoch": 0.0474479766827086, - "grad_norm": 1.4353164965009617, - "learning_rate": 1.998420653336151e-06, - "loss": 1.3321, - "step": 350 - }, - { - "epoch": 0.047583542330373485, - "grad_norm": 1.565217788762805, - "learning_rate": 1.99839588627427e-06, - "loss": 1.3287, - "step": 351 - }, - { - "epoch": 0.04771910797803836, - "grad_norm": 1.8615156984198393, - "learning_rate": 1.9983709266796224e-06, - "loss": 1.3005, - "step": 352 - }, - { - "epoch": 0.047854673625703247, - "grad_norm": 2.4369499137376955, - "learning_rate": 1.9983457745570222e-06, - "loss": 1.3404, - "step": 353 - }, - { - "epoch": 0.04799023927336813, - "grad_norm": 1.607346094921835, - "learning_rate": 1.99832042991132e-06, - "loss": 1.3405, - "step": 354 - }, - { - "epoch": 0.04812580492103301, - "grad_norm": 1.6184839158540065, - "learning_rate": 1.9982948927474033e-06, - "loss": 1.3596, - "step": 355 - }, - { - "epoch": 0.04826137056869789, - "grad_norm": 2.8590844541543716, - "learning_rate": 1.9982691630701966e-06, - "loss": 1.346, - "step": 356 - }, - { - "epoch": 0.048396936216362776, - "grad_norm": 1.776482412113547, - "learning_rate": 1.9982432408846615e-06, - "loss": 1.3614, - "step": 357 - }, - { - "epoch": 0.048532501864027654, - "grad_norm": 3.770129586164139, - "learning_rate": 1.998217126195797e-06, - "loss": 1.3537, - "step": 358 - }, - { - "epoch": 0.04866806751169254, - "grad_norm": 2.04428253071013, - "learning_rate": 1.9981908190086398e-06, - "loss": 1.3468, - "step": 359 - }, - { - "epoch": 0.04880363315935742, - "grad_norm": 1.5159915733939324, - "learning_rate": 1.9981643193282617e-06, - "loss": 1.3178, - "step": 360 - }, - { - "epoch": 0.0489391988070223, - "grad_norm": 1.7170315618627663, - "learning_rate": 1.9981376271597735e-06, - "loss": 1.3931, - "step": 361 - }, - { - "epoch": 0.049074764454687184, - "grad_norm": 1.6230027904156388, - "learning_rate": 1.9981107425083233e-06, - "loss": 1.3351, - "step": 362 - }, - { - "epoch": 0.04921033010235206, - "grad_norm": 11.869326817553834, - "learning_rate": 1.9980836653790946e-06, - "loss": 1.3617, - "step": 363 - }, - { - "epoch": 0.049345895750016945, - "grad_norm": 2.1147216152060295, - "learning_rate": 1.9980563957773097e-06, - "loss": 1.3339, - "step": 364 - }, - { - "epoch": 0.04948146139768183, - "grad_norm": 1.598455104492896, - "learning_rate": 1.998028933708227e-06, - "loss": 1.3199, - "step": 365 - }, - { - "epoch": 0.04961702704534671, - "grad_norm": 1.7633906596270812, - "learning_rate": 1.9980012791771424e-06, - "loss": 1.3732, - "step": 366 - }, - { - "epoch": 0.04975259269301159, - "grad_norm": 1.8207984269053112, - "learning_rate": 1.9979734321893885e-06, - "loss": 1.3153, - "step": 367 - }, - { - "epoch": 0.049888158340676475, - "grad_norm": 2.1495108479927683, - "learning_rate": 1.9979453927503364e-06, - "loss": 1.3655, - "step": 368 - }, - { - "epoch": 0.05002372398834135, - "grad_norm": 1.596046919186072, - "learning_rate": 1.9979171608653923e-06, - "loss": 1.3301, - "step": 369 - }, - { - "epoch": 0.050159289636006237, - "grad_norm": 1.795715776451096, - "learning_rate": 1.9978887365400006e-06, - "loss": 1.3384, - "step": 370 - }, - { - "epoch": 0.05029485528367112, - "grad_norm": 3.5816793761628496, - "learning_rate": 1.997860119779643e-06, - "loss": 1.331, - "step": 371 - }, - { - "epoch": 0.050430420931336, - "grad_norm": 1.734198087439023, - "learning_rate": 1.9978313105898378e-06, - "loss": 1.3396, - "step": 372 - }, - { - "epoch": 0.05056598657900088, - "grad_norm": 1.5274546220495695, - "learning_rate": 1.997802308976141e-06, - "loss": 1.3723, - "step": 373 - }, - { - "epoch": 0.05070155222666576, - "grad_norm": 2.188705127964835, - "learning_rate": 1.997773114944145e-06, - "loss": 1.3462, - "step": 374 - }, - { - "epoch": 0.050837117874330644, - "grad_norm": 2.0257058017226215, - "learning_rate": 1.99774372849948e-06, - "loss": 1.2909, - "step": 375 - }, - { - "epoch": 0.05097268352199553, - "grad_norm": 1.9760634049413812, - "learning_rate": 1.9977141496478124e-06, - "loss": 1.3347, - "step": 376 - }, - { - "epoch": 0.051108249169660405, - "grad_norm": 4.1368254722518945, - "learning_rate": 1.9976843783948463e-06, - "loss": 1.372, - "step": 377 - }, - { - "epoch": 0.05124381481732529, - "grad_norm": 1.8292719934168464, - "learning_rate": 1.9976544147463237e-06, - "loss": 1.327, - "step": 378 - }, - { - "epoch": 0.051379380464990174, - "grad_norm": 4.208575732877597, - "learning_rate": 1.9976242587080216e-06, - "loss": 1.366, - "step": 379 - }, - { - "epoch": 0.05151494611265505, - "grad_norm": 3.742350703435567, - "learning_rate": 1.997593910285756e-06, - "loss": 1.3197, - "step": 380 - }, - { - "epoch": 0.051650511760319935, - "grad_norm": 1.4592105287386117, - "learning_rate": 1.9975633694853797e-06, - "loss": 1.3536, - "step": 381 - }, - { - "epoch": 0.05178607740798482, - "grad_norm": 1.6300626078368101, - "learning_rate": 1.9975326363127815e-06, - "loss": 1.334, - "step": 382 - }, - { - "epoch": 0.0519216430556497, - "grad_norm": 1.9820916323856055, - "learning_rate": 1.9975017107738887e-06, - "loss": 1.3494, - "step": 383 - }, - { - "epoch": 0.05205720870331458, - "grad_norm": 4.4990301636312955, - "learning_rate": 1.997470592874665e-06, - "loss": 1.3449, - "step": 384 - }, - { - "epoch": 0.052192774350979465, - "grad_norm": 1.5630700204309569, - "learning_rate": 1.9974392826211107e-06, - "loss": 1.377, - "step": 385 - }, - { - "epoch": 0.05232833999864434, - "grad_norm": 1.6289114310746307, - "learning_rate": 1.997407780019264e-06, - "loss": 1.3061, - "step": 386 - }, - { - "epoch": 0.05246390564630923, - "grad_norm": 2.086155189139043, - "learning_rate": 1.9973760850752e-06, - "loss": 1.3142, - "step": 387 - }, - { - "epoch": 0.052599471293974104, - "grad_norm": 1.6497620612641346, - "learning_rate": 1.997344197795031e-06, - "loss": 1.3621, - "step": 388 - }, - { - "epoch": 0.05273503694163899, - "grad_norm": 4.02957297002193, - "learning_rate": 1.9973121181849056e-06, - "loss": 1.3509, - "step": 389 - }, - { - "epoch": 0.05287060258930387, - "grad_norm": 1.6178439486427196, - "learning_rate": 1.997279846251011e-06, - "loss": 1.3368, - "step": 390 - }, - { - "epoch": 0.05300616823696875, - "grad_norm": 1.4741117061716669, - "learning_rate": 1.99724738199957e-06, - "loss": 1.3484, - "step": 391 - }, - { - "epoch": 0.053141733884633634, - "grad_norm": 3.249748446659568, - "learning_rate": 1.997214725436843e-06, - "loss": 1.362, - "step": 392 - }, - { - "epoch": 0.05327729953229852, - "grad_norm": 3.666770151554077, - "learning_rate": 1.997181876569128e-06, - "loss": 1.3191, - "step": 393 - }, - { - "epoch": 0.053412865179963395, - "grad_norm": 2.0381016666494327, - "learning_rate": 1.9971488354027592e-06, - "loss": 1.3278, - "step": 394 - }, - { - "epoch": 0.05354843082762828, - "grad_norm": 4.632742415684971, - "learning_rate": 1.997115601944108e-06, - "loss": 1.2984, - "step": 395 - }, - { - "epoch": 0.053683996475293164, - "grad_norm": 1.6658717188127294, - "learning_rate": 1.9970821761995843e-06, - "loss": 1.3036, - "step": 396 - }, - { - "epoch": 0.05381956212295804, - "grad_norm": 3.0114119128877137, - "learning_rate": 1.9970485581756334e-06, - "loss": 1.3339, - "step": 397 - }, - { - "epoch": 0.053955127770622925, - "grad_norm": 1.3842415618330917, - "learning_rate": 1.997014747878738e-06, - "loss": 1.3188, - "step": 398 - }, - { - "epoch": 0.0540906934182878, - "grad_norm": 1.4315856550353896, - "learning_rate": 1.996980745315419e-06, - "loss": 1.3469, - "step": 399 - }, - { - "epoch": 0.05422625906595269, - "grad_norm": 1.6406857491847975, - "learning_rate": 1.9969465504922324e-06, - "loss": 1.3675, - "step": 400 - }, - { - "epoch": 0.05436182471361757, - "grad_norm": 2.016661666282183, - "learning_rate": 1.9969121634157734e-06, - "loss": 1.3483, - "step": 401 - }, - { - "epoch": 0.05449739036128245, - "grad_norm": 1.6753470392988217, - "learning_rate": 1.9968775840926725e-06, - "loss": 1.3324, - "step": 402 - }, - { - "epoch": 0.05463295600894733, - "grad_norm": 1.5111473604936305, - "learning_rate": 1.996842812529598e-06, - "loss": 1.2842, - "step": 403 - }, - { - "epoch": 0.05476852165661222, - "grad_norm": 1.6828378650794957, - "learning_rate": 1.9968078487332563e-06, - "loss": 1.3304, - "step": 404 - }, - { - "epoch": 0.054904087304277094, - "grad_norm": 2.168934223045547, - "learning_rate": 1.9967726927103893e-06, - "loss": 1.3165, - "step": 405 - }, - { - "epoch": 0.05503965295194198, - "grad_norm": 1.8997720956075808, - "learning_rate": 1.9967373444677763e-06, - "loss": 1.2941, - "step": 406 - }, - { - "epoch": 0.05517521859960686, - "grad_norm": 2.7517396334209123, - "learning_rate": 1.996701804012234e-06, - "loss": 1.3277, - "step": 407 - }, - { - "epoch": 0.05531078424727174, - "grad_norm": 2.2067764042230524, - "learning_rate": 1.9966660713506167e-06, - "loss": 1.3626, - "step": 408 - }, - { - "epoch": 0.055446349894936624, - "grad_norm": 1.7041715825335415, - "learning_rate": 1.996630146489815e-06, - "loss": 1.3254, - "step": 409 - }, - { - "epoch": 0.05558191554260151, - "grad_norm": 1.5771474992965888, - "learning_rate": 1.996594029436756e-06, - "loss": 1.3367, - "step": 410 - }, - { - "epoch": 0.055717481190266385, - "grad_norm": 5.784844170210368, - "learning_rate": 1.9965577201984048e-06, - "loss": 1.2865, - "step": 411 - }, - { - "epoch": 0.05585304683793127, - "grad_norm": 1.9801906393826596, - "learning_rate": 1.9965212187817644e-06, - "loss": 1.3325, - "step": 412 - }, - { - "epoch": 0.05598861248559615, - "grad_norm": 1.9624822261665378, - "learning_rate": 1.9964845251938722e-06, - "loss": 1.3566, - "step": 413 - }, - { - "epoch": 0.05612417813326103, - "grad_norm": 2.2097174616707704, - "learning_rate": 1.9964476394418054e-06, - "loss": 1.3153, - "step": 414 - }, - { - "epoch": 0.056259743780925915, - "grad_norm": 1.4470693231991125, - "learning_rate": 1.996410561532677e-06, - "loss": 1.3222, - "step": 415 - }, - { - "epoch": 0.05639530942859079, - "grad_norm": 1.5564458343382892, - "learning_rate": 1.996373291473637e-06, - "loss": 1.3184, - "step": 416 - }, - { - "epoch": 0.05653087507625568, - "grad_norm": 1.6882197797594178, - "learning_rate": 1.9963358292718723e-06, - "loss": 1.3763, - "step": 417 - }, - { - "epoch": 0.05666644072392056, - "grad_norm": 1.565012291020919, - "learning_rate": 1.996298174934608e-06, - "loss": 1.3176, - "step": 418 - }, - { - "epoch": 0.05680200637158544, - "grad_norm": 1.6013360211328695, - "learning_rate": 1.996260328469104e-06, - "loss": 1.3158, - "step": 419 - }, - { - "epoch": 0.05693757201925032, - "grad_norm": 2.513638370491214, - "learning_rate": 1.9962222898826608e-06, - "loss": 1.3361, - "step": 420 - }, - { - "epoch": 0.05707313766691521, - "grad_norm": 2.424746708148747, - "learning_rate": 1.996184059182612e-06, - "loss": 1.3288, - "step": 421 - }, - { - "epoch": 0.057208703314580084, - "grad_norm": 1.8247804104149357, - "learning_rate": 1.996145636376331e-06, - "loss": 1.349, - "step": 422 - }, - { - "epoch": 0.05734426896224497, - "grad_norm": 1.9809696762546272, - "learning_rate": 1.996107021471227e-06, - "loss": 1.3488, - "step": 423 - }, - { - "epoch": 0.05747983460990985, - "grad_norm": 1.5862141344091834, - "learning_rate": 1.996068214474747e-06, - "loss": 1.3386, - "step": 424 - }, - { - "epoch": 0.05761540025757473, - "grad_norm": 1.6883426978358795, - "learning_rate": 1.996029215394374e-06, - "loss": 1.3168, - "step": 425 - }, - { - "epoch": 0.057750965905239614, - "grad_norm": 1.5589826421852886, - "learning_rate": 1.9959900242376294e-06, - "loss": 1.2977, - "step": 426 - }, - { - "epoch": 0.05788653155290449, - "grad_norm": 1.711752196616678, - "learning_rate": 1.9959506410120702e-06, - "loss": 1.3076, - "step": 427 - }, - { - "epoch": 0.058022097200569375, - "grad_norm": 1.4669977289469502, - "learning_rate": 1.9959110657252915e-06, - "loss": 1.327, - "step": 428 - }, - { - "epoch": 0.05815766284823426, - "grad_norm": 1.44419022891671, - "learning_rate": 1.995871298384925e-06, - "loss": 1.3304, - "step": 429 - }, - { - "epoch": 0.05829322849589914, - "grad_norm": 4.917571214962596, - "learning_rate": 1.9958313389986395e-06, - "loss": 1.3045, - "step": 430 - }, - { - "epoch": 0.05842879414356402, - "grad_norm": 1.6250185548028586, - "learning_rate": 1.995791187574141e-06, - "loss": 1.3262, - "step": 431 - }, - { - "epoch": 0.058564359791228905, - "grad_norm": 1.433707422981661, - "learning_rate": 1.995750844119172e-06, - "loss": 1.3117, - "step": 432 - }, - { - "epoch": 0.05869992543889378, - "grad_norm": 1.5697212323629302, - "learning_rate": 1.995710308641513e-06, - "loss": 1.3278, - "step": 433 - }, - { - "epoch": 0.05883549108655867, - "grad_norm": 2.2646359788149573, - "learning_rate": 1.9956695811489803e-06, - "loss": 1.3089, - "step": 434 - }, - { - "epoch": 0.05897105673422355, - "grad_norm": 5.023230639955923, - "learning_rate": 1.9956286616494287e-06, - "loss": 1.2927, - "step": 435 - }, - { - "epoch": 0.05910662238188843, - "grad_norm": 1.5985371999441, - "learning_rate": 1.9955875501507485e-06, - "loss": 1.3176, - "step": 436 - }, - { - "epoch": 0.05924218802955331, - "grad_norm": 1.4716544494839214, - "learning_rate": 1.995546246660868e-06, - "loss": 1.288, - "step": 437 - }, - { - "epoch": 0.05937775367721819, - "grad_norm": 2.164608683204455, - "learning_rate": 1.995504751187752e-06, - "loss": 1.3516, - "step": 438 - }, - { - "epoch": 0.059513319324883074, - "grad_norm": 1.64331861990943, - "learning_rate": 1.9954630637394027e-06, - "loss": 1.2823, - "step": 439 - }, - { - "epoch": 0.05964888497254796, - "grad_norm": 1.4723217058941405, - "learning_rate": 1.9954211843238594e-06, - "loss": 1.3084, - "step": 440 - }, - { - "epoch": 0.059784450620212835, - "grad_norm": 1.6709222848785577, - "learning_rate": 1.9953791129491983e-06, - "loss": 1.3354, - "step": 441 - }, - { - "epoch": 0.05992001626787772, - "grad_norm": 1.556185035062882, - "learning_rate": 1.995336849623532e-06, - "loss": 1.347, - "step": 442 - }, - { - "epoch": 0.060055581915542604, - "grad_norm": 2.203247919926396, - "learning_rate": 1.995294394355011e-06, - "loss": 1.2845, - "step": 443 - }, - { - "epoch": 0.06019114756320748, - "grad_norm": 1.524745899137362, - "learning_rate": 1.9952517471518228e-06, - "loss": 1.3146, - "step": 444 - }, - { - "epoch": 0.060326713210872365, - "grad_norm": 1.8508110768572208, - "learning_rate": 1.9952089080221907e-06, - "loss": 1.2908, - "step": 445 - }, - { - "epoch": 0.06046227885853725, - "grad_norm": 1.7391566072419, - "learning_rate": 1.9951658769743766e-06, - "loss": 1.3391, - "step": 446 - }, - { - "epoch": 0.06059784450620213, - "grad_norm": 1.8279658169804338, - "learning_rate": 1.9951226540166785e-06, - "loss": 1.314, - "step": 447 - }, - { - "epoch": 0.06073341015386701, - "grad_norm": 1.4676813551434693, - "learning_rate": 1.9950792391574316e-06, - "loss": 1.3272, - "step": 448 - }, - { - "epoch": 0.060868975801531895, - "grad_norm": 1.6382691829959917, - "learning_rate": 1.995035632405008e-06, - "loss": 1.3357, - "step": 449 - }, - { - "epoch": 0.06100454144919677, - "grad_norm": 1.8445399743875985, - "learning_rate": 1.994991833767817e-06, - "loss": 1.3177, - "step": 450 - }, - { - "epoch": 0.06114010709686166, - "grad_norm": 1.9515047605021112, - "learning_rate": 1.994947843254305e-06, - "loss": 1.3173, - "step": 451 - }, - { - "epoch": 0.061275672744526534, - "grad_norm": 2.2603209811392904, - "learning_rate": 1.994903660872955e-06, - "loss": 1.3073, - "step": 452 - }, - { - "epoch": 0.06141123839219142, - "grad_norm": 1.419849389488627, - "learning_rate": 1.9948592866322873e-06, - "loss": 1.2722, - "step": 453 - }, - { - "epoch": 0.0615468040398563, - "grad_norm": 1.9003273639738958, - "learning_rate": 1.9948147205408593e-06, - "loss": 1.3486, - "step": 454 - }, - { - "epoch": 0.06168236968752118, - "grad_norm": 1.8132409892624903, - "learning_rate": 1.9947699626072646e-06, - "loss": 1.3251, - "step": 455 - }, - { - "epoch": 0.061817935335186064, - "grad_norm": 1.4649313077167263, - "learning_rate": 1.9947250128401354e-06, - "loss": 1.2827, - "step": 456 - }, - { - "epoch": 0.06195350098285095, - "grad_norm": 2.001177415425561, - "learning_rate": 1.994679871248139e-06, - "loss": 1.3168, - "step": 457 - }, - { - "epoch": 0.062089066630515825, - "grad_norm": 1.8607492276313142, - "learning_rate": 1.9946345378399807e-06, - "loss": 1.3489, - "step": 458 - }, - { - "epoch": 0.06222463227818071, - "grad_norm": 1.7635360590414757, - "learning_rate": 1.9945890126244038e-06, - "loss": 1.3577, - "step": 459 - }, - { - "epoch": 0.062360197925845594, - "grad_norm": 1.8454874815312912, - "learning_rate": 1.9945432956101858e-06, - "loss": 1.3115, - "step": 460 - }, - { - "epoch": 0.06249576357351047, - "grad_norm": 1.6026541863132866, - "learning_rate": 1.994497386806144e-06, - "loss": 1.3157, - "step": 461 - }, - { - "epoch": 0.06263132922117536, - "grad_norm": 2.6847942475959767, - "learning_rate": 1.9944512862211313e-06, - "loss": 1.2664, - "step": 462 - }, - { - "epoch": 0.06276689486884024, - "grad_norm": 1.9449916937171503, - "learning_rate": 1.9944049938640377e-06, - "loss": 1.2971, - "step": 463 - }, - { - "epoch": 0.06290246051650512, - "grad_norm": 1.4022842639250457, - "learning_rate": 1.9943585097437903e-06, - "loss": 1.3126, - "step": 464 - }, - { - "epoch": 0.06303802616417, - "grad_norm": 1.926694244568102, - "learning_rate": 1.9943118338693533e-06, - "loss": 1.2791, - "step": 465 - }, - { - "epoch": 0.06317359181183488, - "grad_norm": 1.9104559946057846, - "learning_rate": 1.994264966249728e-06, - "loss": 1.3286, - "step": 466 - }, - { - "epoch": 0.06330915745949976, - "grad_norm": 2.762839817865476, - "learning_rate": 1.9942179068939516e-06, - "loss": 1.3058, - "step": 467 - }, - { - "epoch": 0.06344472310716465, - "grad_norm": 1.4725354866187492, - "learning_rate": 1.9941706558111004e-06, - "loss": 1.3204, - "step": 468 - }, - { - "epoch": 0.06358028875482953, - "grad_norm": 1.7162197224725495, - "learning_rate": 1.9941232130102854e-06, - "loss": 1.3061, - "step": 469 - }, - { - "epoch": 0.0637158544024944, - "grad_norm": 1.6356961100078748, - "learning_rate": 1.9940755785006564e-06, - "loss": 1.3048, - "step": 470 - }, - { - "epoch": 0.06385142005015929, - "grad_norm": 2.883742681067345, - "learning_rate": 1.994027752291398e-06, - "loss": 1.3028, - "step": 471 - }, - { - "epoch": 0.06398698569782417, - "grad_norm": 2.8245688472067143, - "learning_rate": 1.9939797343917344e-06, - "loss": 1.3033, - "step": 472 - }, - { - "epoch": 0.06412255134548905, - "grad_norm": 1.530754263008746, - "learning_rate": 1.9939315248109253e-06, - "loss": 1.3265, - "step": 473 - }, - { - "epoch": 0.06425811699315394, - "grad_norm": 1.6941335404899922, - "learning_rate": 1.993883123558267e-06, - "loss": 1.3044, - "step": 474 - }, - { - "epoch": 0.06439368264081882, - "grad_norm": 1.9094991559219776, - "learning_rate": 1.9938345306430936e-06, - "loss": 1.2954, - "step": 475 - }, - { - "epoch": 0.06452924828848369, - "grad_norm": 2.070619884497355, - "learning_rate": 1.9937857460747757e-06, - "loss": 1.3231, - "step": 476 - }, - { - "epoch": 0.06466481393614858, - "grad_norm": 1.6996400118328405, - "learning_rate": 1.9937367698627208e-06, - "loss": 1.3233, - "step": 477 - }, - { - "epoch": 0.06480037958381346, - "grad_norm": 1.5968529957784185, - "learning_rate": 1.9936876020163746e-06, - "loss": 1.3158, - "step": 478 - }, - { - "epoch": 0.06493594523147835, - "grad_norm": 1.5585931057617113, - "learning_rate": 1.9936382425452176e-06, - "loss": 1.3498, - "step": 479 - }, - { - "epoch": 0.06507151087914323, - "grad_norm": 1.6816609897079249, - "learning_rate": 1.993588691458769e-06, - "loss": 1.3026, - "step": 480 - }, - { - "epoch": 0.06520707652680811, - "grad_norm": 1.6979704883039763, - "learning_rate": 1.993538948766584e-06, - "loss": 1.323, - "step": 481 - }, - { - "epoch": 0.06534264217447298, - "grad_norm": 1.4992519989584911, - "learning_rate": 1.9934890144782558e-06, - "loss": 1.3057, - "step": 482 - }, - { - "epoch": 0.06547820782213787, - "grad_norm": 1.8104416207849634, - "learning_rate": 1.9934388886034126e-06, - "loss": 1.2636, - "step": 483 - }, - { - "epoch": 0.06561377346980275, - "grad_norm": 1.7547422058059843, - "learning_rate": 1.993388571151722e-06, - "loss": 1.3164, - "step": 484 - }, - { - "epoch": 0.06574933911746764, - "grad_norm": 2.0678389452371, - "learning_rate": 1.993338062132886e-06, - "loss": 1.3669, - "step": 485 - }, - { - "epoch": 0.06588490476513252, - "grad_norm": 1.9701122273144858, - "learning_rate": 1.993287361556646e-06, - "loss": 1.3276, - "step": 486 - }, - { - "epoch": 0.06602047041279739, - "grad_norm": 1.9412510091233752, - "learning_rate": 1.9932364694327795e-06, - "loss": 1.297, - "step": 487 - }, - { - "epoch": 0.06615603606046228, - "grad_norm": 3.310422642067009, - "learning_rate": 1.9931853857710995e-06, - "loss": 1.3203, - "step": 488 - }, - { - "epoch": 0.06629160170812716, - "grad_norm": 1.7928286895455505, - "learning_rate": 1.9931341105814575e-06, - "loss": 1.3354, - "step": 489 - }, - { - "epoch": 0.06642716735579204, - "grad_norm": 1.519300892551403, - "learning_rate": 1.993082643873742e-06, - "loss": 1.349, - "step": 490 - }, - { - "epoch": 0.06656273300345693, - "grad_norm": 5.584329426558805, - "learning_rate": 1.9930309856578772e-06, - "loss": 1.2887, - "step": 491 - }, - { - "epoch": 0.06669829865112181, - "grad_norm": 1.62419084173422, - "learning_rate": 1.992979135943825e-06, - "loss": 1.3043, - "step": 492 - }, - { - "epoch": 0.06683386429878668, - "grad_norm": 1.6850969082886642, - "learning_rate": 1.9929270947415852e-06, - "loss": 1.296, - "step": 493 - }, - { - "epoch": 0.06696942994645157, - "grad_norm": 5.404630141479217, - "learning_rate": 1.9928748620611927e-06, - "loss": 1.3213, - "step": 494 - }, - { - "epoch": 0.06710499559411645, - "grad_norm": 1.7852175932919099, - "learning_rate": 1.99282243791272e-06, - "loss": 1.3772, - "step": 495 - }, - { - "epoch": 0.06724056124178134, - "grad_norm": 2.4986628623650278, - "learning_rate": 1.992769822306277e-06, - "loss": 1.3403, - "step": 496 - }, - { - "epoch": 0.06737612688944622, - "grad_norm": 1.4346852549025286, - "learning_rate": 1.992717015252011e-06, - "loss": 1.3058, - "step": 497 - }, - { - "epoch": 0.06751169253711109, - "grad_norm": 2.488055620019409, - "learning_rate": 1.992664016760104e-06, - "loss": 1.3185, - "step": 498 - }, - { - "epoch": 0.06764725818477597, - "grad_norm": 2.8565567213505023, - "learning_rate": 1.992610826840777e-06, - "loss": 1.2932, - "step": 499 - }, - { - "epoch": 0.06778282383244086, - "grad_norm": 1.6128376424556348, - "learning_rate": 1.9925574455042873e-06, - "loss": 1.3464, - "step": 500 - }, - { - "epoch": 0.06791838948010574, - "grad_norm": 1.5802302125869778, - "learning_rate": 1.9925038727609287e-06, - "loss": 1.2843, - "step": 501 - }, - { - "epoch": 0.06805395512777063, - "grad_norm": 1.933618973642034, - "learning_rate": 1.9924501086210334e-06, - "loss": 1.341, - "step": 502 - }, - { - "epoch": 0.06818952077543551, - "grad_norm": 1.4995581323401785, - "learning_rate": 1.9923961530949677e-06, - "loss": 1.2896, - "step": 503 - }, - { - "epoch": 0.06832508642310038, - "grad_norm": 1.7532118700751427, - "learning_rate": 1.9923420061931376e-06, - "loss": 1.2881, - "step": 504 - }, - { - "epoch": 0.06846065207076527, - "grad_norm": 1.9874518329772233, - "learning_rate": 1.992287667925985e-06, - "loss": 1.2627, - "step": 505 - }, - { - "epoch": 0.06859621771843015, - "grad_norm": 1.416892297483906, - "learning_rate": 1.992233138303988e-06, - "loss": 1.2797, - "step": 506 - }, - { - "epoch": 0.06873178336609503, - "grad_norm": 1.6236043899055141, - "learning_rate": 1.9921784173376626e-06, - "loss": 1.2901, - "step": 507 - }, - { - "epoch": 0.06886734901375992, - "grad_norm": 1.4614626414428689, - "learning_rate": 1.9921235050375612e-06, - "loss": 1.2923, - "step": 508 - }, - { - "epoch": 0.06900291466142479, - "grad_norm": 1.4960417070658094, - "learning_rate": 1.9920684014142736e-06, - "loss": 1.3025, - "step": 509 - }, - { - "epoch": 0.06913848030908967, - "grad_norm": 1.8175874161478727, - "learning_rate": 1.992013106478425e-06, - "loss": 1.3134, - "step": 510 - }, - { - "epoch": 0.06927404595675456, - "grad_norm": 4.675520892339957, - "learning_rate": 1.9919576202406795e-06, - "loss": 1.265, - "step": 511 - }, - { - "epoch": 0.06940961160441944, - "grad_norm": 3.2588933028334264, - "learning_rate": 1.9919019427117372e-06, - "loss": 1.3017, - "step": 512 - }, - { - "epoch": 0.06954517725208433, - "grad_norm": 1.7187836335470021, - "learning_rate": 1.9918460739023348e-06, - "loss": 1.3391, - "step": 513 - }, - { - "epoch": 0.06968074289974921, - "grad_norm": 1.6891302130948875, - "learning_rate": 1.991790013823246e-06, - "loss": 1.3281, - "step": 514 - }, - { - "epoch": 0.06981630854741408, - "grad_norm": 1.5992363755937586, - "learning_rate": 1.991733762485282e-06, - "loss": 1.3046, - "step": 515 - }, - { - "epoch": 0.06995187419507896, - "grad_norm": 1.6975915094643845, - "learning_rate": 1.9916773198992897e-06, - "loss": 1.3027, - "step": 516 - }, - { - "epoch": 0.07008743984274385, - "grad_norm": 1.7819207707701166, - "learning_rate": 1.9916206860761546e-06, - "loss": 1.2921, - "step": 517 - }, - { - "epoch": 0.07022300549040873, - "grad_norm": 1.9452075033276783, - "learning_rate": 1.9915638610267974e-06, - "loss": 1.3292, - "step": 518 - }, - { - "epoch": 0.07035857113807362, - "grad_norm": 1.5916496332558832, - "learning_rate": 1.9915068447621765e-06, - "loss": 1.3243, - "step": 519 - }, - { - "epoch": 0.07049413678573849, - "grad_norm": 1.4423492102187532, - "learning_rate": 1.9914496372932873e-06, - "loss": 1.3075, - "step": 520 - }, - { - "epoch": 0.07062970243340337, - "grad_norm": 2.4555355242774586, - "learning_rate": 1.9913922386311612e-06, - "loss": 1.3174, - "step": 521 - }, - { - "epoch": 0.07076526808106826, - "grad_norm": 1.3462200257572452, - "learning_rate": 1.9913346487868676e-06, - "loss": 1.3203, - "step": 522 - }, - { - "epoch": 0.07090083372873314, - "grad_norm": 1.9351246112699338, - "learning_rate": 1.9912768677715123e-06, - "loss": 1.3217, - "step": 523 - }, - { - "epoch": 0.07103639937639802, - "grad_norm": 1.9896765270656418, - "learning_rate": 1.9912188955962376e-06, - "loss": 1.3278, - "step": 524 - }, - { - "epoch": 0.07117196502406291, - "grad_norm": 1.6145284674745477, - "learning_rate": 1.991160732272223e-06, - "loss": 1.3164, - "step": 525 - }, - { - "epoch": 0.07130753067172778, - "grad_norm": 1.5942759380528135, - "learning_rate": 1.9911023778106846e-06, - "loss": 1.3077, - "step": 526 - }, - { - "epoch": 0.07144309631939266, - "grad_norm": 2.047575289827827, - "learning_rate": 1.9910438322228762e-06, - "loss": 1.3126, - "step": 527 - }, - { - "epoch": 0.07157866196705755, - "grad_norm": 1.7041947373741728, - "learning_rate": 1.990985095520088e-06, - "loss": 1.2908, - "step": 528 - }, - { - "epoch": 0.07171422761472243, - "grad_norm": 1.4762904263915089, - "learning_rate": 1.990926167713646e-06, - "loss": 1.2483, - "step": 529 - }, - { - "epoch": 0.07184979326238732, - "grad_norm": 1.489943388069022, - "learning_rate": 1.9908670488149145e-06, - "loss": 1.288, - "step": 530 - }, - { - "epoch": 0.0719853589100522, - "grad_norm": 1.8990393053973695, - "learning_rate": 1.9908077388352943e-06, - "loss": 1.2713, - "step": 531 - }, - { - "epoch": 0.07212092455771707, - "grad_norm": 2.0358807226289755, - "learning_rate": 1.9907482377862226e-06, - "loss": 1.3144, - "step": 532 - }, - { - "epoch": 0.07225649020538195, - "grad_norm": 1.5567197672750046, - "learning_rate": 1.990688545679173e-06, - "loss": 1.2997, - "step": 533 - }, - { - "epoch": 0.07239205585304684, - "grad_norm": 2.126356687117648, - "learning_rate": 1.990628662525658e-06, - "loss": 1.292, - "step": 534 - }, - { - "epoch": 0.07252762150071172, - "grad_norm": 1.4903445990419877, - "learning_rate": 1.9905685883372254e-06, - "loss": 1.295, - "step": 535 - }, - { - "epoch": 0.0726631871483766, - "grad_norm": 1.481390527472389, - "learning_rate": 1.990508323125459e-06, - "loss": 1.2703, - "step": 536 - }, - { - "epoch": 0.07279875279604148, - "grad_norm": 1.5704060471992556, - "learning_rate": 1.9904478669019815e-06, - "loss": 1.2386, - "step": 537 - }, - { - "epoch": 0.07293431844370636, - "grad_norm": 1.5689773296724414, - "learning_rate": 1.990387219678451e-06, - "loss": 1.3121, - "step": 538 - }, - { - "epoch": 0.07306988409137125, - "grad_norm": 1.4870583683999612, - "learning_rate": 1.9903263814665624e-06, - "loss": 1.2591, - "step": 539 - }, - { - "epoch": 0.07320544973903613, - "grad_norm": 1.6471986131768939, - "learning_rate": 1.9902653522780482e-06, - "loss": 1.2907, - "step": 540 - }, - { - "epoch": 0.07334101538670101, - "grad_norm": 1.7367319575823017, - "learning_rate": 1.990204132124678e-06, - "loss": 1.2741, - "step": 541 - }, - { - "epoch": 0.0734765810343659, - "grad_norm": 1.4761462763749382, - "learning_rate": 1.990142721018257e-06, - "loss": 1.2685, - "step": 542 - }, - { - "epoch": 0.07361214668203077, - "grad_norm": 1.5581884993448196, - "learning_rate": 1.990081118970628e-06, - "loss": 1.2779, - "step": 543 - }, - { - "epoch": 0.07374771232969565, - "grad_norm": 1.4879160991291551, - "learning_rate": 1.99001932599367e-06, - "loss": 1.2951, - "step": 544 - }, - { - "epoch": 0.07388327797736054, - "grad_norm": 4.185222222717664, - "learning_rate": 1.9899573420993003e-06, - "loss": 1.3256, - "step": 545 - }, - { - "epoch": 0.07401884362502542, - "grad_norm": 1.55144865719735, - "learning_rate": 1.9898951672994708e-06, - "loss": 1.303, - "step": 546 - }, - { - "epoch": 0.0741544092726903, - "grad_norm": 1.4335482947892946, - "learning_rate": 1.9898328016061726e-06, - "loss": 1.2841, - "step": 547 - }, - { - "epoch": 0.07428997492035518, - "grad_norm": 1.5598381220287991, - "learning_rate": 1.9897702450314316e-06, - "loss": 1.2985, - "step": 548 - }, - { - "epoch": 0.07442554056802006, - "grad_norm": 1.890981458795422, - "learning_rate": 1.9897074975873116e-06, - "loss": 1.3111, - "step": 549 - }, - { - "epoch": 0.07456110621568494, - "grad_norm": 1.6154079356462079, - "learning_rate": 1.9896445592859134e-06, - "loss": 1.2919, - "step": 550 - }, - { - "epoch": 0.07469667186334983, - "grad_norm": 1.6989202802312897, - "learning_rate": 1.989581430139373e-06, - "loss": 1.309, - "step": 551 - }, - { - "epoch": 0.07483223751101471, - "grad_norm": 1.5538909561532737, - "learning_rate": 1.9895181101598656e-06, - "loss": 1.2899, - "step": 552 - }, - { - "epoch": 0.0749678031586796, - "grad_norm": 2.5168039316526745, - "learning_rate": 1.9894545993596014e-06, - "loss": 1.2638, - "step": 553 - }, - { - "epoch": 0.07510336880634447, - "grad_norm": 1.5630990434876166, - "learning_rate": 1.9893908977508277e-06, - "loss": 1.2884, - "step": 554 - }, - { - "epoch": 0.07523893445400935, - "grad_norm": 1.836554456480694, - "learning_rate": 1.9893270053458293e-06, - "loss": 1.3104, - "step": 555 - }, - { - "epoch": 0.07537450010167424, - "grad_norm": 1.4659186365549168, - "learning_rate": 1.9892629221569274e-06, - "loss": 1.3231, - "step": 556 - }, - { - "epoch": 0.07551006574933912, - "grad_norm": 1.5972956343711222, - "learning_rate": 1.989198648196479e-06, - "loss": 1.3139, - "step": 557 - }, - { - "epoch": 0.075645631397004, - "grad_norm": 1.7672485452533742, - "learning_rate": 1.9891341834768806e-06, - "loss": 1.3099, - "step": 558 - }, - { - "epoch": 0.07578119704466887, - "grad_norm": 1.5838838767637349, - "learning_rate": 1.9890695280105622e-06, - "loss": 1.2889, - "step": 559 - }, - { - "epoch": 0.07591676269233376, - "grad_norm": 1.5364930705973818, - "learning_rate": 1.9890046818099925e-06, - "loss": 1.2781, - "step": 560 - }, - { - "epoch": 0.07605232833999864, - "grad_norm": 1.5693138789188021, - "learning_rate": 1.9889396448876765e-06, - "loss": 1.3543, - "step": 561 - }, - { - "epoch": 0.07618789398766353, - "grad_norm": 1.438154199064127, - "learning_rate": 1.9888744172561563e-06, - "loss": 1.3266, - "step": 562 - }, - { - "epoch": 0.07632345963532841, - "grad_norm": 1.494128019686765, - "learning_rate": 1.9888089989280107e-06, - "loss": 1.295, - "step": 563 - }, - { - "epoch": 0.0764590252829933, - "grad_norm": 1.4710901347307717, - "learning_rate": 1.9887433899158547e-06, - "loss": 1.3267, - "step": 564 - }, - { - "epoch": 0.07659459093065817, - "grad_norm": 1.7565577152342584, - "learning_rate": 1.9886775902323402e-06, - "loss": 1.3213, - "step": 565 - }, - { - "epoch": 0.07673015657832305, - "grad_norm": 1.5562145381014498, - "learning_rate": 1.9886115998901572e-06, - "loss": 1.3249, - "step": 566 - }, - { - "epoch": 0.07686572222598793, - "grad_norm": 1.6294774303994506, - "learning_rate": 1.9885454189020303e-06, - "loss": 1.3127, - "step": 567 - }, - { - "epoch": 0.07700128787365282, - "grad_norm": 1.6613780821092223, - "learning_rate": 1.988479047280723e-06, - "loss": 1.3302, - "step": 568 - }, - { - "epoch": 0.0771368535213177, - "grad_norm": 1.8873318123532934, - "learning_rate": 1.9884124850390336e-06, - "loss": 1.2922, - "step": 569 - }, - { - "epoch": 0.07727241916898257, - "grad_norm": 1.8687203650642756, - "learning_rate": 1.9883457321897984e-06, - "loss": 1.3248, - "step": 570 - }, - { - "epoch": 0.07740798481664746, - "grad_norm": 3.9050825380692187, - "learning_rate": 1.9882787887458907e-06, - "loss": 1.2933, - "step": 571 - }, - { - "epoch": 0.07754355046431234, - "grad_norm": 1.589537659341866, - "learning_rate": 1.988211654720219e-06, - "loss": 1.3532, - "step": 572 - }, - { - "epoch": 0.07767911611197723, - "grad_norm": 1.542611601841325, - "learning_rate": 1.9881443301257308e-06, - "loss": 1.2784, - "step": 573 - }, - { - "epoch": 0.07781468175964211, - "grad_norm": 2.208028798484998, - "learning_rate": 1.988076814975408e-06, - "loss": 1.2977, - "step": 574 - }, - { - "epoch": 0.077950247407307, - "grad_norm": 1.5380495056127808, - "learning_rate": 1.988009109282271e-06, - "loss": 1.3173, - "step": 575 - }, - { - "epoch": 0.07808581305497186, - "grad_norm": 1.9697441115530157, - "learning_rate": 1.9879412130593765e-06, - "loss": 1.3302, - "step": 576 - }, - { - "epoch": 0.07822137870263675, - "grad_norm": 1.6340040526633612, - "learning_rate": 1.9878731263198165e-06, - "loss": 1.2785, - "step": 577 - }, - { - "epoch": 0.07835694435030163, - "grad_norm": 1.703497370768127, - "learning_rate": 1.987804849076723e-06, - "loss": 1.2677, - "step": 578 - }, - { - "epoch": 0.07849250999796652, - "grad_norm": 1.4224659947371023, - "learning_rate": 1.9877363813432607e-06, - "loss": 1.3357, - "step": 579 - }, - { - "epoch": 0.0786280756456314, - "grad_norm": 1.7189135770354766, - "learning_rate": 1.9876677231326347e-06, - "loss": 1.293, - "step": 580 - }, - { - "epoch": 0.07876364129329629, - "grad_norm": 1.5125706713928668, - "learning_rate": 1.9875988744580837e-06, - "loss": 1.2872, - "step": 581 - }, - { - "epoch": 0.07889920694096116, - "grad_norm": 1.368428556534405, - "learning_rate": 1.987529835332886e-06, - "loss": 1.2611, - "step": 582 - }, - { - "epoch": 0.07903477258862604, - "grad_norm": 1.7246034795356069, - "learning_rate": 1.9874606057703546e-06, - "loss": 1.3097, - "step": 583 - }, - { - "epoch": 0.07917033823629092, - "grad_norm": 1.871146294229989, - "learning_rate": 1.9873911857838395e-06, - "loss": 1.2831, - "step": 584 - }, - { - "epoch": 0.07930590388395581, - "grad_norm": 1.496208218264772, - "learning_rate": 1.9873215753867286e-06, - "loss": 1.3226, - "step": 585 - }, - { - "epoch": 0.07944146953162069, - "grad_norm": 1.5727741277520515, - "learning_rate": 1.987251774592445e-06, - "loss": 1.313, - "step": 586 - }, - { - "epoch": 0.07957703517928556, - "grad_norm": 1.7678799112843224, - "learning_rate": 1.98718178341445e-06, - "loss": 1.3125, - "step": 587 - }, - { - "epoch": 0.07971260082695045, - "grad_norm": 3.5595752817692676, - "learning_rate": 1.9871116018662403e-06, - "loss": 1.2978, - "step": 588 - }, - { - "epoch": 0.07984816647461533, - "grad_norm": 7.279514237780942, - "learning_rate": 1.98704122996135e-06, - "loss": 1.2978, - "step": 589 - }, - { - "epoch": 0.07998373212228022, - "grad_norm": 1.6383011755600667, - "learning_rate": 1.9869706677133493e-06, - "loss": 1.2944, - "step": 590 - }, - { - "epoch": 0.0801192977699451, - "grad_norm": 1.5608035960675017, - "learning_rate": 1.9868999151358465e-06, - "loss": 1.329, - "step": 591 - }, - { - "epoch": 0.08025486341760998, - "grad_norm": 1.8924487716303573, - "learning_rate": 1.9868289722424846e-06, - "loss": 1.3027, - "step": 592 - }, - { - "epoch": 0.08039042906527485, - "grad_norm": 1.565913059053905, - "learning_rate": 1.9867578390469454e-06, - "loss": 1.3055, - "step": 593 - }, - { - "epoch": 0.08052599471293974, - "grad_norm": 1.6289662979337494, - "learning_rate": 1.986686515562946e-06, - "loss": 1.2839, - "step": 594 - }, - { - "epoch": 0.08066156036060462, - "grad_norm": 1.4494767063201306, - "learning_rate": 1.9866150018042403e-06, - "loss": 1.3068, - "step": 595 - }, - { - "epoch": 0.08079712600826951, - "grad_norm": 1.6060449184920051, - "learning_rate": 1.986543297784619e-06, - "loss": 1.3181, - "step": 596 - }, - { - "epoch": 0.08093269165593439, - "grad_norm": 3.3757983347850717, - "learning_rate": 1.9864714035179108e-06, - "loss": 1.2989, - "step": 597 - }, - { - "epoch": 0.08106825730359926, - "grad_norm": 1.5558330191708303, - "learning_rate": 1.986399319017979e-06, - "loss": 1.2712, - "step": 598 - }, - { - "epoch": 0.08120382295126415, - "grad_norm": 1.5500131424810792, - "learning_rate": 1.986327044298724e-06, - "loss": 1.3021, - "step": 599 - }, - { - "epoch": 0.08133938859892903, - "grad_norm": 2.4455017466103537, - "learning_rate": 1.986254579374085e-06, - "loss": 1.2548, - "step": 600 - }, - { - "epoch": 0.08147495424659391, - "grad_norm": 1.5970706452197627, - "learning_rate": 1.9861819242580353e-06, - "loss": 1.2783, - "step": 601 - }, - { - "epoch": 0.0816105198942588, - "grad_norm": 1.860376714419881, - "learning_rate": 1.9861090789645855e-06, - "loss": 1.2716, - "step": 602 - }, - { - "epoch": 0.08174608554192368, - "grad_norm": 2.4738764071870185, - "learning_rate": 1.9860360435077837e-06, - "loss": 1.2924, - "step": 603 - }, - { - "epoch": 0.08188165118958855, - "grad_norm": 1.6775167006194183, - "learning_rate": 1.9859628179017142e-06, - "loss": 1.273, - "step": 604 - }, - { - "epoch": 0.08201721683725344, - "grad_norm": 1.3743539604551787, - "learning_rate": 1.985889402160498e-06, - "loss": 1.2924, - "step": 605 - }, - { - "epoch": 0.08215278248491832, - "grad_norm": 11.46999079888356, - "learning_rate": 1.985815796298293e-06, - "loss": 1.2951, - "step": 606 - }, - { - "epoch": 0.0822883481325832, - "grad_norm": 1.6177478902833045, - "learning_rate": 1.985742000329293e-06, - "loss": 1.3026, - "step": 607 - }, - { - "epoch": 0.08242391378024809, - "grad_norm": 1.935217073295268, - "learning_rate": 1.9856680142677294e-06, - "loss": 1.2596, - "step": 608 - }, - { - "epoch": 0.08255947942791296, - "grad_norm": 3.4876172471554123, - "learning_rate": 1.9855938381278698e-06, - "loss": 1.2901, - "step": 609 - }, - { - "epoch": 0.08269504507557784, - "grad_norm": 1.4612246110604123, - "learning_rate": 1.985519471924018e-06, - "loss": 1.2744, - "step": 610 - }, - { - "epoch": 0.08283061072324273, - "grad_norm": 2.9041846662895208, - "learning_rate": 1.985444915670515e-06, - "loss": 1.2911, - "step": 611 - }, - { - "epoch": 0.08296617637090761, - "grad_norm": 1.8387015285821628, - "learning_rate": 1.9853701693817393e-06, - "loss": 1.2485, - "step": 612 - }, - { - "epoch": 0.0831017420185725, - "grad_norm": 2.0760928548157147, - "learning_rate": 1.985295233072104e-06, - "loss": 1.2823, - "step": 613 - }, - { - "epoch": 0.08323730766623738, - "grad_norm": 3.0119348906280803, - "learning_rate": 1.9852201067560607e-06, - "loss": 1.311, - "step": 614 - }, - { - "epoch": 0.08337287331390225, - "grad_norm": 1.6481286499751628, - "learning_rate": 1.9851447904480964e-06, - "loss": 1.2941, - "step": 615 - }, - { - "epoch": 0.08350843896156714, - "grad_norm": 1.4797109489588276, - "learning_rate": 1.9850692841627356e-06, - "loss": 1.3031, - "step": 616 - }, - { - "epoch": 0.08364400460923202, - "grad_norm": 1.4279429291671268, - "learning_rate": 1.984993587914539e-06, - "loss": 1.2538, - "step": 617 - }, - { - "epoch": 0.0837795702568969, - "grad_norm": 1.9789331247025645, - "learning_rate": 1.9849177017181044e-06, - "loss": 1.268, - "step": 618 - }, - { - "epoch": 0.08391513590456179, - "grad_norm": 1.9271980328183445, - "learning_rate": 1.984841625588065e-06, - "loss": 1.2746, - "step": 619 - }, - { - "epoch": 0.08405070155222667, - "grad_norm": 1.806767483167396, - "learning_rate": 1.9847653595390923e-06, - "loss": 1.2829, - "step": 620 - }, - { - "epoch": 0.08418626719989154, - "grad_norm": 2.0638256433097677, - "learning_rate": 1.984688903585893e-06, - "loss": 1.2844, - "step": 621 - }, - { - "epoch": 0.08432183284755643, - "grad_norm": 1.5518598035443159, - "learning_rate": 1.9846122577432116e-06, - "loss": 1.2663, - "step": 622 - }, - { - "epoch": 0.08445739849522131, - "grad_norm": 1.5909301133353464, - "learning_rate": 1.9845354220258283e-06, - "loss": 1.2503, - "step": 623 - }, - { - "epoch": 0.0845929641428862, - "grad_norm": 1.5921965847653332, - "learning_rate": 1.9844583964485604e-06, - "loss": 1.2843, - "step": 624 - }, - { - "epoch": 0.08472852979055108, - "grad_norm": 1.5415413150445911, - "learning_rate": 1.9843811810262612e-06, - "loss": 1.2924, - "step": 625 - }, - { - "epoch": 0.08486409543821595, - "grad_norm": 1.744085814825703, - "learning_rate": 1.984303775773822e-06, - "loss": 1.2931, - "step": 626 - }, - { - "epoch": 0.08499966108588083, - "grad_norm": 1.6386385339250844, - "learning_rate": 1.9842261807061685e-06, - "loss": 1.2939, - "step": 627 - }, - { - "epoch": 0.08513522673354572, - "grad_norm": 1.9602338823924021, - "learning_rate": 1.984148395838266e-06, - "loss": 1.2914, - "step": 628 - }, - { - "epoch": 0.0852707923812106, - "grad_norm": 3.034970277790808, - "learning_rate": 1.984070421185113e-06, - "loss": 1.2855, - "step": 629 - }, - { - "epoch": 0.08540635802887549, - "grad_norm": 2.7067929006597238, - "learning_rate": 1.983992256761747e-06, - "loss": 1.2665, - "step": 630 - }, - { - "epoch": 0.08554192367654037, - "grad_norm": 1.5579224610277251, - "learning_rate": 1.983913902583242e-06, - "loss": 1.2647, - "step": 631 - }, - { - "epoch": 0.08567748932420524, - "grad_norm": 1.8017031984047098, - "learning_rate": 1.983835358664707e-06, - "loss": 1.3258, - "step": 632 - }, - { - "epoch": 0.08581305497187013, - "grad_norm": 1.7528031181788906, - "learning_rate": 1.9837566250212894e-06, - "loss": 1.298, - "step": 633 - }, - { - "epoch": 0.08594862061953501, - "grad_norm": 1.832451846287892, - "learning_rate": 1.9836777016681723e-06, - "loss": 1.2302, - "step": 634 - }, - { - "epoch": 0.0860841862671999, - "grad_norm": 1.5162800859613486, - "learning_rate": 1.9835985886205744e-06, - "loss": 1.2836, - "step": 635 - }, - { - "epoch": 0.08621975191486478, - "grad_norm": 1.605871001008823, - "learning_rate": 1.983519285893753e-06, - "loss": 1.3177, - "step": 636 - }, - { - "epoch": 0.08635531756252965, - "grad_norm": 1.7684459789954774, - "learning_rate": 1.983439793503e-06, - "loss": 1.272, - "step": 637 - }, - { - "epoch": 0.08649088321019453, - "grad_norm": 1.945871398967508, - "learning_rate": 1.9833601114636465e-06, - "loss": 1.2668, - "step": 638 - }, - { - "epoch": 0.08662644885785942, - "grad_norm": 1.5121979103328829, - "learning_rate": 1.9832802397910578e-06, - "loss": 1.286, - "step": 639 - }, - { - "epoch": 0.0867620145055243, - "grad_norm": 1.615637411914517, - "learning_rate": 1.983200178500636e-06, - "loss": 1.3077, - "step": 640 - }, - { - "epoch": 0.08689758015318919, - "grad_norm": 3.9163056751226297, - "learning_rate": 1.9831199276078208e-06, - "loss": 1.3055, - "step": 641 - }, - { - "epoch": 0.08703314580085407, - "grad_norm": 1.6355687740136384, - "learning_rate": 1.9830394871280876e-06, - "loss": 1.2425, - "step": 642 - }, - { - "epoch": 0.08716871144851894, - "grad_norm": 2.1948653079827203, - "learning_rate": 1.982958857076949e-06, - "loss": 1.2974, - "step": 643 - }, - { - "epoch": 0.08730427709618382, - "grad_norm": 1.473366323435812, - "learning_rate": 1.982878037469954e-06, - "loss": 1.2924, - "step": 644 - }, - { - "epoch": 0.08743984274384871, - "grad_norm": 1.4961064009714087, - "learning_rate": 1.9827970283226883e-06, - "loss": 1.3147, - "step": 645 - }, - { - "epoch": 0.08757540839151359, - "grad_norm": 2.0229297439654714, - "learning_rate": 1.9827158296507727e-06, - "loss": 1.2883, - "step": 646 - }, - { - "epoch": 0.08771097403917848, - "grad_norm": 1.566352372823629, - "learning_rate": 1.9826344414698667e-06, - "loss": 1.2684, - "step": 647 - }, - { - "epoch": 0.08784653968684335, - "grad_norm": 1.9824141973275196, - "learning_rate": 1.982552863795665e-06, - "loss": 1.2628, - "step": 648 - }, - { - "epoch": 0.08798210533450823, - "grad_norm": 2.191669969229068, - "learning_rate": 1.9824710966438995e-06, - "loss": 1.2856, - "step": 649 - }, - { - "epoch": 0.08811767098217312, - "grad_norm": 1.4306657579367612, - "learning_rate": 1.982389140030338e-06, - "loss": 1.2354, - "step": 650 - }, - { - "epoch": 0.088253236629838, - "grad_norm": 3.97632168514313, - "learning_rate": 1.9823069939707856e-06, - "loss": 1.28, - "step": 651 - }, - { - "epoch": 0.08838880227750288, - "grad_norm": 1.6510888374742998, - "learning_rate": 1.982224658481083e-06, - "loss": 1.2711, - "step": 652 - }, - { - "epoch": 0.08852436792516777, - "grad_norm": 1.4052834146854785, - "learning_rate": 1.9821421335771084e-06, - "loss": 1.2867, - "step": 653 - }, - { - "epoch": 0.08865993357283264, - "grad_norm": 1.9967319857182486, - "learning_rate": 1.9820594192747757e-06, - "loss": 1.3004, - "step": 654 - }, - { - "epoch": 0.08879549922049752, - "grad_norm": 1.7202008962386708, - "learning_rate": 1.981976515590036e-06, - "loss": 1.3077, - "step": 655 - }, - { - "epoch": 0.08893106486816241, - "grad_norm": 1.9391541858495354, - "learning_rate": 1.9818934225388765e-06, - "loss": 1.3162, - "step": 656 - }, - { - "epoch": 0.08906663051582729, - "grad_norm": 2.128898698074163, - "learning_rate": 1.981810140137321e-06, - "loss": 1.2822, - "step": 657 - }, - { - "epoch": 0.08920219616349218, - "grad_norm": 1.604711215743902, - "learning_rate": 1.9817266684014303e-06, - "loss": 1.2546, - "step": 658 - }, - { - "epoch": 0.08933776181115705, - "grad_norm": 1.5999653178498394, - "learning_rate": 1.9816430073473005e-06, - "loss": 1.2746, - "step": 659 - }, - { - "epoch": 0.08947332745882193, - "grad_norm": 31.988137306423816, - "learning_rate": 1.9815591569910653e-06, - "loss": 1.2958, - "step": 660 - }, - { - "epoch": 0.08960889310648681, - "grad_norm": 21.29864133661415, - "learning_rate": 1.9814751173488944e-06, - "loss": 1.3031, - "step": 661 - }, - { - "epoch": 0.0897444587541517, - "grad_norm": 1.5469991411494257, - "learning_rate": 1.981390888436995e-06, - "loss": 1.3298, - "step": 662 - }, - { - "epoch": 0.08988002440181658, - "grad_norm": 1.3577465240185425, - "learning_rate": 1.981306470271609e-06, - "loss": 1.2852, - "step": 663 - }, - { - "epoch": 0.09001559004948147, - "grad_norm": 2.1317730038060314, - "learning_rate": 1.9812218628690165e-06, - "loss": 1.2787, - "step": 664 - }, - { - "epoch": 0.09015115569714634, - "grad_norm": 1.7924445701889, - "learning_rate": 1.981137066245533e-06, - "loss": 1.2581, - "step": 665 - }, - { - "epoch": 0.09028672134481122, - "grad_norm": 1.410246700531771, - "learning_rate": 1.981052080417511e-06, - "loss": 1.2896, - "step": 666 - }, - { - "epoch": 0.0904222869924761, - "grad_norm": 1.711366035994104, - "learning_rate": 1.980966905401339e-06, - "loss": 1.2506, - "step": 667 - }, - { - "epoch": 0.09055785264014099, - "grad_norm": 2.1698919251586957, - "learning_rate": 1.9808815412134424e-06, - "loss": 1.2688, - "step": 668 - }, - { - "epoch": 0.09069341828780587, - "grad_norm": 2.116424341389474, - "learning_rate": 1.9807959878702833e-06, - "loss": 1.2509, - "step": 669 - }, - { - "epoch": 0.09082898393547076, - "grad_norm": 2.3480725387597463, - "learning_rate": 1.98071024538836e-06, - "loss": 1.3034, - "step": 670 - }, - { - "epoch": 0.09096454958313563, - "grad_norm": 1.6315038060072091, - "learning_rate": 1.980624313784207e-06, - "loss": 1.2753, - "step": 671 - }, - { - "epoch": 0.09110011523080051, - "grad_norm": 2.1210837512882947, - "learning_rate": 1.980538193074396e-06, - "loss": 1.2584, - "step": 672 - }, - { - "epoch": 0.0912356808784654, - "grad_norm": 1.4806465269317812, - "learning_rate": 1.980451883275534e-06, - "loss": 1.2661, - "step": 673 - }, - { - "epoch": 0.09137124652613028, - "grad_norm": 1.6625372093772222, - "learning_rate": 1.9803653844042655e-06, - "loss": 1.2812, - "step": 674 - }, - { - "epoch": 0.09150681217379517, - "grad_norm": 1.7453684209775335, - "learning_rate": 1.9802786964772714e-06, - "loss": 1.2793, - "step": 675 - }, - { - "epoch": 0.09164237782146004, - "grad_norm": 2.372444755215296, - "learning_rate": 1.9801918195112684e-06, - "loss": 1.2797, - "step": 676 - }, - { - "epoch": 0.09177794346912492, - "grad_norm": 1.4763829577649146, - "learning_rate": 1.9801047535230103e-06, - "loss": 1.2957, - "step": 677 - }, - { - "epoch": 0.0919135091167898, - "grad_norm": 1.7086929623242846, - "learning_rate": 1.9800174985292866e-06, - "loss": 1.2947, - "step": 678 - }, - { - "epoch": 0.09204907476445469, - "grad_norm": 1.8121606895128257, - "learning_rate": 1.9799300545469248e-06, - "loss": 1.2879, - "step": 679 - }, - { - "epoch": 0.09218464041211957, - "grad_norm": 1.663852066692697, - "learning_rate": 1.9798424215927864e-06, - "loss": 1.2335, - "step": 680 - }, - { - "epoch": 0.09232020605978446, - "grad_norm": 1.7059759383233075, - "learning_rate": 1.979754599683772e-06, - "loss": 1.3039, - "step": 681 - }, - { - "epoch": 0.09245577170744933, - "grad_norm": 2.0413233065089136, - "learning_rate": 1.979666588836816e-06, - "loss": 1.2673, - "step": 682 - }, - { - "epoch": 0.09259133735511421, - "grad_norm": 1.5974223087749404, - "learning_rate": 1.9795783890688917e-06, - "loss": 1.2979, - "step": 683 - }, - { - "epoch": 0.0927269030027791, - "grad_norm": 1.7254315449324926, - "learning_rate": 1.9794900003970073e-06, - "loss": 1.2782, - "step": 684 - }, - { - "epoch": 0.09286246865044398, - "grad_norm": 1.9582526062781564, - "learning_rate": 1.9794014228382085e-06, - "loss": 1.26, - "step": 685 - }, - { - "epoch": 0.09299803429810886, - "grad_norm": 1.6855648064107889, - "learning_rate": 1.9793126564095756e-06, - "loss": 1.2911, - "step": 686 - }, - { - "epoch": 0.09313359994577373, - "grad_norm": 1.4815049983234934, - "learning_rate": 1.979223701128227e-06, - "loss": 1.3178, - "step": 687 - }, - { - "epoch": 0.09326916559343862, - "grad_norm": 1.628627802900006, - "learning_rate": 1.979134557011318e-06, - "loss": 1.2783, - "step": 688 - }, - { - "epoch": 0.0934047312411035, - "grad_norm": 2.427822361817264, - "learning_rate": 1.979045224076038e-06, - "loss": 1.3106, - "step": 689 - }, - { - "epoch": 0.09354029688876839, - "grad_norm": 2.169833160909322, - "learning_rate": 1.9789557023396145e-06, - "loss": 1.2688, - "step": 690 - }, - { - "epoch": 0.09367586253643327, - "grad_norm": 1.8474082868980088, - "learning_rate": 1.9788659918193115e-06, - "loss": 1.3, - "step": 691 - }, - { - "epoch": 0.09381142818409816, - "grad_norm": 1.6028672641968185, - "learning_rate": 1.9787760925324285e-06, - "loss": 1.2876, - "step": 692 - }, - { - "epoch": 0.09394699383176303, - "grad_norm": 2.113549766034792, - "learning_rate": 1.9786860044963023e-06, - "loss": 1.2763, - "step": 693 - }, - { - "epoch": 0.09408255947942791, - "grad_norm": 2.119107258607182, - "learning_rate": 1.978595727728305e-06, - "loss": 1.2953, - "step": 694 - }, - { - "epoch": 0.0942181251270928, - "grad_norm": 1.3580683384851495, - "learning_rate": 1.9785052622458467e-06, - "loss": 1.2754, - "step": 695 - }, - { - "epoch": 0.09435369077475768, - "grad_norm": 2.0347326317362673, - "learning_rate": 1.978414608066372e-06, - "loss": 1.2978, - "step": 696 - }, - { - "epoch": 0.09448925642242256, - "grad_norm": 1.9530036036028577, - "learning_rate": 1.9783237652073633e-06, - "loss": 1.2492, - "step": 697 - }, - { - "epoch": 0.09462482207008743, - "grad_norm": 1.6265189119043595, - "learning_rate": 1.978232733686339e-06, - "loss": 1.2465, - "step": 698 - }, - { - "epoch": 0.09476038771775232, - "grad_norm": 1.5374981086648056, - "learning_rate": 1.9781415135208536e-06, - "loss": 1.2769, - "step": 699 - }, - { - "epoch": 0.0948959533654172, - "grad_norm": 1.581981420974454, - "learning_rate": 1.9780501047284983e-06, - "loss": 1.2713, - "step": 700 - }, - { - "epoch": 0.09503151901308209, - "grad_norm": 1.5877442380354205, - "learning_rate": 1.977958507326901e-06, - "loss": 1.2608, - "step": 701 - }, - { - "epoch": 0.09516708466074697, - "grad_norm": 1.6784522141134899, - "learning_rate": 1.9778667213337242e-06, - "loss": 1.2807, - "step": 702 - }, - { - "epoch": 0.09530265030841185, - "grad_norm": 2.6132137248801355, - "learning_rate": 1.97777474676667e-06, - "loss": 1.2672, - "step": 703 - }, - { - "epoch": 0.09543821595607672, - "grad_norm": 1.9561132148325933, - "learning_rate": 1.9776825836434733e-06, - "loss": 1.2653, - "step": 704 - }, - { - "epoch": 0.09557378160374161, - "grad_norm": 1.4291774300514024, - "learning_rate": 1.977590231981908e-06, - "loss": 1.2746, - "step": 705 - }, - { - "epoch": 0.09570934725140649, - "grad_norm": 1.451404608189407, - "learning_rate": 1.977497691799783e-06, - "loss": 1.267, - "step": 706 - }, - { - "epoch": 0.09584491289907138, - "grad_norm": 1.8276056261059224, - "learning_rate": 1.9774049631149443e-06, - "loss": 1.2785, - "step": 707 - }, - { - "epoch": 0.09598047854673626, - "grad_norm": 1.6032764817152547, - "learning_rate": 1.977312045945273e-06, - "loss": 1.2802, - "step": 708 - }, - { - "epoch": 0.09611604419440115, - "grad_norm": 1.7309554508393425, - "learning_rate": 1.9772189403086884e-06, - "loss": 1.2625, - "step": 709 - }, - { - "epoch": 0.09625160984206602, - "grad_norm": 1.5614635745975578, - "learning_rate": 1.977125646223145e-06, - "loss": 1.268, - "step": 710 - }, - { - "epoch": 0.0963871754897309, - "grad_norm": 1.7466695641535293, - "learning_rate": 1.977032163706633e-06, - "loss": 1.3235, - "step": 711 - }, - { - "epoch": 0.09652274113739578, - "grad_norm": 2.0245549024682687, - "learning_rate": 1.976938492777182e-06, - "loss": 1.2496, - "step": 712 - }, - { - "epoch": 0.09665830678506067, - "grad_norm": 1.4833217778779997, - "learning_rate": 1.976844633452853e-06, - "loss": 1.2721, - "step": 713 - }, - { - "epoch": 0.09679387243272555, - "grad_norm": 2.485928686175528, - "learning_rate": 1.976750585751747e-06, - "loss": 1.2542, - "step": 714 - }, - { - "epoch": 0.09692943808039042, - "grad_norm": 1.5198694354628275, - "learning_rate": 1.9766563496920014e-06, - "loss": 1.2904, - "step": 715 - }, - { - "epoch": 0.09706500372805531, - "grad_norm": 1.633720577847526, - "learning_rate": 1.9765619252917873e-06, - "loss": 1.2773, - "step": 716 - }, - { - "epoch": 0.09720056937572019, - "grad_norm": 2.510407800678708, - "learning_rate": 1.9764673125693146e-06, - "loss": 1.2943, - "step": 717 - }, - { - "epoch": 0.09733613502338508, - "grad_norm": 3.021141519839254, - "learning_rate": 1.9763725115428284e-06, - "loss": 1.2542, - "step": 718 - }, - { - "epoch": 0.09747170067104996, - "grad_norm": 1.7126479713661096, - "learning_rate": 1.9762775222306107e-06, - "loss": 1.2736, - "step": 719 - }, - { - "epoch": 0.09760726631871484, - "grad_norm": 1.6262182782458037, - "learning_rate": 1.976182344650979e-06, - "loss": 1.2648, - "step": 720 - }, - { - "epoch": 0.09774283196637971, - "grad_norm": 1.7336122607713624, - "learning_rate": 1.9760869788222873e-06, - "loss": 1.2591, - "step": 721 - }, - { - "epoch": 0.0978783976140446, - "grad_norm": 2.665015743333432, - "learning_rate": 1.9759914247629264e-06, - "loss": 1.2929, - "step": 722 - }, - { - "epoch": 0.09801396326170948, - "grad_norm": 1.5452797836536165, - "learning_rate": 1.975895682491324e-06, - "loss": 1.2462, - "step": 723 - }, - { - "epoch": 0.09814952890937437, - "grad_norm": 1.7389593036870483, - "learning_rate": 1.975799752025942e-06, - "loss": 1.3001, - "step": 724 - }, - { - "epoch": 0.09828509455703925, - "grad_norm": 1.5629965610880847, - "learning_rate": 1.97570363338528e-06, - "loss": 1.2664, - "step": 725 - }, - { - "epoch": 0.09842066020470412, - "grad_norm": 2.2302046147953467, - "learning_rate": 1.9756073265878746e-06, - "loss": 1.2757, - "step": 726 - }, - { - "epoch": 0.098556225852369, - "grad_norm": 1.7159532034122267, - "learning_rate": 1.9755108316522967e-06, - "loss": 1.2799, - "step": 727 - }, - { - "epoch": 0.09869179150003389, - "grad_norm": 1.447176459956692, - "learning_rate": 1.9754141485971555e-06, - "loss": 1.2756, - "step": 728 - }, - { - "epoch": 0.09882735714769877, - "grad_norm": 1.5665690954691323, - "learning_rate": 1.9753172774410952e-06, - "loss": 1.2874, - "step": 729 - }, - { - "epoch": 0.09896292279536366, - "grad_norm": 1.574369159208473, - "learning_rate": 1.9752202182027967e-06, - "loss": 1.2722, - "step": 730 - }, - { - "epoch": 0.09909848844302854, - "grad_norm": 15.412214893481114, - "learning_rate": 1.9751229709009767e-06, - "loss": 1.2504, - "step": 731 - }, - { - "epoch": 0.09923405409069341, - "grad_norm": 1.4367962899242634, - "learning_rate": 1.975025535554389e-06, - "loss": 1.3092, - "step": 732 - }, - { - "epoch": 0.0993696197383583, - "grad_norm": 1.599804043516488, - "learning_rate": 1.9749279121818236e-06, - "loss": 1.2439, - "step": 733 - }, - { - "epoch": 0.09950518538602318, - "grad_norm": 1.5663906812806803, - "learning_rate": 1.9748301008021055e-06, - "loss": 1.2838, - "step": 734 - }, - { - "epoch": 0.09964075103368807, - "grad_norm": 1.6403049877053975, - "learning_rate": 1.9747321014340974e-06, - "loss": 1.2334, - "step": 735 - }, - { - "epoch": 0.09977631668135295, - "grad_norm": 1.5204378637994362, - "learning_rate": 1.974633914096698e-06, - "loss": 1.2487, - "step": 736 - }, - { - "epoch": 0.09991188232901782, - "grad_norm": 1.6438341392102949, - "learning_rate": 1.974535538808841e-06, - "loss": 1.2689, - "step": 737 - }, - { - "epoch": 0.1000474479766827, - "grad_norm": 1.40066131309602, - "learning_rate": 1.9744369755894977e-06, - "loss": 1.3051, - "step": 738 - }, - { - "epoch": 0.10018301362434759, - "grad_norm": 1.5697385949561993, - "learning_rate": 1.974338224457676e-06, - "loss": 1.2584, - "step": 739 - }, - { - "epoch": 0.10031857927201247, - "grad_norm": 1.4839171349542781, - "learning_rate": 1.9742392854324186e-06, - "loss": 1.2915, - "step": 740 - }, - { - "epoch": 0.10045414491967736, - "grad_norm": 2.314523955568001, - "learning_rate": 1.974140158532805e-06, - "loss": 1.2947, - "step": 741 - }, - { - "epoch": 0.10058971056734224, - "grad_norm": 1.9879510536035845, - "learning_rate": 1.974040843777951e-06, - "loss": 1.2274, - "step": 742 - }, - { - "epoch": 0.10072527621500711, - "grad_norm": 8.461030352584999, - "learning_rate": 1.973941341187009e-06, - "loss": 1.2831, - "step": 743 - }, - { - "epoch": 0.100860841862672, - "grad_norm": 1.510644350033131, - "learning_rate": 1.9738416507791676e-06, - "loss": 1.2887, - "step": 744 - }, - { - "epoch": 0.10099640751033688, - "grad_norm": 1.758178071911335, - "learning_rate": 1.9737417725736507e-06, - "loss": 1.2325, - "step": 745 - }, - { - "epoch": 0.10113197315800176, - "grad_norm": 1.436185039703425, - "learning_rate": 1.9736417065897187e-06, - "loss": 1.3195, - "step": 746 - }, - { - "epoch": 0.10126753880566665, - "grad_norm": 2.0841992510118224, - "learning_rate": 1.9735414528466694e-06, - "loss": 1.292, - "step": 747 - }, - { - "epoch": 0.10140310445333152, - "grad_norm": 1.504809454982794, - "learning_rate": 1.9734410113638356e-06, - "loss": 1.2721, - "step": 748 - }, - { - "epoch": 0.1015386701009964, - "grad_norm": 1.7374768813281256, - "learning_rate": 1.973340382160587e-06, - "loss": 1.2808, - "step": 749 - }, - { - "epoch": 0.10167423574866129, - "grad_norm": 1.6362596739393156, - "learning_rate": 1.973239565256328e-06, - "loss": 1.3203, - "step": 750 - }, - { - "epoch": 0.10180980139632617, - "grad_norm": 1.6898965219227324, - "learning_rate": 1.973138560670502e-06, - "loss": 1.3024, - "step": 751 - }, - { - "epoch": 0.10194536704399106, - "grad_norm": 1.66743253980346, - "learning_rate": 1.973037368422585e-06, - "loss": 1.2398, - "step": 752 - }, - { - "epoch": 0.10208093269165594, - "grad_norm": 1.935099864389448, - "learning_rate": 1.9729359885320933e-06, - "loss": 1.2805, - "step": 753 - }, - { - "epoch": 0.10221649833932081, - "grad_norm": 1.5528957201794682, - "learning_rate": 1.9728344210185757e-06, - "loss": 1.2511, - "step": 754 - }, - { - "epoch": 0.1023520639869857, - "grad_norm": 1.6256867277587643, - "learning_rate": 1.9727326659016187e-06, - "loss": 1.2845, - "step": 755 - }, - { - "epoch": 0.10248762963465058, - "grad_norm": 1.4314543653225704, - "learning_rate": 1.972630723200846e-06, - "loss": 1.2543, - "step": 756 - }, - { - "epoch": 0.10262319528231546, - "grad_norm": 1.4374501481988509, - "learning_rate": 1.9725285929359156e-06, - "loss": 1.2625, - "step": 757 - }, - { - "epoch": 0.10275876092998035, - "grad_norm": 2.147596532864122, - "learning_rate": 1.9724262751265222e-06, - "loss": 1.2891, - "step": 758 - }, - { - "epoch": 0.10289432657764523, - "grad_norm": 2.0275417697695466, - "learning_rate": 1.972323769792398e-06, - "loss": 1.2992, - "step": 759 - }, - { - "epoch": 0.1030298922253101, - "grad_norm": 1.7541285057707512, - "learning_rate": 1.97222107695331e-06, - "loss": 1.2954, - "step": 760 - }, - { - "epoch": 0.10316545787297499, - "grad_norm": 1.9552489776499686, - "learning_rate": 1.9721181966290614e-06, - "loss": 1.2581, - "step": 761 - }, - { - "epoch": 0.10330102352063987, - "grad_norm": 1.6272177470116274, - "learning_rate": 1.9720151288394916e-06, - "loss": 1.2797, - "step": 762 - }, - { - "epoch": 0.10343658916830475, - "grad_norm": 2.0469354878465325, - "learning_rate": 1.9719118736044773e-06, - "loss": 1.2483, - "step": 763 - }, - { - "epoch": 0.10357215481596964, - "grad_norm": 7.172800527407506, - "learning_rate": 1.97180843094393e-06, - "loss": 1.2517, - "step": 764 - }, - { - "epoch": 0.10370772046363451, - "grad_norm": 1.6862036695136389, - "learning_rate": 1.9717048008777978e-06, - "loss": 1.2885, - "step": 765 - }, - { - "epoch": 0.1038432861112994, - "grad_norm": 1.9719088387570782, - "learning_rate": 1.9716009834260645e-06, - "loss": 1.2922, - "step": 766 - }, - { - "epoch": 0.10397885175896428, - "grad_norm": 1.4861523448379703, - "learning_rate": 1.971496978608751e-06, - "loss": 1.2729, - "step": 767 - }, - { - "epoch": 0.10411441740662916, - "grad_norm": 1.6197310960897238, - "learning_rate": 1.971392786445914e-06, - "loss": 1.2735, - "step": 768 - }, - { - "epoch": 0.10424998305429405, - "grad_norm": 1.589928799974308, - "learning_rate": 1.9712884069576455e-06, - "loss": 1.2458, - "step": 769 - }, - { - "epoch": 0.10438554870195893, - "grad_norm": 2.3989492689158567, - "learning_rate": 1.971183840164075e-06, - "loss": 1.2482, - "step": 770 - }, - { - "epoch": 0.1045211143496238, - "grad_norm": 1.4517086319681582, - "learning_rate": 1.9710790860853667e-06, - "loss": 1.2516, - "step": 771 - }, - { - "epoch": 0.10465667999728868, - "grad_norm": 1.7076974939104437, - "learning_rate": 1.9709741447417223e-06, - "loss": 1.2577, - "step": 772 - }, - { - "epoch": 0.10479224564495357, - "grad_norm": 1.922689967618789, - "learning_rate": 1.970869016153378e-06, - "loss": 1.2943, - "step": 773 - }, - { - "epoch": 0.10492781129261845, - "grad_norm": 1.5125418793431575, - "learning_rate": 1.9707637003406075e-06, - "loss": 1.2622, - "step": 774 - }, - { - "epoch": 0.10506337694028334, - "grad_norm": 1.4636607439305682, - "learning_rate": 1.9706581973237202e-06, - "loss": 1.2527, - "step": 775 - }, - { - "epoch": 0.10519894258794821, - "grad_norm": 1.3812441509516777, - "learning_rate": 1.9705525071230616e-06, - "loss": 1.27, - "step": 776 - }, - { - "epoch": 0.10533450823561309, - "grad_norm": 2.865237880813151, - "learning_rate": 1.9704466297590134e-06, - "loss": 1.2911, - "step": 777 - }, - { - "epoch": 0.10547007388327798, - "grad_norm": 1.6553431100127027, - "learning_rate": 1.9703405652519924e-06, - "loss": 1.3086, - "step": 778 - }, - { - "epoch": 0.10560563953094286, - "grad_norm": 2.1164501762478816, - "learning_rate": 1.970234313622453e-06, - "loss": 1.285, - "step": 779 - }, - { - "epoch": 0.10574120517860774, - "grad_norm": 1.609086384716465, - "learning_rate": 1.9701278748908844e-06, - "loss": 1.2739, - "step": 780 - }, - { - "epoch": 0.10587677082627263, - "grad_norm": 1.7841574504373858, - "learning_rate": 1.9700212490778136e-06, - "loss": 1.2728, - "step": 781 - }, - { - "epoch": 0.1060123364739375, - "grad_norm": 1.523955430834669, - "learning_rate": 1.969914436203801e-06, - "loss": 1.2828, - "step": 782 - }, - { - "epoch": 0.10614790212160238, - "grad_norm": 2.806397425455087, - "learning_rate": 1.9698074362894456e-06, - "loss": 1.3043, - "step": 783 - }, - { - "epoch": 0.10628346776926727, - "grad_norm": 1.9566490640604284, - "learning_rate": 1.9697002493553815e-06, - "loss": 1.2696, - "step": 784 - }, - { - "epoch": 0.10641903341693215, - "grad_norm": 1.6209967146272017, - "learning_rate": 1.969592875422279e-06, - "loss": 1.292, - "step": 785 - }, - { - "epoch": 0.10655459906459704, - "grad_norm": 3.36843914374822, - "learning_rate": 1.9694853145108433e-06, - "loss": 1.2622, - "step": 786 - }, - { - "epoch": 0.1066901647122619, - "grad_norm": 2.13696258033473, - "learning_rate": 1.969377566641818e-06, - "loss": 1.2687, - "step": 787 - }, - { - "epoch": 0.10682573035992679, - "grad_norm": 1.8649322043139447, - "learning_rate": 1.96926963183598e-06, - "loss": 1.2863, - "step": 788 - }, - { - "epoch": 0.10696129600759167, - "grad_norm": 1.7278471887021751, - "learning_rate": 1.9691615101141454e-06, - "loss": 1.2946, - "step": 789 - }, - { - "epoch": 0.10709686165525656, - "grad_norm": 4.8500837955772065, - "learning_rate": 1.969053201497163e-06, - "loss": 1.2829, - "step": 790 - }, - { - "epoch": 0.10723242730292144, - "grad_norm": 1.5680663245981747, - "learning_rate": 1.96894470600592e-06, - "loss": 1.2842, - "step": 791 - }, - { - "epoch": 0.10736799295058633, - "grad_norm": 1.741450741261888, - "learning_rate": 1.9688360236613388e-06, - "loss": 1.2313, - "step": 792 - }, - { - "epoch": 0.1075035585982512, - "grad_norm": 1.8561874330629216, - "learning_rate": 1.968727154484378e-06, - "loss": 1.3176, - "step": 793 - }, - { - "epoch": 0.10763912424591608, - "grad_norm": 1.8914746300131384, - "learning_rate": 1.968618098496032e-06, - "loss": 1.2903, - "step": 794 - }, - { - "epoch": 0.10777468989358097, - "grad_norm": 1.7908453917409826, - "learning_rate": 1.9685088557173318e-06, - "loss": 1.2777, - "step": 795 - }, - { - "epoch": 0.10791025554124585, - "grad_norm": 1.5520296848145565, - "learning_rate": 1.968399426169344e-06, - "loss": 1.267, - "step": 796 - }, - { - "epoch": 0.10804582118891073, - "grad_norm": 1.4858809256622454, - "learning_rate": 1.9682898098731707e-06, - "loss": 1.2412, - "step": 797 - }, - { - "epoch": 0.1081813868365756, - "grad_norm": 2.643632999590669, - "learning_rate": 1.9681800068499507e-06, - "loss": 1.2746, - "step": 798 - }, - { - "epoch": 0.10831695248424049, - "grad_norm": 1.7226708374056627, - "learning_rate": 1.9680700171208583e-06, - "loss": 1.331, - "step": 799 - }, - { - "epoch": 0.10845251813190537, - "grad_norm": 1.6143992489412122, - "learning_rate": 1.9679598407071053e-06, - "loss": 1.2848, - "step": 800 - }, - { - "epoch": 0.10858808377957026, - "grad_norm": 1.7507716186101254, - "learning_rate": 1.967849477629937e-06, - "loss": 1.2781, - "step": 801 - }, - { - "epoch": 0.10872364942723514, - "grad_norm": 1.521036152500972, - "learning_rate": 1.9677389279106367e-06, - "loss": 1.2729, - "step": 802 - }, - { - "epoch": 0.10885921507490003, - "grad_norm": 1.5719555105983984, - "learning_rate": 1.9676281915705236e-06, - "loss": 1.2481, - "step": 803 - }, - { - "epoch": 0.1089947807225649, - "grad_norm": 2.051349756770951, - "learning_rate": 1.9675172686309516e-06, - "loss": 1.2776, - "step": 804 - }, - { - "epoch": 0.10913034637022978, - "grad_norm": 2.3211077189387646, - "learning_rate": 1.9674061591133114e-06, - "loss": 1.2728, - "step": 805 - }, - { - "epoch": 0.10926591201789466, - "grad_norm": 3.674067968202453, - "learning_rate": 1.9672948630390295e-06, - "loss": 1.2412, - "step": 806 - }, - { - "epoch": 0.10940147766555955, - "grad_norm": 4.688287815384254, - "learning_rate": 1.9671833804295684e-06, - "loss": 1.2928, - "step": 807 - }, - { - "epoch": 0.10953704331322443, - "grad_norm": 1.7692705197883027, - "learning_rate": 1.967071711306427e-06, - "loss": 1.2818, - "step": 808 - }, - { - "epoch": 0.10967260896088932, - "grad_norm": 1.6640975327189038, - "learning_rate": 1.96695985569114e-06, - "loss": 1.2749, - "step": 809 - }, - { - "epoch": 0.10980817460855419, - "grad_norm": 2.1424330672471092, - "learning_rate": 1.966847813605277e-06, - "loss": 1.268, - "step": 810 - }, - { - "epoch": 0.10994374025621907, - "grad_norm": 1.6713294698217311, - "learning_rate": 1.9667355850704456e-06, - "loss": 1.2908, - "step": 811 - }, - { - "epoch": 0.11007930590388396, - "grad_norm": 1.505286279798439, - "learning_rate": 1.9666231701082876e-06, - "loss": 1.2187, - "step": 812 - }, - { - "epoch": 0.11021487155154884, - "grad_norm": 3.1227536262024036, - "learning_rate": 1.966510568740481e-06, - "loss": 1.2636, - "step": 813 - }, - { - "epoch": 0.11035043719921372, - "grad_norm": 1.943919832350122, - "learning_rate": 1.9663977809887406e-06, - "loss": 1.2398, - "step": 814 - }, - { - "epoch": 0.1104860028468786, - "grad_norm": 1.955432040392769, - "learning_rate": 1.966284806874816e-06, - "loss": 1.2322, - "step": 815 - }, - { - "epoch": 0.11062156849454348, - "grad_norm": 1.5113860136999075, - "learning_rate": 1.966171646420494e-06, - "loss": 1.2533, - "step": 816 - }, - { - "epoch": 0.11075713414220836, - "grad_norm": 3.1272395709036194, - "learning_rate": 1.9660582996475962e-06, - "loss": 1.3028, - "step": 817 - }, - { - "epoch": 0.11089269978987325, - "grad_norm": 1.6515597811575264, - "learning_rate": 1.9659447665779815e-06, - "loss": 1.2249, - "step": 818 - }, - { - "epoch": 0.11102826543753813, - "grad_norm": 2.052907730950357, - "learning_rate": 1.965831047233543e-06, - "loss": 1.2509, - "step": 819 - }, - { - "epoch": 0.11116383108520302, - "grad_norm": 1.7395146985490642, - "learning_rate": 1.965717141636211e-06, - "loss": 1.2844, - "step": 820 - }, - { - "epoch": 0.11129939673286789, - "grad_norm": 1.52026905932968, - "learning_rate": 1.9656030498079507e-06, - "loss": 1.2737, - "step": 821 - }, - { - "epoch": 0.11143496238053277, - "grad_norm": 1.464574043811576, - "learning_rate": 1.9654887717707645e-06, - "loss": 1.2561, - "step": 822 - }, - { - "epoch": 0.11157052802819765, - "grad_norm": 1.453437200165557, - "learning_rate": 1.96537430754669e-06, - "loss": 1.3, - "step": 823 - }, - { - "epoch": 0.11170609367586254, - "grad_norm": 1.8553514592430425, - "learning_rate": 1.9652596571578003e-06, - "loss": 1.2916, - "step": 824 - }, - { - "epoch": 0.11184165932352742, - "grad_norm": 1.8809571857512666, - "learning_rate": 1.9651448206262047e-06, - "loss": 1.2487, - "step": 825 - }, - { - "epoch": 0.1119772249711923, - "grad_norm": 28.029758845478085, - "learning_rate": 1.965029797974049e-06, - "loss": 1.2655, - "step": 826 - }, - { - "epoch": 0.11211279061885718, - "grad_norm": 1.8027089268293792, - "learning_rate": 1.9649145892235145e-06, - "loss": 1.2479, - "step": 827 - }, - { - "epoch": 0.11224835626652206, - "grad_norm": 1.647419514738696, - "learning_rate": 1.964799194396818e-06, - "loss": 1.2611, - "step": 828 - }, - { - "epoch": 0.11238392191418695, - "grad_norm": 1.8593231742725629, - "learning_rate": 1.9646836135162125e-06, - "loss": 1.2625, - "step": 829 - }, - { - "epoch": 0.11251948756185183, - "grad_norm": 6.613446707953263, - "learning_rate": 1.9645678466039864e-06, - "loss": 1.2351, - "step": 830 - }, - { - "epoch": 0.11265505320951671, - "grad_norm": 1.5591967723197564, - "learning_rate": 1.9644518936824658e-06, - "loss": 1.2822, - "step": 831 - }, - { - "epoch": 0.11279061885718158, - "grad_norm": 2.0745683815045965, - "learning_rate": 1.9643357547740097e-06, - "loss": 1.2418, - "step": 832 - }, - { - "epoch": 0.11292618450484647, - "grad_norm": 3.8872711137150486, - "learning_rate": 1.9642194299010155e-06, - "loss": 1.2102, - "step": 833 - }, - { - "epoch": 0.11306175015251135, - "grad_norm": 1.8209809749464891, - "learning_rate": 1.9641029190859155e-06, - "loss": 1.2567, - "step": 834 - }, - { - "epoch": 0.11319731580017624, - "grad_norm": 1.5381605282961741, - "learning_rate": 1.9639862223511777e-06, - "loss": 1.2434, - "step": 835 - }, - { - "epoch": 0.11333288144784112, - "grad_norm": 1.5928669147722587, - "learning_rate": 1.9638693397193057e-06, - "loss": 1.2346, - "step": 836 - }, - { - "epoch": 0.11346844709550599, - "grad_norm": 2.092950986212143, - "learning_rate": 1.9637522712128407e-06, - "loss": 1.243, - "step": 837 - }, - { - "epoch": 0.11360401274317088, - "grad_norm": 1.898738301824002, - "learning_rate": 1.963635016854357e-06, - "loss": 1.2708, - "step": 838 - }, - { - "epoch": 0.11373957839083576, - "grad_norm": 1.3826524158905262, - "learning_rate": 1.963517576666467e-06, - "loss": 1.2562, - "step": 839 - }, - { - "epoch": 0.11387514403850064, - "grad_norm": 1.5628965029900301, - "learning_rate": 1.9633999506718176e-06, - "loss": 1.2732, - "step": 840 - }, - { - "epoch": 0.11401070968616553, - "grad_norm": 1.6022526906669057, - "learning_rate": 1.9632821388930926e-06, - "loss": 1.2423, - "step": 841 - }, - { - "epoch": 0.11414627533383041, - "grad_norm": 2.192606508762241, - "learning_rate": 1.9631641413530102e-06, - "loss": 1.2242, - "step": 842 - }, - { - "epoch": 0.11428184098149528, - "grad_norm": 1.5667686858178926, - "learning_rate": 1.9630459580743264e-06, - "loss": 1.2926, - "step": 843 - }, - { - "epoch": 0.11441740662916017, - "grad_norm": 1.4633853208155367, - "learning_rate": 1.9629275890798315e-06, - "loss": 1.2607, - "step": 844 - }, - { - "epoch": 0.11455297227682505, - "grad_norm": 1.8028736484332653, - "learning_rate": 1.962809034392352e-06, - "loss": 1.2674, - "step": 845 - }, - { - "epoch": 0.11468853792448994, - "grad_norm": 3.9443603648925145, - "learning_rate": 1.96269029403475e-06, - "loss": 1.2771, - "step": 846 - }, - { - "epoch": 0.11482410357215482, - "grad_norm": 1.4212543363678605, - "learning_rate": 1.962571368029924e-06, - "loss": 1.27, - "step": 847 - }, - { - "epoch": 0.1149596692198197, - "grad_norm": 1.6921129641813615, - "learning_rate": 1.9624522564008074e-06, - "loss": 1.2582, - "step": 848 - }, - { - "epoch": 0.11509523486748457, - "grad_norm": 1.577105224860581, - "learning_rate": 1.9623329591703706e-06, - "loss": 1.2588, - "step": 849 - }, - { - "epoch": 0.11523080051514946, - "grad_norm": 1.7494780956972387, - "learning_rate": 1.962213476361619e-06, - "loss": 1.2758, - "step": 850 - }, - { - "epoch": 0.11536636616281434, - "grad_norm": 1.574599791434754, - "learning_rate": 1.962093807997593e-06, - "loss": 1.2577, - "step": 851 - }, - { - "epoch": 0.11550193181047923, - "grad_norm": 1.7132079566088423, - "learning_rate": 1.961973954101371e-06, - "loss": 1.2835, - "step": 852 - }, - { - "epoch": 0.11563749745814411, - "grad_norm": 1.8375047510298568, - "learning_rate": 1.961853914696065e-06, - "loss": 1.2781, - "step": 853 - }, - { - "epoch": 0.11577306310580898, - "grad_norm": 1.470682876200792, - "learning_rate": 1.961733689804824e-06, - "loss": 1.2357, - "step": 854 - }, - { - "epoch": 0.11590862875347387, - "grad_norm": 2.8470346219756038, - "learning_rate": 1.961613279450833e-06, - "loss": 1.284, - "step": 855 - }, - { - "epoch": 0.11604419440113875, - "grad_norm": 1.6955449224934596, - "learning_rate": 1.9614926836573107e-06, - "loss": 1.2863, - "step": 856 - }, - { - "epoch": 0.11617976004880363, - "grad_norm": 1.3978470557756484, - "learning_rate": 1.9613719024475145e-06, - "loss": 1.2617, - "step": 857 - }, - { - "epoch": 0.11631532569646852, - "grad_norm": 1.5304688625654441, - "learning_rate": 1.961250935844735e-06, - "loss": 1.3009, - "step": 858 - }, - { - "epoch": 0.1164508913441334, - "grad_norm": 1.5143135757191295, - "learning_rate": 1.9611297838723007e-06, - "loss": 1.2718, - "step": 859 - }, - { - "epoch": 0.11658645699179827, - "grad_norm": 2.2230684604389186, - "learning_rate": 1.961008446553574e-06, - "loss": 1.3124, - "step": 860 - }, - { - "epoch": 0.11672202263946316, - "grad_norm": 1.7129151105577303, - "learning_rate": 1.9608869239119545e-06, - "loss": 1.2847, - "step": 861 - }, - { - "epoch": 0.11685758828712804, - "grad_norm": 1.59888880782774, - "learning_rate": 1.960765215970876e-06, - "loss": 1.2335, - "step": 862 - }, - { - "epoch": 0.11699315393479293, - "grad_norm": 2.3816511970848517, - "learning_rate": 1.9606433227538095e-06, - "loss": 1.2871, - "step": 863 - }, - { - "epoch": 0.11712871958245781, - "grad_norm": 1.7804844509307958, - "learning_rate": 1.960521244284261e-06, - "loss": 1.2938, - "step": 864 - }, - { - "epoch": 0.11726428523012268, - "grad_norm": 1.4907439497119248, - "learning_rate": 1.960398980585773e-06, - "loss": 1.2542, - "step": 865 - }, - { - "epoch": 0.11739985087778756, - "grad_norm": 1.6721954678806472, - "learning_rate": 1.960276531681922e-06, - "loss": 1.2449, - "step": 866 - }, - { - "epoch": 0.11753541652545245, - "grad_norm": 1.5468347731108112, - "learning_rate": 1.960153897596322e-06, - "loss": 1.2477, - "step": 867 - }, - { - "epoch": 0.11767098217311733, - "grad_norm": 1.770939032990603, - "learning_rate": 1.960031078352622e-06, - "loss": 1.2556, - "step": 868 - }, - { - "epoch": 0.11780654782078222, - "grad_norm": 2.2391009352404954, - "learning_rate": 1.9599080739745064e-06, - "loss": 1.2497, - "step": 869 - }, - { - "epoch": 0.1179421134684471, - "grad_norm": 1.8081896855269712, - "learning_rate": 1.9597848844856955e-06, - "loss": 1.285, - "step": 870 - }, - { - "epoch": 0.11807767911611197, - "grad_norm": 2.483366602345101, - "learning_rate": 1.959661509909946e-06, - "loss": 1.2504, - "step": 871 - }, - { - "epoch": 0.11821324476377686, - "grad_norm": 1.6509566775363147, - "learning_rate": 1.9595379502710495e-06, - "loss": 1.2656, - "step": 872 - }, - { - "epoch": 0.11834881041144174, - "grad_norm": 1.6096692846908967, - "learning_rate": 1.9594142055928333e-06, - "loss": 1.2341, - "step": 873 - }, - { - "epoch": 0.11848437605910662, - "grad_norm": 1.5067061419783216, - "learning_rate": 1.9592902758991606e-06, - "loss": 1.266, - "step": 874 - }, - { - "epoch": 0.11861994170677151, - "grad_norm": 2.6258702287766993, - "learning_rate": 1.9591661612139306e-06, - "loss": 1.2768, - "step": 875 - }, - { - "epoch": 0.11875550735443638, - "grad_norm": 1.6810064756713663, - "learning_rate": 1.9590418615610775e-06, - "loss": 1.2808, - "step": 876 - }, - { - "epoch": 0.11889107300210126, - "grad_norm": 1.579070452797444, - "learning_rate": 1.9589173769645714e-06, - "loss": 1.2732, - "step": 877 - }, - { - "epoch": 0.11902663864976615, - "grad_norm": 1.5661392494707786, - "learning_rate": 1.958792707448419e-06, - "loss": 1.2651, - "step": 878 - }, - { - "epoch": 0.11916220429743103, - "grad_norm": 1.6304416687657992, - "learning_rate": 1.9586678530366606e-06, - "loss": 1.2424, - "step": 879 - }, - { - "epoch": 0.11929776994509592, - "grad_norm": 2.4025959783358357, - "learning_rate": 1.958542813753374e-06, - "loss": 1.3082, - "step": 880 - }, - { - "epoch": 0.1194333355927608, - "grad_norm": 1.564548294064639, - "learning_rate": 1.9584175896226725e-06, - "loss": 1.2769, - "step": 881 - }, - { - "epoch": 0.11956890124042567, - "grad_norm": 1.5298088942977284, - "learning_rate": 1.9582921806687037e-06, - "loss": 1.2767, - "step": 882 - }, - { - "epoch": 0.11970446688809055, - "grad_norm": 1.6865875894779256, - "learning_rate": 1.9581665869156526e-06, - "loss": 1.2384, - "step": 883 - }, - { - "epoch": 0.11984003253575544, - "grad_norm": 1.4491629330031184, - "learning_rate": 1.958040808387738e-06, - "loss": 1.2436, - "step": 884 - }, - { - "epoch": 0.11997559818342032, - "grad_norm": 1.8647869300767543, - "learning_rate": 1.9579148451092163e-06, - "loss": 1.2513, - "step": 885 - }, - { - "epoch": 0.12011116383108521, - "grad_norm": 2.4608023949001243, - "learning_rate": 1.957788697104378e-06, - "loss": 1.3089, - "step": 886 - }, - { - "epoch": 0.12024672947875008, - "grad_norm": 1.8284479007455114, - "learning_rate": 1.9576623643975496e-06, - "loss": 1.2626, - "step": 887 - }, - { - "epoch": 0.12038229512641496, - "grad_norm": 2.124596345281153, - "learning_rate": 1.9575358470130934e-06, - "loss": 1.2415, - "step": 888 - }, - { - "epoch": 0.12051786077407985, - "grad_norm": 2.041288693029506, - "learning_rate": 1.9574091449754074e-06, - "loss": 1.2621, - "step": 889 - }, - { - "epoch": 0.12065342642174473, - "grad_norm": 1.5473955140496722, - "learning_rate": 1.9572822583089253e-06, - "loss": 1.2783, - "step": 890 - }, - { - "epoch": 0.12078899206940961, - "grad_norm": 2.3053571096674985, - "learning_rate": 1.9571551870381163e-06, - "loss": 1.2513, - "step": 891 - }, - { - "epoch": 0.1209245577170745, - "grad_norm": 1.6755594865886592, - "learning_rate": 1.9570279311874842e-06, - "loss": 1.2547, - "step": 892 - }, - { - "epoch": 0.12106012336473937, - "grad_norm": 5.483868877736675, - "learning_rate": 1.9569004907815706e-06, - "loss": 1.2611, - "step": 893 - }, - { - "epoch": 0.12119568901240425, - "grad_norm": 1.750377807540651, - "learning_rate": 1.9567728658449503e-06, - "loss": 1.2454, - "step": 894 - }, - { - "epoch": 0.12133125466006914, - "grad_norm": 1.573484089789557, - "learning_rate": 1.956645056402235e-06, - "loss": 1.2313, - "step": 895 - }, - { - "epoch": 0.12146682030773402, - "grad_norm": 1.5941705475565895, - "learning_rate": 1.956517062478072e-06, - "loss": 1.2603, - "step": 896 - }, - { - "epoch": 0.1216023859553989, - "grad_norm": 1.8638804973797685, - "learning_rate": 1.956388884097144e-06, - "loss": 1.2771, - "step": 897 - }, - { - "epoch": 0.12173795160306379, - "grad_norm": 1.7147061697928825, - "learning_rate": 1.9562605212841686e-06, - "loss": 1.2595, - "step": 898 - }, - { - "epoch": 0.12187351725072866, - "grad_norm": 1.6116627891911504, - "learning_rate": 1.9561319740639e-06, - "loss": 1.2728, - "step": 899 - }, - { - "epoch": 0.12200908289839354, - "grad_norm": 1.7251546211093454, - "learning_rate": 1.9560032424611274e-06, - "loss": 1.2491, - "step": 900 - }, - { - "epoch": 0.12214464854605843, - "grad_norm": 1.9040553631325392, - "learning_rate": 1.955874326500676e-06, - "loss": 1.2709, - "step": 901 - }, - { - "epoch": 0.12228021419372331, - "grad_norm": 1.9650259584293959, - "learning_rate": 1.955745226207406e-06, - "loss": 1.2401, - "step": 902 - }, - { - "epoch": 0.1224157798413882, - "grad_norm": 2.259370800687737, - "learning_rate": 1.9556159416062127e-06, - "loss": 1.2279, - "step": 903 - }, - { - "epoch": 0.12255134548905307, - "grad_norm": 1.711526510230783, - "learning_rate": 1.955486472722029e-06, - "loss": 1.2946, - "step": 904 - }, - { - "epoch": 0.12268691113671795, - "grad_norm": 1.713021412522385, - "learning_rate": 1.955356819579821e-06, - "loss": 1.2416, - "step": 905 - }, - { - "epoch": 0.12282247678438284, - "grad_norm": 2.7568415075320063, - "learning_rate": 1.955226982204591e-06, - "loss": 1.2621, - "step": 906 - }, - { - "epoch": 0.12295804243204772, - "grad_norm": 1.8293832950823714, - "learning_rate": 1.955096960621378e-06, - "loss": 1.2687, - "step": 907 - }, - { - "epoch": 0.1230936080797126, - "grad_norm": 1.5054633707831675, - "learning_rate": 1.9549667548552553e-06, - "loss": 1.2433, - "step": 908 - }, - { - "epoch": 0.12322917372737749, - "grad_norm": 1.7093653082722424, - "learning_rate": 1.9548363649313315e-06, - "loss": 1.2549, - "step": 909 - }, - { - "epoch": 0.12336473937504236, - "grad_norm": 2.0778024326846856, - "learning_rate": 1.9547057908747522e-06, - "loss": 1.2695, - "step": 910 - }, - { - "epoch": 0.12350030502270724, - "grad_norm": 1.9262297794942371, - "learning_rate": 1.954575032710697e-06, - "loss": 1.2514, - "step": 911 - }, - { - "epoch": 0.12363587067037213, - "grad_norm": 1.8291105906396925, - "learning_rate": 1.954444090464382e-06, - "loss": 1.2685, - "step": 912 - }, - { - "epoch": 0.12377143631803701, - "grad_norm": 1.5773506851358237, - "learning_rate": 1.9543129641610575e-06, - "loss": 1.2502, - "step": 913 - }, - { - "epoch": 0.1239070019657019, - "grad_norm": 1.5919369569765018, - "learning_rate": 1.9541816538260105e-06, - "loss": 1.2751, - "step": 914 - }, - { - "epoch": 0.12404256761336677, - "grad_norm": 1.399763568474772, - "learning_rate": 1.954050159484564e-06, - "loss": 1.2669, - "step": 915 - }, - { - "epoch": 0.12417813326103165, - "grad_norm": 2.16546190402148, - "learning_rate": 1.953918481162075e-06, - "loss": 1.2362, - "step": 916 - }, - { - "epoch": 0.12431369890869653, - "grad_norm": 1.73251220484067, - "learning_rate": 1.953786618883937e-06, - "loss": 1.2402, - "step": 917 - }, - { - "epoch": 0.12444926455636142, - "grad_norm": 1.7625479248174385, - "learning_rate": 1.953654572675578e-06, - "loss": 1.3062, - "step": 918 - }, - { - "epoch": 0.1245848302040263, - "grad_norm": 1.8203387771668529, - "learning_rate": 1.953522342562462e-06, - "loss": 1.2366, - "step": 919 - }, - { - "epoch": 0.12472039585169119, - "grad_norm": 4.1837069463971766, - "learning_rate": 1.9533899285700893e-06, - "loss": 1.2094, - "step": 920 - }, - { - "epoch": 0.12485596149935606, - "grad_norm": 2.1386376931682487, - "learning_rate": 1.9532573307239942e-06, - "loss": 1.2134, - "step": 921 - }, - { - "epoch": 0.12499152714702094, - "grad_norm": 1.633000635039251, - "learning_rate": 1.9531245490497475e-06, - "loss": 1.269, - "step": 922 - }, - { - "epoch": 0.12512709279468584, - "grad_norm": 1.6443585593265144, - "learning_rate": 1.952991583572955e-06, - "loss": 1.261, - "step": 923 - }, - { - "epoch": 0.1252626584423507, - "grad_norm": 1.9309861202426057, - "learning_rate": 1.9528584343192583e-06, - "loss": 1.2779, - "step": 924 - }, - { - "epoch": 0.12539822409001558, - "grad_norm": 1.5251835346521132, - "learning_rate": 1.9527251013143338e-06, - "loss": 1.2408, - "step": 925 - }, - { - "epoch": 0.12553378973768048, - "grad_norm": 2.3326594121496087, - "learning_rate": 1.9525915845838942e-06, - "loss": 1.2583, - "step": 926 - }, - { - "epoch": 0.12566935538534535, - "grad_norm": 1.6055791984579688, - "learning_rate": 1.952457884153686e-06, - "loss": 1.2362, - "step": 927 - }, - { - "epoch": 0.12580492103301025, - "grad_norm": 1.5491879525430206, - "learning_rate": 1.952324000049494e-06, - "loss": 1.2691, - "step": 928 - }, - { - "epoch": 0.12594048668067512, - "grad_norm": 1.5590892503345732, - "learning_rate": 1.952189932297135e-06, - "loss": 1.2489, - "step": 929 - }, - { - "epoch": 0.12607605232834, - "grad_norm": 1.8469697510756622, - "learning_rate": 1.9520556809224643e-06, - "loss": 1.2739, - "step": 930 - }, - { - "epoch": 0.1262116179760049, - "grad_norm": 1.5485436947356637, - "learning_rate": 1.9519212459513702e-06, - "loss": 1.3113, - "step": 931 - }, - { - "epoch": 0.12634718362366976, - "grad_norm": 1.6380746384276927, - "learning_rate": 1.951786627409778e-06, - "loss": 1.2209, - "step": 932 - }, - { - "epoch": 0.12648274927133465, - "grad_norm": 1.8540978125985466, - "learning_rate": 1.9516518253236474e-06, - "loss": 1.2641, - "step": 933 - }, - { - "epoch": 0.12661831491899952, - "grad_norm": 1.9975992261991338, - "learning_rate": 1.9515168397189743e-06, - "loss": 1.2238, - "step": 934 - }, - { - "epoch": 0.1267538805666644, - "grad_norm": 1.5048399844714937, - "learning_rate": 1.95138167062179e-06, - "loss": 1.2511, - "step": 935 - }, - { - "epoch": 0.1268894462143293, - "grad_norm": 1.5726204659702656, - "learning_rate": 1.9512463180581595e-06, - "loss": 1.2512, - "step": 936 - }, - { - "epoch": 0.12702501186199416, - "grad_norm": 1.4102582231240288, - "learning_rate": 1.9511107820541857e-06, - "loss": 1.2321, - "step": 937 - }, - { - "epoch": 0.12716057750965906, - "grad_norm": 1.6845560577582603, - "learning_rate": 1.9509750626360053e-06, - "loss": 1.2357, - "step": 938 - }, - { - "epoch": 0.12729614315732393, - "grad_norm": 2.4101320418003174, - "learning_rate": 1.95083915982979e-06, - "loss": 1.2503, - "step": 939 - }, - { - "epoch": 0.1274317088049888, - "grad_norm": 1.7748304908683372, - "learning_rate": 1.950703073661749e-06, - "loss": 1.2797, - "step": 940 - }, - { - "epoch": 0.1275672744526537, - "grad_norm": 1.6825985708168958, - "learning_rate": 1.950566804158124e-06, - "loss": 1.2588, - "step": 941 - }, - { - "epoch": 0.12770284010031857, - "grad_norm": 2.318233717026581, - "learning_rate": 1.9504303513451944e-06, - "loss": 1.2325, - "step": 942 - }, - { - "epoch": 0.12783840574798347, - "grad_norm": 1.9955951570388242, - "learning_rate": 1.9502937152492737e-06, - "loss": 1.241, - "step": 943 - }, - { - "epoch": 0.12797397139564834, - "grad_norm": 3.811293611060677, - "learning_rate": 1.950156895896711e-06, - "loss": 1.2621, - "step": 944 - }, - { - "epoch": 0.12810953704331324, - "grad_norm": 4.146487348999009, - "learning_rate": 1.9500198933138914e-06, - "loss": 1.2391, - "step": 945 - }, - { - "epoch": 0.1282451026909781, - "grad_norm": 2.062762742642024, - "learning_rate": 1.949882707527234e-06, - "loss": 1.2337, - "step": 946 - }, - { - "epoch": 0.12838066833864298, - "grad_norm": 2.340283441362446, - "learning_rate": 1.949745338563195e-06, - "loss": 1.2417, - "step": 947 - }, - { - "epoch": 0.12851623398630788, - "grad_norm": 1.5092933902873382, - "learning_rate": 1.949607786448264e-06, - "loss": 1.2407, - "step": 948 - }, - { - "epoch": 0.12865179963397275, - "grad_norm": 1.7006953803196607, - "learning_rate": 1.9494700512089664e-06, - "loss": 1.2827, - "step": 949 - }, - { - "epoch": 0.12878736528163764, - "grad_norm": 1.6632783516972538, - "learning_rate": 1.949332132871865e-06, - "loss": 1.2231, - "step": 950 - }, - { - "epoch": 0.12892293092930251, - "grad_norm": 1.7847625417318536, - "learning_rate": 1.9491940314635553e-06, - "loss": 1.2286, - "step": 951 - }, - { - "epoch": 0.12905849657696739, - "grad_norm": 1.9323350468769132, - "learning_rate": 1.9490557470106686e-06, - "loss": 1.2835, - "step": 952 - }, - { - "epoch": 0.12919406222463228, - "grad_norm": 2.7392971333017626, - "learning_rate": 1.9489172795398727e-06, - "loss": 1.247, - "step": 953 - }, - { - "epoch": 0.12932962787229715, - "grad_norm": 2.133722951619094, - "learning_rate": 1.9487786290778696e-06, - "loss": 1.2522, - "step": 954 - }, - { - "epoch": 0.12946519351996205, - "grad_norm": 1.5538398639379472, - "learning_rate": 1.9486397956513975e-06, - "loss": 1.2428, - "step": 955 - }, - { - "epoch": 0.12960075916762692, - "grad_norm": 1.4564176615658888, - "learning_rate": 1.9485007792872285e-06, - "loss": 1.2831, - "step": 956 - }, - { - "epoch": 0.1297363248152918, - "grad_norm": 1.7431866935350133, - "learning_rate": 1.9483615800121713e-06, - "loss": 1.2384, - "step": 957 - }, - { - "epoch": 0.1298718904629567, - "grad_norm": 4.199055910669216, - "learning_rate": 1.9482221978530695e-06, - "loss": 1.2368, - "step": 958 - }, - { - "epoch": 0.13000745611062156, - "grad_norm": 1.9180469132147882, - "learning_rate": 1.9480826328368018e-06, - "loss": 1.2497, - "step": 959 - }, - { - "epoch": 0.13014302175828646, - "grad_norm": 2.1644490275887778, - "learning_rate": 1.9479428849902816e-06, - "loss": 1.2328, - "step": 960 - }, - { - "epoch": 0.13027858740595133, - "grad_norm": 3.6484802505987433, - "learning_rate": 1.9478029543404587e-06, - "loss": 1.2695, - "step": 961 - }, - { - "epoch": 0.13041415305361623, - "grad_norm": 1.704523904307557, - "learning_rate": 1.9476628409143177e-06, - "loss": 1.2703, - "step": 962 - }, - { - "epoch": 0.1305497187012811, - "grad_norm": 1.558489971669944, - "learning_rate": 1.9475225447388787e-06, - "loss": 1.2696, - "step": 963 - }, - { - "epoch": 0.13068528434894597, - "grad_norm": 1.6901874918740454, - "learning_rate": 1.9473820658411954e-06, - "loss": 1.2486, - "step": 964 - }, - { - "epoch": 0.13082084999661087, - "grad_norm": 1.9222331896112053, - "learning_rate": 1.9472414042483594e-06, - "loss": 1.238, - "step": 965 - }, - { - "epoch": 0.13095641564427574, - "grad_norm": 1.7530321550174606, - "learning_rate": 1.9471005599874955e-06, - "loss": 1.2607, - "step": 966 - }, - { - "epoch": 0.13109198129194063, - "grad_norm": 1.4503963193403409, - "learning_rate": 1.9469595330857644e-06, - "loss": 1.2408, - "step": 967 - }, - { - "epoch": 0.1312275469396055, - "grad_norm": 1.5980469437026408, - "learning_rate": 1.946818323570362e-06, - "loss": 1.2289, - "step": 968 - }, - { - "epoch": 0.13136311258727038, - "grad_norm": 1.4017840449695484, - "learning_rate": 1.9466769314685204e-06, - "loss": 1.2325, - "step": 969 - }, - { - "epoch": 0.13149867823493527, - "grad_norm": 1.9304585352014205, - "learning_rate": 1.9465353568075047e-06, - "loss": 1.2476, - "step": 970 - }, - { - "epoch": 0.13163424388260014, - "grad_norm": 2.01696243886089, - "learning_rate": 1.946393599614617e-06, - "loss": 1.269, - "step": 971 - }, - { - "epoch": 0.13176980953026504, - "grad_norm": 1.733295074817846, - "learning_rate": 1.9462516599171944e-06, - "loss": 1.2892, - "step": 972 - }, - { - "epoch": 0.1319053751779299, - "grad_norm": 1.4628268309194339, - "learning_rate": 1.946109537742608e-06, - "loss": 1.2021, - "step": 973 - }, - { - "epoch": 0.13204094082559478, - "grad_norm": 2.7535239576876442, - "learning_rate": 1.945967233118265e-06, - "loss": 1.2291, - "step": 974 - }, - { - "epoch": 0.13217650647325968, - "grad_norm": 2.2078161543026664, - "learning_rate": 1.945824746071609e-06, - "loss": 1.2288, - "step": 975 - }, - { - "epoch": 0.13231207212092455, - "grad_norm": 1.8435092818112777, - "learning_rate": 1.945682076630116e-06, - "loss": 1.2595, - "step": 976 - }, - { - "epoch": 0.13244763776858945, - "grad_norm": 3.6071763864409605, - "learning_rate": 1.9455392248212995e-06, - "loss": 1.2195, - "step": 977 - }, - { - "epoch": 0.13258320341625432, - "grad_norm": 2.0235196455044924, - "learning_rate": 1.945396190672707e-06, - "loss": 1.2358, - "step": 978 - }, - { - "epoch": 0.1327187690639192, - "grad_norm": 1.4486029875879691, - "learning_rate": 1.9452529742119214e-06, - "loss": 1.2534, - "step": 979 - }, - { - "epoch": 0.1328543347115841, - "grad_norm": 2.5352602551756123, - "learning_rate": 1.9451095754665613e-06, - "loss": 1.2354, - "step": 980 - }, - { - "epoch": 0.13298990035924896, - "grad_norm": 1.934007336805061, - "learning_rate": 1.94496599446428e-06, - "loss": 1.2105, - "step": 981 - }, - { - "epoch": 0.13312546600691386, - "grad_norm": 3.189438140346245, - "learning_rate": 1.9448222312327654e-06, - "loss": 1.2421, - "step": 982 - }, - { - "epoch": 0.13326103165457873, - "grad_norm": 3.5107164287035686, - "learning_rate": 1.944678285799742e-06, - "loss": 1.2391, - "step": 983 - }, - { - "epoch": 0.13339659730224362, - "grad_norm": 1.5396452199778563, - "learning_rate": 1.944534158192968e-06, - "loss": 1.217, - "step": 984 - }, - { - "epoch": 0.1335321629499085, - "grad_norm": 1.5770756667325352, - "learning_rate": 1.944389848440237e-06, - "loss": 1.2503, - "step": 985 - }, - { - "epoch": 0.13366772859757337, - "grad_norm": 1.5516526555796026, - "learning_rate": 1.9442453565693782e-06, - "loss": 1.2193, - "step": 986 - }, - { - "epoch": 0.13380329424523826, - "grad_norm": 2.21353888232779, - "learning_rate": 1.944100682608256e-06, - "loss": 1.2163, - "step": 987 - }, - { - "epoch": 0.13393885989290313, - "grad_norm": 1.5906559194041874, - "learning_rate": 1.943955826584769e-06, - "loss": 1.2093, - "step": 988 - }, - { - "epoch": 0.13407442554056803, - "grad_norm": 1.8270371307828708, - "learning_rate": 1.9438107885268525e-06, - "loss": 1.2416, - "step": 989 - }, - { - "epoch": 0.1342099911882329, - "grad_norm": 1.8214332420952695, - "learning_rate": 1.9436655684624755e-06, - "loss": 1.288, - "step": 990 - }, - { - "epoch": 0.13434555683589777, - "grad_norm": 1.7807802522939966, - "learning_rate": 1.9435201664196424e-06, - "loss": 1.2267, - "step": 991 - }, - { - "epoch": 0.13448112248356267, - "grad_norm": 1.4421310512404386, - "learning_rate": 1.9433745824263924e-06, - "loss": 1.2049, - "step": 992 - }, - { - "epoch": 0.13461668813122754, - "grad_norm": 3.0535123223330585, - "learning_rate": 1.943228816510801e-06, - "loss": 1.2697, - "step": 993 - }, - { - "epoch": 0.13475225377889244, - "grad_norm": 1.618002938654719, - "learning_rate": 1.943082868700978e-06, - "loss": 1.2454, - "step": 994 - }, - { - "epoch": 0.1348878194265573, - "grad_norm": 2.063114468061899, - "learning_rate": 1.9429367390250676e-06, - "loss": 1.2384, - "step": 995 - }, - { - "epoch": 0.13502338507422218, - "grad_norm": 1.6476688126295096, - "learning_rate": 1.942790427511251e-06, - "loss": 1.2167, - "step": 996 - }, - { - "epoch": 0.13515895072188708, - "grad_norm": 1.6522621324689257, - "learning_rate": 1.9426439341877412e-06, - "loss": 1.245, - "step": 997 - }, - { - "epoch": 0.13529451636955195, - "grad_norm": 4.629193745275285, - "learning_rate": 1.94249725908279e-06, - "loss": 1.2277, - "step": 998 - }, - { - "epoch": 0.13543008201721685, - "grad_norm": 1.5550512782376582, - "learning_rate": 1.942350402224682e-06, - "loss": 1.2688, - "step": 999 - }, - { - "epoch": 0.13556564766488172, - "grad_norm": 2.5833196831304255, - "learning_rate": 1.942203363641738e-06, - "loss": 1.2176, - "step": 1000 - }, - { - "epoch": 0.1357012133125466, - "grad_norm": 1.5065549043132132, - "learning_rate": 1.942056143362312e-06, - "loss": 1.2276, - "step": 1001 - }, - { - "epoch": 0.13583677896021149, - "grad_norm": 1.7019198883921853, - "learning_rate": 1.941908741414795e-06, - "loss": 1.2407, - "step": 1002 - }, - { - "epoch": 0.13597234460787636, - "grad_norm": 1.574869340432279, - "learning_rate": 1.941761157827612e-06, - "loss": 1.2219, - "step": 1003 - }, - { - "epoch": 0.13610791025554125, - "grad_norm": 1.410944734758737, - "learning_rate": 1.9416133926292236e-06, - "loss": 1.2221, - "step": 1004 - }, - { - "epoch": 0.13624347590320612, - "grad_norm": 2.788638790929034, - "learning_rate": 1.941465445848125e-06, - "loss": 1.2613, - "step": 1005 - }, - { - "epoch": 0.13637904155087102, - "grad_norm": 1.7568412809975187, - "learning_rate": 1.941317317512847e-06, - "loss": 1.2221, - "step": 1006 - }, - { - "epoch": 0.1365146071985359, - "grad_norm": 2.299969308708164, - "learning_rate": 1.9411690076519545e-06, - "loss": 1.1949, - "step": 1007 - }, - { - "epoch": 0.13665017284620076, - "grad_norm": 1.6568367021554524, - "learning_rate": 1.941020516294048e-06, - "loss": 1.2502, - "step": 1008 - }, - { - "epoch": 0.13678573849386566, - "grad_norm": 3.3068268915109935, - "learning_rate": 1.9408718434677625e-06, - "loss": 1.2696, - "step": 1009 - }, - { - "epoch": 0.13692130414153053, - "grad_norm": 3.684918638552213, - "learning_rate": 1.9407229892017694e-06, - "loss": 1.2494, - "step": 1010 - }, - { - "epoch": 0.13705686978919543, - "grad_norm": 2.1588618398920563, - "learning_rate": 1.940573953524773e-06, - "loss": 1.2418, - "step": 1011 - }, - { - "epoch": 0.1371924354368603, - "grad_norm": 1.721442174371149, - "learning_rate": 1.9404247364655145e-06, - "loss": 1.2246, - "step": 1012 - }, - { - "epoch": 0.13732800108452517, - "grad_norm": 2.222148292280711, - "learning_rate": 1.9402753380527684e-06, - "loss": 1.228, - "step": 1013 - }, - { - "epoch": 0.13746356673219007, - "grad_norm": 2.2430435221199696, - "learning_rate": 1.9401257583153456e-06, - "loss": 1.2168, - "step": 1014 - }, - { - "epoch": 0.13759913237985494, - "grad_norm": 2.3466447232708174, - "learning_rate": 1.9399759972820913e-06, - "loss": 1.2454, - "step": 1015 - }, - { - "epoch": 0.13773469802751984, - "grad_norm": 2.1238090837329273, - "learning_rate": 1.9398260549818856e-06, - "loss": 1.2413, - "step": 1016 - }, - { - "epoch": 0.1378702636751847, - "grad_norm": 2.1028257799796544, - "learning_rate": 1.9396759314436435e-06, - "loss": 1.2315, - "step": 1017 - }, - { - "epoch": 0.13800582932284958, - "grad_norm": 2.080874171713308, - "learning_rate": 1.939525626696316e-06, - "loss": 1.2405, - "step": 1018 - }, - { - "epoch": 0.13814139497051448, - "grad_norm": 1.5411331889190538, - "learning_rate": 1.9393751407688866e-06, - "loss": 1.2149, - "step": 1019 - }, - { - "epoch": 0.13827696061817935, - "grad_norm": 2.3341300867884187, - "learning_rate": 1.9392244736903773e-06, - "loss": 1.2877, - "step": 1020 - }, - { - "epoch": 0.13841252626584424, - "grad_norm": 1.5131619221438795, - "learning_rate": 1.9390736254898414e-06, - "loss": 1.2436, - "step": 1021 - }, - { - "epoch": 0.1385480919135091, - "grad_norm": 1.9827983977686559, - "learning_rate": 1.9389225961963698e-06, - "loss": 1.2891, - "step": 1022 - }, - { - "epoch": 0.138683657561174, - "grad_norm": 1.7180846359585797, - "learning_rate": 1.9387713858390863e-06, - "loss": 1.2498, - "step": 1023 - }, - { - "epoch": 0.13881922320883888, - "grad_norm": 1.796373587435938, - "learning_rate": 1.938619994447152e-06, - "loss": 1.2135, - "step": 1024 - }, - { - "epoch": 0.13895478885650375, - "grad_norm": 2.8750565975614086, - "learning_rate": 1.9384684220497604e-06, - "loss": 1.248, - "step": 1025 - }, - { - "epoch": 0.13909035450416865, - "grad_norm": 1.8841064704704917, - "learning_rate": 1.9383166686761416e-06, - "loss": 1.2845, - "step": 1026 - }, - { - "epoch": 0.13922592015183352, - "grad_norm": 1.8522660131010587, - "learning_rate": 1.9381647343555596e-06, - "loss": 1.2784, - "step": 1027 - }, - { - "epoch": 0.13936148579949842, - "grad_norm": 2.2344481844928277, - "learning_rate": 1.938012619117314e-06, - "loss": 1.237, - "step": 1028 - }, - { - "epoch": 0.1394970514471633, - "grad_norm": 1.6358962620147233, - "learning_rate": 1.9378603229907393e-06, - "loss": 1.2152, - "step": 1029 - }, - { - "epoch": 0.13963261709482816, - "grad_norm": 2.1623826403018582, - "learning_rate": 1.937707846005204e-06, - "loss": 1.2126, - "step": 1030 - }, - { - "epoch": 0.13976818274249306, - "grad_norm": 1.5030566097659113, - "learning_rate": 1.9375551881901127e-06, - "loss": 1.2304, - "step": 1031 - }, - { - "epoch": 0.13990374839015793, - "grad_norm": 1.442164329931959, - "learning_rate": 1.937402349574904e-06, - "loss": 1.2061, - "step": 1032 - }, - { - "epoch": 0.14003931403782283, - "grad_norm": 2.1453054942473244, - "learning_rate": 1.9372493301890517e-06, - "loss": 1.2333, - "step": 1033 - }, - { - "epoch": 0.1401748796854877, - "grad_norm": 1.4595766186205736, - "learning_rate": 1.9370961300620636e-06, - "loss": 1.2615, - "step": 1034 - }, - { - "epoch": 0.14031044533315257, - "grad_norm": 1.7408383274526922, - "learning_rate": 1.9369427492234846e-06, - "loss": 1.1919, - "step": 1035 - }, - { - "epoch": 0.14044601098081747, - "grad_norm": 1.6629685247415986, - "learning_rate": 1.9367891877028917e-06, - "loss": 1.2413, - "step": 1036 - }, - { - "epoch": 0.14058157662848234, - "grad_norm": 1.9195332910945415, - "learning_rate": 1.9366354455298987e-06, - "loss": 1.2698, - "step": 1037 - }, - { - "epoch": 0.14071714227614723, - "grad_norm": 1.9719464111009932, - "learning_rate": 1.936481522734153e-06, - "loss": 1.2528, - "step": 1038 - }, - { - "epoch": 0.1408527079238121, - "grad_norm": 1.5905040641842048, - "learning_rate": 1.9363274193453383e-06, - "loss": 1.2351, - "step": 1039 - }, - { - "epoch": 0.14098827357147697, - "grad_norm": 1.9179926180865963, - "learning_rate": 1.9361731353931714e-06, - "loss": 1.2337, - "step": 1040 - }, - { - "epoch": 0.14112383921914187, - "grad_norm": 1.4822847307782494, - "learning_rate": 1.936018670907405e-06, - "loss": 1.245, - "step": 1041 - }, - { - "epoch": 0.14125940486680674, - "grad_norm": 1.4730599140408605, - "learning_rate": 1.935864025917827e-06, - "loss": 1.2531, - "step": 1042 - }, - { - "epoch": 0.14139497051447164, - "grad_norm": 1.4692166809858849, - "learning_rate": 1.935709200454258e-06, - "loss": 1.222, - "step": 1043 - }, - { - "epoch": 0.1415305361621365, - "grad_norm": 1.7454797775346012, - "learning_rate": 1.9355541945465563e-06, - "loss": 1.298, - "step": 1044 - }, - { - "epoch": 0.1416661018098014, - "grad_norm": 5.8620166782359275, - "learning_rate": 1.9353990082246127e-06, - "loss": 1.3151, - "step": 1045 - }, - { - "epoch": 0.14180166745746628, - "grad_norm": 1.7086252052887503, - "learning_rate": 1.935243641518354e-06, - "loss": 1.2816, - "step": 1046 - }, - { - "epoch": 0.14193723310513115, - "grad_norm": 1.566179966940149, - "learning_rate": 1.935088094457742e-06, - "loss": 1.2654, - "step": 1047 - }, - { - "epoch": 0.14207279875279605, - "grad_norm": 1.7835897301884485, - "learning_rate": 1.9349323670727717e-06, - "loss": 1.2154, - "step": 1048 - }, - { - "epoch": 0.14220836440046092, - "grad_norm": 1.9926627062192526, - "learning_rate": 1.9347764593934743e-06, - "loss": 1.2928, - "step": 1049 - }, - { - "epoch": 0.14234393004812582, - "grad_norm": 1.5588812230665556, - "learning_rate": 1.934620371449915e-06, - "loss": 1.274, - "step": 1050 - }, - { - "epoch": 0.1424794956957907, - "grad_norm": 1.9580567973424576, - "learning_rate": 1.934464103272195e-06, - "loss": 1.2608, - "step": 1051 - }, - { - "epoch": 0.14261506134345556, - "grad_norm": 1.717027785698081, - "learning_rate": 1.9343076548904483e-06, - "loss": 1.2553, - "step": 1052 - }, - { - "epoch": 0.14275062699112046, - "grad_norm": 1.7913816875644528, - "learning_rate": 1.9341510263348457e-06, - "loss": 1.2208, - "step": 1053 - }, - { - "epoch": 0.14288619263878533, - "grad_norm": 1.5720918716416261, - "learning_rate": 1.9339942176355916e-06, - "loss": 1.282, - "step": 1054 - }, - { - "epoch": 0.14302175828645022, - "grad_norm": 1.5339229858413248, - "learning_rate": 1.933837228822925e-06, - "loss": 1.2646, - "step": 1055 - }, - { - "epoch": 0.1431573239341151, - "grad_norm": 1.761629118117543, - "learning_rate": 1.9336800599271203e-06, - "loss": 1.2709, - "step": 1056 - }, - { - "epoch": 0.14329288958177996, - "grad_norm": 1.5855380432133905, - "learning_rate": 1.933522710978486e-06, - "loss": 1.245, - "step": 1057 - }, - { - "epoch": 0.14342845522944486, - "grad_norm": 1.8979733295549495, - "learning_rate": 1.9333651820073655e-06, - "loss": 1.2583, - "step": 1058 - }, - { - "epoch": 0.14356402087710973, - "grad_norm": 2.479709621772689, - "learning_rate": 1.933207473044137e-06, - "loss": 1.2134, - "step": 1059 - }, - { - "epoch": 0.14369958652477463, - "grad_norm": 1.576859906610863, - "learning_rate": 1.9330495841192138e-06, - "loss": 1.2718, - "step": 1060 - }, - { - "epoch": 0.1438351521724395, - "grad_norm": 3.540699839668719, - "learning_rate": 1.9328915152630435e-06, - "loss": 1.2267, - "step": 1061 - }, - { - "epoch": 0.1439707178201044, - "grad_norm": 1.5705515201510214, - "learning_rate": 1.932733266506108e-06, - "loss": 1.2354, - "step": 1062 - }, - { - "epoch": 0.14410628346776927, - "grad_norm": 2.16139489254235, - "learning_rate": 1.9325748378789246e-06, - "loss": 1.2447, - "step": 1063 - }, - { - "epoch": 0.14424184911543414, - "grad_norm": 1.465821364618215, - "learning_rate": 1.9324162294120453e-06, - "loss": 1.2747, - "step": 1064 - }, - { - "epoch": 0.14437741476309904, - "grad_norm": 2.504830942337657, - "learning_rate": 1.9322574411360557e-06, - "loss": 1.2474, - "step": 1065 - }, - { - "epoch": 0.1445129804107639, - "grad_norm": 2.1908325490122413, - "learning_rate": 1.932098473081578e-06, - "loss": 1.2621, - "step": 1066 - }, - { - "epoch": 0.1446485460584288, - "grad_norm": 1.6381375697961302, - "learning_rate": 1.931939325279267e-06, - "loss": 1.2613, - "step": 1067 - }, - { - "epoch": 0.14478411170609368, - "grad_norm": 1.5216608961750206, - "learning_rate": 1.9317799977598136e-06, - "loss": 1.2224, - "step": 1068 - }, - { - "epoch": 0.14491967735375855, - "grad_norm": 1.5274998652345184, - "learning_rate": 1.9316204905539425e-06, - "loss": 1.2635, - "step": 1069 - }, - { - "epoch": 0.14505524300142345, - "grad_norm": 1.6531542168770128, - "learning_rate": 1.9314608036924133e-06, - "loss": 1.2686, - "step": 1070 - }, - { - "epoch": 0.14519080864908832, - "grad_norm": 1.6320942262711018, - "learning_rate": 1.931300937206021e-06, - "loss": 1.2688, - "step": 1071 - }, - { - "epoch": 0.1453263742967532, - "grad_norm": 1.7134817181697977, - "learning_rate": 1.931140891125594e-06, - "loss": 1.2592, - "step": 1072 - }, - { - "epoch": 0.14546193994441808, - "grad_norm": 1.8990782502506465, - "learning_rate": 1.9309806654819963e-06, - "loss": 1.3013, - "step": 1073 - }, - { - "epoch": 0.14559750559208295, - "grad_norm": 2.8992076134083056, - "learning_rate": 1.9308202603061258e-06, - "loss": 1.2322, - "step": 1074 - }, - { - "epoch": 0.14573307123974785, - "grad_norm": 1.7924359275620443, - "learning_rate": 1.9306596756289155e-06, - "loss": 1.2646, - "step": 1075 - }, - { - "epoch": 0.14586863688741272, - "grad_norm": 2.589357184122086, - "learning_rate": 1.930498911481333e-06, - "loss": 1.2202, - "step": 1076 - }, - { - "epoch": 0.14600420253507762, - "grad_norm": 1.514507062572406, - "learning_rate": 1.9303379678943805e-06, - "loss": 1.2618, - "step": 1077 - }, - { - "epoch": 0.1461397681827425, - "grad_norm": 2.21857224424984, - "learning_rate": 1.9301768448990946e-06, - "loss": 1.2187, - "step": 1078 - }, - { - "epoch": 0.14627533383040736, - "grad_norm": 1.684930269417547, - "learning_rate": 1.930015542526546e-06, - "loss": 1.2324, - "step": 1079 - }, - { - "epoch": 0.14641089947807226, - "grad_norm": 1.6980941124964881, - "learning_rate": 1.9298540608078417e-06, - "loss": 1.2578, - "step": 1080 - }, - { - "epoch": 0.14654646512573713, - "grad_norm": 1.9283474314568836, - "learning_rate": 1.9296923997741216e-06, - "loss": 1.2642, - "step": 1081 - }, - { - "epoch": 0.14668203077340203, - "grad_norm": 1.9871472888123223, - "learning_rate": 1.9295305594565604e-06, - "loss": 1.2381, - "step": 1082 - }, - { - "epoch": 0.1468175964210669, - "grad_norm": 1.6381357187314456, - "learning_rate": 1.9293685398863683e-06, - "loss": 1.2286, - "step": 1083 - }, - { - "epoch": 0.1469531620687318, - "grad_norm": 2.0381799474763467, - "learning_rate": 1.929206341094789e-06, - "loss": 1.2484, - "step": 1084 - }, - { - "epoch": 0.14708872771639667, - "grad_norm": 1.5780553269938595, - "learning_rate": 1.9290439631131018e-06, - "loss": 1.2626, - "step": 1085 - }, - { - "epoch": 0.14722429336406154, - "grad_norm": 1.5213523064816792, - "learning_rate": 1.9288814059726196e-06, - "loss": 1.2394, - "step": 1086 - }, - { - "epoch": 0.14735985901172644, - "grad_norm": 1.4910641375543334, - "learning_rate": 1.92871866970469e-06, - "loss": 1.2201, - "step": 1087 - }, - { - "epoch": 0.1474954246593913, - "grad_norm": 1.7718289793966837, - "learning_rate": 1.9285557543406964e-06, - "loss": 1.2921, - "step": 1088 - }, - { - "epoch": 0.1476309903070562, - "grad_norm": 2.234223464086942, - "learning_rate": 1.928392659912055e-06, - "loss": 1.2693, - "step": 1089 - }, - { - "epoch": 0.14776655595472107, - "grad_norm": 1.5484761311109212, - "learning_rate": 1.9282293864502176e-06, - "loss": 1.2324, - "step": 1090 - }, - { - "epoch": 0.14790212160238594, - "grad_norm": 1.623377859674405, - "learning_rate": 1.92806593398667e-06, - "loss": 1.2116, - "step": 1091 - }, - { - "epoch": 0.14803768725005084, - "grad_norm": 1.7609292215800592, - "learning_rate": 1.9279023025529324e-06, - "loss": 1.2509, - "step": 1092 - }, - { - "epoch": 0.1481732528977157, - "grad_norm": 1.5743545669382824, - "learning_rate": 1.9277384921805604e-06, - "loss": 1.2395, - "step": 1093 - }, - { - "epoch": 0.1483088185453806, - "grad_norm": 1.4889518523415273, - "learning_rate": 1.927574502901143e-06, - "loss": 1.2201, - "step": 1094 - }, - { - "epoch": 0.14844438419304548, - "grad_norm": 1.547272045781177, - "learning_rate": 1.927410334746305e-06, - "loss": 1.2674, - "step": 1095 - }, - { - "epoch": 0.14857994984071035, - "grad_norm": 1.6071091633340264, - "learning_rate": 1.927245987747704e-06, - "loss": 1.254, - "step": 1096 - }, - { - "epoch": 0.14871551548837525, - "grad_norm": 1.4398518326977447, - "learning_rate": 1.9270814619370337e-06, - "loss": 1.2594, - "step": 1097 - }, - { - "epoch": 0.14885108113604012, - "grad_norm": 1.3711175378709786, - "learning_rate": 1.9269167573460217e-06, - "loss": 1.2635, - "step": 1098 - }, - { - "epoch": 0.14898664678370502, - "grad_norm": 1.6961472382439038, - "learning_rate": 1.9267518740064294e-06, - "loss": 1.2466, - "step": 1099 - }, - { - "epoch": 0.1491222124313699, - "grad_norm": 1.4329585743434698, - "learning_rate": 1.9265868119500538e-06, - "loss": 1.2132, - "step": 1100 - }, - { - "epoch": 0.1492577780790348, - "grad_norm": 1.4695568548420856, - "learning_rate": 1.926421571208725e-06, - "loss": 1.209, - "step": 1101 - }, - { - "epoch": 0.14939334372669966, - "grad_norm": 1.7605132260240421, - "learning_rate": 1.9262561518143095e-06, - "loss": 1.2825, - "step": 1102 - }, - { - "epoch": 0.14952890937436453, - "grad_norm": 1.3301928667894327, - "learning_rate": 1.9260905537987063e-06, - "loss": 1.2508, - "step": 1103 - }, - { - "epoch": 0.14966447502202943, - "grad_norm": 6.929964487614914, - "learning_rate": 1.92592477719385e-06, - "loss": 1.201, - "step": 1104 - }, - { - "epoch": 0.1498000406696943, - "grad_norm": 2.5866661583793262, - "learning_rate": 1.925758822031709e-06, - "loss": 1.2395, - "step": 1105 - }, - { - "epoch": 0.1499356063173592, - "grad_norm": 19.370744269261632, - "learning_rate": 1.9255926883442867e-06, - "loss": 1.2643, - "step": 1106 - }, - { - "epoch": 0.15007117196502406, - "grad_norm": 2.0639397159644135, - "learning_rate": 1.9254263761636207e-06, - "loss": 1.2371, - "step": 1107 - }, - { - "epoch": 0.15020673761268893, - "grad_norm": 1.5614205405619115, - "learning_rate": 1.925259885521783e-06, - "loss": 1.2334, - "step": 1108 - }, - { - "epoch": 0.15034230326035383, - "grad_norm": 1.8479651083743123, - "learning_rate": 1.92509321645088e-06, - "loss": 1.2765, - "step": 1109 - }, - { - "epoch": 0.1504778689080187, - "grad_norm": 1.616766122076782, - "learning_rate": 1.924926368983052e-06, - "loss": 1.2343, - "step": 1110 - }, - { - "epoch": 0.1506134345556836, - "grad_norm": 1.382961203809606, - "learning_rate": 1.9247593431504756e-06, - "loss": 1.2137, - "step": 1111 - }, - { - "epoch": 0.15074900020334847, - "grad_norm": 1.76848505449108, - "learning_rate": 1.9245921389853588e-06, - "loss": 1.2257, - "step": 1112 - }, - { - "epoch": 0.15088456585101334, - "grad_norm": 1.6339262343198608, - "learning_rate": 1.9244247565199463e-06, - "loss": 1.2185, - "step": 1113 - }, - { - "epoch": 0.15102013149867824, - "grad_norm": 1.4410188654277871, - "learning_rate": 1.9242571957865165e-06, - "loss": 1.2851, - "step": 1114 - }, - { - "epoch": 0.1511556971463431, - "grad_norm": 3.6892103596362236, - "learning_rate": 1.924089456817382e-06, - "loss": 1.23, - "step": 1115 - }, - { - "epoch": 0.151291262794008, - "grad_norm": 1.717427087138512, - "learning_rate": 1.92392153964489e-06, - "loss": 1.2438, - "step": 1116 - }, - { - "epoch": 0.15142682844167288, - "grad_norm": 1.5144565294721724, - "learning_rate": 1.923753444301423e-06, - "loss": 1.2566, - "step": 1117 - }, - { - "epoch": 0.15156239408933775, - "grad_norm": 2.1200251198755597, - "learning_rate": 1.923585170819395e-06, - "loss": 1.2408, - "step": 1118 - }, - { - "epoch": 0.15169795973700265, - "grad_norm": 1.5959484013162997, - "learning_rate": 1.923416719231257e-06, - "loss": 1.2386, - "step": 1119 - }, - { - "epoch": 0.15183352538466752, - "grad_norm": 1.6121421127344069, - "learning_rate": 1.9232480895694945e-06, - "loss": 1.2093, - "step": 1120 - }, - { - "epoch": 0.15196909103233242, - "grad_norm": 2.2639654161351976, - "learning_rate": 1.9230792818666252e-06, - "loss": 1.2231, - "step": 1121 - }, - { - "epoch": 0.15210465667999729, - "grad_norm": 1.8090956013049044, - "learning_rate": 1.9229102961552026e-06, - "loss": 1.2751, - "step": 1122 - }, - { - "epoch": 0.15224022232766218, - "grad_norm": 1.7833830803799655, - "learning_rate": 1.9227411324678146e-06, - "loss": 1.2246, - "step": 1123 - }, - { - "epoch": 0.15237578797532705, - "grad_norm": 1.990772546781389, - "learning_rate": 1.922571790837083e-06, - "loss": 1.2535, - "step": 1124 - }, - { - "epoch": 0.15251135362299192, - "grad_norm": 1.6577418607565044, - "learning_rate": 1.9224022712956635e-06, - "loss": 1.2145, - "step": 1125 - }, - { - "epoch": 0.15264691927065682, - "grad_norm": 1.694170600213874, - "learning_rate": 1.922232573876247e-06, - "loss": 1.2227, - "step": 1126 - }, - { - "epoch": 0.1527824849183217, - "grad_norm": 1.781975288379602, - "learning_rate": 1.922062698611559e-06, - "loss": 1.249, - "step": 1127 - }, - { - "epoch": 0.1529180505659866, - "grad_norm": 1.9529326466284553, - "learning_rate": 1.921892645534357e-06, - "loss": 1.2876, - "step": 1128 - }, - { - "epoch": 0.15305361621365146, - "grad_norm": 3.1682936404836433, - "learning_rate": 1.9217224146774357e-06, - "loss": 1.2357, - "step": 1129 - }, - { - "epoch": 0.15318918186131633, - "grad_norm": 1.87652476818638, - "learning_rate": 1.921552006073622e-06, - "loss": 1.2294, - "step": 1130 - }, - { - "epoch": 0.15332474750898123, - "grad_norm": 1.5612227816075344, - "learning_rate": 1.9213814197557787e-06, - "loss": 1.2478, - "step": 1131 - }, - { - "epoch": 0.1534603131566461, - "grad_norm": 1.798022402090002, - "learning_rate": 1.9212106557568016e-06, - "loss": 1.2221, - "step": 1132 - }, - { - "epoch": 0.153595878804311, - "grad_norm": 1.5537937290506243, - "learning_rate": 1.9210397141096206e-06, - "loss": 1.2212, - "step": 1133 - }, - { - "epoch": 0.15373144445197587, - "grad_norm": 2.6531505752624893, - "learning_rate": 1.9208685948472014e-06, - "loss": 1.2528, - "step": 1134 - }, - { - "epoch": 0.15386701009964074, - "grad_norm": 2.0741859638859834, - "learning_rate": 1.9206972980025426e-06, - "loss": 1.2135, - "step": 1135 - }, - { - "epoch": 0.15400257574730564, - "grad_norm": 1.9547774873773638, - "learning_rate": 1.9205258236086773e-06, - "loss": 1.2487, - "step": 1136 - }, - { - "epoch": 0.1541381413949705, - "grad_norm": 2.490340225160739, - "learning_rate": 1.920354171698673e-06, - "loss": 1.2241, - "step": 1137 - }, - { - "epoch": 0.1542737070426354, - "grad_norm": 1.6602771040225255, - "learning_rate": 1.9201823423056315e-06, - "loss": 1.2469, - "step": 1138 - }, - { - "epoch": 0.15440927269030028, - "grad_norm": 1.5894970894257991, - "learning_rate": 1.920010335462689e-06, - "loss": 1.2474, - "step": 1139 - }, - { - "epoch": 0.15454483833796515, - "grad_norm": 1.4475018218797087, - "learning_rate": 1.9198381512030154e-06, - "loss": 1.2674, - "step": 1140 - }, - { - "epoch": 0.15468040398563004, - "grad_norm": 2.2686122524967702, - "learning_rate": 1.919665789559815e-06, - "loss": 1.2041, - "step": 1141 - }, - { - "epoch": 0.15481596963329491, - "grad_norm": 4.755632879202707, - "learning_rate": 1.9194932505663265e-06, - "loss": 1.2261, - "step": 1142 - }, - { - "epoch": 0.1549515352809598, - "grad_norm": 1.5154836531519884, - "learning_rate": 1.9193205342558227e-06, - "loss": 1.2387, - "step": 1143 - }, - { - "epoch": 0.15508710092862468, - "grad_norm": 2.1036614308386383, - "learning_rate": 1.9191476406616107e-06, - "loss": 1.2435, - "step": 1144 - }, - { - "epoch": 0.15522266657628958, - "grad_norm": 1.5064555355427527, - "learning_rate": 1.918974569817031e-06, - "loss": 1.2028, - "step": 1145 - }, - { - "epoch": 0.15535823222395445, - "grad_norm": 1.5956690421817357, - "learning_rate": 1.9188013217554596e-06, - "loss": 1.2146, - "step": 1146 - }, - { - "epoch": 0.15549379787161932, - "grad_norm": 1.509386506319828, - "learning_rate": 1.918627896510306e-06, - "loss": 1.259, - "step": 1147 - }, - { - "epoch": 0.15562936351928422, - "grad_norm": 1.672563204754197, - "learning_rate": 1.9184542941150143e-06, - "loss": 1.2486, - "step": 1148 - }, - { - "epoch": 0.1557649291669491, - "grad_norm": 1.7490006335791861, - "learning_rate": 1.9182805146030614e-06, - "loss": 1.2295, - "step": 1149 - }, - { - "epoch": 0.155900494814614, - "grad_norm": 3.763672108816059, - "learning_rate": 1.9181065580079593e-06, - "loss": 1.2474, - "step": 1150 - }, - { - "epoch": 0.15603606046227886, - "grad_norm": 1.5024007021267705, - "learning_rate": 1.917932424363255e-06, - "loss": 1.2269, - "step": 1151 - }, - { - "epoch": 0.15617162610994373, - "grad_norm": 2.3766770980247456, - "learning_rate": 1.9177581137025284e-06, - "loss": 1.2431, - "step": 1152 - }, - { - "epoch": 0.15630719175760863, - "grad_norm": 1.5447336328246781, - "learning_rate": 1.9175836260593937e-06, - "loss": 1.2087, - "step": 1153 - }, - { - "epoch": 0.1564427574052735, - "grad_norm": 2.7889290329424754, - "learning_rate": 1.9174089614674998e-06, - "loss": 1.2441, - "step": 1154 - }, - { - "epoch": 0.1565783230529384, - "grad_norm": 1.7774401376333266, - "learning_rate": 1.9172341199605293e-06, - "loss": 1.2176, - "step": 1155 - }, - { - "epoch": 0.15671388870060327, - "grad_norm": 1.4469050520054572, - "learning_rate": 1.9170591015721987e-06, - "loss": 1.2107, - "step": 1156 - }, - { - "epoch": 0.15684945434826814, - "grad_norm": 1.6442936470501763, - "learning_rate": 1.9168839063362595e-06, - "loss": 1.226, - "step": 1157 - }, - { - "epoch": 0.15698501999593303, - "grad_norm": 1.7311151737691326, - "learning_rate": 1.9167085342864962e-06, - "loss": 1.1854, - "step": 1158 - }, - { - "epoch": 0.1571205856435979, - "grad_norm": 1.7632834396197599, - "learning_rate": 1.9165329854567285e-06, - "loss": 1.2083, - "step": 1159 - }, - { - "epoch": 0.1572561512912628, - "grad_norm": 1.5629431299353793, - "learning_rate": 1.916357259880809e-06, - "loss": 1.2286, - "step": 1160 - }, - { - "epoch": 0.15739171693892767, - "grad_norm": 1.780901606881394, - "learning_rate": 1.916181357592625e-06, - "loss": 1.2614, - "step": 1161 - }, - { - "epoch": 0.15752728258659257, - "grad_norm": 1.6487292480399713, - "learning_rate": 1.916005278626098e-06, - "loss": 1.2467, - "step": 1162 - }, - { - "epoch": 0.15766284823425744, - "grad_norm": 4.940013726523356, - "learning_rate": 1.915829023015184e-06, - "loss": 1.2655, - "step": 1163 - }, - { - "epoch": 0.1577984138819223, - "grad_norm": 1.7259005643464387, - "learning_rate": 1.915652590793872e-06, - "loss": 1.2395, - "step": 1164 - }, - { - "epoch": 0.1579339795295872, - "grad_norm": 1.525014882541036, - "learning_rate": 1.9154759819961854e-06, - "loss": 1.2373, - "step": 1165 - }, - { - "epoch": 0.15806954517725208, - "grad_norm": 1.6222758131718455, - "learning_rate": 1.915299196656182e-06, - "loss": 1.2503, - "step": 1166 - }, - { - "epoch": 0.15820511082491698, - "grad_norm": 1.6840964910118656, - "learning_rate": 1.9151222348079535e-06, - "loss": 1.2671, - "step": 1167 - }, - { - "epoch": 0.15834067647258185, - "grad_norm": 1.5257174692329623, - "learning_rate": 1.9149450964856254e-06, - "loss": 1.2846, - "step": 1168 - }, - { - "epoch": 0.15847624212024672, - "grad_norm": 1.706683071505236, - "learning_rate": 1.914767781723358e-06, - "loss": 1.2292, - "step": 1169 - }, - { - "epoch": 0.15861180776791162, - "grad_norm": 1.6023260113111397, - "learning_rate": 1.914590290555344e-06, - "loss": 1.2554, - "step": 1170 - }, - { - "epoch": 0.1587473734155765, - "grad_norm": 1.5406375554359437, - "learning_rate": 1.9144126230158124e-06, - "loss": 1.2337, - "step": 1171 - }, - { - "epoch": 0.15888293906324139, - "grad_norm": 1.5614017910805822, - "learning_rate": 1.9142347791390242e-06, - "loss": 1.3002, - "step": 1172 - }, - { - "epoch": 0.15901850471090626, - "grad_norm": 1.4215292028143196, - "learning_rate": 1.9140567589592755e-06, - "loss": 1.1813, - "step": 1173 - }, - { - "epoch": 0.15915407035857113, - "grad_norm": 1.810761761200612, - "learning_rate": 1.9138785625108955e-06, - "loss": 1.2798, - "step": 1174 - }, - { - "epoch": 0.15928963600623602, - "grad_norm": 1.4550924451753835, - "learning_rate": 1.9137001898282484e-06, - "loss": 1.211, - "step": 1175 - }, - { - "epoch": 0.1594252016539009, - "grad_norm": 1.5473973021249587, - "learning_rate": 1.9135216409457327e-06, - "loss": 1.2634, - "step": 1176 - }, - { - "epoch": 0.1595607673015658, - "grad_norm": 1.6378708174674714, - "learning_rate": 1.913342915897779e-06, - "loss": 1.239, - "step": 1177 - }, - { - "epoch": 0.15969633294923066, - "grad_norm": 1.7654457986608207, - "learning_rate": 1.9131640147188534e-06, - "loss": 1.2225, - "step": 1178 - }, - { - "epoch": 0.15983189859689553, - "grad_norm": 1.6029160562752705, - "learning_rate": 1.912984937443456e-06, - "loss": 1.2444, - "step": 1179 - }, - { - "epoch": 0.15996746424456043, - "grad_norm": 1.4333996312722654, - "learning_rate": 1.9128056841061197e-06, - "loss": 1.2737, - "step": 1180 - }, - { - "epoch": 0.1601030298922253, - "grad_norm": 2.069719950627627, - "learning_rate": 1.912626254741413e-06, - "loss": 1.2314, - "step": 1181 - }, - { - "epoch": 0.1602385955398902, - "grad_norm": 1.8788936096644884, - "learning_rate": 1.912446649383936e-06, - "loss": 1.236, - "step": 1182 - }, - { - "epoch": 0.16037416118755507, - "grad_norm": 1.5950967914701581, - "learning_rate": 1.9122668680683255e-06, - "loss": 1.2442, - "step": 1183 - }, - { - "epoch": 0.16050972683521997, - "grad_norm": 2.6976221081388667, - "learning_rate": 1.9120869108292504e-06, - "loss": 1.2089, - "step": 1184 - }, - { - "epoch": 0.16064529248288484, - "grad_norm": 1.6255358878455806, - "learning_rate": 1.9119067777014146e-06, - "loss": 1.2258, - "step": 1185 - }, - { - "epoch": 0.1607808581305497, - "grad_norm": 12.487634551788583, - "learning_rate": 1.9117264687195546e-06, - "loss": 1.2753, - "step": 1186 - }, - { - "epoch": 0.1609164237782146, - "grad_norm": 15.564762236857428, - "learning_rate": 1.911545983918442e-06, - "loss": 1.2464, - "step": 1187 - }, - { - "epoch": 0.16105198942587948, - "grad_norm": 1.6242780976490165, - "learning_rate": 1.911365323332881e-06, - "loss": 1.2278, - "step": 1188 - }, - { - "epoch": 0.16118755507354438, - "grad_norm": 1.8438710182797384, - "learning_rate": 1.9111844869977123e-06, - "loss": 1.2856, - "step": 1189 - }, - { - "epoch": 0.16132312072120925, - "grad_norm": 2.0417358295332217, - "learning_rate": 1.911003474947807e-06, - "loss": 1.2283, - "step": 1190 - }, - { - "epoch": 0.16145868636887412, - "grad_norm": 2.473317266962249, - "learning_rate": 1.910822287218073e-06, - "loss": 1.1957, - "step": 1191 - }, - { - "epoch": 0.16159425201653901, - "grad_norm": 2.4852478328023624, - "learning_rate": 1.9106409238434503e-06, - "loss": 1.2027, - "step": 1192 - }, - { - "epoch": 0.16172981766420388, - "grad_norm": 1.5432661128407745, - "learning_rate": 1.9104593848589137e-06, - "loss": 1.2377, - "step": 1193 - }, - { - "epoch": 0.16186538331186878, - "grad_norm": 2.0349679231482205, - "learning_rate": 1.9102776702994713e-06, - "loss": 1.257, - "step": 1194 - }, - { - "epoch": 0.16200094895953365, - "grad_norm": 1.684171345537619, - "learning_rate": 1.9100957802001654e-06, - "loss": 1.2242, - "step": 1195 - }, - { - "epoch": 0.16213651460719852, - "grad_norm": 1.70763519729722, - "learning_rate": 1.9099137145960724e-06, - "loss": 1.2475, - "step": 1196 - }, - { - "epoch": 0.16227208025486342, - "grad_norm": 4.0869004632376615, - "learning_rate": 1.909731473522302e-06, - "loss": 1.2311, - "step": 1197 - }, - { - "epoch": 0.1624076459025283, - "grad_norm": 2.5110663095637156, - "learning_rate": 1.9095490570139977e-06, - "loss": 1.2088, - "step": 1198 - }, - { - "epoch": 0.1625432115501932, - "grad_norm": 1.7364312627774643, - "learning_rate": 1.9093664651063375e-06, - "loss": 1.238, - "step": 1199 - }, - { - "epoch": 0.16267877719785806, - "grad_norm": 2.420896023366959, - "learning_rate": 1.9091836978345323e-06, - "loss": 1.2116, - "step": 1200 - }, - { - "epoch": 0.16281434284552296, - "grad_norm": 1.5908414163359035, - "learning_rate": 1.909000755233828e-06, - "loss": 1.2444, - "step": 1201 - }, - { - "epoch": 0.16294990849318783, - "grad_norm": 1.6358950548825064, - "learning_rate": 1.908817637339503e-06, - "loss": 1.2023, - "step": 1202 - }, - { - "epoch": 0.1630854741408527, - "grad_norm": 1.9549365335637314, - "learning_rate": 1.9086343441868706e-06, - "loss": 1.2205, - "step": 1203 - }, - { - "epoch": 0.1632210397885176, - "grad_norm": 1.881001839422245, - "learning_rate": 1.908450875811277e-06, - "loss": 1.2732, - "step": 1204 - }, - { - "epoch": 0.16335660543618247, - "grad_norm": 1.3809549840052702, - "learning_rate": 1.908267232248103e-06, - "loss": 1.2525, - "step": 1205 - }, - { - "epoch": 0.16349217108384737, - "grad_norm": 1.8758952208296058, - "learning_rate": 1.9080834135327624e-06, - "loss": 1.2481, - "step": 1206 - }, - { - "epoch": 0.16362773673151224, - "grad_norm": 1.7296358964948957, - "learning_rate": 1.907899419700704e-06, - "loss": 1.2614, - "step": 1207 - }, - { - "epoch": 0.1637633023791771, - "grad_norm": 2.110213983996092, - "learning_rate": 1.9077152507874086e-06, - "loss": 1.1906, - "step": 1208 - }, - { - "epoch": 0.163898868026842, - "grad_norm": 1.5370691582123748, - "learning_rate": 1.9075309068283928e-06, - "loss": 1.2345, - "step": 1209 - }, - { - "epoch": 0.16403443367450687, - "grad_norm": 1.4505410365828166, - "learning_rate": 1.9073463878592046e-06, - "loss": 1.2003, - "step": 1210 - }, - { - "epoch": 0.16416999932217177, - "grad_norm": 2.692676497575644, - "learning_rate": 1.9071616939154279e-06, - "loss": 1.2444, - "step": 1211 - }, - { - "epoch": 0.16430556496983664, - "grad_norm": 1.6517613509834048, - "learning_rate": 1.9069768250326792e-06, - "loss": 1.2737, - "step": 1212 - }, - { - "epoch": 0.1644411306175015, - "grad_norm": 1.9260493509077707, - "learning_rate": 1.9067917812466088e-06, - "loss": 1.2173, - "step": 1213 - }, - { - "epoch": 0.1645766962651664, - "grad_norm": 1.94710200650408, - "learning_rate": 1.9066065625929014e-06, - "loss": 1.2303, - "step": 1214 - }, - { - "epoch": 0.16471226191283128, - "grad_norm": 2.2388468924762304, - "learning_rate": 1.9064211691072747e-06, - "loss": 1.2253, - "step": 1215 - }, - { - "epoch": 0.16484782756049618, - "grad_norm": 1.7769142661547939, - "learning_rate": 1.9062356008254804e-06, - "loss": 1.246, - "step": 1216 - }, - { - "epoch": 0.16498339320816105, - "grad_norm": 1.6544897023854173, - "learning_rate": 1.906049857783304e-06, - "loss": 1.2254, - "step": 1217 - }, - { - "epoch": 0.16511895885582592, - "grad_norm": 1.6194001785332748, - "learning_rate": 1.905863940016564e-06, - "loss": 1.2371, - "step": 1218 - }, - { - "epoch": 0.16525452450349082, - "grad_norm": 2.781212935737495, - "learning_rate": 1.9056778475611143e-06, - "loss": 1.2224, - "step": 1219 - }, - { - "epoch": 0.1653900901511557, - "grad_norm": 1.6325972068946706, - "learning_rate": 1.9054915804528403e-06, - "loss": 1.2327, - "step": 1220 - }, - { - "epoch": 0.1655256557988206, - "grad_norm": 1.6477275619708773, - "learning_rate": 1.9053051387276625e-06, - "loss": 1.2619, - "step": 1221 - }, - { - "epoch": 0.16566122144648546, - "grad_norm": 1.9504701627505017, - "learning_rate": 1.9051185224215347e-06, - "loss": 1.218, - "step": 1222 - }, - { - "epoch": 0.16579678709415036, - "grad_norm": 1.549018003834068, - "learning_rate": 1.9049317315704445e-06, - "loss": 1.2065, - "step": 1223 - }, - { - "epoch": 0.16593235274181523, - "grad_norm": 1.6276232020915615, - "learning_rate": 1.904744766210413e-06, - "loss": 1.2225, - "step": 1224 - }, - { - "epoch": 0.1660679183894801, - "grad_norm": 1.4139028864868886, - "learning_rate": 1.904557626377495e-06, - "loss": 1.2441, - "step": 1225 - }, - { - "epoch": 0.166203484037145, - "grad_norm": 2.899999838819042, - "learning_rate": 1.9043703121077788e-06, - "loss": 1.2195, - "step": 1226 - }, - { - "epoch": 0.16633904968480986, - "grad_norm": 1.6808682270855506, - "learning_rate": 1.9041828234373866e-06, - "loss": 1.2488, - "step": 1227 - }, - { - "epoch": 0.16647461533247476, - "grad_norm": 2.1294958919748472, - "learning_rate": 1.903995160402474e-06, - "loss": 1.1919, - "step": 1228 - }, - { - "epoch": 0.16661018098013963, - "grad_norm": 2.2390371296199727, - "learning_rate": 1.9038073230392306e-06, - "loss": 1.2346, - "step": 1229 - }, - { - "epoch": 0.1667457466278045, - "grad_norm": 1.4030177778805542, - "learning_rate": 1.903619311383879e-06, - "loss": 1.2556, - "step": 1230 - }, - { - "epoch": 0.1668813122754694, - "grad_norm": 2.8284758571552615, - "learning_rate": 1.903431125472676e-06, - "loss": 1.2438, - "step": 1231 - }, - { - "epoch": 0.16701687792313427, - "grad_norm": 2.8926412654743943, - "learning_rate": 1.903242765341912e-06, - "loss": 1.2607, - "step": 1232 - }, - { - "epoch": 0.16715244357079917, - "grad_norm": 2.9119395512227704, - "learning_rate": 1.90305423102791e-06, - "loss": 1.2115, - "step": 1233 - }, - { - "epoch": 0.16728800921846404, - "grad_norm": 1.9369059504769695, - "learning_rate": 1.902865522567028e-06, - "loss": 1.223, - "step": 1234 - }, - { - "epoch": 0.1674235748661289, - "grad_norm": 1.6368639667097638, - "learning_rate": 1.9026766399956568e-06, - "loss": 1.2371, - "step": 1235 - }, - { - "epoch": 0.1675591405137938, - "grad_norm": 1.5421275831292478, - "learning_rate": 1.9024875833502208e-06, - "loss": 1.2217, - "step": 1236 - }, - { - "epoch": 0.16769470616145868, - "grad_norm": 1.995335951544947, - "learning_rate": 1.9022983526671784e-06, - "loss": 1.1748, - "step": 1237 - }, - { - "epoch": 0.16783027180912358, - "grad_norm": 1.4814132833880838, - "learning_rate": 1.9021089479830206e-06, - "loss": 1.2395, - "step": 1238 - }, - { - "epoch": 0.16796583745678845, - "grad_norm": 1.5876523981299182, - "learning_rate": 1.9019193693342733e-06, - "loss": 1.2299, - "step": 1239 - }, - { - "epoch": 0.16810140310445335, - "grad_norm": 1.4715063588536506, - "learning_rate": 1.9017296167574948e-06, - "loss": 1.2034, - "step": 1240 - }, - { - "epoch": 0.16823696875211822, - "grad_norm": 1.7527293572058136, - "learning_rate": 1.9015396902892775e-06, - "loss": 1.2346, - "step": 1241 - }, - { - "epoch": 0.16837253439978309, - "grad_norm": 2.177961238330356, - "learning_rate": 1.9013495899662474e-06, - "loss": 1.2821, - "step": 1242 - }, - { - "epoch": 0.16850810004744798, - "grad_norm": 2.1873694149236678, - "learning_rate": 1.9011593158250637e-06, - "loss": 1.2377, - "step": 1243 - }, - { - "epoch": 0.16864366569511285, - "grad_norm": 1.4210379898299248, - "learning_rate": 1.9009688679024189e-06, - "loss": 1.198, - "step": 1244 - }, - { - "epoch": 0.16877923134277775, - "grad_norm": 3.5550944373368436, - "learning_rate": 1.9007782462350401e-06, - "loss": 1.2429, - "step": 1245 - }, - { - "epoch": 0.16891479699044262, - "grad_norm": 1.64143143668381, - "learning_rate": 1.9005874508596868e-06, - "loss": 1.2313, - "step": 1246 - }, - { - "epoch": 0.1690503626381075, - "grad_norm": 1.481486942956639, - "learning_rate": 1.9003964818131524e-06, - "loss": 1.2676, - "step": 1247 - }, - { - "epoch": 0.1691859282857724, - "grad_norm": 1.5562905245091379, - "learning_rate": 1.9002053391322636e-06, - "loss": 1.207, - "step": 1248 - }, - { - "epoch": 0.16932149393343726, - "grad_norm": 1.7416632070939642, - "learning_rate": 1.900014022853881e-06, - "loss": 1.2367, - "step": 1249 - }, - { - "epoch": 0.16945705958110216, - "grad_norm": 1.594186395973025, - "learning_rate": 1.8998225330148988e-06, - "loss": 1.209, - "step": 1250 - }, - { - "epoch": 0.16959262522876703, - "grad_norm": 1.6449336822475855, - "learning_rate": 1.8996308696522432e-06, - "loss": 1.2315, - "step": 1251 - }, - { - "epoch": 0.1697281908764319, - "grad_norm": 2.5663776721328806, - "learning_rate": 1.899439032802876e-06, - "loss": 1.2385, - "step": 1252 - }, - { - "epoch": 0.1698637565240968, - "grad_norm": 2.1140748259454702, - "learning_rate": 1.8992470225037911e-06, - "loss": 1.2893, - "step": 1253 - }, - { - "epoch": 0.16999932217176167, - "grad_norm": 6.085412541415054, - "learning_rate": 1.899054838792016e-06, - "loss": 1.2212, - "step": 1254 - }, - { - "epoch": 0.17013488781942657, - "grad_norm": 1.4520217755509597, - "learning_rate": 1.8988624817046119e-06, - "loss": 1.1964, - "step": 1255 - }, - { - "epoch": 0.17027045346709144, - "grad_norm": 1.546899649762348, - "learning_rate": 1.8986699512786735e-06, - "loss": 1.2381, - "step": 1256 - }, - { - "epoch": 0.1704060191147563, - "grad_norm": 2.1238574846122815, - "learning_rate": 1.898477247551329e-06, - "loss": 1.2077, - "step": 1257 - }, - { - "epoch": 0.1705415847624212, - "grad_norm": 1.6135432756906116, - "learning_rate": 1.8982843705597388e-06, - "loss": 1.2369, - "step": 1258 - }, - { - "epoch": 0.17067715041008608, - "grad_norm": 2.737044276758795, - "learning_rate": 1.8980913203410988e-06, - "loss": 1.2425, - "step": 1259 - }, - { - "epoch": 0.17081271605775097, - "grad_norm": 1.9297603166611985, - "learning_rate": 1.8978980969326366e-06, - "loss": 1.246, - "step": 1260 - }, - { - "epoch": 0.17094828170541584, - "grad_norm": 1.8124515151795635, - "learning_rate": 1.897704700371614e-06, - "loss": 1.2198, - "step": 1261 - }, - { - "epoch": 0.17108384735308074, - "grad_norm": 1.6942674342538102, - "learning_rate": 1.8975111306953261e-06, - "loss": 1.2775, - "step": 1262 - }, - { - "epoch": 0.1712194130007456, - "grad_norm": 1.6128875945812085, - "learning_rate": 1.8973173879411011e-06, - "loss": 1.2579, - "step": 1263 - }, - { - "epoch": 0.17135497864841048, - "grad_norm": 1.4569367179601904, - "learning_rate": 1.8971234721463008e-06, - "loss": 1.2598, - "step": 1264 - }, - { - "epoch": 0.17149054429607538, - "grad_norm": 1.954573564663893, - "learning_rate": 1.8969293833483202e-06, - "loss": 1.201, - "step": 1265 - }, - { - "epoch": 0.17162610994374025, - "grad_norm": 1.381549668336407, - "learning_rate": 1.896735121584588e-06, - "loss": 1.1705, - "step": 1266 - }, - { - "epoch": 0.17176167559140515, - "grad_norm": 1.4541714981855904, - "learning_rate": 1.8965406868925664e-06, - "loss": 1.2044, - "step": 1267 - }, - { - "epoch": 0.17189724123907002, - "grad_norm": 1.5748815834073446, - "learning_rate": 1.89634607930975e-06, - "loss": 1.2377, - "step": 1268 - }, - { - "epoch": 0.1720328068867349, - "grad_norm": 1.6459742780296809, - "learning_rate": 1.8961512988736671e-06, - "loss": 1.2378, - "step": 1269 - }, - { - "epoch": 0.1721683725343998, - "grad_norm": 1.5444869272892492, - "learning_rate": 1.8959563456218807e-06, - "loss": 1.2289, - "step": 1270 - }, - { - "epoch": 0.17230393818206466, - "grad_norm": 1.6880338697947572, - "learning_rate": 1.8957612195919847e-06, - "loss": 1.2314, - "step": 1271 - }, - { - "epoch": 0.17243950382972956, - "grad_norm": 2.6579808758625747, - "learning_rate": 1.8955659208216086e-06, - "loss": 1.2367, - "step": 1272 - }, - { - "epoch": 0.17257506947739443, - "grad_norm": 1.6248204963880861, - "learning_rate": 1.8953704493484138e-06, - "loss": 1.2281, - "step": 1273 - }, - { - "epoch": 0.1727106351250593, - "grad_norm": 2.857034709332075, - "learning_rate": 1.8951748052100954e-06, - "loss": 1.2959, - "step": 1274 - }, - { - "epoch": 0.1728462007727242, - "grad_norm": 1.5590003975053879, - "learning_rate": 1.894978988444382e-06, - "loss": 1.2029, - "step": 1275 - }, - { - "epoch": 0.17298176642038907, - "grad_norm": 2.8395255824637937, - "learning_rate": 1.8947829990890347e-06, - "loss": 1.2658, - "step": 1276 - }, - { - "epoch": 0.17311733206805396, - "grad_norm": 1.6717236496037429, - "learning_rate": 1.8945868371818493e-06, - "loss": 1.2303, - "step": 1277 - }, - { - "epoch": 0.17325289771571883, - "grad_norm": 1.847374491782956, - "learning_rate": 1.8943905027606539e-06, - "loss": 1.2273, - "step": 1278 - }, - { - "epoch": 0.17338846336338373, - "grad_norm": 1.4158919288090577, - "learning_rate": 1.8941939958633099e-06, - "loss": 1.257, - "step": 1279 - }, - { - "epoch": 0.1735240290110486, - "grad_norm": 1.4785555608788699, - "learning_rate": 1.8939973165277123e-06, - "loss": 1.237, - "step": 1280 - }, - { - "epoch": 0.17365959465871347, - "grad_norm": 1.9495125029137732, - "learning_rate": 1.8938004647917886e-06, - "loss": 1.2172, - "step": 1281 - }, - { - "epoch": 0.17379516030637837, - "grad_norm": 1.5991833864144316, - "learning_rate": 1.8936034406935008e-06, - "loss": 1.1927, - "step": 1282 - }, - { - "epoch": 0.17393072595404324, - "grad_norm": 1.7374298732450029, - "learning_rate": 1.8934062442708432e-06, - "loss": 1.1908, - "step": 1283 - }, - { - "epoch": 0.17406629160170814, - "grad_norm": 1.703671329384703, - "learning_rate": 1.8932088755618434e-06, - "loss": 1.2465, - "step": 1284 - }, - { - "epoch": 0.174201857249373, - "grad_norm": 1.861851451173301, - "learning_rate": 1.8930113346045627e-06, - "loss": 1.2178, - "step": 1285 - }, - { - "epoch": 0.17433742289703788, - "grad_norm": 1.675580066393857, - "learning_rate": 1.892813621437095e-06, - "loss": 1.2194, - "step": 1286 - }, - { - "epoch": 0.17447298854470278, - "grad_norm": 1.8281451948733107, - "learning_rate": 1.8926157360975674e-06, - "loss": 1.2209, - "step": 1287 - }, - { - "epoch": 0.17460855419236765, - "grad_norm": 1.8423592869332466, - "learning_rate": 1.8924176786241416e-06, - "loss": 1.2207, - "step": 1288 - }, - { - "epoch": 0.17474411984003255, - "grad_norm": 14.63064942656738, - "learning_rate": 1.8922194490550103e-06, - "loss": 1.2224, - "step": 1289 - }, - { - "epoch": 0.17487968548769742, - "grad_norm": 1.6884032259046406, - "learning_rate": 1.8920210474284014e-06, - "loss": 1.2614, - "step": 1290 - }, - { - "epoch": 0.1750152511353623, - "grad_norm": 1.5821086911107092, - "learning_rate": 1.8918224737825743e-06, - "loss": 1.2051, - "step": 1291 - }, - { - "epoch": 0.17515081678302719, - "grad_norm": 1.8467802568178933, - "learning_rate": 1.891623728155823e-06, - "loss": 1.2144, - "step": 1292 - }, - { - "epoch": 0.17528638243069206, - "grad_norm": 1.6312057343935724, - "learning_rate": 1.8914248105864738e-06, - "loss": 1.2218, - "step": 1293 - }, - { - "epoch": 0.17542194807835695, - "grad_norm": 1.9575475176522887, - "learning_rate": 1.8912257211128864e-06, - "loss": 1.2457, - "step": 1294 - }, - { - "epoch": 0.17555751372602182, - "grad_norm": 1.478477660404088, - "learning_rate": 1.8910264597734535e-06, - "loss": 1.2041, - "step": 1295 - }, - { - "epoch": 0.1756930793736867, - "grad_norm": 2.1459025528346225, - "learning_rate": 1.8908270266066011e-06, - "loss": 1.2449, - "step": 1296 - }, - { - "epoch": 0.1758286450213516, - "grad_norm": 1.6990274792728293, - "learning_rate": 1.8906274216507885e-06, - "loss": 1.2408, - "step": 1297 - }, - { - "epoch": 0.17596421066901646, - "grad_norm": 1.4856002633854293, - "learning_rate": 1.8904276449445079e-06, - "loss": 1.2152, - "step": 1298 - }, - { - "epoch": 0.17609977631668136, - "grad_norm": 2.247785644635343, - "learning_rate": 1.8902276965262845e-06, - "loss": 1.2075, - "step": 1299 - }, - { - "epoch": 0.17623534196434623, - "grad_norm": 1.4032737641863449, - "learning_rate": 1.8900275764346768e-06, - "loss": 1.2134, - "step": 1300 - }, - { - "epoch": 0.17637090761201113, - "grad_norm": 2.165152703994322, - "learning_rate": 1.8898272847082764e-06, - "loss": 1.2292, - "step": 1301 - }, - { - "epoch": 0.176506473259676, - "grad_norm": 1.4857191988094804, - "learning_rate": 1.8896268213857078e-06, - "loss": 1.2579, - "step": 1302 - }, - { - "epoch": 0.17664203890734087, - "grad_norm": 1.4626631045837586, - "learning_rate": 1.8894261865056293e-06, - "loss": 1.2156, - "step": 1303 - }, - { - "epoch": 0.17677760455500577, - "grad_norm": 2.04013157789782, - "learning_rate": 1.8892253801067315e-06, - "loss": 1.2303, - "step": 1304 - }, - { - "epoch": 0.17691317020267064, - "grad_norm": 1.7668663080322815, - "learning_rate": 1.889024402227738e-06, - "loss": 1.2018, - "step": 1305 - }, - { - "epoch": 0.17704873585033554, - "grad_norm": 2.266898818391618, - "learning_rate": 1.8888232529074062e-06, - "loss": 1.2023, - "step": 1306 - }, - { - "epoch": 0.1771843014980004, - "grad_norm": 1.9946332144073193, - "learning_rate": 1.888621932184526e-06, - "loss": 1.2844, - "step": 1307 - }, - { - "epoch": 0.17731986714566528, - "grad_norm": 1.5185393924210941, - "learning_rate": 1.8884204400979206e-06, - "loss": 1.2484, - "step": 1308 - }, - { - "epoch": 0.17745543279333018, - "grad_norm": 2.3782533565887047, - "learning_rate": 1.888218776686446e-06, - "loss": 1.2054, - "step": 1309 - }, - { - "epoch": 0.17759099844099505, - "grad_norm": 1.7387908136071994, - "learning_rate": 1.8880169419889915e-06, - "loss": 1.2072, - "step": 1310 - }, - { - "epoch": 0.17772656408865994, - "grad_norm": 1.7420104866217532, - "learning_rate": 1.8878149360444793e-06, - "loss": 1.1975, - "step": 1311 - }, - { - "epoch": 0.17786212973632481, - "grad_norm": 2.120785017835378, - "learning_rate": 1.8876127588918648e-06, - "loss": 1.2445, - "step": 1312 - }, - { - "epoch": 0.17799769538398968, - "grad_norm": 2.52340646920416, - "learning_rate": 1.887410410570136e-06, - "loss": 1.2234, - "step": 1313 - }, - { - "epoch": 0.17813326103165458, - "grad_norm": 1.4588727672582837, - "learning_rate": 1.8872078911183145e-06, - "loss": 1.2447, - "step": 1314 - }, - { - "epoch": 0.17826882667931945, - "grad_norm": 3.025330976926478, - "learning_rate": 1.8870052005754542e-06, - "loss": 1.1925, - "step": 1315 - }, - { - "epoch": 0.17840439232698435, - "grad_norm": 1.5295150877744053, - "learning_rate": 1.8868023389806428e-06, - "loss": 1.2012, - "step": 1316 - }, - { - "epoch": 0.17853995797464922, - "grad_norm": 1.7736095923822441, - "learning_rate": 1.8865993063730002e-06, - "loss": 1.2358, - "step": 1317 - }, - { - "epoch": 0.1786755236223141, - "grad_norm": 1.5129914960932915, - "learning_rate": 1.8863961027916794e-06, - "loss": 1.2537, - "step": 1318 - }, - { - "epoch": 0.178811089269979, - "grad_norm": 1.7327605754442004, - "learning_rate": 1.8861927282758673e-06, - "loss": 1.2428, - "step": 1319 - }, - { - "epoch": 0.17894665491764386, - "grad_norm": 1.6054407287986312, - "learning_rate": 1.8859891828647827e-06, - "loss": 1.1973, - "step": 1320 - }, - { - "epoch": 0.17908222056530876, - "grad_norm": 1.8767285860216136, - "learning_rate": 1.8857854665976777e-06, - "loss": 1.2334, - "step": 1321 - }, - { - "epoch": 0.17921778621297363, - "grad_norm": 1.5813608476492758, - "learning_rate": 1.8855815795138375e-06, - "loss": 1.2471, - "step": 1322 - }, - { - "epoch": 0.17935335186063853, - "grad_norm": 1.5743123148880938, - "learning_rate": 1.8853775216525803e-06, - "loss": 1.2552, - "step": 1323 - }, - { - "epoch": 0.1794889175083034, - "grad_norm": 1.6145579983179494, - "learning_rate": 1.8851732930532563e-06, - "loss": 1.2085, - "step": 1324 - }, - { - "epoch": 0.17962448315596827, - "grad_norm": 2.428013942504317, - "learning_rate": 1.8849688937552502e-06, - "loss": 1.2252, - "step": 1325 - }, - { - "epoch": 0.17976004880363317, - "grad_norm": 1.6134097929973992, - "learning_rate": 1.8847643237979783e-06, - "loss": 1.2156, - "step": 1326 - }, - { - "epoch": 0.17989561445129804, - "grad_norm": 1.951922039760306, - "learning_rate": 1.8845595832208905e-06, - "loss": 1.1987, - "step": 1327 - }, - { - "epoch": 0.18003118009896293, - "grad_norm": 1.73916113485436, - "learning_rate": 1.8843546720634693e-06, - "loss": 1.2558, - "step": 1328 - }, - { - "epoch": 0.1801667457466278, - "grad_norm": 2.0435564943498634, - "learning_rate": 1.8841495903652302e-06, - "loss": 1.2035, - "step": 1329 - }, - { - "epoch": 0.18030231139429267, - "grad_norm": 1.5093139401357645, - "learning_rate": 1.883944338165722e-06, - "loss": 1.2434, - "step": 1330 - }, - { - "epoch": 0.18043787704195757, - "grad_norm": 1.972707663184111, - "learning_rate": 1.8837389155045253e-06, - "loss": 1.2112, - "step": 1331 - }, - { - "epoch": 0.18057344268962244, - "grad_norm": 1.4462918077330376, - "learning_rate": 1.883533322421255e-06, - "loss": 1.2217, - "step": 1332 - }, - { - "epoch": 0.18070900833728734, - "grad_norm": 1.6914485955939509, - "learning_rate": 1.883327558955557e-06, - "loss": 1.2428, - "step": 1333 - }, - { - "epoch": 0.1808445739849522, - "grad_norm": 3.0198531318910944, - "learning_rate": 1.8831216251471123e-06, - "loss": 1.2474, - "step": 1334 - }, - { - "epoch": 0.18098013963261708, - "grad_norm": 1.8020316494539865, - "learning_rate": 1.8829155210356329e-06, - "loss": 1.2021, - "step": 1335 - }, - { - "epoch": 0.18111570528028198, - "grad_norm": 1.9348041114957417, - "learning_rate": 1.8827092466608647e-06, - "loss": 1.2059, - "step": 1336 - }, - { - "epoch": 0.18125127092794685, - "grad_norm": 1.6731421412991756, - "learning_rate": 1.8825028020625858e-06, - "loss": 1.1826, - "step": 1337 - }, - { - "epoch": 0.18138683657561175, - "grad_norm": 2.147033880395358, - "learning_rate": 1.8822961872806076e-06, - "loss": 1.2231, - "step": 1338 - }, - { - "epoch": 0.18152240222327662, - "grad_norm": 1.4569115526616976, - "learning_rate": 1.8820894023547745e-06, - "loss": 1.2028, - "step": 1339 - }, - { - "epoch": 0.18165796787094152, - "grad_norm": 1.262445466261922, - "learning_rate": 1.8818824473249624e-06, - "loss": 1.2079, - "step": 1340 - }, - { - "epoch": 0.1817935335186064, - "grad_norm": 1.542281370237099, - "learning_rate": 1.8816753222310818e-06, - "loss": 1.2439, - "step": 1341 - }, - { - "epoch": 0.18192909916627126, - "grad_norm": 1.7217766648322952, - "learning_rate": 1.8814680271130747e-06, - "loss": 1.2101, - "step": 1342 - }, - { - "epoch": 0.18206466481393616, - "grad_norm": 2.086004561649804, - "learning_rate": 1.8812605620109165e-06, - "loss": 1.1935, - "step": 1343 - }, - { - "epoch": 0.18220023046160103, - "grad_norm": 1.7744362365568493, - "learning_rate": 1.881052926964615e-06, - "loss": 1.2461, - "step": 1344 - }, - { - "epoch": 0.18233579610926592, - "grad_norm": 2.2582454011363913, - "learning_rate": 1.8808451220142114e-06, - "loss": 1.2422, - "step": 1345 - }, - { - "epoch": 0.1824713617569308, - "grad_norm": 2.3300203013256375, - "learning_rate": 1.880637147199779e-06, - "loss": 1.2448, - "step": 1346 - }, - { - "epoch": 0.18260692740459566, - "grad_norm": 1.799948915139363, - "learning_rate": 1.8804290025614242e-06, - "loss": 1.2555, - "step": 1347 - }, - { - "epoch": 0.18274249305226056, - "grad_norm": 1.7521780434742782, - "learning_rate": 1.8802206881392858e-06, - "loss": 1.2257, - "step": 1348 - }, - { - "epoch": 0.18287805869992543, - "grad_norm": 1.5365784827504991, - "learning_rate": 1.8800122039735355e-06, - "loss": 1.2496, - "step": 1349 - }, - { - "epoch": 0.18301362434759033, - "grad_norm": 1.6062616176570053, - "learning_rate": 1.8798035501043783e-06, - "loss": 1.1928, - "step": 1350 - }, - { - "epoch": 0.1831491899952552, - "grad_norm": 1.4771253931149013, - "learning_rate": 1.879594726572051e-06, - "loss": 1.201, - "step": 1351 - }, - { - "epoch": 0.18328475564292007, - "grad_norm": 1.5046859599316353, - "learning_rate": 1.8793857334168243e-06, - "loss": 1.2611, - "step": 1352 - }, - { - "epoch": 0.18342032129058497, - "grad_norm": 1.958647834984457, - "learning_rate": 1.8791765706789997e-06, - "loss": 1.1959, - "step": 1353 - }, - { - "epoch": 0.18355588693824984, - "grad_norm": 1.3977943083897169, - "learning_rate": 1.8789672383989134e-06, - "loss": 1.206, - "step": 1354 - }, - { - "epoch": 0.18369145258591474, - "grad_norm": 1.4698720079510912, - "learning_rate": 1.8787577366169336e-06, - "loss": 1.2473, - "step": 1355 - }, - { - "epoch": 0.1838270182335796, - "grad_norm": 5.155243754010471, - "learning_rate": 1.8785480653734607e-06, - "loss": 1.2369, - "step": 1356 - }, - { - "epoch": 0.18396258388124448, - "grad_norm": 2.566332179175006, - "learning_rate": 1.878338224708928e-06, - "loss": 1.2336, - "step": 1357 - }, - { - "epoch": 0.18409814952890938, - "grad_norm": 1.5462264028884325, - "learning_rate": 1.878128214663802e-06, - "loss": 1.2067, - "step": 1358 - }, - { - "epoch": 0.18423371517657425, - "grad_norm": 1.9323371172192303, - "learning_rate": 1.8779180352785814e-06, - "loss": 1.2251, - "step": 1359 - }, - { - "epoch": 0.18436928082423915, - "grad_norm": 1.5844227812979286, - "learning_rate": 1.8777076865937976e-06, - "loss": 1.2228, - "step": 1360 - }, - { - "epoch": 0.18450484647190402, - "grad_norm": 1.9436030831516313, - "learning_rate": 1.8774971686500143e-06, - "loss": 1.215, - "step": 1361 - }, - { - "epoch": 0.18464041211956891, - "grad_norm": 1.4151339029696255, - "learning_rate": 1.877286481487829e-06, - "loss": 1.1975, - "step": 1362 - }, - { - "epoch": 0.18477597776723378, - "grad_norm": 1.7516739680541114, - "learning_rate": 1.8770756251478703e-06, - "loss": 1.2466, - "step": 1363 - }, - { - "epoch": 0.18491154341489865, - "grad_norm": 1.535957023227142, - "learning_rate": 1.8768645996708007e-06, - "loss": 1.2252, - "step": 1364 - }, - { - "epoch": 0.18504710906256355, - "grad_norm": 1.7369372446456999, - "learning_rate": 1.8766534050973144e-06, - "loss": 1.1846, - "step": 1365 - }, - { - "epoch": 0.18518267471022842, - "grad_norm": 1.886149450244158, - "learning_rate": 1.876442041468139e-06, - "loss": 1.2119, - "step": 1366 - }, - { - "epoch": 0.18531824035789332, - "grad_norm": 1.722451823386913, - "learning_rate": 1.876230508824034e-06, - "loss": 1.2215, - "step": 1367 - }, - { - "epoch": 0.1854538060055582, - "grad_norm": 2.5464862810796145, - "learning_rate": 1.876018807205792e-06, - "loss": 1.1975, - "step": 1368 - }, - { - "epoch": 0.18558937165322306, - "grad_norm": 1.4767566694489909, - "learning_rate": 1.875806936654238e-06, - "loss": 1.2029, - "step": 1369 - }, - { - "epoch": 0.18572493730088796, - "grad_norm": 2.5762626565571707, - "learning_rate": 1.8755948972102292e-06, - "loss": 1.2244, - "step": 1370 - }, - { - "epoch": 0.18586050294855283, - "grad_norm": 1.5158147214514315, - "learning_rate": 1.8753826889146562e-06, - "loss": 1.2325, - "step": 1371 - }, - { - "epoch": 0.18599606859621773, - "grad_norm": 1.8491504947796384, - "learning_rate": 1.8751703118084413e-06, - "loss": 1.2132, - "step": 1372 - }, - { - "epoch": 0.1861316342438826, - "grad_norm": 1.418267841191721, - "learning_rate": 1.8749577659325401e-06, - "loss": 1.193, - "step": 1373 - }, - { - "epoch": 0.18626719989154747, - "grad_norm": 1.5579546203705545, - "learning_rate": 1.8747450513279403e-06, - "loss": 1.1999, - "step": 1374 - }, - { - "epoch": 0.18640276553921237, - "grad_norm": 1.5655437170466329, - "learning_rate": 1.874532168035662e-06, - "loss": 1.2669, - "step": 1375 - }, - { - "epoch": 0.18653833118687724, - "grad_norm": 1.364652416288234, - "learning_rate": 1.8743191160967584e-06, - "loss": 1.2369, - "step": 1376 - }, - { - "epoch": 0.18667389683454214, - "grad_norm": 1.4618003112972837, - "learning_rate": 1.8741058955523145e-06, - "loss": 1.2488, - "step": 1377 - }, - { - "epoch": 0.186809462482207, - "grad_norm": 1.6635047030516643, - "learning_rate": 1.8738925064434485e-06, - "loss": 1.2275, - "step": 1378 - }, - { - "epoch": 0.1869450281298719, - "grad_norm": 1.982085294965747, - "learning_rate": 1.8736789488113108e-06, - "loss": 1.2161, - "step": 1379 - }, - { - "epoch": 0.18708059377753677, - "grad_norm": 1.8064094172369536, - "learning_rate": 1.8734652226970844e-06, - "loss": 1.2104, - "step": 1380 - }, - { - "epoch": 0.18721615942520164, - "grad_norm": 1.7220920245287827, - "learning_rate": 1.8732513281419843e-06, - "loss": 1.2282, - "step": 1381 - }, - { - "epoch": 0.18735172507286654, - "grad_norm": 1.7478242397035015, - "learning_rate": 1.8730372651872585e-06, - "loss": 1.2209, - "step": 1382 - }, - { - "epoch": 0.1874872907205314, - "grad_norm": 1.6033971846602753, - "learning_rate": 1.8728230338741877e-06, - "loss": 1.1873, - "step": 1383 - }, - { - "epoch": 0.1876228563681963, - "grad_norm": 1.5356201008053036, - "learning_rate": 1.8726086342440842e-06, - "loss": 1.2096, - "step": 1384 - }, - { - "epoch": 0.18775842201586118, - "grad_norm": 1.4041536948625855, - "learning_rate": 1.8723940663382939e-06, - "loss": 1.2091, - "step": 1385 - }, - { - "epoch": 0.18789398766352605, - "grad_norm": 1.4362847145742166, - "learning_rate": 1.8721793301981937e-06, - "loss": 1.1929, - "step": 1386 - }, - { - "epoch": 0.18802955331119095, - "grad_norm": 2.984363726860373, - "learning_rate": 1.8719644258651942e-06, - "loss": 1.2356, - "step": 1387 - }, - { - "epoch": 0.18816511895885582, - "grad_norm": 1.5221416449088556, - "learning_rate": 1.8717493533807386e-06, - "loss": 1.2016, - "step": 1388 - }, - { - "epoch": 0.18830068460652072, - "grad_norm": 1.610399382730175, - "learning_rate": 1.871534112786301e-06, - "loss": 1.2047, - "step": 1389 - }, - { - "epoch": 0.1884362502541856, - "grad_norm": 2.0042908418669247, - "learning_rate": 1.8713187041233893e-06, - "loss": 1.2012, - "step": 1390 - }, - { - "epoch": 0.18857181590185046, - "grad_norm": 1.5663836862419407, - "learning_rate": 1.8711031274335434e-06, - "loss": 1.1585, - "step": 1391 - }, - { - "epoch": 0.18870738154951536, - "grad_norm": 1.8402522509483414, - "learning_rate": 1.8708873827583352e-06, - "loss": 1.2463, - "step": 1392 - }, - { - "epoch": 0.18884294719718023, - "grad_norm": 2.0786600819659435, - "learning_rate": 1.8706714701393697e-06, - "loss": 1.2435, - "step": 1393 - }, - { - "epoch": 0.18897851284484513, - "grad_norm": 1.854288466513282, - "learning_rate": 1.8704553896182838e-06, - "loss": 1.2101, - "step": 1394 - }, - { - "epoch": 0.18911407849251, - "grad_norm": 1.4671808543724592, - "learning_rate": 1.870239141236747e-06, - "loss": 1.2555, - "step": 1395 - }, - { - "epoch": 0.18924964414017487, - "grad_norm": 1.5266128351661312, - "learning_rate": 1.870022725036461e-06, - "loss": 1.1862, - "step": 1396 - }, - { - "epoch": 0.18938520978783976, - "grad_norm": 1.8555742750651882, - "learning_rate": 1.8698061410591604e-06, - "loss": 1.2171, - "step": 1397 - }, - { - "epoch": 0.18952077543550463, - "grad_norm": 2.2848609680464906, - "learning_rate": 1.8695893893466108e-06, - "loss": 1.1999, - "step": 1398 - }, - { - "epoch": 0.18965634108316953, - "grad_norm": 1.810559669922742, - "learning_rate": 1.869372469940612e-06, - "loss": 1.2411, - "step": 1399 - }, - { - "epoch": 0.1897919067308344, - "grad_norm": 1.632557466654525, - "learning_rate": 1.8691553828829948e-06, - "loss": 1.2286, - "step": 1400 - }, - { - "epoch": 0.1899274723784993, - "grad_norm": 1.5970728773050953, - "learning_rate": 1.8689381282156222e-06, - "loss": 1.1898, - "step": 1401 - }, - { - "epoch": 0.19006303802616417, - "grad_norm": 2.0485310915296755, - "learning_rate": 1.868720705980391e-06, - "loss": 1.2338, - "step": 1402 - }, - { - "epoch": 0.19019860367382904, - "grad_norm": 2.2894986115728404, - "learning_rate": 1.8685031162192287e-06, - "loss": 1.2296, - "step": 1403 - }, - { - "epoch": 0.19033416932149394, - "grad_norm": 1.733041333670982, - "learning_rate": 1.8682853589740962e-06, - "loss": 1.2067, - "step": 1404 - }, - { - "epoch": 0.1904697349691588, - "grad_norm": 1.6022640755790163, - "learning_rate": 1.8680674342869858e-06, - "loss": 1.2103, - "step": 1405 - }, - { - "epoch": 0.1906053006168237, - "grad_norm": 2.2647397947293184, - "learning_rate": 1.867849342199923e-06, - "loss": 1.1987, - "step": 1406 - }, - { - "epoch": 0.19074086626448858, - "grad_norm": 1.7544470852859406, - "learning_rate": 1.867631082754965e-06, - "loss": 1.2209, - "step": 1407 - }, - { - "epoch": 0.19087643191215345, - "grad_norm": 1.7207728872358996, - "learning_rate": 1.8674126559942009e-06, - "loss": 1.2153, - "step": 1408 - }, - { - "epoch": 0.19101199755981835, - "grad_norm": 3.9648662622809288, - "learning_rate": 1.8671940619597532e-06, - "loss": 1.1899, - "step": 1409 - }, - { - "epoch": 0.19114756320748322, - "grad_norm": 1.7689130305060925, - "learning_rate": 1.8669753006937762e-06, - "loss": 1.2441, - "step": 1410 - }, - { - "epoch": 0.19128312885514812, - "grad_norm": 1.7660883348477603, - "learning_rate": 1.8667563722384559e-06, - "loss": 1.1786, - "step": 1411 - }, - { - "epoch": 0.19141869450281299, - "grad_norm": 3.6368626541420968, - "learning_rate": 1.8665372766360107e-06, - "loss": 1.2526, - "step": 1412 - }, - { - "epoch": 0.19155426015047786, - "grad_norm": 1.633891041403092, - "learning_rate": 1.866318013928692e-06, - "loss": 1.1852, - "step": 1413 - }, - { - "epoch": 0.19168982579814275, - "grad_norm": 1.6601210619601163, - "learning_rate": 1.8660985841587824e-06, - "loss": 1.1753, - "step": 1414 - }, - { - "epoch": 0.19182539144580762, - "grad_norm": 1.9037365646764177, - "learning_rate": 1.8658789873685973e-06, - "loss": 1.2393, - "step": 1415 - }, - { - "epoch": 0.19196095709347252, - "grad_norm": 1.5533688635612095, - "learning_rate": 1.8656592236004847e-06, - "loss": 1.2156, - "step": 1416 - }, - { - "epoch": 0.1920965227411374, - "grad_norm": 1.8846462744658266, - "learning_rate": 1.8654392928968239e-06, - "loss": 1.2129, - "step": 1417 - }, - { - "epoch": 0.1922320883888023, - "grad_norm": 2.815923467876235, - "learning_rate": 1.8652191953000265e-06, - "loss": 1.1925, - "step": 1418 - }, - { - "epoch": 0.19236765403646716, - "grad_norm": 2.374362237648059, - "learning_rate": 1.864998930852537e-06, - "loss": 1.1997, - "step": 1419 - }, - { - "epoch": 0.19250321968413203, - "grad_norm": 2.0791296963082786, - "learning_rate": 1.8647784995968317e-06, - "loss": 1.1727, - "step": 1420 - }, - { - "epoch": 0.19263878533179693, - "grad_norm": 1.6634745806632492, - "learning_rate": 1.8645579015754189e-06, - "loss": 1.1864, - "step": 1421 - }, - { - "epoch": 0.1927743509794618, - "grad_norm": 1.8055219301629448, - "learning_rate": 1.8643371368308389e-06, - "loss": 1.2485, - "step": 1422 - }, - { - "epoch": 0.1929099166271267, - "grad_norm": 1.4879911570768993, - "learning_rate": 1.8641162054056651e-06, - "loss": 1.2316, - "step": 1423 - }, - { - "epoch": 0.19304548227479157, - "grad_norm": 2.194260441396623, - "learning_rate": 1.8638951073425018e-06, - "loss": 1.1989, - "step": 1424 - }, - { - "epoch": 0.19318104792245644, - "grad_norm": 1.6035308419587084, - "learning_rate": 1.8636738426839863e-06, - "loss": 1.205, - "step": 1425 - }, - { - "epoch": 0.19331661357012134, - "grad_norm": 2.5114083014614135, - "learning_rate": 1.8634524114727878e-06, - "loss": 1.2029, - "step": 1426 - }, - { - "epoch": 0.1934521792177862, - "grad_norm": 1.3771221547958599, - "learning_rate": 1.8632308137516071e-06, - "loss": 1.223, - "step": 1427 - }, - { - "epoch": 0.1935877448654511, - "grad_norm": 2.0230105341588462, - "learning_rate": 1.8630090495631783e-06, - "loss": 1.2507, - "step": 1428 - }, - { - "epoch": 0.19372331051311598, - "grad_norm": 1.8266623147508114, - "learning_rate": 1.8627871189502662e-06, - "loss": 1.2105, - "step": 1429 - }, - { - "epoch": 0.19385887616078085, - "grad_norm": 2.470269112695365, - "learning_rate": 1.8625650219556688e-06, - "loss": 1.2227, - "step": 1430 - }, - { - "epoch": 0.19399444180844574, - "grad_norm": 1.4199452970446456, - "learning_rate": 1.8623427586222154e-06, - "loss": 1.2152, - "step": 1431 - }, - { - "epoch": 0.19413000745611061, - "grad_norm": 1.748907051897559, - "learning_rate": 1.8621203289927681e-06, - "loss": 1.2686, - "step": 1432 - }, - { - "epoch": 0.1942655731037755, - "grad_norm": 2.8378595694679043, - "learning_rate": 1.8618977331102204e-06, - "loss": 1.2067, - "step": 1433 - }, - { - "epoch": 0.19440113875144038, - "grad_norm": 1.4890481890509066, - "learning_rate": 1.861674971017498e-06, - "loss": 1.206, - "step": 1434 - }, - { - "epoch": 0.19453670439910525, - "grad_norm": 1.7184265477984764, - "learning_rate": 1.8614520427575596e-06, - "loss": 1.2033, - "step": 1435 - }, - { - "epoch": 0.19467227004677015, - "grad_norm": 1.5231135016006527, - "learning_rate": 1.8612289483733942e-06, - "loss": 1.2011, - "step": 1436 - }, - { - "epoch": 0.19480783569443502, - "grad_norm": 1.6302778308509214, - "learning_rate": 1.8610056879080247e-06, - "loss": 1.1826, - "step": 1437 - }, - { - "epoch": 0.19494340134209992, - "grad_norm": 1.6373423635766284, - "learning_rate": 1.8607822614045041e-06, - "loss": 1.2717, - "step": 1438 - }, - { - "epoch": 0.1950789669897648, - "grad_norm": 1.6108192145484106, - "learning_rate": 1.8605586689059195e-06, - "loss": 1.1767, - "step": 1439 - }, - { - "epoch": 0.1952145326374297, - "grad_norm": 1.9309400916976882, - "learning_rate": 1.8603349104553882e-06, - "loss": 1.1586, - "step": 1440 - }, - { - "epoch": 0.19535009828509456, - "grad_norm": 1.8883982587020072, - "learning_rate": 1.8601109860960603e-06, - "loss": 1.162, - "step": 1441 - }, - { - "epoch": 0.19548566393275943, - "grad_norm": 1.6488161141166962, - "learning_rate": 1.8598868958711185e-06, - "loss": 1.195, - "step": 1442 - }, - { - "epoch": 0.19562122958042433, - "grad_norm": 1.9957640301394148, - "learning_rate": 1.8596626398237762e-06, - "loss": 1.2069, - "step": 1443 - }, - { - "epoch": 0.1957567952280892, - "grad_norm": 2.7526610923526262, - "learning_rate": 1.8594382179972794e-06, - "loss": 1.2074, - "step": 1444 - }, - { - "epoch": 0.1958923608757541, - "grad_norm": 1.8344438524131959, - "learning_rate": 1.8592136304349063e-06, - "loss": 1.2417, - "step": 1445 - }, - { - "epoch": 0.19602792652341897, - "grad_norm": 1.6221374170844824, - "learning_rate": 1.8589888771799669e-06, - "loss": 1.2091, - "step": 1446 - }, - { - "epoch": 0.19616349217108384, - "grad_norm": 1.8134246908259164, - "learning_rate": 1.858763958275803e-06, - "loss": 1.2024, - "step": 1447 - }, - { - "epoch": 0.19629905781874873, - "grad_norm": 1.897391310741579, - "learning_rate": 1.8585388737657883e-06, - "loss": 1.2772, - "step": 1448 - }, - { - "epoch": 0.1964346234664136, - "grad_norm": 1.5838776862653434, - "learning_rate": 1.8583136236933287e-06, - "loss": 1.2478, - "step": 1449 - }, - { - "epoch": 0.1965701891140785, - "grad_norm": 2.6233584354252844, - "learning_rate": 1.858088208101862e-06, - "loss": 1.2074, - "step": 1450 - }, - { - "epoch": 0.19670575476174337, - "grad_norm": 2.0325473656311215, - "learning_rate": 1.8578626270348576e-06, - "loss": 1.1699, - "step": 1451 - }, - { - "epoch": 0.19684132040940824, - "grad_norm": 1.5249489793256887, - "learning_rate": 1.8576368805358171e-06, - "loss": 1.2583, - "step": 1452 - }, - { - "epoch": 0.19697688605707314, - "grad_norm": 2.5336619214188287, - "learning_rate": 1.857410968648274e-06, - "loss": 1.2088, - "step": 1453 - }, - { - "epoch": 0.197112451704738, - "grad_norm": 5.526362990903123, - "learning_rate": 1.8571848914157938e-06, - "loss": 1.2041, - "step": 1454 - }, - { - "epoch": 0.1972480173524029, - "grad_norm": 1.4463317403521831, - "learning_rate": 1.8569586488819732e-06, - "loss": 1.2125, - "step": 1455 - }, - { - "epoch": 0.19738358300006778, - "grad_norm": 1.5884230624069613, - "learning_rate": 1.8567322410904416e-06, - "loss": 1.2026, - "step": 1456 - }, - { - "epoch": 0.19751914864773265, - "grad_norm": 1.5394975552416765, - "learning_rate": 1.8565056680848602e-06, - "loss": 1.2395, - "step": 1457 - }, - { - "epoch": 0.19765471429539755, - "grad_norm": 1.643902045820333, - "learning_rate": 1.8562789299089212e-06, - "loss": 1.1994, - "step": 1458 - }, - { - "epoch": 0.19779027994306242, - "grad_norm": 2.1249366081654313, - "learning_rate": 1.8560520266063497e-06, - "loss": 1.2033, - "step": 1459 - }, - { - "epoch": 0.19792584559072732, - "grad_norm": 1.8275232453307295, - "learning_rate": 1.8558249582209022e-06, - "loss": 1.1973, - "step": 1460 - }, - { - "epoch": 0.1980614112383922, - "grad_norm": 5.720250820967446, - "learning_rate": 1.8555977247963673e-06, - "loss": 1.1959, - "step": 1461 - }, - { - "epoch": 0.19819697688605709, - "grad_norm": 1.609614926661516, - "learning_rate": 1.8553703263765646e-06, - "loss": 1.2067, - "step": 1462 - }, - { - "epoch": 0.19833254253372196, - "grad_norm": 1.7616481678400506, - "learning_rate": 1.8551427630053463e-06, - "loss": 1.1987, - "step": 1463 - }, - { - "epoch": 0.19846810818138683, - "grad_norm": 1.5572661113564807, - "learning_rate": 1.854915034726596e-06, - "loss": 1.1826, - "step": 1464 - }, - { - "epoch": 0.19860367382905172, - "grad_norm": 2.0013263125281875, - "learning_rate": 1.8546871415842298e-06, - "loss": 1.2597, - "step": 1465 - }, - { - "epoch": 0.1987392394767166, - "grad_norm": 1.5865046479419145, - "learning_rate": 1.8544590836221947e-06, - "loss": 1.1598, - "step": 1466 - }, - { - "epoch": 0.1988748051243815, - "grad_norm": 1.595637595452128, - "learning_rate": 1.8542308608844704e-06, - "loss": 1.2171, - "step": 1467 - }, - { - "epoch": 0.19901037077204636, - "grad_norm": 1.5870980760306073, - "learning_rate": 1.854002473415067e-06, - "loss": 1.225, - "step": 1468 - }, - { - "epoch": 0.19914593641971123, - "grad_norm": 1.69386209377305, - "learning_rate": 1.853773921258028e-06, - "loss": 1.2268, - "step": 1469 - }, - { - "epoch": 0.19928150206737613, - "grad_norm": 1.6524942510555918, - "learning_rate": 1.8535452044574274e-06, - "loss": 1.2332, - "step": 1470 - }, - { - "epoch": 0.199417067715041, - "grad_norm": 1.4409531546934857, - "learning_rate": 1.8533163230573716e-06, - "loss": 1.2337, - "step": 1471 - }, - { - "epoch": 0.1995526333627059, - "grad_norm": 1.7663722281156902, - "learning_rate": 1.8530872771019984e-06, - "loss": 1.2206, - "step": 1472 - }, - { - "epoch": 0.19968819901037077, - "grad_norm": 1.6505727713358247, - "learning_rate": 1.8528580666354782e-06, - "loss": 1.2214, - "step": 1473 - }, - { - "epoch": 0.19982376465803564, - "grad_norm": 1.607780640518207, - "learning_rate": 1.8526286917020114e-06, - "loss": 1.2247, - "step": 1474 - }, - { - "epoch": 0.19995933030570054, - "grad_norm": 1.8686278620028887, - "learning_rate": 1.852399152345832e-06, - "loss": 1.1859, - "step": 1475 - }, - { - "epoch": 0.2000948959533654, - "grad_norm": 1.6971478087710758, - "learning_rate": 1.8521694486112045e-06, - "loss": 1.2025, - "step": 1476 - }, - { - "epoch": 0.2002304616010303, - "grad_norm": 8.345244654216257, - "learning_rate": 1.851939580542425e-06, - "loss": 1.1966, - "step": 1477 - }, - { - "epoch": 0.20036602724869518, - "grad_norm": 1.6647515177232668, - "learning_rate": 1.8517095481838228e-06, - "loss": 1.216, - "step": 1478 - }, - { - "epoch": 0.20050159289636008, - "grad_norm": 1.499680963000177, - "learning_rate": 1.8514793515797567e-06, - "loss": 1.2472, - "step": 1479 - }, - { - "epoch": 0.20063715854402495, - "grad_norm": 1.7540804653460536, - "learning_rate": 1.8512489907746193e-06, - "loss": 1.2421, - "step": 1480 - }, - { - "epoch": 0.20077272419168982, - "grad_norm": 1.721438621975217, - "learning_rate": 1.851018465812833e-06, - "loss": 1.1973, - "step": 1481 - }, - { - "epoch": 0.20090828983935471, - "grad_norm": 1.6258642392718168, - "learning_rate": 1.8507877767388531e-06, - "loss": 1.2275, - "step": 1482 - }, - { - "epoch": 0.20104385548701958, - "grad_norm": 1.8715188206680318, - "learning_rate": 1.8505569235971663e-06, - "loss": 1.213, - "step": 1483 - }, - { - "epoch": 0.20117942113468448, - "grad_norm": 2.466888309048399, - "learning_rate": 1.8503259064322907e-06, - "loss": 1.2428, - "step": 1484 - }, - { - "epoch": 0.20131498678234935, - "grad_norm": 2.1719231785945476, - "learning_rate": 1.8500947252887759e-06, - "loss": 1.223, - "step": 1485 - }, - { - "epoch": 0.20145055243001422, - "grad_norm": 1.6233843671621229, - "learning_rate": 1.8498633802112039e-06, - "loss": 1.2284, - "step": 1486 - }, - { - "epoch": 0.20158611807767912, - "grad_norm": 1.6635614136871975, - "learning_rate": 1.849631871244187e-06, - "loss": 1.2237, - "step": 1487 - }, - { - "epoch": 0.201721683725344, - "grad_norm": 2.3411804594157104, - "learning_rate": 1.8494001984323706e-06, - "loss": 1.2035, - "step": 1488 - }, - { - "epoch": 0.2018572493730089, - "grad_norm": 1.5795904821003797, - "learning_rate": 1.8491683618204307e-06, - "loss": 1.2218, - "step": 1489 - }, - { - "epoch": 0.20199281502067376, - "grad_norm": 1.6758742692242126, - "learning_rate": 1.848936361453075e-06, - "loss": 1.2017, - "step": 1490 - }, - { - "epoch": 0.20212838066833863, - "grad_norm": 2.8778155508246543, - "learning_rate": 1.8487041973750434e-06, - "loss": 1.2196, - "step": 1491 - }, - { - "epoch": 0.20226394631600353, - "grad_norm": 1.8233660087137162, - "learning_rate": 1.8484718696311063e-06, - "loss": 1.2507, - "step": 1492 - }, - { - "epoch": 0.2023995119636684, - "grad_norm": 1.9087829808650099, - "learning_rate": 1.8482393782660669e-06, - "loss": 1.2239, - "step": 1493 - }, - { - "epoch": 0.2025350776113333, - "grad_norm": 1.8402037864055822, - "learning_rate": 1.8480067233247584e-06, - "loss": 1.1705, - "step": 1494 - }, - { - "epoch": 0.20267064325899817, - "grad_norm": 1.637831807597322, - "learning_rate": 1.8477739048520475e-06, - "loss": 1.186, - "step": 1495 - }, - { - "epoch": 0.20280620890666304, - "grad_norm": 1.4963793505310783, - "learning_rate": 1.847540922892831e-06, - "loss": 1.2343, - "step": 1496 - }, - { - "epoch": 0.20294177455432794, - "grad_norm": 1.5651642181554637, - "learning_rate": 1.8473077774920377e-06, - "loss": 1.2131, - "step": 1497 - }, - { - "epoch": 0.2030773402019928, - "grad_norm": 1.8665762617411188, - "learning_rate": 1.8470744686946276e-06, - "loss": 1.1985, - "step": 1498 - }, - { - "epoch": 0.2032129058496577, - "grad_norm": 1.9371182550227313, - "learning_rate": 1.8468409965455924e-06, - "loss": 1.1995, - "step": 1499 - }, - { - "epoch": 0.20334847149732257, - "grad_norm": 1.5610907609843288, - "learning_rate": 1.8466073610899557e-06, - "loss": 1.1966, - "step": 1500 - }, - { - "epoch": 0.20348403714498747, - "grad_norm": 1.9716712695215797, - "learning_rate": 1.846373562372772e-06, - "loss": 1.1871, - "step": 1501 - }, - { - "epoch": 0.20361960279265234, - "grad_norm": 1.645614424587316, - "learning_rate": 1.846139600439128e-06, - "loss": 1.2632, - "step": 1502 - }, - { - "epoch": 0.2037551684403172, - "grad_norm": 1.6089787174082277, - "learning_rate": 1.845905475334141e-06, - "loss": 1.2345, - "step": 1503 - }, - { - "epoch": 0.2038907340879821, - "grad_norm": 2.0170454313410504, - "learning_rate": 1.84567118710296e-06, - "loss": 1.2238, - "step": 1504 - }, - { - "epoch": 0.20402629973564698, - "grad_norm": 2.1431420252537374, - "learning_rate": 1.8454367357907663e-06, - "loss": 1.2681, - "step": 1505 - }, - { - "epoch": 0.20416186538331188, - "grad_norm": 1.5245170012338731, - "learning_rate": 1.8452021214427713e-06, - "loss": 1.2217, - "step": 1506 - }, - { - "epoch": 0.20429743103097675, - "grad_norm": 1.840721614634791, - "learning_rate": 1.8449673441042188e-06, - "loss": 1.2326, - "step": 1507 - }, - { - "epoch": 0.20443299667864162, - "grad_norm": 2.2335259309065485, - "learning_rate": 1.8447324038203838e-06, - "loss": 1.2407, - "step": 1508 - }, - { - "epoch": 0.20456856232630652, - "grad_norm": 1.5869144023672115, - "learning_rate": 1.8444973006365724e-06, - "loss": 1.2057, - "step": 1509 - }, - { - "epoch": 0.2047041279739714, - "grad_norm": 2.0710730151773276, - "learning_rate": 1.844262034598123e-06, - "loss": 1.2384, - "step": 1510 - }, - { - "epoch": 0.2048396936216363, - "grad_norm": 2.0889404607634625, - "learning_rate": 1.8440266057504044e-06, - "loss": 1.2182, - "step": 1511 - }, - { - "epoch": 0.20497525926930116, - "grad_norm": 1.7642131891462585, - "learning_rate": 1.843791014138817e-06, - "loss": 1.1591, - "step": 1512 - }, - { - "epoch": 0.20511082491696603, - "grad_norm": 1.9746253772660132, - "learning_rate": 1.843555259808793e-06, - "loss": 1.2345, - "step": 1513 - }, - { - "epoch": 0.20524639056463093, - "grad_norm": 2.05351498901923, - "learning_rate": 1.8433193428057958e-06, - "loss": 1.2182, - "step": 1514 - }, - { - "epoch": 0.2053819562122958, - "grad_norm": 1.7040651734399646, - "learning_rate": 1.84308326317532e-06, - "loss": 1.2187, - "step": 1515 - }, - { - "epoch": 0.2055175218599607, - "grad_norm": 6.555490550965792, - "learning_rate": 1.842847020962892e-06, - "loss": 1.2135, - "step": 1516 - }, - { - "epoch": 0.20565308750762556, - "grad_norm": 4.530004862173186, - "learning_rate": 1.842610616214069e-06, - "loss": 1.2326, - "step": 1517 - }, - { - "epoch": 0.20578865315529046, - "grad_norm": 2.1179255363959735, - "learning_rate": 1.8423740489744399e-06, - "loss": 1.2216, - "step": 1518 - }, - { - "epoch": 0.20592421880295533, - "grad_norm": 1.71067410018918, - "learning_rate": 1.8421373192896248e-06, - "loss": 1.2169, - "step": 1519 - }, - { - "epoch": 0.2060597844506202, - "grad_norm": 3.7765904232240173, - "learning_rate": 1.841900427205275e-06, - "loss": 1.2289, - "step": 1520 - }, - { - "epoch": 0.2061953500982851, - "grad_norm": 1.6153259671425968, - "learning_rate": 1.8416633727670732e-06, - "loss": 1.2044, - "step": 1521 - }, - { - "epoch": 0.20633091574594997, - "grad_norm": 2.253939066988127, - "learning_rate": 1.8414261560207337e-06, - "loss": 1.2289, - "step": 1522 - }, - { - "epoch": 0.20646648139361487, - "grad_norm": 1.4792919881276108, - "learning_rate": 1.8411887770120021e-06, - "loss": 1.2055, - "step": 1523 - }, - { - "epoch": 0.20660204704127974, - "grad_norm": 9.172037241917007, - "learning_rate": 1.8409512357866546e-06, - "loss": 1.2303, - "step": 1524 - }, - { - "epoch": 0.2067376126889446, - "grad_norm": 2.6246347218183392, - "learning_rate": 1.8407135323904995e-06, - "loss": 1.1691, - "step": 1525 - }, - { - "epoch": 0.2068731783366095, - "grad_norm": 2.0098110816669217, - "learning_rate": 1.8404756668693758e-06, - "loss": 1.1696, - "step": 1526 - }, - { - "epoch": 0.20700874398427438, - "grad_norm": 2.0997443842574093, - "learning_rate": 1.8402376392691539e-06, - "loss": 1.216, - "step": 1527 - }, - { - "epoch": 0.20714430963193928, - "grad_norm": 1.5396270424978564, - "learning_rate": 1.8399994496357359e-06, - "loss": 1.223, - "step": 1528 - }, - { - "epoch": 0.20727987527960415, - "grad_norm": 11.132097027972966, - "learning_rate": 1.8397610980150544e-06, - "loss": 1.2237, - "step": 1529 - }, - { - "epoch": 0.20741544092726902, - "grad_norm": 2.2335897422201607, - "learning_rate": 1.8395225844530738e-06, - "loss": 1.2037, - "step": 1530 - }, - { - "epoch": 0.20755100657493392, - "grad_norm": 1.7503758665638165, - "learning_rate": 1.8392839089957897e-06, - "loss": 1.2171, - "step": 1531 - }, - { - "epoch": 0.2076865722225988, - "grad_norm": 1.7990343359003957, - "learning_rate": 1.8390450716892288e-06, - "loss": 1.2164, - "step": 1532 - }, - { - "epoch": 0.20782213787026368, - "grad_norm": 2.1807638500099538, - "learning_rate": 1.8388060725794485e-06, - "loss": 1.254, - "step": 1533 - }, - { - "epoch": 0.20795770351792855, - "grad_norm": 1.8761947647881143, - "learning_rate": 1.8385669117125385e-06, - "loss": 1.2012, - "step": 1534 - }, - { - "epoch": 0.20809326916559343, - "grad_norm": 1.3991005515032644, - "learning_rate": 1.8383275891346186e-06, - "loss": 1.2408, - "step": 1535 - }, - { - "epoch": 0.20822883481325832, - "grad_norm": 1.6401167163868693, - "learning_rate": 1.8380881048918404e-06, - "loss": 1.2022, - "step": 1536 - }, - { - "epoch": 0.2083644004609232, - "grad_norm": 2.0765176109563193, - "learning_rate": 1.837848459030387e-06, - "loss": 1.1872, - "step": 1537 - }, - { - "epoch": 0.2084999661085881, - "grad_norm": 1.562235070914228, - "learning_rate": 1.8376086515964716e-06, - "loss": 1.2129, - "step": 1538 - }, - { - "epoch": 0.20863553175625296, - "grad_norm": 1.7230220888420706, - "learning_rate": 1.8373686826363397e-06, - "loss": 1.2033, - "step": 1539 - }, - { - "epoch": 0.20877109740391786, - "grad_norm": 1.7555941069437508, - "learning_rate": 1.837128552196267e-06, - "loss": 1.1789, - "step": 1540 - }, - { - "epoch": 0.20890666305158273, - "grad_norm": 1.6150811884025469, - "learning_rate": 1.8368882603225609e-06, - "loss": 1.2518, - "step": 1541 - }, - { - "epoch": 0.2090422286992476, - "grad_norm": 1.7409068812620287, - "learning_rate": 1.8366478070615596e-06, - "loss": 1.2151, - "step": 1542 - }, - { - "epoch": 0.2091777943469125, - "grad_norm": 2.124381554882674, - "learning_rate": 1.8364071924596328e-06, - "loss": 1.224, - "step": 1543 - }, - { - "epoch": 0.20931335999457737, - "grad_norm": 1.540122056310556, - "learning_rate": 1.8361664165631817e-06, - "loss": 1.1896, - "step": 1544 - }, - { - "epoch": 0.20944892564224227, - "grad_norm": 2.156473230009947, - "learning_rate": 1.8359254794186368e-06, - "loss": 1.2153, - "step": 1545 - }, - { - "epoch": 0.20958449128990714, - "grad_norm": 1.5759770479318151, - "learning_rate": 1.835684381072462e-06, - "loss": 1.2311, - "step": 1546 - }, - { - "epoch": 0.209720056937572, - "grad_norm": 1.911148110436354, - "learning_rate": 1.8354431215711506e-06, - "loss": 1.1562, - "step": 1547 - }, - { - "epoch": 0.2098556225852369, - "grad_norm": 1.4412852715211495, - "learning_rate": 1.8352017009612276e-06, - "loss": 1.1735, - "step": 1548 - }, - { - "epoch": 0.20999118823290178, - "grad_norm": 1.7036443382857072, - "learning_rate": 1.8349601192892498e-06, - "loss": 1.2029, - "step": 1549 - }, - { - "epoch": 0.21012675388056667, - "grad_norm": 1.8248371454403671, - "learning_rate": 1.8347183766018033e-06, - "loss": 1.218, - "step": 1550 - }, - { - "epoch": 0.21026231952823154, - "grad_norm": 2.9805756530113974, - "learning_rate": 1.8344764729455066e-06, - "loss": 1.2506, - "step": 1551 - }, - { - "epoch": 0.21039788517589642, - "grad_norm": 1.6314382238558447, - "learning_rate": 1.8342344083670097e-06, - "loss": 1.2109, - "step": 1552 - }, - { - "epoch": 0.2105334508235613, - "grad_norm": 1.7643587659367264, - "learning_rate": 1.8339921829129916e-06, - "loss": 1.2087, - "step": 1553 - }, - { - "epoch": 0.21066901647122618, - "grad_norm": 2.221104940849202, - "learning_rate": 1.8337497966301645e-06, - "loss": 1.2461, - "step": 1554 - }, - { - "epoch": 0.21080458211889108, - "grad_norm": 2.2168916868475157, - "learning_rate": 1.8335072495652702e-06, - "loss": 1.1404, - "step": 1555 - }, - { - "epoch": 0.21094014776655595, - "grad_norm": 1.4438428207923866, - "learning_rate": 1.8332645417650822e-06, - "loss": 1.2275, - "step": 1556 - }, - { - "epoch": 0.21107571341422085, - "grad_norm": 1.9486399292582846, - "learning_rate": 1.8330216732764049e-06, - "loss": 1.2189, - "step": 1557 - }, - { - "epoch": 0.21121127906188572, - "grad_norm": 1.651497944948543, - "learning_rate": 1.832778644146073e-06, - "loss": 1.1959, - "step": 1558 - }, - { - "epoch": 0.2113468447095506, - "grad_norm": 1.739882139116549, - "learning_rate": 1.8325354544209532e-06, - "loss": 1.1642, - "step": 1559 - }, - { - "epoch": 0.2114824103572155, - "grad_norm": 1.9284959407745148, - "learning_rate": 1.832292104147943e-06, - "loss": 1.1948, - "step": 1560 - }, - { - "epoch": 0.21161797600488036, - "grad_norm": 1.8462793059878042, - "learning_rate": 1.8320485933739697e-06, - "loss": 1.2339, - "step": 1561 - }, - { - "epoch": 0.21175354165254526, - "grad_norm": 3.597084975474117, - "learning_rate": 1.8318049221459932e-06, - "loss": 1.2045, - "step": 1562 - }, - { - "epoch": 0.21188910730021013, - "grad_norm": 3.1520769588179247, - "learning_rate": 1.8315610905110032e-06, - "loss": 1.2243, - "step": 1563 - }, - { - "epoch": 0.212024672947875, - "grad_norm": 2.1083811358216784, - "learning_rate": 1.8313170985160213e-06, - "loss": 1.2008, - "step": 1564 - }, - { - "epoch": 0.2121602385955399, - "grad_norm": 2.1283563381761152, - "learning_rate": 1.8310729462080987e-06, - "loss": 1.2401, - "step": 1565 - }, - { - "epoch": 0.21229580424320477, - "grad_norm": 2.260862047413429, - "learning_rate": 1.8308286336343183e-06, - "loss": 1.2222, - "step": 1566 - }, - { - "epoch": 0.21243136989086966, - "grad_norm": 2.012657634351372, - "learning_rate": 1.8305841608417945e-06, - "loss": 1.1943, - "step": 1567 - }, - { - "epoch": 0.21256693553853453, - "grad_norm": 1.65164426225241, - "learning_rate": 1.8303395278776712e-06, - "loss": 1.2314, - "step": 1568 - }, - { - "epoch": 0.2127025011861994, - "grad_norm": 1.866727372579322, - "learning_rate": 1.830094734789124e-06, - "loss": 1.2328, - "step": 1569 - }, - { - "epoch": 0.2128380668338643, - "grad_norm": 1.5850788107753944, - "learning_rate": 1.82984978162336e-06, - "loss": 1.2272, - "step": 1570 - }, - { - "epoch": 0.21297363248152917, - "grad_norm": 2.0893640554753548, - "learning_rate": 1.8296046684276161e-06, - "loss": 1.247, - "step": 1571 - }, - { - "epoch": 0.21310919812919407, - "grad_norm": 2.6350475485991365, - "learning_rate": 1.8293593952491602e-06, - "loss": 1.2004, - "step": 1572 - }, - { - "epoch": 0.21324476377685894, - "grad_norm": 1.7362131277934978, - "learning_rate": 1.8291139621352913e-06, - "loss": 1.1897, - "step": 1573 - }, - { - "epoch": 0.2133803294245238, - "grad_norm": 1.4074132445337593, - "learning_rate": 1.8288683691333398e-06, - "loss": 1.2026, - "step": 1574 - }, - { - "epoch": 0.2135158950721887, - "grad_norm": 1.622099390491543, - "learning_rate": 1.8286226162906657e-06, - "loss": 1.2099, - "step": 1575 - }, - { - "epoch": 0.21365146071985358, - "grad_norm": 2.144061572448268, - "learning_rate": 1.8283767036546612e-06, - "loss": 1.2586, - "step": 1576 - }, - { - "epoch": 0.21378702636751848, - "grad_norm": 1.610406921811909, - "learning_rate": 1.8281306312727477e-06, - "loss": 1.2425, - "step": 1577 - }, - { - "epoch": 0.21392259201518335, - "grad_norm": 1.6768408922161657, - "learning_rate": 1.8278843991923791e-06, - "loss": 1.2419, - "step": 1578 - }, - { - "epoch": 0.21405815766284825, - "grad_norm": 1.5506307469239886, - "learning_rate": 1.8276380074610392e-06, - "loss": 1.1702, - "step": 1579 - }, - { - "epoch": 0.21419372331051312, - "grad_norm": 1.5472384508060257, - "learning_rate": 1.8273914561262422e-06, - "loss": 1.2223, - "step": 1580 - }, - { - "epoch": 0.214329288958178, - "grad_norm": 1.567723144777334, - "learning_rate": 1.8271447452355343e-06, - "loss": 1.2131, - "step": 1581 - }, - { - "epoch": 0.2144648546058429, - "grad_norm": 1.8999643404208981, - "learning_rate": 1.826897874836491e-06, - "loss": 1.2424, - "step": 1582 - }, - { - "epoch": 0.21460042025350776, - "grad_norm": 1.9854732745463948, - "learning_rate": 1.8266508449767196e-06, - "loss": 1.1983, - "step": 1583 - }, - { - "epoch": 0.21473598590117265, - "grad_norm": 1.7939774213202193, - "learning_rate": 1.8264036557038581e-06, - "loss": 1.2164, - "step": 1584 - }, - { - "epoch": 0.21487155154883752, - "grad_norm": 1.7386587016301276, - "learning_rate": 1.826156307065575e-06, - "loss": 1.2077, - "step": 1585 - }, - { - "epoch": 0.2150071171965024, - "grad_norm": 1.559098206226636, - "learning_rate": 1.8259087991095692e-06, - "loss": 1.2114, - "step": 1586 - }, - { - "epoch": 0.2151426828441673, - "grad_norm": 2.283958481665517, - "learning_rate": 1.8256611318835709e-06, - "loss": 1.1835, - "step": 1587 - }, - { - "epoch": 0.21527824849183216, - "grad_norm": 1.6044377530388516, - "learning_rate": 1.8254133054353406e-06, - "loss": 1.1789, - "step": 1588 - }, - { - "epoch": 0.21541381413949706, - "grad_norm": 2.281700217724224, - "learning_rate": 1.8251653198126697e-06, - "loss": 1.1928, - "step": 1589 - }, - { - "epoch": 0.21554937978716193, - "grad_norm": 1.4197043205795181, - "learning_rate": 1.8249171750633808e-06, - "loss": 1.154, - "step": 1590 - }, - { - "epoch": 0.2156849454348268, - "grad_norm": 1.6472509856827486, - "learning_rate": 1.8246688712353256e-06, - "loss": 1.2325, - "step": 1591 - }, - { - "epoch": 0.2158205110824917, - "grad_norm": 1.9151424997141822, - "learning_rate": 1.8244204083763886e-06, - "loss": 1.2151, - "step": 1592 - }, - { - "epoch": 0.21595607673015657, - "grad_norm": 1.6872629847644691, - "learning_rate": 1.824171786534483e-06, - "loss": 1.1989, - "step": 1593 - }, - { - "epoch": 0.21609164237782147, - "grad_norm": 1.7203183619501972, - "learning_rate": 1.823923005757554e-06, - "loss": 1.2195, - "step": 1594 - }, - { - "epoch": 0.21622720802548634, - "grad_norm": 1.7380644898352045, - "learning_rate": 1.8236740660935772e-06, - "loss": 1.1688, - "step": 1595 - }, - { - "epoch": 0.2163627736731512, - "grad_norm": 1.8182675923533507, - "learning_rate": 1.8234249675905584e-06, - "loss": 1.2465, - "step": 1596 - }, - { - "epoch": 0.2164983393208161, - "grad_norm": 1.4906130532205106, - "learning_rate": 1.8231757102965343e-06, - "loss": 1.1878, - "step": 1597 - }, - { - "epoch": 0.21663390496848098, - "grad_norm": 2.2010266864627246, - "learning_rate": 1.8229262942595724e-06, - "loss": 1.2065, - "step": 1598 - }, - { - "epoch": 0.21676947061614588, - "grad_norm": 6.04184868651582, - "learning_rate": 1.8226767195277702e-06, - "loss": 1.2099, - "step": 1599 - }, - { - "epoch": 0.21690503626381075, - "grad_norm": 1.6964479712475071, - "learning_rate": 1.8224269861492565e-06, - "loss": 1.2017, - "step": 1600 - }, - { - "epoch": 0.21704060191147564, - "grad_norm": 2.9689891706680815, - "learning_rate": 1.8221770941721904e-06, - "loss": 1.1988, - "step": 1601 - }, - { - "epoch": 0.21717616755914051, - "grad_norm": 1.6809818801973095, - "learning_rate": 1.8219270436447615e-06, - "loss": 1.2066, - "step": 1602 - }, - { - "epoch": 0.21731173320680539, - "grad_norm": 1.509991434974565, - "learning_rate": 1.8216768346151904e-06, - "loss": 1.2056, - "step": 1603 - }, - { - "epoch": 0.21744729885447028, - "grad_norm": 1.6976515942138783, - "learning_rate": 1.8214264671317272e-06, - "loss": 1.1842, - "step": 1604 - }, - { - "epoch": 0.21758286450213515, - "grad_norm": 1.8811310387719844, - "learning_rate": 1.821175941242654e-06, - "loss": 1.2177, - "step": 1605 - }, - { - "epoch": 0.21771843014980005, - "grad_norm": 2.159582871370095, - "learning_rate": 1.8209252569962828e-06, - "loss": 1.2417, - "step": 1606 - }, - { - "epoch": 0.21785399579746492, - "grad_norm": 1.8431441214637967, - "learning_rate": 1.8206744144409553e-06, - "loss": 1.2168, - "step": 1607 - }, - { - "epoch": 0.2179895614451298, - "grad_norm": 1.5470372411002222, - "learning_rate": 1.8204234136250452e-06, - "loss": 1.2267, - "step": 1608 - }, - { - "epoch": 0.2181251270927947, - "grad_norm": 1.5221054613018028, - "learning_rate": 1.8201722545969557e-06, - "loss": 1.1908, - "step": 1609 - }, - { - "epoch": 0.21826069274045956, - "grad_norm": 1.9339386726401724, - "learning_rate": 1.8199209374051212e-06, - "loss": 1.1958, - "step": 1610 - }, - { - "epoch": 0.21839625838812446, - "grad_norm": 1.692555567617147, - "learning_rate": 1.8196694620980058e-06, - "loss": 1.2115, - "step": 1611 - }, - { - "epoch": 0.21853182403578933, - "grad_norm": 1.6665501765281199, - "learning_rate": 1.8194178287241047e-06, - "loss": 1.2333, - "step": 1612 - }, - { - "epoch": 0.2186673896834542, - "grad_norm": 1.8455282451532584, - "learning_rate": 1.8191660373319433e-06, - "loss": 1.2046, - "step": 1613 - }, - { - "epoch": 0.2188029553311191, - "grad_norm": 1.5941480342456726, - "learning_rate": 1.8189140879700779e-06, - "loss": 1.1813, - "step": 1614 - }, - { - "epoch": 0.21893852097878397, - "grad_norm": 1.5293579906398593, - "learning_rate": 1.818661980687095e-06, - "loss": 1.1959, - "step": 1615 - }, - { - "epoch": 0.21907408662644887, - "grad_norm": 1.664067029089426, - "learning_rate": 1.8184097155316108e-06, - "loss": 1.2396, - "step": 1616 - }, - { - "epoch": 0.21920965227411374, - "grad_norm": 2.7542588268157995, - "learning_rate": 1.8181572925522732e-06, - "loss": 1.2153, - "step": 1617 - }, - { - "epoch": 0.21934521792177863, - "grad_norm": 2.1578123154700135, - "learning_rate": 1.81790471179776e-06, - "loss": 1.1845, - "step": 1618 - }, - { - "epoch": 0.2194807835694435, - "grad_norm": 1.5011819589909037, - "learning_rate": 1.8176519733167792e-06, - "loss": 1.1819, - "step": 1619 - }, - { - "epoch": 0.21961634921710838, - "grad_norm": 2.4242399691644483, - "learning_rate": 1.8173990771580694e-06, - "loss": 1.2058, - "step": 1620 - }, - { - "epoch": 0.21975191486477327, - "grad_norm": 1.9305810386033582, - "learning_rate": 1.8171460233704e-06, - "loss": 1.2263, - "step": 1621 - }, - { - "epoch": 0.21988748051243814, - "grad_norm": 1.979075536096811, - "learning_rate": 1.8168928120025698e-06, - "loss": 1.219, - "step": 1622 - }, - { - "epoch": 0.22002304616010304, - "grad_norm": 1.74170704713699, - "learning_rate": 1.816639443103409e-06, - "loss": 1.2265, - "step": 1623 - }, - { - "epoch": 0.2201586118077679, - "grad_norm": 4.677919739776005, - "learning_rate": 1.8163859167217778e-06, - "loss": 1.2273, - "step": 1624 - }, - { - "epoch": 0.22029417745543278, - "grad_norm": 3.3972992968313083, - "learning_rate": 1.816132232906567e-06, - "loss": 1.1934, - "step": 1625 - }, - { - "epoch": 0.22042974310309768, - "grad_norm": 1.7621936312130335, - "learning_rate": 1.815878391706697e-06, - "loss": 1.1917, - "step": 1626 - }, - { - "epoch": 0.22056530875076255, - "grad_norm": 2.323534281212155, - "learning_rate": 1.8156243931711194e-06, - "loss": 1.1932, - "step": 1627 - }, - { - "epoch": 0.22070087439842745, - "grad_norm": 2.00024925154576, - "learning_rate": 1.8153702373488157e-06, - "loss": 1.1897, - "step": 1628 - }, - { - "epoch": 0.22083644004609232, - "grad_norm": 1.6218703561558876, - "learning_rate": 1.815115924288798e-06, - "loss": 1.2068, - "step": 1629 - }, - { - "epoch": 0.2209720056937572, - "grad_norm": 1.7038042153241142, - "learning_rate": 1.8148614540401082e-06, - "loss": 1.1875, - "step": 1630 - }, - { - "epoch": 0.2211075713414221, - "grad_norm": 1.4999321828155547, - "learning_rate": 1.8146068266518193e-06, - "loss": 1.184, - "step": 1631 - }, - { - "epoch": 0.22124313698908696, - "grad_norm": 1.5002165086510584, - "learning_rate": 1.8143520421730338e-06, - "loss": 1.2146, - "step": 1632 - }, - { - "epoch": 0.22137870263675186, - "grad_norm": 2.024084773070431, - "learning_rate": 1.8140971006528854e-06, - "loss": 1.2147, - "step": 1633 - }, - { - "epoch": 0.22151426828441673, - "grad_norm": 1.577097388717524, - "learning_rate": 1.8138420021405367e-06, - "loss": 1.1781, - "step": 1634 - }, - { - "epoch": 0.2216498339320816, - "grad_norm": 1.6981537445208257, - "learning_rate": 1.8135867466851824e-06, - "loss": 1.2313, - "step": 1635 - }, - { - "epoch": 0.2217853995797465, - "grad_norm": 1.7633386064242647, - "learning_rate": 1.813331334336046e-06, - "loss": 1.1821, - "step": 1636 - }, - { - "epoch": 0.22192096522741137, - "grad_norm": 1.6132980774946981, - "learning_rate": 1.8130757651423817e-06, - "loss": 1.1947, - "step": 1637 - }, - { - "epoch": 0.22205653087507626, - "grad_norm": 3.459745828829997, - "learning_rate": 1.812820039153474e-06, - "loss": 1.1601, - "step": 1638 - }, - { - "epoch": 0.22219209652274113, - "grad_norm": 1.5651026472328764, - "learning_rate": 1.812564156418638e-06, - "loss": 1.2034, - "step": 1639 - }, - { - "epoch": 0.22232766217040603, - "grad_norm": 3.82426832267302, - "learning_rate": 1.8123081169872184e-06, - "loss": 1.2411, - "step": 1640 - }, - { - "epoch": 0.2224632278180709, - "grad_norm": 2.3754300597167433, - "learning_rate": 1.8120519209085905e-06, - "loss": 1.1913, - "step": 1641 - }, - { - "epoch": 0.22259879346573577, - "grad_norm": 1.4326386968510665, - "learning_rate": 1.8117955682321594e-06, - "loss": 1.2158, - "step": 1642 - }, - { - "epoch": 0.22273435911340067, - "grad_norm": 2.785327652972002, - "learning_rate": 1.811539059007361e-06, - "loss": 1.1938, - "step": 1643 - }, - { - "epoch": 0.22286992476106554, - "grad_norm": 1.7381143392316023, - "learning_rate": 1.8112823932836609e-06, - "loss": 1.1944, - "step": 1644 - }, - { - "epoch": 0.22300549040873044, - "grad_norm": 1.991953347758075, - "learning_rate": 1.8110255711105552e-06, - "loss": 1.2093, - "step": 1645 - }, - { - "epoch": 0.2231410560563953, - "grad_norm": 1.7441380485397322, - "learning_rate": 1.81076859253757e-06, - "loss": 1.1746, - "step": 1646 - }, - { - "epoch": 0.22327662170406018, - "grad_norm": 1.6096621385423762, - "learning_rate": 1.8105114576142615e-06, - "loss": 1.2429, - "step": 1647 - }, - { - "epoch": 0.22341218735172508, - "grad_norm": 1.4444640205791388, - "learning_rate": 1.810254166390216e-06, - "loss": 1.2142, - "step": 1648 - }, - { - "epoch": 0.22354775299938995, - "grad_norm": 2.441873773578424, - "learning_rate": 1.8099967189150505e-06, - "loss": 1.2055, - "step": 1649 - }, - { - "epoch": 0.22368331864705485, - "grad_norm": 1.8839335680738951, - "learning_rate": 1.8097391152384113e-06, - "loss": 1.2116, - "step": 1650 - }, - { - "epoch": 0.22381888429471972, - "grad_norm": 1.447198983726194, - "learning_rate": 1.8094813554099754e-06, - "loss": 1.2151, - "step": 1651 - }, - { - "epoch": 0.2239544499423846, - "grad_norm": 1.6709832235031776, - "learning_rate": 1.80922343947945e-06, - "loss": 1.1931, - "step": 1652 - }, - { - "epoch": 0.22409001559004949, - "grad_norm": 1.5188877017885898, - "learning_rate": 1.808965367496572e-06, - "loss": 1.1907, - "step": 1653 - }, - { - "epoch": 0.22422558123771436, - "grad_norm": 1.748022234056033, - "learning_rate": 1.808707139511108e-06, - "loss": 1.242, - "step": 1654 - }, - { - "epoch": 0.22436114688537925, - "grad_norm": 2.004557560228434, - "learning_rate": 1.808448755572856e-06, - "loss": 1.2201, - "step": 1655 - }, - { - "epoch": 0.22449671253304412, - "grad_norm": 2.791930348607405, - "learning_rate": 1.808190215731643e-06, - "loss": 1.2064, - "step": 1656 - }, - { - "epoch": 0.22463227818070902, - "grad_norm": 1.5331999877580431, - "learning_rate": 1.8079315200373265e-06, - "loss": 1.2305, - "step": 1657 - }, - { - "epoch": 0.2247678438283739, - "grad_norm": 1.6011126697086775, - "learning_rate": 1.8076726685397934e-06, - "loss": 1.2598, - "step": 1658 - }, - { - "epoch": 0.22490340947603876, - "grad_norm": 1.607604012665984, - "learning_rate": 1.8074136612889619e-06, - "loss": 1.2083, - "step": 1659 - }, - { - "epoch": 0.22503897512370366, - "grad_norm": 1.9054429482288957, - "learning_rate": 1.8071544983347791e-06, - "loss": 1.2293, - "step": 1660 - }, - { - "epoch": 0.22517454077136853, - "grad_norm": 1.5984487019449007, - "learning_rate": 1.8068951797272222e-06, - "loss": 1.2025, - "step": 1661 - }, - { - "epoch": 0.22531010641903343, - "grad_norm": 2.129379605367283, - "learning_rate": 1.8066357055162994e-06, - "loss": 1.2065, - "step": 1662 - }, - { - "epoch": 0.2254456720666983, - "grad_norm": 1.900584341832968, - "learning_rate": 1.8063760757520483e-06, - "loss": 1.202, - "step": 1663 - }, - { - "epoch": 0.22558123771436317, - "grad_norm": 2.9919570546051437, - "learning_rate": 1.8061162904845356e-06, - "loss": 1.1652, - "step": 1664 - }, - { - "epoch": 0.22571680336202807, - "grad_norm": 1.5793478282678424, - "learning_rate": 1.80585634976386e-06, - "loss": 1.2039, - "step": 1665 - }, - { - "epoch": 0.22585236900969294, - "grad_norm": 1.7971091848688938, - "learning_rate": 1.8055962536401479e-06, - "loss": 1.2263, - "step": 1666 - }, - { - "epoch": 0.22598793465735784, - "grad_norm": 1.6265570920417962, - "learning_rate": 1.8053360021635572e-06, - "loss": 1.2306, - "step": 1667 - }, - { - "epoch": 0.2261235003050227, - "grad_norm": 2.3622065514647974, - "learning_rate": 1.8050755953842757e-06, - "loss": 1.234, - "step": 1668 - }, - { - "epoch": 0.22625906595268758, - "grad_norm": 1.8138887005133384, - "learning_rate": 1.8048150333525206e-06, - "loss": 1.2004, - "step": 1669 - }, - { - "epoch": 0.22639463160035248, - "grad_norm": 2.1598857715331308, - "learning_rate": 1.8045543161185388e-06, - "loss": 1.2197, - "step": 1670 - }, - { - "epoch": 0.22653019724801735, - "grad_norm": 2.354101209300282, - "learning_rate": 1.8042934437326082e-06, - "loss": 1.2096, - "step": 1671 - }, - { - "epoch": 0.22666576289568224, - "grad_norm": 1.5437047653428955, - "learning_rate": 1.8040324162450355e-06, - "loss": 1.2114, - "step": 1672 - }, - { - "epoch": 0.2268013285433471, - "grad_norm": 2.625082248184305, - "learning_rate": 1.8037712337061582e-06, - "loss": 1.231, - "step": 1673 - }, - { - "epoch": 0.22693689419101198, - "grad_norm": 1.9642766299564542, - "learning_rate": 1.803509896166343e-06, - "loss": 1.2347, - "step": 1674 - }, - { - "epoch": 0.22707245983867688, - "grad_norm": 1.5206744748662435, - "learning_rate": 1.8032484036759866e-06, - "loss": 1.1841, - "step": 1675 - }, - { - "epoch": 0.22720802548634175, - "grad_norm": 1.6165088353155703, - "learning_rate": 1.8029867562855161e-06, - "loss": 1.1874, - "step": 1676 - }, - { - "epoch": 0.22734359113400665, - "grad_norm": 1.681966331734533, - "learning_rate": 1.8027249540453878e-06, - "loss": 1.206, - "step": 1677 - }, - { - "epoch": 0.22747915678167152, - "grad_norm": 1.6984957205583373, - "learning_rate": 1.802462997006089e-06, - "loss": 1.2496, - "step": 1678 - }, - { - "epoch": 0.22761472242933642, - "grad_norm": 1.593306209709851, - "learning_rate": 1.8022008852181351e-06, - "loss": 1.2189, - "step": 1679 - }, - { - "epoch": 0.2277502880770013, - "grad_norm": 1.7226803132480977, - "learning_rate": 1.801938618732073e-06, - "loss": 1.2002, - "step": 1680 - }, - { - "epoch": 0.22788585372466616, - "grad_norm": 1.7366445784058124, - "learning_rate": 1.801676197598478e-06, - "loss": 1.2147, - "step": 1681 - }, - { - "epoch": 0.22802141937233106, - "grad_norm": 1.5158680578171195, - "learning_rate": 1.8014136218679566e-06, - "loss": 1.2397, - "step": 1682 - }, - { - "epoch": 0.22815698501999593, - "grad_norm": 1.7485095621788613, - "learning_rate": 1.8011508915911441e-06, - "loss": 1.1553, - "step": 1683 - }, - { - "epoch": 0.22829255066766083, - "grad_norm": 1.5778678046223609, - "learning_rate": 1.800888006818706e-06, - "loss": 1.2074, - "step": 1684 - }, - { - "epoch": 0.2284281163153257, - "grad_norm": 3.3089925440327947, - "learning_rate": 1.8006249676013377e-06, - "loss": 1.2148, - "step": 1685 - }, - { - "epoch": 0.22856368196299057, - "grad_norm": 1.5306692223401936, - "learning_rate": 1.8003617739897642e-06, - "loss": 1.161, - "step": 1686 - }, - { - "epoch": 0.22869924761065547, - "grad_norm": 2.633873383338012, - "learning_rate": 1.8000984260347401e-06, - "loss": 1.2041, - "step": 1687 - }, - { - "epoch": 0.22883481325832034, - "grad_norm": 3.069460347978959, - "learning_rate": 1.7998349237870506e-06, - "loss": 1.1847, - "step": 1688 - }, - { - "epoch": 0.22897037890598523, - "grad_norm": 2.4260859375786987, - "learning_rate": 1.7995712672975088e-06, - "loss": 1.2024, - "step": 1689 - }, - { - "epoch": 0.2291059445536501, - "grad_norm": 4.529317923278312, - "learning_rate": 1.79930745661696e-06, - "loss": 1.2111, - "step": 1690 - }, - { - "epoch": 0.22924151020131497, - "grad_norm": 1.8021158291553345, - "learning_rate": 1.7990434917962776e-06, - "loss": 1.2091, - "step": 1691 - }, - { - "epoch": 0.22937707584897987, - "grad_norm": 2.767083589345307, - "learning_rate": 1.7987793728863649e-06, - "loss": 1.1838, - "step": 1692 - }, - { - "epoch": 0.22951264149664474, - "grad_norm": 2.084973231171956, - "learning_rate": 1.7985150999381553e-06, - "loss": 1.1868, - "step": 1693 - }, - { - "epoch": 0.22964820714430964, - "grad_norm": 3.862602475650728, - "learning_rate": 1.798250673002612e-06, - "loss": 1.1911, - "step": 1694 - }, - { - "epoch": 0.2297837727919745, - "grad_norm": 1.612358713168583, - "learning_rate": 1.797986092130727e-06, - "loss": 1.2086, - "step": 1695 - }, - { - "epoch": 0.2299193384396394, - "grad_norm": 1.77913259783897, - "learning_rate": 1.7977213573735234e-06, - "loss": 1.1593, - "step": 1696 - }, - { - "epoch": 0.23005490408730428, - "grad_norm": 1.9137547708032894, - "learning_rate": 1.7974564687820526e-06, - "loss": 1.2393, - "step": 1697 - }, - { - "epoch": 0.23019046973496915, - "grad_norm": 2.7372111318217223, - "learning_rate": 1.7971914264073967e-06, - "loss": 1.2138, - "step": 1698 - }, - { - "epoch": 0.23032603538263405, - "grad_norm": 1.9082125181838738, - "learning_rate": 1.796926230300667e-06, - "loss": 1.2221, - "step": 1699 - }, - { - "epoch": 0.23046160103029892, - "grad_norm": 1.3448794426563628, - "learning_rate": 1.7966608805130043e-06, - "loss": 1.1884, - "step": 1700 - }, - { - "epoch": 0.23059716667796382, - "grad_norm": 1.887980307745491, - "learning_rate": 1.7963953770955791e-06, - "loss": 1.1772, - "step": 1701 - }, - { - "epoch": 0.2307327323256287, - "grad_norm": 1.8523347765913138, - "learning_rate": 1.7961297200995917e-06, - "loss": 1.1775, - "step": 1702 - }, - { - "epoch": 0.23086829797329356, - "grad_norm": 1.5608201085120448, - "learning_rate": 1.7958639095762722e-06, - "loss": 1.1793, - "step": 1703 - }, - { - "epoch": 0.23100386362095846, - "grad_norm": 1.5941973850053575, - "learning_rate": 1.79559794557688e-06, - "loss": 1.2048, - "step": 1704 - }, - { - "epoch": 0.23113942926862333, - "grad_norm": 1.5359152745265474, - "learning_rate": 1.795331828152704e-06, - "loss": 1.1675, - "step": 1705 - }, - { - "epoch": 0.23127499491628822, - "grad_norm": 2.1062349941265603, - "learning_rate": 1.7950655573550627e-06, - "loss": 1.2207, - "step": 1706 - }, - { - "epoch": 0.2314105605639531, - "grad_norm": 1.5769604708602056, - "learning_rate": 1.7947991332353048e-06, - "loss": 1.2185, - "step": 1707 - }, - { - "epoch": 0.23154612621161796, - "grad_norm": 1.5213613248341018, - "learning_rate": 1.7945325558448078e-06, - "loss": 1.1756, - "step": 1708 - }, - { - "epoch": 0.23168169185928286, - "grad_norm": 1.9716780437529604, - "learning_rate": 1.7942658252349787e-06, - "loss": 1.1959, - "step": 1709 - }, - { - "epoch": 0.23181725750694773, - "grad_norm": 1.91008605650578, - "learning_rate": 1.7939989414572552e-06, - "loss": 1.196, - "step": 1710 - }, - { - "epoch": 0.23195282315461263, - "grad_norm": 1.9129720633085083, - "learning_rate": 1.7937319045631032e-06, - "loss": 1.1916, - "step": 1711 - }, - { - "epoch": 0.2320883888022775, - "grad_norm": 2.8607253245953155, - "learning_rate": 1.7934647146040185e-06, - "loss": 1.1805, - "step": 1712 - }, - { - "epoch": 0.23222395444994237, - "grad_norm": 2.1118796130412862, - "learning_rate": 1.793197371631527e-06, - "loss": 1.1755, - "step": 1713 - }, - { - "epoch": 0.23235952009760727, - "grad_norm": 1.5980823408314888, - "learning_rate": 1.7929298756971836e-06, - "loss": 1.2285, - "step": 1714 - }, - { - "epoch": 0.23249508574527214, - "grad_norm": 1.6635323161805202, - "learning_rate": 1.7926622268525725e-06, - "loss": 1.1972, - "step": 1715 - }, - { - "epoch": 0.23263065139293704, - "grad_norm": 1.6137798032606994, - "learning_rate": 1.792394425149308e-06, - "loss": 1.1726, - "step": 1716 - }, - { - "epoch": 0.2327662170406019, - "grad_norm": 2.1488517089444867, - "learning_rate": 1.792126470639033e-06, - "loss": 1.1891, - "step": 1717 - }, - { - "epoch": 0.2329017826882668, - "grad_norm": 1.6810790490647396, - "learning_rate": 1.7918583633734212e-06, - "loss": 1.1736, - "step": 1718 - }, - { - "epoch": 0.23303734833593168, - "grad_norm": 1.571631876275798, - "learning_rate": 1.7915901034041744e-06, - "loss": 1.2056, - "step": 1719 - }, - { - "epoch": 0.23317291398359655, - "grad_norm": 1.932655677976464, - "learning_rate": 1.7913216907830248e-06, - "loss": 1.2124, - "step": 1720 - }, - { - "epoch": 0.23330847963126145, - "grad_norm": 1.9050204153521608, - "learning_rate": 1.7910531255617332e-06, - "loss": 1.1958, - "step": 1721 - }, - { - "epoch": 0.23344404527892632, - "grad_norm": 2.252603124524, - "learning_rate": 1.7907844077920905e-06, - "loss": 1.2455, - "step": 1722 - }, - { - "epoch": 0.2335796109265912, - "grad_norm": 2.0949886425250437, - "learning_rate": 1.790515537525917e-06, - "loss": 1.1982, - "step": 1723 - }, - { - "epoch": 0.23371517657425608, - "grad_norm": 1.9383415455256918, - "learning_rate": 1.7902465148150623e-06, - "loss": 1.1766, - "step": 1724 - }, - { - "epoch": 0.23385074222192095, - "grad_norm": 1.6127418578636872, - "learning_rate": 1.7899773397114046e-06, - "loss": 1.1991, - "step": 1725 - }, - { - "epoch": 0.23398630786958585, - "grad_norm": 1.5109002611956859, - "learning_rate": 1.789708012266853e-06, - "loss": 1.2273, - "step": 1726 - }, - { - "epoch": 0.23412187351725072, - "grad_norm": 2.0823966834311505, - "learning_rate": 1.7894385325333444e-06, - "loss": 1.1681, - "step": 1727 - }, - { - "epoch": 0.23425743916491562, - "grad_norm": 1.5794320696903419, - "learning_rate": 1.7891689005628466e-06, - "loss": 1.2318, - "step": 1728 - }, - { - "epoch": 0.2343930048125805, - "grad_norm": 1.8782760432250274, - "learning_rate": 1.7888991164073554e-06, - "loss": 1.2182, - "step": 1729 - }, - { - "epoch": 0.23452857046024536, - "grad_norm": 1.5345817923067055, - "learning_rate": 1.7886291801188968e-06, - "loss": 1.2277, - "step": 1730 - }, - { - "epoch": 0.23466413610791026, - "grad_norm": 1.6130538576422029, - "learning_rate": 1.788359091749526e-06, - "loss": 1.2579, - "step": 1731 - }, - { - "epoch": 0.23479970175557513, - "grad_norm": 1.8952059093929858, - "learning_rate": 1.7880888513513272e-06, - "loss": 1.2334, - "step": 1732 - }, - { - "epoch": 0.23493526740324003, - "grad_norm": 1.9969935681567166, - "learning_rate": 1.7878184589764142e-06, - "loss": 1.241, - "step": 1733 - }, - { - "epoch": 0.2350708330509049, - "grad_norm": 1.669399572405858, - "learning_rate": 1.7875479146769303e-06, - "loss": 1.1892, - "step": 1734 - }, - { - "epoch": 0.23520639869856977, - "grad_norm": 2.961538890073287, - "learning_rate": 1.7872772185050474e-06, - "loss": 1.2032, - "step": 1735 - }, - { - "epoch": 0.23534196434623467, - "grad_norm": 2.502055703493878, - "learning_rate": 1.7870063705129672e-06, - "loss": 1.2078, - "step": 1736 - }, - { - "epoch": 0.23547752999389954, - "grad_norm": 1.5653153080453501, - "learning_rate": 1.786735370752921e-06, - "loss": 1.2293, - "step": 1737 - }, - { - "epoch": 0.23561309564156444, - "grad_norm": 1.5403161878728662, - "learning_rate": 1.7864642192771683e-06, - "loss": 1.2173, - "step": 1738 - }, - { - "epoch": 0.2357486612892293, - "grad_norm": 1.7150678812519777, - "learning_rate": 1.786192916137999e-06, - "loss": 1.2056, - "step": 1739 - }, - { - "epoch": 0.2358842269368942, - "grad_norm": 1.5368865527623417, - "learning_rate": 1.7859214613877316e-06, - "loss": 1.1691, - "step": 1740 - }, - { - "epoch": 0.23601979258455907, - "grad_norm": 1.5488523705781576, - "learning_rate": 1.7856498550787141e-06, - "loss": 1.1953, - "step": 1741 - }, - { - "epoch": 0.23615535823222394, - "grad_norm": 1.4922721206679952, - "learning_rate": 1.7853780972633239e-06, - "loss": 1.179, - "step": 1742 - }, - { - "epoch": 0.23629092387988884, - "grad_norm": 2.1613134248061656, - "learning_rate": 1.7851061879939669e-06, - "loss": 1.2082, - "step": 1743 - }, - { - "epoch": 0.2364264895275537, - "grad_norm": 2.1828051850654546, - "learning_rate": 1.7848341273230786e-06, - "loss": 1.2335, - "step": 1744 - }, - { - "epoch": 0.2365620551752186, - "grad_norm": 2.0082302386401407, - "learning_rate": 1.784561915303124e-06, - "loss": 1.1818, - "step": 1745 - }, - { - "epoch": 0.23669762082288348, - "grad_norm": 1.5342248812923145, - "learning_rate": 1.784289551986597e-06, - "loss": 1.2012, - "step": 1746 - }, - { - "epoch": 0.23683318647054835, - "grad_norm": 2.6614065442501142, - "learning_rate": 1.7840170374260206e-06, - "loss": 1.2011, - "step": 1747 - }, - { - "epoch": 0.23696875211821325, - "grad_norm": 1.6621065729122688, - "learning_rate": 1.7837443716739474e-06, - "loss": 1.189, - "step": 1748 - }, - { - "epoch": 0.23710431776587812, - "grad_norm": 2.251791095587067, - "learning_rate": 1.7834715547829584e-06, - "loss": 1.2043, - "step": 1749 - }, - { - "epoch": 0.23723988341354302, - "grad_norm": 3.1919404392213484, - "learning_rate": 1.7831985868056646e-06, - "loss": 1.1682, - "step": 1750 - }, - { - "epoch": 0.2373754490612079, - "grad_norm": 1.4308438946852087, - "learning_rate": 1.7829254677947054e-06, - "loss": 1.2074, - "step": 1751 - }, - { - "epoch": 0.23751101470887276, - "grad_norm": 2.0664863780825793, - "learning_rate": 1.7826521978027499e-06, - "loss": 1.1878, - "step": 1752 - }, - { - "epoch": 0.23764658035653766, - "grad_norm": 1.588363488616962, - "learning_rate": 1.7823787768824958e-06, - "loss": 1.2233, - "step": 1753 - }, - { - "epoch": 0.23778214600420253, - "grad_norm": 1.9712780021028689, - "learning_rate": 1.7821052050866703e-06, - "loss": 1.2213, - "step": 1754 - }, - { - "epoch": 0.23791771165186743, - "grad_norm": 1.713164839646067, - "learning_rate": 1.7818314824680298e-06, - "loss": 1.2271, - "step": 1755 - }, - { - "epoch": 0.2380532772995323, - "grad_norm": 2.0582344321464836, - "learning_rate": 1.7815576090793592e-06, - "loss": 1.18, - "step": 1756 - }, - { - "epoch": 0.2381888429471972, - "grad_norm": 1.709783999282695, - "learning_rate": 1.781283584973473e-06, - "loss": 1.2608, - "step": 1757 - }, - { - "epoch": 0.23832440859486206, - "grad_norm": 3.3576143130481215, - "learning_rate": 1.781009410203214e-06, - "loss": 1.1656, - "step": 1758 - }, - { - "epoch": 0.23845997424252693, - "grad_norm": 1.5737733190919279, - "learning_rate": 1.7807350848214557e-06, - "loss": 1.2036, - "step": 1759 - }, - { - "epoch": 0.23859553989019183, - "grad_norm": 1.6641856568527742, - "learning_rate": 1.780460608881099e-06, - "loss": 1.2188, - "step": 1760 - }, - { - "epoch": 0.2387311055378567, - "grad_norm": 1.5867628931576359, - "learning_rate": 1.7801859824350743e-06, - "loss": 1.2094, - "step": 1761 - }, - { - "epoch": 0.2388666711855216, - "grad_norm": 1.5714295066589938, - "learning_rate": 1.7799112055363415e-06, - "loss": 1.166, - "step": 1762 - }, - { - "epoch": 0.23900223683318647, - "grad_norm": 1.436096097500095, - "learning_rate": 1.7796362782378887e-06, - "loss": 1.1802, - "step": 1763 - }, - { - "epoch": 0.23913780248085134, - "grad_norm": 1.4277780313203803, - "learning_rate": 1.7793612005927337e-06, - "loss": 1.2173, - "step": 1764 - }, - { - "epoch": 0.23927336812851624, - "grad_norm": 1.6646991629245973, - "learning_rate": 1.7790859726539232e-06, - "loss": 1.1835, - "step": 1765 - }, - { - "epoch": 0.2394089337761811, - "grad_norm": 2.2543976930553735, - "learning_rate": 1.7788105944745325e-06, - "loss": 1.1929, - "step": 1766 - }, - { - "epoch": 0.239544499423846, - "grad_norm": 6.387713840112075, - "learning_rate": 1.7785350661076663e-06, - "loss": 1.1871, - "step": 1767 - }, - { - "epoch": 0.23968006507151088, - "grad_norm": 2.4139263835036613, - "learning_rate": 1.778259387606458e-06, - "loss": 1.2121, - "step": 1768 - }, - { - "epoch": 0.23981563071917575, - "grad_norm": 20.87516555750094, - "learning_rate": 1.7779835590240699e-06, - "loss": 1.1818, - "step": 1769 - }, - { - "epoch": 0.23995119636684065, - "grad_norm": 1.9206608843571586, - "learning_rate": 1.7777075804136938e-06, - "loss": 1.1768, - "step": 1770 - }, - { - "epoch": 0.24008676201450552, - "grad_norm": 2.045805922901558, - "learning_rate": 1.7774314518285492e-06, - "loss": 1.2018, - "step": 1771 - }, - { - "epoch": 0.24022232766217042, - "grad_norm": 2.5713845367875248, - "learning_rate": 1.777155173321886e-06, - "loss": 1.2031, - "step": 1772 - }, - { - "epoch": 0.24035789330983529, - "grad_norm": 1.723989865826696, - "learning_rate": 1.7768787449469823e-06, - "loss": 1.1655, - "step": 1773 - }, - { - "epoch": 0.24049345895750016, - "grad_norm": 1.5934280140319634, - "learning_rate": 1.7766021667571448e-06, - "loss": 1.2068, - "step": 1774 - }, - { - "epoch": 0.24062902460516505, - "grad_norm": 1.807203659933249, - "learning_rate": 1.7763254388057094e-06, - "loss": 1.2257, - "step": 1775 - }, - { - "epoch": 0.24076459025282992, - "grad_norm": 1.47426042796791, - "learning_rate": 1.7760485611460415e-06, - "loss": 1.1919, - "step": 1776 - }, - { - "epoch": 0.24090015590049482, - "grad_norm": 1.500017597255888, - "learning_rate": 1.7757715338315337e-06, - "loss": 1.2326, - "step": 1777 - }, - { - "epoch": 0.2410357215481597, - "grad_norm": 1.8745677117075954, - "learning_rate": 1.7754943569156096e-06, - "loss": 1.2207, - "step": 1778 - }, - { - "epoch": 0.2411712871958246, - "grad_norm": 1.8839605247005642, - "learning_rate": 1.7752170304517202e-06, - "loss": 1.2451, - "step": 1779 - }, - { - "epoch": 0.24130685284348946, - "grad_norm": 1.5916181777925038, - "learning_rate": 1.7749395544933455e-06, - "loss": 1.2075, - "step": 1780 - }, - { - "epoch": 0.24144241849115433, - "grad_norm": 1.8478201458957617, - "learning_rate": 1.7746619290939946e-06, - "loss": 1.1896, - "step": 1781 - }, - { - "epoch": 0.24157798413881923, - "grad_norm": 1.6319989047383878, - "learning_rate": 1.7743841543072055e-06, - "loss": 1.1797, - "step": 1782 - }, - { - "epoch": 0.2417135497864841, - "grad_norm": 1.9817309450200067, - "learning_rate": 1.7741062301865453e-06, - "loss": 1.1604, - "step": 1783 - }, - { - "epoch": 0.241849115434149, - "grad_norm": 2.1406734479760225, - "learning_rate": 1.7738281567856088e-06, - "loss": 1.2062, - "step": 1784 - }, - { - "epoch": 0.24198468108181387, - "grad_norm": 1.4184247001152535, - "learning_rate": 1.7735499341580203e-06, - "loss": 1.2202, - "step": 1785 - }, - { - "epoch": 0.24212024672947874, - "grad_norm": 2.290925986780862, - "learning_rate": 1.7732715623574333e-06, - "loss": 1.1747, - "step": 1786 - }, - { - "epoch": 0.24225581237714364, - "grad_norm": 1.9059915894155868, - "learning_rate": 1.772993041437529e-06, - "loss": 1.1741, - "step": 1787 - }, - { - "epoch": 0.2423913780248085, - "grad_norm": 2.0579543036916523, - "learning_rate": 1.7727143714520184e-06, - "loss": 1.2066, - "step": 1788 - }, - { - "epoch": 0.2425269436724734, - "grad_norm": 1.6285903985022356, - "learning_rate": 1.7724355524546409e-06, - "loss": 1.1801, - "step": 1789 - }, - { - "epoch": 0.24266250932013828, - "grad_norm": 1.682379930427517, - "learning_rate": 1.7721565844991641e-06, - "loss": 1.2031, - "step": 1790 - }, - { - "epoch": 0.24279807496780315, - "grad_norm": 2.09349970654746, - "learning_rate": 1.7718774676393852e-06, - "loss": 1.2128, - "step": 1791 - }, - { - "epoch": 0.24293364061546804, - "grad_norm": 2.1484355957257972, - "learning_rate": 1.7715982019291293e-06, - "loss": 1.1931, - "step": 1792 - }, - { - "epoch": 0.24306920626313291, - "grad_norm": 1.9753447983236694, - "learning_rate": 1.771318787422251e-06, - "loss": 1.2101, - "step": 1793 - }, - { - "epoch": 0.2432047719107978, - "grad_norm": 1.6962573271436958, - "learning_rate": 1.7710392241726328e-06, - "loss": 1.1998, - "step": 1794 - }, - { - "epoch": 0.24334033755846268, - "grad_norm": 1.7980419704907058, - "learning_rate": 1.7707595122341865e-06, - "loss": 1.2386, - "step": 1795 - }, - { - "epoch": 0.24347590320612758, - "grad_norm": 8.613993252694431, - "learning_rate": 1.7704796516608524e-06, - "loss": 1.2127, - "step": 1796 - }, - { - "epoch": 0.24361146885379245, - "grad_norm": 3.5718276657984545, - "learning_rate": 1.7701996425065992e-06, - "loss": 1.1714, - "step": 1797 - }, - { - "epoch": 0.24374703450145732, - "grad_norm": 1.870368356694085, - "learning_rate": 1.7699194848254244e-06, - "loss": 1.202, - "step": 1798 - }, - { - "epoch": 0.24388260014912222, - "grad_norm": 1.5707149587876872, - "learning_rate": 1.7696391786713545e-06, - "loss": 1.2042, - "step": 1799 - }, - { - "epoch": 0.2440181657967871, - "grad_norm": 1.566050580511284, - "learning_rate": 1.769358724098444e-06, - "loss": 1.1919, - "step": 1800 - }, - { - "epoch": 0.244153731444452, - "grad_norm": 1.6689480753183854, - "learning_rate": 1.7690781211607767e-06, - "loss": 1.1982, - "step": 1801 - }, - { - "epoch": 0.24428929709211686, - "grad_norm": 3.0455734368122402, - "learning_rate": 1.7687973699124643e-06, - "loss": 1.185, - "step": 1802 - }, - { - "epoch": 0.24442486273978173, - "grad_norm": 1.7733693962421544, - "learning_rate": 1.7685164704076476e-06, - "loss": 1.1707, - "step": 1803 - }, - { - "epoch": 0.24456042838744663, - "grad_norm": 3.85358938352622, - "learning_rate": 1.768235422700496e-06, - "loss": 1.2599, - "step": 1804 - }, - { - "epoch": 0.2446959940351115, - "grad_norm": 2.9734544688163878, - "learning_rate": 1.767954226845207e-06, - "loss": 1.209, - "step": 1805 - }, - { - "epoch": 0.2448315596827764, - "grad_norm": 1.6322176203724004, - "learning_rate": 1.7676728828960075e-06, - "loss": 1.1638, - "step": 1806 - }, - { - "epoch": 0.24496712533044127, - "grad_norm": 2.8151168345249187, - "learning_rate": 1.7673913909071523e-06, - "loss": 1.2148, - "step": 1807 - }, - { - "epoch": 0.24510269097810614, - "grad_norm": 1.8911683052883512, - "learning_rate": 1.7671097509329242e-06, - "loss": 1.1674, - "step": 1808 - }, - { - "epoch": 0.24523825662577103, - "grad_norm": 1.8360651026447021, - "learning_rate": 1.7668279630276364e-06, - "loss": 1.207, - "step": 1809 - }, - { - "epoch": 0.2453738222734359, - "grad_norm": 1.7404166480993435, - "learning_rate": 1.7665460272456287e-06, - "loss": 1.2138, - "step": 1810 - }, - { - "epoch": 0.2455093879211008, - "grad_norm": 1.731248513004992, - "learning_rate": 1.7662639436412703e-06, - "loss": 1.1933, - "step": 1811 - }, - { - "epoch": 0.24564495356876567, - "grad_norm": 1.7846574779238498, - "learning_rate": 1.7659817122689589e-06, - "loss": 1.1658, - "step": 1812 - }, - { - "epoch": 0.24578051921643054, - "grad_norm": 1.5990289844867998, - "learning_rate": 1.7656993331831208e-06, - "loss": 1.2048, - "step": 1813 - }, - { - "epoch": 0.24591608486409544, - "grad_norm": 2.7871185551958355, - "learning_rate": 1.76541680643821e-06, - "loss": 1.2181, - "step": 1814 - }, - { - "epoch": 0.2460516505117603, - "grad_norm": 1.669064959222726, - "learning_rate": 1.7651341320887102e-06, - "loss": 1.1683, - "step": 1815 - }, - { - "epoch": 0.2461872161594252, - "grad_norm": 1.632013968878007, - "learning_rate": 1.7648513101891325e-06, - "loss": 1.203, - "step": 1816 - }, - { - "epoch": 0.24632278180709008, - "grad_norm": 1.7004497817847526, - "learning_rate": 1.764568340794017e-06, - "loss": 1.2506, - "step": 1817 - }, - { - "epoch": 0.24645834745475498, - "grad_norm": 1.5278590534539576, - "learning_rate": 1.7642852239579323e-06, - "loss": 1.19, - "step": 1818 - }, - { - "epoch": 0.24659391310241985, - "grad_norm": 1.8935000858509914, - "learning_rate": 1.7640019597354747e-06, - "loss": 1.1937, - "step": 1819 - }, - { - "epoch": 0.24672947875008472, - "grad_norm": 1.5194441560760525, - "learning_rate": 1.76371854818127e-06, - "loss": 1.1924, - "step": 1820 - }, - { - "epoch": 0.24686504439774962, - "grad_norm": 1.8005219934015375, - "learning_rate": 1.7634349893499719e-06, - "loss": 1.2207, - "step": 1821 - }, - { - "epoch": 0.2470006100454145, - "grad_norm": 1.990527349272436, - "learning_rate": 1.7631512832962622e-06, - "loss": 1.196, - "step": 1822 - }, - { - "epoch": 0.24713617569307939, - "grad_norm": 2.060578909722346, - "learning_rate": 1.7628674300748511e-06, - "loss": 1.2021, - "step": 1823 - }, - { - "epoch": 0.24727174134074426, - "grad_norm": 1.5460374330086066, - "learning_rate": 1.7625834297404783e-06, - "loss": 1.2007, - "step": 1824 - }, - { - "epoch": 0.24740730698840913, - "grad_norm": 1.6275969963310644, - "learning_rate": 1.7622992823479103e-06, - "loss": 1.1983, - "step": 1825 - }, - { - "epoch": 0.24754287263607402, - "grad_norm": 1.6969775810897163, - "learning_rate": 1.7620149879519431e-06, - "loss": 1.1597, - "step": 1826 - }, - { - "epoch": 0.2476784382837389, - "grad_norm": 1.5664039432194394, - "learning_rate": 1.7617305466074002e-06, - "loss": 1.1873, - "step": 1827 - }, - { - "epoch": 0.2478140039314038, - "grad_norm": 1.7428389619900455, - "learning_rate": 1.7614459583691342e-06, - "loss": 1.2317, - "step": 1828 - }, - { - "epoch": 0.24794956957906866, - "grad_norm": 2.1372815312711837, - "learning_rate": 1.7611612232920258e-06, - "loss": 1.1918, - "step": 1829 - }, - { - "epoch": 0.24808513522673353, - "grad_norm": 1.7520161558403065, - "learning_rate": 1.7608763414309835e-06, - "loss": 1.2, - "step": 1830 - }, - { - "epoch": 0.24822070087439843, - "grad_norm": 2.3755529095277113, - "learning_rate": 1.7605913128409449e-06, - "loss": 1.1475, - "step": 1831 - }, - { - "epoch": 0.2483562665220633, - "grad_norm": 1.5309263233771964, - "learning_rate": 1.7603061375768754e-06, - "loss": 1.1743, - "step": 1832 - }, - { - "epoch": 0.2484918321697282, - "grad_norm": 1.7269799839440687, - "learning_rate": 1.7600208156937688e-06, - "loss": 1.1935, - "step": 1833 - }, - { - "epoch": 0.24862739781739307, - "grad_norm": 1.6732120570165234, - "learning_rate": 1.759735347246647e-06, - "loss": 1.1617, - "step": 1834 - }, - { - "epoch": 0.24876296346505797, - "grad_norm": 1.4891959458162647, - "learning_rate": 1.7594497322905603e-06, - "loss": 1.1989, - "step": 1835 - }, - { - "epoch": 0.24889852911272284, - "grad_norm": 1.6975475816490415, - "learning_rate": 1.759163970880588e-06, - "loss": 1.167, - "step": 1836 - }, - { - "epoch": 0.2490340947603877, - "grad_norm": 2.237393199567373, - "learning_rate": 1.7588780630718358e-06, - "loss": 1.2006, - "step": 1837 - }, - { - "epoch": 0.2491696604080526, - "grad_norm": 1.6064712565819388, - "learning_rate": 1.7585920089194394e-06, - "loss": 1.1902, - "step": 1838 - }, - { - "epoch": 0.24930522605571748, - "grad_norm": 1.9959067513247652, - "learning_rate": 1.7583058084785625e-06, - "loss": 1.2142, - "step": 1839 - }, - { - "epoch": 0.24944079170338238, - "grad_norm": 1.7500628474871012, - "learning_rate": 1.758019461804396e-06, - "loss": 1.1787, - "step": 1840 - }, - { - "epoch": 0.24957635735104725, - "grad_norm": 1.5591458931669742, - "learning_rate": 1.7577329689521596e-06, - "loss": 1.1815, - "step": 1841 - }, - { - "epoch": 0.24971192299871212, - "grad_norm": 1.9415918528662648, - "learning_rate": 1.7574463299771011e-06, - "loss": 1.1723, - "step": 1842 - }, - { - "epoch": 0.24984748864637701, - "grad_norm": 2.3490481854399845, - "learning_rate": 1.7571595449344972e-06, - "loss": 1.1979, - "step": 1843 - }, - { - "epoch": 0.24998305429404188, - "grad_norm": 2.086094533353555, - "learning_rate": 1.7568726138796515e-06, - "loss": 1.2109, - "step": 1844 - }, - { - "epoch": 0.25011861994170675, - "grad_norm": 1.7169559678402486, - "learning_rate": 1.7565855368678965e-06, - "loss": 1.2032, - "step": 1845 - }, - { - "epoch": 0.2502541855893717, - "grad_norm": 2.3590007267406334, - "learning_rate": 1.756298313954593e-06, - "loss": 1.2356, - "step": 1846 - }, - { - "epoch": 0.25038975123703655, - "grad_norm": 1.6563728680264345, - "learning_rate": 1.7560109451951295e-06, - "loss": 1.1719, - "step": 1847 - }, - { - "epoch": 0.2505253168847014, - "grad_norm": 2.076519506943116, - "learning_rate": 1.7557234306449227e-06, - "loss": 1.1914, - "step": 1848 - }, - { - "epoch": 0.2506608825323663, - "grad_norm": 1.7143262198778149, - "learning_rate": 1.7554357703594178e-06, - "loss": 1.2197, - "step": 1849 - }, - { - "epoch": 0.25079644818003116, - "grad_norm": 1.865420801327151, - "learning_rate": 1.7551479643940874e-06, - "loss": 1.1895, - "step": 1850 - }, - { - "epoch": 0.2509320138276961, - "grad_norm": 1.489585723070958, - "learning_rate": 1.7548600128044328e-06, - "loss": 1.1906, - "step": 1851 - }, - { - "epoch": 0.25106757947536096, - "grad_norm": 3.4311812015809053, - "learning_rate": 1.7545719156459835e-06, - "loss": 1.1868, - "step": 1852 - }, - { - "epoch": 0.25120314512302583, - "grad_norm": 1.6129139443062457, - "learning_rate": 1.7542836729742964e-06, - "loss": 1.1882, - "step": 1853 - }, - { - "epoch": 0.2513387107706907, - "grad_norm": 1.5706514257913424, - "learning_rate": 1.753995284844957e-06, - "loss": 1.21, - "step": 1854 - }, - { - "epoch": 0.25147427641835557, - "grad_norm": 4.569138060378896, - "learning_rate": 1.7537067513135787e-06, - "loss": 1.2375, - "step": 1855 - }, - { - "epoch": 0.2516098420660205, - "grad_norm": 1.7913134045219632, - "learning_rate": 1.7534180724358026e-06, - "loss": 1.1788, - "step": 1856 - }, - { - "epoch": 0.25174540771368537, - "grad_norm": 3.2890760601152285, - "learning_rate": 1.7531292482672982e-06, - "loss": 1.2394, - "step": 1857 - }, - { - "epoch": 0.25188097336135024, - "grad_norm": 1.71206368837618, - "learning_rate": 1.7528402788637633e-06, - "loss": 1.1845, - "step": 1858 - }, - { - "epoch": 0.2520165390090151, - "grad_norm": 1.528301103250045, - "learning_rate": 1.7525511642809232e-06, - "loss": 1.184, - "step": 1859 - }, - { - "epoch": 0.25215210465668, - "grad_norm": 1.806588701858939, - "learning_rate": 1.7522619045745312e-06, - "loss": 1.1938, - "step": 1860 - }, - { - "epoch": 0.2522876703043449, - "grad_norm": 1.559793363496674, - "learning_rate": 1.751972499800369e-06, - "loss": 1.1562, - "step": 1861 - }, - { - "epoch": 0.2524232359520098, - "grad_norm": 1.605726718859392, - "learning_rate": 1.7516829500142461e-06, - "loss": 1.1703, - "step": 1862 - }, - { - "epoch": 0.25255880159967464, - "grad_norm": 1.608485927710475, - "learning_rate": 1.7513932552719995e-06, - "loss": 1.1765, - "step": 1863 - }, - { - "epoch": 0.2526943672473395, - "grad_norm": 5.694720899321809, - "learning_rate": 1.7511034156294948e-06, - "loss": 1.1917, - "step": 1864 - }, - { - "epoch": 0.2528299328950044, - "grad_norm": 1.826878690726368, - "learning_rate": 1.7508134311426253e-06, - "loss": 1.1734, - "step": 1865 - }, - { - "epoch": 0.2529654985426693, - "grad_norm": 2.206755246750216, - "learning_rate": 1.750523301867312e-06, - "loss": 1.1864, - "step": 1866 - }, - { - "epoch": 0.2531010641903342, - "grad_norm": 1.8519559895031703, - "learning_rate": 1.7502330278595043e-06, - "loss": 1.2315, - "step": 1867 - }, - { - "epoch": 0.25323662983799905, - "grad_norm": 2.329847785735694, - "learning_rate": 1.7499426091751792e-06, - "loss": 1.191, - "step": 1868 - }, - { - "epoch": 0.2533721954856639, - "grad_norm": 1.8100596771426842, - "learning_rate": 1.7496520458703416e-06, - "loss": 1.2478, - "step": 1869 - }, - { - "epoch": 0.2535077611333288, - "grad_norm": 1.7301729282557006, - "learning_rate": 1.7493613380010244e-06, - "loss": 1.1818, - "step": 1870 - }, - { - "epoch": 0.2536433267809937, - "grad_norm": 1.6595545667590552, - "learning_rate": 1.7490704856232882e-06, - "loss": 1.1962, - "step": 1871 - }, - { - "epoch": 0.2537788924286586, - "grad_norm": 1.77899817473183, - "learning_rate": 1.7487794887932216e-06, - "loss": 1.1976, - "step": 1872 - }, - { - "epoch": 0.25391445807632346, - "grad_norm": 1.5608200297789305, - "learning_rate": 1.7484883475669412e-06, - "loss": 1.2147, - "step": 1873 - }, - { - "epoch": 0.2540500237239883, - "grad_norm": 1.9176783642045296, - "learning_rate": 1.748197062000591e-06, - "loss": 1.1977, - "step": 1874 - }, - { - "epoch": 0.2541855893716532, - "grad_norm": 2.4696813874017667, - "learning_rate": 1.7479056321503436e-06, - "loss": 1.1484, - "step": 1875 - }, - { - "epoch": 0.2543211550193181, - "grad_norm": 1.563125237635046, - "learning_rate": 1.7476140580723984e-06, - "loss": 1.157, - "step": 1876 - }, - { - "epoch": 0.254456720666983, - "grad_norm": 1.8531539890142796, - "learning_rate": 1.7473223398229836e-06, - "loss": 1.2018, - "step": 1877 - }, - { - "epoch": 0.25459228631464786, - "grad_norm": 1.6228733728381488, - "learning_rate": 1.7470304774583542e-06, - "loss": 1.2057, - "step": 1878 - }, - { - "epoch": 0.25472785196231273, - "grad_norm": 1.4814496947019944, - "learning_rate": 1.7467384710347943e-06, - "loss": 1.1746, - "step": 1879 - }, - { - "epoch": 0.2548634176099776, - "grad_norm": 2.2326865337126773, - "learning_rate": 1.7464463206086144e-06, - "loss": 1.2008, - "step": 1880 - }, - { - "epoch": 0.25499898325764253, - "grad_norm": 2.034774868453193, - "learning_rate": 1.7461540262361538e-06, - "loss": 1.203, - "step": 1881 - }, - { - "epoch": 0.2551345489053074, - "grad_norm": 1.4405408078617963, - "learning_rate": 1.7458615879737791e-06, - "loss": 1.2423, - "step": 1882 - }, - { - "epoch": 0.25527011455297227, - "grad_norm": 3.156298832770999, - "learning_rate": 1.7455690058778844e-06, - "loss": 1.1979, - "step": 1883 - }, - { - "epoch": 0.25540568020063714, - "grad_norm": 2.3055070326134324, - "learning_rate": 1.7452762800048924e-06, - "loss": 1.1804, - "step": 1884 - }, - { - "epoch": 0.25554124584830207, - "grad_norm": 2.229454277381315, - "learning_rate": 1.7449834104112525e-06, - "loss": 1.2092, - "step": 1885 - }, - { - "epoch": 0.25567681149596694, - "grad_norm": 1.5652240956628032, - "learning_rate": 1.7446903971534423e-06, - "loss": 1.1865, - "step": 1886 - }, - { - "epoch": 0.2558123771436318, - "grad_norm": 1.6660615301329724, - "learning_rate": 1.7443972402879674e-06, - "loss": 1.1908, - "step": 1887 - }, - { - "epoch": 0.2559479427912967, - "grad_norm": 1.6808585576327417, - "learning_rate": 1.7441039398713605e-06, - "loss": 1.1963, - "step": 1888 - }, - { - "epoch": 0.25608350843896155, - "grad_norm": 1.5467596849211303, - "learning_rate": 1.7438104959601826e-06, - "loss": 1.1519, - "step": 1889 - }, - { - "epoch": 0.2562190740866265, - "grad_norm": 3.69015853324842, - "learning_rate": 1.7435169086110217e-06, - "loss": 1.1797, - "step": 1890 - }, - { - "epoch": 0.25635463973429135, - "grad_norm": 1.7706617480142048, - "learning_rate": 1.743223177880494e-06, - "loss": 1.1908, - "step": 1891 - }, - { - "epoch": 0.2564902053819562, - "grad_norm": 1.6231691497162457, - "learning_rate": 1.742929303825243e-06, - "loss": 1.2308, - "step": 1892 - }, - { - "epoch": 0.2566257710296211, - "grad_norm": 1.9855269091945533, - "learning_rate": 1.7426352865019402e-06, - "loss": 1.1719, - "step": 1893 - }, - { - "epoch": 0.25676133667728596, - "grad_norm": 1.799176782751917, - "learning_rate": 1.7423411259672841e-06, - "loss": 1.1918, - "step": 1894 - }, - { - "epoch": 0.2568969023249509, - "grad_norm": 2.0465651162837526, - "learning_rate": 1.7420468222780017e-06, - "loss": 1.1817, - "step": 1895 - }, - { - "epoch": 0.25703246797261575, - "grad_norm": 1.4893487485090278, - "learning_rate": 1.7417523754908473e-06, - "loss": 1.2212, - "step": 1896 - }, - { - "epoch": 0.2571680336202806, - "grad_norm": 1.890896093423901, - "learning_rate": 1.741457785662602e-06, - "loss": 1.1908, - "step": 1897 - }, - { - "epoch": 0.2573035992679455, - "grad_norm": 1.6175566263019743, - "learning_rate": 1.7411630528500757e-06, - "loss": 1.184, - "step": 1898 - }, - { - "epoch": 0.25743916491561036, - "grad_norm": 1.6520556262686763, - "learning_rate": 1.7408681771101048e-06, - "loss": 1.2404, - "step": 1899 - }, - { - "epoch": 0.2575747305632753, - "grad_norm": 2.1122157217879396, - "learning_rate": 1.740573158499554e-06, - "loss": 1.236, - "step": 1900 - }, - { - "epoch": 0.25771029621094016, - "grad_norm": 1.5213514849602392, - "learning_rate": 1.7402779970753154e-06, - "loss": 1.2032, - "step": 1901 - }, - { - "epoch": 0.25784586185860503, - "grad_norm": 1.9520475976161087, - "learning_rate": 1.7399826928943084e-06, - "loss": 1.1938, - "step": 1902 - }, - { - "epoch": 0.2579814275062699, - "grad_norm": 1.3801054875397702, - "learning_rate": 1.7396872460134805e-06, - "loss": 1.2022, - "step": 1903 - }, - { - "epoch": 0.25811699315393477, - "grad_norm": 1.9008585718455056, - "learning_rate": 1.7393916564898055e-06, - "loss": 1.2163, - "step": 1904 - }, - { - "epoch": 0.2582525588015997, - "grad_norm": 1.5983095060650903, - "learning_rate": 1.739095924380286e-06, - "loss": 1.1785, - "step": 1905 - }, - { - "epoch": 0.25838812444926457, - "grad_norm": 2.228462564286304, - "learning_rate": 1.7388000497419518e-06, - "loss": 1.2412, - "step": 1906 - }, - { - "epoch": 0.25852369009692944, - "grad_norm": 1.6483064746175413, - "learning_rate": 1.7385040326318597e-06, - "loss": 1.1703, - "step": 1907 - }, - { - "epoch": 0.2586592557445943, - "grad_norm": 1.5791599364332414, - "learning_rate": 1.738207873107094e-06, - "loss": 1.1739, - "step": 1908 - }, - { - "epoch": 0.2587948213922592, - "grad_norm": 2.0036923709334964, - "learning_rate": 1.7379115712247675e-06, - "loss": 1.2011, - "step": 1909 - }, - { - "epoch": 0.2589303870399241, - "grad_norm": 1.4852610218076565, - "learning_rate": 1.7376151270420186e-06, - "loss": 1.1761, - "step": 1910 - }, - { - "epoch": 0.259065952687589, - "grad_norm": 1.3702449279427933, - "learning_rate": 1.737318540616015e-06, - "loss": 1.1806, - "step": 1911 - }, - { - "epoch": 0.25920151833525384, - "grad_norm": 2.119310443418174, - "learning_rate": 1.7370218120039512e-06, - "loss": 1.1565, - "step": 1912 - }, - { - "epoch": 0.2593370839829187, - "grad_norm": 1.4777002582062737, - "learning_rate": 1.7367249412630484e-06, - "loss": 1.1854, - "step": 1913 - }, - { - "epoch": 0.2594726496305836, - "grad_norm": 2.2001975424656997, - "learning_rate": 1.7364279284505564e-06, - "loss": 1.1993, - "step": 1914 - }, - { - "epoch": 0.2596082152782485, - "grad_norm": 2.458182272053733, - "learning_rate": 1.736130773623751e-06, - "loss": 1.1365, - "step": 1915 - }, - { - "epoch": 0.2597437809259134, - "grad_norm": 1.562020078578069, - "learning_rate": 1.7358334768399368e-06, - "loss": 1.2149, - "step": 1916 - }, - { - "epoch": 0.25987934657357825, - "grad_norm": 1.6105973046841557, - "learning_rate": 1.7355360381564449e-06, - "loss": 1.2031, - "step": 1917 - }, - { - "epoch": 0.2600149122212431, - "grad_norm": 1.5189268595765233, - "learning_rate": 1.7352384576306336e-06, - "loss": 1.1936, - "step": 1918 - }, - { - "epoch": 0.260150477868908, - "grad_norm": 1.880591860972907, - "learning_rate": 1.7349407353198898e-06, - "loss": 1.2086, - "step": 1919 - }, - { - "epoch": 0.2602860435165729, - "grad_norm": 1.6743979957835917, - "learning_rate": 1.7346428712816262e-06, - "loss": 1.1613, - "step": 1920 - }, - { - "epoch": 0.2604216091642378, - "grad_norm": 1.9872728341646062, - "learning_rate": 1.734344865573284e-06, - "loss": 1.2136, - "step": 1921 - }, - { - "epoch": 0.26055717481190266, - "grad_norm": 2.268637977879567, - "learning_rate": 1.734046718252331e-06, - "loss": 1.1816, - "step": 1922 - }, - { - "epoch": 0.26069274045956753, - "grad_norm": 1.8785686317423995, - "learning_rate": 1.7337484293762627e-06, - "loss": 1.1635, - "step": 1923 - }, - { - "epoch": 0.26082830610723245, - "grad_norm": 1.6830443513280406, - "learning_rate": 1.7334499990026014e-06, - "loss": 1.1738, - "step": 1924 - }, - { - "epoch": 0.2609638717548973, - "grad_norm": 1.5578132445282213, - "learning_rate": 1.7331514271888973e-06, - "loss": 1.1892, - "step": 1925 - }, - { - "epoch": 0.2610994374025622, - "grad_norm": 1.5266502040744918, - "learning_rate": 1.7328527139927278e-06, - "loss": 1.1657, - "step": 1926 - }, - { - "epoch": 0.26123500305022707, - "grad_norm": 1.653365193232319, - "learning_rate": 1.7325538594716971e-06, - "loss": 1.199, - "step": 1927 - }, - { - "epoch": 0.26137056869789194, - "grad_norm": 2.9146467579667625, - "learning_rate": 1.7322548636834372e-06, - "loss": 1.2631, - "step": 1928 - }, - { - "epoch": 0.26150613434555686, - "grad_norm": 9.030360399092245, - "learning_rate": 1.7319557266856067e-06, - "loss": 1.1873, - "step": 1929 - }, - { - "epoch": 0.26164169999322173, - "grad_norm": 1.8895664567206647, - "learning_rate": 1.731656448535892e-06, - "loss": 1.1889, - "step": 1930 - }, - { - "epoch": 0.2617772656408866, - "grad_norm": 1.69212756737808, - "learning_rate": 1.7313570292920065e-06, - "loss": 1.1456, - "step": 1931 - }, - { - "epoch": 0.2619128312885515, - "grad_norm": 1.55934895472438, - "learning_rate": 1.731057469011691e-06, - "loss": 1.1954, - "step": 1932 - }, - { - "epoch": 0.26204839693621634, - "grad_norm": 1.4238402245932757, - "learning_rate": 1.7307577677527135e-06, - "loss": 1.2062, - "step": 1933 - }, - { - "epoch": 0.26218396258388127, - "grad_norm": 1.7953814389761782, - "learning_rate": 1.7304579255728684e-06, - "loss": 1.2016, - "step": 1934 - }, - { - "epoch": 0.26231952823154614, - "grad_norm": 1.4109062293297472, - "learning_rate": 1.7301579425299782e-06, - "loss": 1.1712, - "step": 1935 - }, - { - "epoch": 0.262455093879211, - "grad_norm": 2.148570769901404, - "learning_rate": 1.7298578186818925e-06, - "loss": 1.1801, - "step": 1936 - }, - { - "epoch": 0.2625906595268759, - "grad_norm": 1.610953342059103, - "learning_rate": 1.7295575540864875e-06, - "loss": 1.1946, - "step": 1937 - }, - { - "epoch": 0.26272622517454075, - "grad_norm": 2.5846969144524516, - "learning_rate": 1.729257148801667e-06, - "loss": 1.1541, - "step": 1938 - }, - { - "epoch": 0.2628617908222057, - "grad_norm": 1.630289677762051, - "learning_rate": 1.7289566028853616e-06, - "loss": 1.1656, - "step": 1939 - }, - { - "epoch": 0.26299735646987055, - "grad_norm": 1.6695357196690213, - "learning_rate": 1.7286559163955297e-06, - "loss": 1.2318, - "step": 1940 - }, - { - "epoch": 0.2631329221175354, - "grad_norm": 1.5489853930953343, - "learning_rate": 1.7283550893901557e-06, - "loss": 1.1972, - "step": 1941 - }, - { - "epoch": 0.2632684877652003, - "grad_norm": 1.8694701251980057, - "learning_rate": 1.728054121927252e-06, - "loss": 1.1545, - "step": 1942 - }, - { - "epoch": 0.26340405341286516, - "grad_norm": 2.2065872356442786, - "learning_rate": 1.727753014064858e-06, - "loss": 1.2294, - "step": 1943 - }, - { - "epoch": 0.2635396190605301, - "grad_norm": 1.7749296194891162, - "learning_rate": 1.7274517658610397e-06, - "loss": 1.1787, - "step": 1944 - }, - { - "epoch": 0.26367518470819495, - "grad_norm": 1.5695302537397104, - "learning_rate": 1.7271503773738906e-06, - "loss": 1.1754, - "step": 1945 - }, - { - "epoch": 0.2638107503558598, - "grad_norm": 1.472538467148062, - "learning_rate": 1.7268488486615307e-06, - "loss": 1.1677, - "step": 1946 - }, - { - "epoch": 0.2639463160035247, - "grad_norm": 2.7061770699601477, - "learning_rate": 1.726547179782108e-06, - "loss": 1.1804, - "step": 1947 - }, - { - "epoch": 0.26408188165118957, - "grad_norm": 2.5972446727747522, - "learning_rate": 1.7262453707937964e-06, - "loss": 1.1761, - "step": 1948 - }, - { - "epoch": 0.2642174472988545, - "grad_norm": 1.5812911586825953, - "learning_rate": 1.725943421754798e-06, - "loss": 1.2051, - "step": 1949 - }, - { - "epoch": 0.26435301294651936, - "grad_norm": 1.7871580041661546, - "learning_rate": 1.7256413327233408e-06, - "loss": 1.1811, - "step": 1950 - }, - { - "epoch": 0.26448857859418423, - "grad_norm": 2.2977873622357703, - "learning_rate": 1.7253391037576806e-06, - "loss": 1.1679, - "step": 1951 - }, - { - "epoch": 0.2646241442418491, - "grad_norm": 1.4162446167856595, - "learning_rate": 1.7250367349160994e-06, - "loss": 1.1833, - "step": 1952 - }, - { - "epoch": 0.26475970988951397, - "grad_norm": 1.6948537226921172, - "learning_rate": 1.724734226256907e-06, - "loss": 1.2328, - "step": 1953 - }, - { - "epoch": 0.2648952755371789, - "grad_norm": 1.9185820877845787, - "learning_rate": 1.7244315778384403e-06, - "loss": 1.199, - "step": 1954 - }, - { - "epoch": 0.26503084118484377, - "grad_norm": 3.940088186294935, - "learning_rate": 1.7241287897190616e-06, - "loss": 1.1909, - "step": 1955 - }, - { - "epoch": 0.26516640683250864, - "grad_norm": 1.9741516417306701, - "learning_rate": 1.7238258619571616e-06, - "loss": 1.2153, - "step": 1956 - }, - { - "epoch": 0.2653019724801735, - "grad_norm": 1.659230328574816, - "learning_rate": 1.7235227946111582e-06, - "loss": 1.1843, - "step": 1957 - }, - { - "epoch": 0.2654375381278384, - "grad_norm": 1.4416609341995261, - "learning_rate": 1.7232195877394948e-06, - "loss": 1.2289, - "step": 1958 - }, - { - "epoch": 0.2655731037755033, - "grad_norm": 1.999871336464743, - "learning_rate": 1.7229162414006426e-06, - "loss": 1.2091, - "step": 1959 - }, - { - "epoch": 0.2657086694231682, - "grad_norm": 1.6407417007496632, - "learning_rate": 1.7226127556530997e-06, - "loss": 1.201, - "step": 1960 - }, - { - "epoch": 0.26584423507083305, - "grad_norm": 1.6533415972684002, - "learning_rate": 1.7223091305553905e-06, - "loss": 1.2276, - "step": 1961 - }, - { - "epoch": 0.2659798007184979, - "grad_norm": 1.668007442997153, - "learning_rate": 1.7220053661660673e-06, - "loss": 1.1978, - "step": 1962 - }, - { - "epoch": 0.2661153663661628, - "grad_norm": 1.9750930637928574, - "learning_rate": 1.7217014625437085e-06, - "loss": 1.1756, - "step": 1963 - }, - { - "epoch": 0.2662509320138277, - "grad_norm": 1.8416910598082321, - "learning_rate": 1.721397419746919e-06, - "loss": 1.1984, - "step": 1964 - }, - { - "epoch": 0.2663864976614926, - "grad_norm": 1.6742950333274575, - "learning_rate": 1.721093237834332e-06, - "loss": 1.1772, - "step": 1965 - }, - { - "epoch": 0.26652206330915745, - "grad_norm": 1.7342107952510437, - "learning_rate": 1.7207889168646056e-06, - "loss": 1.1949, - "step": 1966 - }, - { - "epoch": 0.2666576289568223, - "grad_norm": 1.5776670772389574, - "learning_rate": 1.7204844568964262e-06, - "loss": 1.1836, - "step": 1967 - }, - { - "epoch": 0.26679319460448725, - "grad_norm": 2.4337526266627307, - "learning_rate": 1.7201798579885067e-06, - "loss": 1.1801, - "step": 1968 - }, - { - "epoch": 0.2669287602521521, - "grad_norm": 1.5401607386204241, - "learning_rate": 1.7198751201995862e-06, - "loss": 1.2301, - "step": 1969 - }, - { - "epoch": 0.267064325899817, - "grad_norm": 1.97544889062139, - "learning_rate": 1.7195702435884312e-06, - "loss": 1.1522, - "step": 1970 - }, - { - "epoch": 0.26719989154748186, - "grad_norm": 1.3907376262451205, - "learning_rate": 1.7192652282138346e-06, - "loss": 1.1853, - "step": 1971 - }, - { - "epoch": 0.26733545719514673, - "grad_norm": 1.6056729771749296, - "learning_rate": 1.7189600741346164e-06, - "loss": 1.186, - "step": 1972 - }, - { - "epoch": 0.26747102284281166, - "grad_norm": 1.6254880721500145, - "learning_rate": 1.7186547814096232e-06, - "loss": 1.192, - "step": 1973 - }, - { - "epoch": 0.2676065884904765, - "grad_norm": 1.8042638019186525, - "learning_rate": 1.7183493500977275e-06, - "loss": 1.2228, - "step": 1974 - }, - { - "epoch": 0.2677421541381414, - "grad_norm": 2.1272322029119954, - "learning_rate": 1.7180437802578302e-06, - "loss": 1.2055, - "step": 1975 - }, - { - "epoch": 0.26787771978580627, - "grad_norm": 1.4275244900274735, - "learning_rate": 1.717738071948858e-06, - "loss": 1.188, - "step": 1976 - }, - { - "epoch": 0.26801328543347114, - "grad_norm": 1.8545478393390604, - "learning_rate": 1.7174322252297638e-06, - "loss": 1.2351, - "step": 1977 - }, - { - "epoch": 0.26814885108113606, - "grad_norm": 1.4872109487518286, - "learning_rate": 1.7171262401595282e-06, - "loss": 1.1411, - "step": 1978 - }, - { - "epoch": 0.26828441672880093, - "grad_norm": 1.6750643679294117, - "learning_rate": 1.7168201167971579e-06, - "loss": 1.1697, - "step": 1979 - }, - { - "epoch": 0.2684199823764658, - "grad_norm": 1.696007116366721, - "learning_rate": 1.7165138552016861e-06, - "loss": 1.2039, - "step": 1980 - }, - { - "epoch": 0.2685555480241307, - "grad_norm": 1.4211552375257703, - "learning_rate": 1.7162074554321736e-06, - "loss": 1.1858, - "step": 1981 - }, - { - "epoch": 0.26869111367179555, - "grad_norm": 1.5223419114179875, - "learning_rate": 1.7159009175477061e-06, - "loss": 1.1931, - "step": 1982 - }, - { - "epoch": 0.26882667931946047, - "grad_norm": 1.6713043753262893, - "learning_rate": 1.715594241607398e-06, - "loss": 1.1766, - "step": 1983 - }, - { - "epoch": 0.26896224496712534, - "grad_norm": 2.004327858016542, - "learning_rate": 1.7152874276703888e-06, - "loss": 1.2229, - "step": 1984 - }, - { - "epoch": 0.2690978106147902, - "grad_norm": 1.5088331168131832, - "learning_rate": 1.7149804757958456e-06, - "loss": 1.1647, - "step": 1985 - }, - { - "epoch": 0.2692333762624551, - "grad_norm": 4.393546907090314, - "learning_rate": 1.714673386042961e-06, - "loss": 1.1888, - "step": 1986 - }, - { - "epoch": 0.26936894191011995, - "grad_norm": 2.142208610405135, - "learning_rate": 1.7143661584709553e-06, - "loss": 1.2261, - "step": 1987 - }, - { - "epoch": 0.2695045075577849, - "grad_norm": 1.6431224218901093, - "learning_rate": 1.714058793139075e-06, - "loss": 1.1691, - "step": 1988 - }, - { - "epoch": 0.26964007320544975, - "grad_norm": 1.9581167821880678, - "learning_rate": 1.7137512901065924e-06, - "loss": 1.2012, - "step": 1989 - }, - { - "epoch": 0.2697756388531146, - "grad_norm": 1.936656157322999, - "learning_rate": 1.713443649432808e-06, - "loss": 1.1963, - "step": 1990 - }, - { - "epoch": 0.2699112045007795, - "grad_norm": 1.6017263986061223, - "learning_rate": 1.7131358711770472e-06, - "loss": 1.1818, - "step": 1991 - }, - { - "epoch": 0.27004677014844436, - "grad_norm": 1.4596365210906028, - "learning_rate": 1.7128279553986626e-06, - "loss": 1.2157, - "step": 1992 - }, - { - "epoch": 0.2701823357961093, - "grad_norm": 1.5463918163931358, - "learning_rate": 1.7125199021570339e-06, - "loss": 1.1643, - "step": 1993 - }, - { - "epoch": 0.27031790144377416, - "grad_norm": 2.598697279830573, - "learning_rate": 1.712211711511566e-06, - "loss": 1.1969, - "step": 1994 - }, - { - "epoch": 0.270453467091439, - "grad_norm": 5.697155207364408, - "learning_rate": 1.7119033835216916e-06, - "loss": 1.1602, - "step": 1995 - }, - { - "epoch": 0.2705890327391039, - "grad_norm": 1.4922258914912339, - "learning_rate": 1.7115949182468693e-06, - "loss": 1.1798, - "step": 1996 - }, - { - "epoch": 0.27072459838676877, - "grad_norm": 3.8196694220345786, - "learning_rate": 1.7112863157465838e-06, - "loss": 1.1809, - "step": 1997 - }, - { - "epoch": 0.2708601640344337, - "grad_norm": 2.4054557958232308, - "learning_rate": 1.7109775760803466e-06, - "loss": 1.1768, - "step": 1998 - }, - { - "epoch": 0.27099572968209856, - "grad_norm": 1.67102754116193, - "learning_rate": 1.7106686993076962e-06, - "loss": 1.1802, - "step": 1999 - }, - { - "epoch": 0.27113129532976343, - "grad_norm": 1.5835503580826054, - "learning_rate": 1.710359685488197e-06, - "loss": 1.1893, - "step": 2000 - }, - { - "epoch": 0.2712668609774283, - "grad_norm": 1.3898760657863136, - "learning_rate": 1.7100505346814396e-06, - "loss": 1.1888, - "step": 2001 - }, - { - "epoch": 0.2714024266250932, - "grad_norm": 1.7653024645489073, - "learning_rate": 1.709741246947041e-06, - "loss": 1.1528, - "step": 2002 - }, - { - "epoch": 0.2715379922727581, - "grad_norm": 1.4898545875198783, - "learning_rate": 1.709431822344646e-06, - "loss": 1.1568, - "step": 2003 - }, - { - "epoch": 0.27167355792042297, - "grad_norm": 1.4750672539757093, - "learning_rate": 1.7091222609339234e-06, - "loss": 1.1762, - "step": 2004 - }, - { - "epoch": 0.27180912356808784, - "grad_norm": 15.037890770712737, - "learning_rate": 1.7088125627745704e-06, - "loss": 1.1656, - "step": 2005 - }, - { - "epoch": 0.2719446892157527, - "grad_norm": 1.640911715201682, - "learning_rate": 1.7085027279263098e-06, - "loss": 1.1885, - "step": 2006 - }, - { - "epoch": 0.27208025486341764, - "grad_norm": 2.2533330416567585, - "learning_rate": 1.7081927564488908e-06, - "loss": 1.2016, - "step": 2007 - }, - { - "epoch": 0.2722158205110825, - "grad_norm": 1.5611126126275892, - "learning_rate": 1.7078826484020886e-06, - "loss": 1.2347, - "step": 2008 - }, - { - "epoch": 0.2723513861587474, - "grad_norm": 1.4630660388287986, - "learning_rate": 1.7075724038457053e-06, - "loss": 1.1271, - "step": 2009 - }, - { - "epoch": 0.27248695180641225, - "grad_norm": 1.715012287834882, - "learning_rate": 1.7072620228395693e-06, - "loss": 1.1695, - "step": 2010 - }, - { - "epoch": 0.2726225174540771, - "grad_norm": 1.8098977198841975, - "learning_rate": 1.7069515054435351e-06, - "loss": 1.2131, - "step": 2011 - }, - { - "epoch": 0.27275808310174204, - "grad_norm": 1.814793796728554, - "learning_rate": 1.7066408517174832e-06, - "loss": 1.2183, - "step": 2012 - }, - { - "epoch": 0.2728936487494069, - "grad_norm": 1.6197766881787514, - "learning_rate": 1.706330061721321e-06, - "loss": 1.1772, - "step": 2013 - }, - { - "epoch": 0.2730292143970718, - "grad_norm": 2.2079284857824, - "learning_rate": 1.7060191355149817e-06, - "loss": 1.1769, - "step": 2014 - }, - { - "epoch": 0.27316478004473665, - "grad_norm": 2.657007119554157, - "learning_rate": 1.7057080731584252e-06, - "loss": 1.2444, - "step": 2015 - }, - { - "epoch": 0.2733003456924015, - "grad_norm": 1.9038406041899838, - "learning_rate": 1.7053968747116374e-06, - "loss": 1.1678, - "step": 2016 - }, - { - "epoch": 0.27343591134006645, - "grad_norm": 1.4638568466325355, - "learning_rate": 1.7050855402346303e-06, - "loss": 1.1553, - "step": 2017 - }, - { - "epoch": 0.2735714769877313, - "grad_norm": 1.5759010306113495, - "learning_rate": 1.7047740697874425e-06, - "loss": 1.1912, - "step": 2018 - }, - { - "epoch": 0.2737070426353962, - "grad_norm": 1.942771339768464, - "learning_rate": 1.7044624634301382e-06, - "loss": 1.2057, - "step": 2019 - }, - { - "epoch": 0.27384260828306106, - "grad_norm": 1.8149382338391082, - "learning_rate": 1.7041507212228088e-06, - "loss": 1.178, - "step": 2020 - }, - { - "epoch": 0.27397817393072593, - "grad_norm": 1.7434494756144343, - "learning_rate": 1.7038388432255709e-06, - "loss": 1.1832, - "step": 2021 - }, - { - "epoch": 0.27411373957839086, - "grad_norm": 1.5760589969470857, - "learning_rate": 1.7035268294985677e-06, - "loss": 1.2014, - "step": 2022 - }, - { - "epoch": 0.27424930522605573, - "grad_norm": 1.6174131772668392, - "learning_rate": 1.703214680101969e-06, - "loss": 1.1904, - "step": 2023 - }, - { - "epoch": 0.2743848708737206, - "grad_norm": 2.1288264063097384, - "learning_rate": 1.70290239509597e-06, - "loss": 1.1987, - "step": 2024 - }, - { - "epoch": 0.27452043652138547, - "grad_norm": 1.6448856810238437, - "learning_rate": 1.7025899745407925e-06, - "loss": 1.1788, - "step": 2025 - }, - { - "epoch": 0.27465600216905034, - "grad_norm": 1.7298974951110495, - "learning_rate": 1.7022774184966845e-06, - "loss": 1.1861, - "step": 2026 - }, - { - "epoch": 0.27479156781671527, - "grad_norm": 1.8391302030430474, - "learning_rate": 1.7019647270239194e-06, - "loss": 1.2012, - "step": 2027 - }, - { - "epoch": 0.27492713346438014, - "grad_norm": 2.3091252882676665, - "learning_rate": 1.7016519001827977e-06, - "loss": 1.2049, - "step": 2028 - }, - { - "epoch": 0.275062699112045, - "grad_norm": 1.5317250361791064, - "learning_rate": 1.7013389380336458e-06, - "loss": 1.206, - "step": 2029 - }, - { - "epoch": 0.2751982647597099, - "grad_norm": 1.6881415669918651, - "learning_rate": 1.7010258406368157e-06, - "loss": 1.2019, - "step": 2030 - }, - { - "epoch": 0.27533383040737475, - "grad_norm": 3.378315573417716, - "learning_rate": 1.7007126080526857e-06, - "loss": 1.1634, - "step": 2031 - }, - { - "epoch": 0.2754693960550397, - "grad_norm": 1.6460833025406625, - "learning_rate": 1.7003992403416603e-06, - "loss": 1.2428, - "step": 2032 - }, - { - "epoch": 0.27560496170270454, - "grad_norm": 3.5171393792539116, - "learning_rate": 1.70008573756417e-06, - "loss": 1.1731, - "step": 2033 - }, - { - "epoch": 0.2757405273503694, - "grad_norm": 1.8206227223668643, - "learning_rate": 1.6997720997806714e-06, - "loss": 1.2453, - "step": 2034 - }, - { - "epoch": 0.2758760929980343, - "grad_norm": 2.3371221758725427, - "learning_rate": 1.699458327051647e-06, - "loss": 1.2155, - "step": 2035 - }, - { - "epoch": 0.27601165864569915, - "grad_norm": 2.6366083846899664, - "learning_rate": 1.6991444194376054e-06, - "loss": 1.2362, - "step": 2036 - }, - { - "epoch": 0.2761472242933641, - "grad_norm": 1.5174168858232495, - "learning_rate": 1.6988303769990813e-06, - "loss": 1.1502, - "step": 2037 - }, - { - "epoch": 0.27628278994102895, - "grad_norm": 1.949013473519719, - "learning_rate": 1.6985161997966352e-06, - "loss": 1.1827, - "step": 2038 - }, - { - "epoch": 0.2764183555886938, - "grad_norm": 1.6671072179711797, - "learning_rate": 1.6982018878908536e-06, - "loss": 1.2027, - "step": 2039 - }, - { - "epoch": 0.2765539212363587, - "grad_norm": 1.4350800723338488, - "learning_rate": 1.6978874413423495e-06, - "loss": 1.1893, - "step": 2040 - }, - { - "epoch": 0.27668948688402356, - "grad_norm": 1.7796166021201278, - "learning_rate": 1.6975728602117609e-06, - "loss": 1.1769, - "step": 2041 - }, - { - "epoch": 0.2768250525316885, - "grad_norm": 2.2447580322391776, - "learning_rate": 1.6972581445597527e-06, - "loss": 1.1975, - "step": 2042 - }, - { - "epoch": 0.27696061817935336, - "grad_norm": 1.7888584592527919, - "learning_rate": 1.6969432944470148e-06, - "loss": 1.2046, - "step": 2043 - }, - { - "epoch": 0.2770961838270182, - "grad_norm": 1.8046316654247059, - "learning_rate": 1.6966283099342643e-06, - "loss": 1.188, - "step": 2044 - }, - { - "epoch": 0.2772317494746831, - "grad_norm": 2.172512058120931, - "learning_rate": 1.6963131910822427e-06, - "loss": 1.1563, - "step": 2045 - }, - { - "epoch": 0.277367315122348, - "grad_norm": 2.528516575409953, - "learning_rate": 1.6959979379517186e-06, - "loss": 1.1568, - "step": 2046 - }, - { - "epoch": 0.2775028807700129, - "grad_norm": 1.7834674794986773, - "learning_rate": 1.6956825506034863e-06, - "loss": 1.1759, - "step": 2047 - }, - { - "epoch": 0.27763844641767776, - "grad_norm": 1.786700554001862, - "learning_rate": 1.6953670290983656e-06, - "loss": 1.1463, - "step": 2048 - }, - { - "epoch": 0.27777401206534263, - "grad_norm": 4.178710594563023, - "learning_rate": 1.6950513734972018e-06, - "loss": 1.1599, - "step": 2049 - }, - { - "epoch": 0.2779095777130075, - "grad_norm": 1.7127750032941875, - "learning_rate": 1.6947355838608672e-06, - "loss": 1.1477, - "step": 2050 - }, - { - "epoch": 0.27804514336067243, - "grad_norm": 4.095253908226092, - "learning_rate": 1.6944196602502593e-06, - "loss": 1.2064, - "step": 2051 - }, - { - "epoch": 0.2781807090083373, - "grad_norm": 1.5697711673112849, - "learning_rate": 1.694103602726301e-06, - "loss": 1.2093, - "step": 2052 - }, - { - "epoch": 0.27831627465600217, - "grad_norm": 4.893848928021499, - "learning_rate": 1.6937874113499425e-06, - "loss": 1.1714, - "step": 2053 - }, - { - "epoch": 0.27845184030366704, - "grad_norm": 9.28167250276754, - "learning_rate": 1.6934710861821575e-06, - "loss": 1.2232, - "step": 2054 - }, - { - "epoch": 0.2785874059513319, - "grad_norm": 1.686751735802921, - "learning_rate": 1.6931546272839477e-06, - "loss": 1.1992, - "step": 2055 - }, - { - "epoch": 0.27872297159899684, - "grad_norm": 3.0594556793124186, - "learning_rate": 1.6928380347163396e-06, - "loss": 1.2004, - "step": 2056 - }, - { - "epoch": 0.2788585372466617, - "grad_norm": 1.6990238126974933, - "learning_rate": 1.6925213085403849e-06, - "loss": 1.1935, - "step": 2057 - }, - { - "epoch": 0.2789941028943266, - "grad_norm": 1.5683180071753926, - "learning_rate": 1.6922044488171627e-06, - "loss": 1.192, - "step": 2058 - }, - { - "epoch": 0.27912966854199145, - "grad_norm": 3.3309688802269917, - "learning_rate": 1.6918874556077764e-06, - "loss": 1.2156, - "step": 2059 - }, - { - "epoch": 0.2792652341896563, - "grad_norm": 1.4490199165508548, - "learning_rate": 1.6915703289733558e-06, - "loss": 1.2215, - "step": 2060 - }, - { - "epoch": 0.27940079983732125, - "grad_norm": 1.5701985791839532, - "learning_rate": 1.6912530689750559e-06, - "loss": 1.2142, - "step": 2061 - }, - { - "epoch": 0.2795363654849861, - "grad_norm": 2.558326140837903, - "learning_rate": 1.6909356756740586e-06, - "loss": 1.118, - "step": 2062 - }, - { - "epoch": 0.279671931132651, - "grad_norm": 2.1780762527410613, - "learning_rate": 1.6906181491315697e-06, - "loss": 1.1775, - "step": 2063 - }, - { - "epoch": 0.27980749678031586, - "grad_norm": 1.8496798506907293, - "learning_rate": 1.6903004894088223e-06, - "loss": 1.1885, - "step": 2064 - }, - { - "epoch": 0.2799430624279807, - "grad_norm": 1.9290254902910118, - "learning_rate": 1.6899826965670742e-06, - "loss": 1.2211, - "step": 2065 - }, - { - "epoch": 0.28007862807564565, - "grad_norm": 1.6166362894190358, - "learning_rate": 1.6896647706676098e-06, - "loss": 1.2058, - "step": 2066 - }, - { - "epoch": 0.2802141937233105, - "grad_norm": 1.5203334244429207, - "learning_rate": 1.6893467117717383e-06, - "loss": 1.1915, - "step": 2067 - }, - { - "epoch": 0.2803497593709754, - "grad_norm": 2.280717729373391, - "learning_rate": 1.6890285199407945e-06, - "loss": 1.2263, - "step": 2068 - }, - { - "epoch": 0.28048532501864026, - "grad_norm": 5.010421966459476, - "learning_rate": 1.6887101952361395e-06, - "loss": 1.221, - "step": 2069 - }, - { - "epoch": 0.28062089066630513, - "grad_norm": 1.545694125094765, - "learning_rate": 1.6883917377191602e-06, - "loss": 1.1985, - "step": 2070 - }, - { - "epoch": 0.28075645631397006, - "grad_norm": 2.7954683249671595, - "learning_rate": 1.6880731474512677e-06, - "loss": 1.1948, - "step": 2071 - }, - { - "epoch": 0.28089202196163493, - "grad_norm": 2.0459396812208754, - "learning_rate": 1.6877544244938998e-06, - "loss": 1.1922, - "step": 2072 - }, - { - "epoch": 0.2810275876092998, - "grad_norm": 2.906883807657313, - "learning_rate": 1.6874355689085205e-06, - "loss": 1.1972, - "step": 2073 - }, - { - "epoch": 0.28116315325696467, - "grad_norm": 2.1161013906393933, - "learning_rate": 1.6871165807566174e-06, - "loss": 1.1629, - "step": 2074 - }, - { - "epoch": 0.28129871890462954, - "grad_norm": 3.9794767986569353, - "learning_rate": 1.686797460099706e-06, - "loss": 1.165, - "step": 2075 - }, - { - "epoch": 0.28143428455229447, - "grad_norm": 2.464204241781635, - "learning_rate": 1.6864782069993252e-06, - "loss": 1.1957, - "step": 2076 - }, - { - "epoch": 0.28156985019995934, - "grad_norm": 1.6119431308239913, - "learning_rate": 1.6861588215170413e-06, - "loss": 1.1992, - "step": 2077 - }, - { - "epoch": 0.2817054158476242, - "grad_norm": 1.717908910873342, - "learning_rate": 1.6858393037144447e-06, - "loss": 1.146, - "step": 2078 - }, - { - "epoch": 0.2818409814952891, - "grad_norm": 2.184480232540292, - "learning_rate": 1.6855196536531522e-06, - "loss": 1.2074, - "step": 2079 - }, - { - "epoch": 0.28197654714295395, - "grad_norm": 1.8856832755093356, - "learning_rate": 1.6851998713948055e-06, - "loss": 1.1717, - "step": 2080 - }, - { - "epoch": 0.2821121127906189, - "grad_norm": 2.0382240288207885, - "learning_rate": 1.6848799570010725e-06, - "loss": 1.2074, - "step": 2081 - }, - { - "epoch": 0.28224767843828374, - "grad_norm": 1.7280610321556569, - "learning_rate": 1.6845599105336456e-06, - "loss": 1.1928, - "step": 2082 - }, - { - "epoch": 0.2823832440859486, - "grad_norm": 1.5030412120601344, - "learning_rate": 1.6842397320542436e-06, - "loss": 1.1865, - "step": 2083 - }, - { - "epoch": 0.2825188097336135, - "grad_norm": 1.7962922884141603, - "learning_rate": 1.6839194216246107e-06, - "loss": 1.2085, - "step": 2084 - }, - { - "epoch": 0.2826543753812784, - "grad_norm": 1.8042564241210972, - "learning_rate": 1.6835989793065152e-06, - "loss": 1.172, - "step": 2085 - }, - { - "epoch": 0.2827899410289433, - "grad_norm": 1.6194942194558668, - "learning_rate": 1.683278405161753e-06, - "loss": 1.161, - "step": 2086 - }, - { - "epoch": 0.28292550667660815, - "grad_norm": 1.7400490532123518, - "learning_rate": 1.682957699252144e-06, - "loss": 1.1601, - "step": 2087 - }, - { - "epoch": 0.283061072324273, - "grad_norm": 3.237773363237634, - "learning_rate": 1.6826368616395331e-06, - "loss": 1.1862, - "step": 2088 - }, - { - "epoch": 0.2831966379719379, - "grad_norm": 1.7728150507346314, - "learning_rate": 1.6823158923857924e-06, - "loss": 1.1451, - "step": 2089 - }, - { - "epoch": 0.2833322036196028, - "grad_norm": 2.082199964289682, - "learning_rate": 1.6819947915528173e-06, - "loss": 1.1518, - "step": 2090 - }, - { - "epoch": 0.2834677692672677, - "grad_norm": 2.8217225438763345, - "learning_rate": 1.6816735592025303e-06, - "loss": 1.2393, - "step": 2091 - }, - { - "epoch": 0.28360333491493256, - "grad_norm": 1.8377490005443708, - "learning_rate": 1.681352195396878e-06, - "loss": 1.175, - "step": 2092 - }, - { - "epoch": 0.28373890056259743, - "grad_norm": 1.4920119925549888, - "learning_rate": 1.681030700197833e-06, - "loss": 1.1869, - "step": 2093 - }, - { - "epoch": 0.2838744662102623, - "grad_norm": 1.5529914400384028, - "learning_rate": 1.6807090736673932e-06, - "loss": 1.2275, - "step": 2094 - }, - { - "epoch": 0.2840100318579272, - "grad_norm": 1.645941063645527, - "learning_rate": 1.6803873158675823e-06, - "loss": 1.1629, - "step": 2095 - }, - { - "epoch": 0.2841455975055921, - "grad_norm": 1.6619067871978555, - "learning_rate": 1.6800654268604478e-06, - "loss": 1.1484, - "step": 2096 - }, - { - "epoch": 0.28428116315325697, - "grad_norm": 2.2615904965712037, - "learning_rate": 1.6797434067080635e-06, - "loss": 1.1769, - "step": 2097 - }, - { - "epoch": 0.28441672880092184, - "grad_norm": 1.6340220397476881, - "learning_rate": 1.679421255472529e-06, - "loss": 1.1831, - "step": 2098 - }, - { - "epoch": 0.2845522944485867, - "grad_norm": 1.775883142388447, - "learning_rate": 1.6790989732159685e-06, - "loss": 1.1621, - "step": 2099 - }, - { - "epoch": 0.28468786009625163, - "grad_norm": 3.8930587643355543, - "learning_rate": 1.6787765600005317e-06, - "loss": 1.2275, - "step": 2100 - }, - { - "epoch": 0.2848234257439165, - "grad_norm": 1.7523969781256725, - "learning_rate": 1.6784540158883928e-06, - "loss": 1.1634, - "step": 2101 - }, - { - "epoch": 0.2849589913915814, - "grad_norm": 1.5565211990477799, - "learning_rate": 1.6781313409417527e-06, - "loss": 1.1781, - "step": 2102 - }, - { - "epoch": 0.28509455703924624, - "grad_norm": 3.119861519653099, - "learning_rate": 1.6778085352228362e-06, - "loss": 1.2036, - "step": 2103 - }, - { - "epoch": 0.2852301226869111, - "grad_norm": 1.6790522687523852, - "learning_rate": 1.6774855987938938e-06, - "loss": 1.1884, - "step": 2104 - }, - { - "epoch": 0.28536568833457604, - "grad_norm": 1.4709659294610595, - "learning_rate": 1.6771625317172018e-06, - "loss": 1.1593, - "step": 2105 - }, - { - "epoch": 0.2855012539822409, - "grad_norm": 1.9091410608829393, - "learning_rate": 1.6768393340550607e-06, - "loss": 1.1675, - "step": 2106 - }, - { - "epoch": 0.2856368196299058, - "grad_norm": 1.4070639264123368, - "learning_rate": 1.6765160058697962e-06, - "loss": 1.1908, - "step": 2107 - }, - { - "epoch": 0.28577238527757065, - "grad_norm": 2.2128526491370284, - "learning_rate": 1.6761925472237604e-06, - "loss": 1.1719, - "step": 2108 - }, - { - "epoch": 0.2859079509252355, - "grad_norm": 1.4968119108618396, - "learning_rate": 1.6758689581793295e-06, - "loss": 1.1482, - "step": 2109 - }, - { - "epoch": 0.28604351657290045, - "grad_norm": 5.187870739938606, - "learning_rate": 1.675545238798905e-06, - "loss": 1.1709, - "step": 2110 - }, - { - "epoch": 0.2861790822205653, - "grad_norm": 1.4261266927896192, - "learning_rate": 1.6752213891449134e-06, - "loss": 1.185, - "step": 2111 - }, - { - "epoch": 0.2863146478682302, - "grad_norm": 5.845974474682294, - "learning_rate": 1.674897409279807e-06, - "loss": 1.2078, - "step": 2112 - }, - { - "epoch": 0.28645021351589506, - "grad_norm": 1.96166965591953, - "learning_rate": 1.6745732992660622e-06, - "loss": 1.2013, - "step": 2113 - }, - { - "epoch": 0.28658577916355993, - "grad_norm": 1.8087376311109828, - "learning_rate": 1.6742490591661817e-06, - "loss": 1.2061, - "step": 2114 - }, - { - "epoch": 0.28672134481122485, - "grad_norm": 1.7975921609168997, - "learning_rate": 1.6739246890426922e-06, - "loss": 1.2, - "step": 2115 - }, - { - "epoch": 0.2868569104588897, - "grad_norm": 1.6047152817047308, - "learning_rate": 1.673600188958146e-06, - "loss": 1.1654, - "step": 2116 - }, - { - "epoch": 0.2869924761065546, - "grad_norm": 4.062464731824118, - "learning_rate": 1.6732755589751208e-06, - "loss": 1.2023, - "step": 2117 - }, - { - "epoch": 0.28712804175421947, - "grad_norm": 1.5666667008158446, - "learning_rate": 1.6729507991562181e-06, - "loss": 1.1764, - "step": 2118 - }, - { - "epoch": 0.28726360740188434, - "grad_norm": 1.5268301253635783, - "learning_rate": 1.6726259095640663e-06, - "loss": 1.1531, - "step": 2119 - }, - { - "epoch": 0.28739917304954926, - "grad_norm": 1.6898355069265987, - "learning_rate": 1.6723008902613168e-06, - "loss": 1.1791, - "step": 2120 - }, - { - "epoch": 0.28753473869721413, - "grad_norm": 4.177735032764537, - "learning_rate": 1.6719757413106475e-06, - "loss": 1.1904, - "step": 2121 - }, - { - "epoch": 0.287670304344879, - "grad_norm": 1.380986445747835, - "learning_rate": 1.6716504627747608e-06, - "loss": 1.1832, - "step": 2122 - }, - { - "epoch": 0.2878058699925439, - "grad_norm": 1.4552098426724664, - "learning_rate": 1.6713250547163839e-06, - "loss": 1.1643, - "step": 2123 - }, - { - "epoch": 0.2879414356402088, - "grad_norm": 1.6387652859248818, - "learning_rate": 1.6709995171982697e-06, - "loss": 1.1878, - "step": 2124 - }, - { - "epoch": 0.28807700128787367, - "grad_norm": 1.6984165428851645, - "learning_rate": 1.6706738502831948e-06, - "loss": 1.145, - "step": 2125 - }, - { - "epoch": 0.28821256693553854, - "grad_norm": 2.6044731378958854, - "learning_rate": 1.6703480540339617e-06, - "loss": 1.1729, - "step": 2126 - }, - { - "epoch": 0.2883481325832034, - "grad_norm": 1.4222194919326705, - "learning_rate": 1.670022128513398e-06, - "loss": 1.1843, - "step": 2127 - }, - { - "epoch": 0.2884836982308683, - "grad_norm": 1.3728353188492077, - "learning_rate": 1.6696960737843556e-06, - "loss": 1.1385, - "step": 2128 - }, - { - "epoch": 0.2886192638785332, - "grad_norm": 1.5321557658790825, - "learning_rate": 1.6693698899097117e-06, - "loss": 1.1906, - "step": 2129 - }, - { - "epoch": 0.2887548295261981, - "grad_norm": 1.7766364015141909, - "learning_rate": 1.6690435769523684e-06, - "loss": 1.1931, - "step": 2130 - }, - { - "epoch": 0.28889039517386295, - "grad_norm": 1.6487778872864252, - "learning_rate": 1.668717134975252e-06, - "loss": 1.2152, - "step": 2131 - }, - { - "epoch": 0.2890259608215278, - "grad_norm": 4.019791972647947, - "learning_rate": 1.668390564041315e-06, - "loss": 1.2107, - "step": 2132 - }, - { - "epoch": 0.2891615264691927, - "grad_norm": 1.5591095992501522, - "learning_rate": 1.6680638642135334e-06, - "loss": 1.1818, - "step": 2133 - }, - { - "epoch": 0.2892970921168576, - "grad_norm": 1.3721178565236052, - "learning_rate": 1.667737035554909e-06, - "loss": 1.1982, - "step": 2134 - }, - { - "epoch": 0.2894326577645225, - "grad_norm": 2.1408560051119547, - "learning_rate": 1.6674100781284683e-06, - "loss": 1.2319, - "step": 2135 - }, - { - "epoch": 0.28956822341218735, - "grad_norm": 1.4730397269423732, - "learning_rate": 1.6670829919972622e-06, - "loss": 1.1836, - "step": 2136 - }, - { - "epoch": 0.2897037890598522, - "grad_norm": 1.8443844826724085, - "learning_rate": 1.6667557772243668e-06, - "loss": 1.1861, - "step": 2137 - }, - { - "epoch": 0.2898393547075171, - "grad_norm": 1.7111041133565996, - "learning_rate": 1.6664284338728824e-06, - "loss": 1.1807, - "step": 2138 - }, - { - "epoch": 0.289974920355182, - "grad_norm": 2.559662952107668, - "learning_rate": 1.6661009620059355e-06, - "loss": 1.1728, - "step": 2139 - }, - { - "epoch": 0.2901104860028469, - "grad_norm": 1.5372400928420669, - "learning_rate": 1.6657733616866755e-06, - "loss": 1.1953, - "step": 2140 - }, - { - "epoch": 0.29024605165051176, - "grad_norm": 1.6569429956121038, - "learning_rate": 1.6654456329782783e-06, - "loss": 1.1588, - "step": 2141 - }, - { - "epoch": 0.29038161729817663, - "grad_norm": 1.5884050184071286, - "learning_rate": 1.6651177759439432e-06, - "loss": 1.1682, - "step": 2142 - }, - { - "epoch": 0.2905171829458415, - "grad_norm": 2.6739860281851193, - "learning_rate": 1.6647897906468953e-06, - "loss": 1.1696, - "step": 2143 - }, - { - "epoch": 0.2906527485935064, - "grad_norm": 1.5579988696209868, - "learning_rate": 1.6644616771503838e-06, - "loss": 1.1611, - "step": 2144 - }, - { - "epoch": 0.2907883142411713, - "grad_norm": 1.7716224654329218, - "learning_rate": 1.6641334355176827e-06, - "loss": 1.2092, - "step": 2145 - }, - { - "epoch": 0.29092387988883617, - "grad_norm": 1.5655497093316786, - "learning_rate": 1.6638050658120913e-06, - "loss": 1.2361, - "step": 2146 - }, - { - "epoch": 0.29105944553650104, - "grad_norm": 1.7029908271407974, - "learning_rate": 1.6634765680969323e-06, - "loss": 1.1855, - "step": 2147 - }, - { - "epoch": 0.2911950111841659, - "grad_norm": 1.9260714273325592, - "learning_rate": 1.6631479424355548e-06, - "loss": 1.1692, - "step": 2148 - }, - { - "epoch": 0.29133057683183083, - "grad_norm": 1.5062672763471887, - "learning_rate": 1.6628191888913308e-06, - "loss": 1.197, - "step": 2149 - }, - { - "epoch": 0.2914661424794957, - "grad_norm": 3.11811261303228, - "learning_rate": 1.662490307527658e-06, - "loss": 1.1906, - "step": 2150 - }, - { - "epoch": 0.2916017081271606, - "grad_norm": 1.7081963715599247, - "learning_rate": 1.6621612984079592e-06, - "loss": 1.1999, - "step": 2151 - }, - { - "epoch": 0.29173727377482545, - "grad_norm": 2.395185711486267, - "learning_rate": 1.6618321615956808e-06, - "loss": 1.176, - "step": 2152 - }, - { - "epoch": 0.2918728394224903, - "grad_norm": 1.6723119294442945, - "learning_rate": 1.661502897154294e-06, - "loss": 1.183, - "step": 2153 - }, - { - "epoch": 0.29200840507015524, - "grad_norm": 1.7508338087184163, - "learning_rate": 1.6611735051472948e-06, - "loss": 1.1923, - "step": 2154 - }, - { - "epoch": 0.2921439707178201, - "grad_norm": 1.508588602519883, - "learning_rate": 1.6608439856382046e-06, - "loss": 1.1501, - "step": 2155 - }, - { - "epoch": 0.292279536365485, - "grad_norm": 3.1739705310889264, - "learning_rate": 1.660514338690568e-06, - "loss": 1.1949, - "step": 2156 - }, - { - "epoch": 0.29241510201314985, - "grad_norm": 1.5114834393965324, - "learning_rate": 1.6601845643679548e-06, - "loss": 1.1444, - "step": 2157 - }, - { - "epoch": 0.2925506676608147, - "grad_norm": 1.5722158081646018, - "learning_rate": 1.6598546627339598e-06, - "loss": 1.1815, - "step": 2158 - }, - { - "epoch": 0.29268623330847965, - "grad_norm": 1.470640863523126, - "learning_rate": 1.6595246338522016e-06, - "loss": 1.2152, - "step": 2159 - }, - { - "epoch": 0.2928217989561445, - "grad_norm": 1.5030645395086537, - "learning_rate": 1.6591944777863237e-06, - "loss": 1.2109, - "step": 2160 - }, - { - "epoch": 0.2929573646038094, - "grad_norm": 1.6135586666415747, - "learning_rate": 1.6588641945999937e-06, - "loss": 1.1725, - "step": 2161 - }, - { - "epoch": 0.29309293025147426, - "grad_norm": 1.5460996107594243, - "learning_rate": 1.658533784356905e-06, - "loss": 1.1998, - "step": 2162 - }, - { - "epoch": 0.2932284958991392, - "grad_norm": 1.8738597638642738, - "learning_rate": 1.658203247120774e-06, - "loss": 1.2696, - "step": 2163 - }, - { - "epoch": 0.29336406154680406, - "grad_norm": 1.6534484402706113, - "learning_rate": 1.6578725829553425e-06, - "loss": 1.1811, - "step": 2164 - }, - { - "epoch": 0.2934996271944689, - "grad_norm": 1.900974075409964, - "learning_rate": 1.6575417919243765e-06, - "loss": 1.1601, - "step": 2165 - }, - { - "epoch": 0.2936351928421338, - "grad_norm": 1.6522487226577396, - "learning_rate": 1.6572108740916657e-06, - "loss": 1.1678, - "step": 2166 - }, - { - "epoch": 0.29377075848979867, - "grad_norm": 1.718593012018825, - "learning_rate": 1.656879829521026e-06, - "loss": 1.197, - "step": 2167 - }, - { - "epoch": 0.2939063241374636, - "grad_norm": 2.111875872103038, - "learning_rate": 1.656548658276296e-06, - "loss": 1.1904, - "step": 2168 - }, - { - "epoch": 0.29404188978512846, - "grad_norm": 1.4324709795612764, - "learning_rate": 1.6562173604213396e-06, - "loss": 1.1802, - "step": 2169 - }, - { - "epoch": 0.29417745543279333, - "grad_norm": 1.4635490634486161, - "learning_rate": 1.6558859360200454e-06, - "loss": 1.1881, - "step": 2170 - }, - { - "epoch": 0.2943130210804582, - "grad_norm": 1.5151935076499172, - "learning_rate": 1.6555543851363256e-06, - "loss": 1.1768, - "step": 2171 - }, - { - "epoch": 0.2944485867281231, - "grad_norm": 1.4948681815967815, - "learning_rate": 1.6552227078341171e-06, - "loss": 1.1738, - "step": 2172 - }, - { - "epoch": 0.294584152375788, - "grad_norm": 1.4509915760047964, - "learning_rate": 1.6548909041773817e-06, - "loss": 1.1357, - "step": 2173 - }, - { - "epoch": 0.29471971802345287, - "grad_norm": 2.368909656427332, - "learning_rate": 1.6545589742301048e-06, - "loss": 1.1752, - "step": 2174 - }, - { - "epoch": 0.29485528367111774, - "grad_norm": 1.7545878134828194, - "learning_rate": 1.6542269180562961e-06, - "loss": 1.2181, - "step": 2175 - }, - { - "epoch": 0.2949908493187826, - "grad_norm": 1.680492589709372, - "learning_rate": 1.6538947357199907e-06, - "loss": 1.141, - "step": 2176 - }, - { - "epoch": 0.2951264149664475, - "grad_norm": 1.5475363547760452, - "learning_rate": 1.6535624272852471e-06, - "loss": 1.181, - "step": 2177 - }, - { - "epoch": 0.2952619806141124, - "grad_norm": 1.7041851493867874, - "learning_rate": 1.653229992816148e-06, - "loss": 1.16, - "step": 2178 - }, - { - "epoch": 0.2953975462617773, - "grad_norm": 3.644869217193937, - "learning_rate": 1.6528974323768016e-06, - "loss": 1.166, - "step": 2179 - }, - { - "epoch": 0.29553311190944215, - "grad_norm": 1.7230281952566826, - "learning_rate": 1.6525647460313388e-06, - "loss": 1.1692, - "step": 2180 - }, - { - "epoch": 0.295668677557107, - "grad_norm": 1.8032940899520207, - "learning_rate": 1.6522319338439156e-06, - "loss": 1.1904, - "step": 2181 - }, - { - "epoch": 0.2958042432047719, - "grad_norm": 2.1457730185841046, - "learning_rate": 1.6518989958787125e-06, - "loss": 1.1765, - "step": 2182 - }, - { - "epoch": 0.2959398088524368, - "grad_norm": 1.5941099345681002, - "learning_rate": 1.6515659321999337e-06, - "loss": 1.1971, - "step": 2183 - }, - { - "epoch": 0.2960753745001017, - "grad_norm": 1.3964662902888996, - "learning_rate": 1.6512327428718082e-06, - "loss": 1.1685, - "step": 2184 - }, - { - "epoch": 0.29621094014776655, - "grad_norm": 1.7166562236447311, - "learning_rate": 1.6508994279585885e-06, - "loss": 1.136, - "step": 2185 - }, - { - "epoch": 0.2963465057954314, - "grad_norm": 1.461270427278781, - "learning_rate": 1.6505659875245524e-06, - "loss": 1.1894, - "step": 2186 - }, - { - "epoch": 0.2964820714430963, - "grad_norm": 9.03191809812402, - "learning_rate": 1.6502324216340004e-06, - "loss": 1.1818, - "step": 2187 - }, - { - "epoch": 0.2966176370907612, - "grad_norm": 1.6312968356736324, - "learning_rate": 1.6498987303512588e-06, - "loss": 1.1686, - "step": 2188 - }, - { - "epoch": 0.2967532027384261, - "grad_norm": 2.0184580944241417, - "learning_rate": 1.649564913740677e-06, - "loss": 1.1684, - "step": 2189 - }, - { - "epoch": 0.29688876838609096, - "grad_norm": 1.5069293586223662, - "learning_rate": 1.6492309718666289e-06, - "loss": 1.1953, - "step": 2190 - }, - { - "epoch": 0.29702433403375583, - "grad_norm": 1.5504198492317949, - "learning_rate": 1.6488969047935125e-06, - "loss": 1.1642, - "step": 2191 - }, - { - "epoch": 0.2971598996814207, - "grad_norm": 1.682234718643412, - "learning_rate": 1.6485627125857504e-06, - "loss": 1.1725, - "step": 2192 - }, - { - "epoch": 0.29729546532908563, - "grad_norm": 1.9877740814869254, - "learning_rate": 1.6482283953077884e-06, - "loss": 1.1729, - "step": 2193 - }, - { - "epoch": 0.2974310309767505, - "grad_norm": 1.510768049065923, - "learning_rate": 1.6478939530240971e-06, - "loss": 1.1829, - "step": 2194 - }, - { - "epoch": 0.29756659662441537, - "grad_norm": 1.7763455433091624, - "learning_rate": 1.6475593857991714e-06, - "loss": 1.1851, - "step": 2195 - }, - { - "epoch": 0.29770216227208024, - "grad_norm": 7.674691708094931, - "learning_rate": 1.6472246936975293e-06, - "loss": 1.1766, - "step": 2196 - }, - { - "epoch": 0.2978377279197451, - "grad_norm": 1.593162385666502, - "learning_rate": 1.6468898767837142e-06, - "loss": 1.1717, - "step": 2197 - }, - { - "epoch": 0.29797329356741004, - "grad_norm": 1.5909070494527071, - "learning_rate": 1.6465549351222924e-06, - "loss": 1.2426, - "step": 2198 - }, - { - "epoch": 0.2981088592150749, - "grad_norm": 1.8738212662421183, - "learning_rate": 1.646219868777855e-06, - "loss": 1.145, - "step": 2199 - }, - { - "epoch": 0.2982444248627398, - "grad_norm": 1.620098181552219, - "learning_rate": 1.645884677815017e-06, - "loss": 1.1588, - "step": 2200 - }, - { - "epoch": 0.29837999051040465, - "grad_norm": 1.5399506366532598, - "learning_rate": 1.645549362298417e-06, - "loss": 1.1879, - "step": 2201 - }, - { - "epoch": 0.2985155561580696, - "grad_norm": 1.6601486043868814, - "learning_rate": 1.6452139222927181e-06, - "loss": 1.1962, - "step": 2202 - }, - { - "epoch": 0.29865112180573444, - "grad_norm": 1.5109089007318248, - "learning_rate": 1.6448783578626076e-06, - "loss": 1.1531, - "step": 2203 - }, - { - "epoch": 0.2987866874533993, - "grad_norm": 1.7941075209668806, - "learning_rate": 1.6445426690727959e-06, - "loss": 1.1493, - "step": 2204 - }, - { - "epoch": 0.2989222531010642, - "grad_norm": 1.5155919435176042, - "learning_rate": 1.6442068559880182e-06, - "loss": 1.167, - "step": 2205 - }, - { - "epoch": 0.29905781874872905, - "grad_norm": 1.5373517836757657, - "learning_rate": 1.6438709186730333e-06, - "loss": 1.1437, - "step": 2206 - }, - { - "epoch": 0.299193384396394, - "grad_norm": 2.865111793900049, - "learning_rate": 1.6435348571926245e-06, - "loss": 1.1712, - "step": 2207 - }, - { - "epoch": 0.29932895004405885, - "grad_norm": 1.8922695981444253, - "learning_rate": 1.6431986716115982e-06, - "loss": 1.1873, - "step": 2208 - }, - { - "epoch": 0.2994645156917237, - "grad_norm": 1.7455592257429309, - "learning_rate": 1.6428623619947848e-06, - "loss": 1.2345, - "step": 2209 - }, - { - "epoch": 0.2996000813393886, - "grad_norm": 1.4032804638091825, - "learning_rate": 1.6425259284070395e-06, - "loss": 1.1882, - "step": 2210 - }, - { - "epoch": 0.29973564698705346, - "grad_norm": 1.7367070218942253, - "learning_rate": 1.6421893709132405e-06, - "loss": 1.1394, - "step": 2211 - }, - { - "epoch": 0.2998712126347184, - "grad_norm": 1.5492656383007153, - "learning_rate": 1.641852689578291e-06, - "loss": 1.1957, - "step": 2212 - }, - { - "epoch": 0.30000677828238326, - "grad_norm": 1.8646349835115428, - "learning_rate": 1.6415158844671163e-06, - "loss": 1.1967, - "step": 2213 - }, - { - "epoch": 0.3001423439300481, - "grad_norm": 2.772527790230075, - "learning_rate": 1.6411789556446673e-06, - "loss": 1.1583, - "step": 2214 - }, - { - "epoch": 0.300277909577713, - "grad_norm": 1.4405273525513895, - "learning_rate": 1.640841903175918e-06, - "loss": 1.2045, - "step": 2215 - }, - { - "epoch": 0.30041347522537787, - "grad_norm": 1.5489151854552172, - "learning_rate": 1.640504727125866e-06, - "loss": 1.1688, - "step": 2216 - }, - { - "epoch": 0.3005490408730428, - "grad_norm": 2.007738357966311, - "learning_rate": 1.640167427559533e-06, - "loss": 1.2325, - "step": 2217 - }, - { - "epoch": 0.30068460652070766, - "grad_norm": 1.7308720487844722, - "learning_rate": 1.639830004541965e-06, - "loss": 1.1649, - "step": 2218 - }, - { - "epoch": 0.30082017216837253, - "grad_norm": 1.7311825807519343, - "learning_rate": 1.6394924581382312e-06, - "loss": 1.194, - "step": 2219 - }, - { - "epoch": 0.3009557378160374, - "grad_norm": 1.6473467606238767, - "learning_rate": 1.6391547884134247e-06, - "loss": 1.199, - "step": 2220 - }, - { - "epoch": 0.3010913034637023, - "grad_norm": 1.8465730520814907, - "learning_rate": 1.6388169954326623e-06, - "loss": 1.1959, - "step": 2221 - }, - { - "epoch": 0.3012268691113672, - "grad_norm": 1.5214972186551377, - "learning_rate": 1.6384790792610849e-06, - "loss": 1.1933, - "step": 2222 - }, - { - "epoch": 0.30136243475903207, - "grad_norm": 1.8155207582150754, - "learning_rate": 1.6381410399638571e-06, - "loss": 1.1555, - "step": 2223 - }, - { - "epoch": 0.30149800040669694, - "grad_norm": 1.7678329106837112, - "learning_rate": 1.6378028776061666e-06, - "loss": 1.203, - "step": 2224 - }, - { - "epoch": 0.3016335660543618, - "grad_norm": 4.5321879325399195, - "learning_rate": 1.6374645922532257e-06, - "loss": 1.1559, - "step": 2225 - }, - { - "epoch": 0.3017691317020267, - "grad_norm": 1.5634660993110618, - "learning_rate": 1.63712618397027e-06, - "loss": 1.1903, - "step": 2226 - }, - { - "epoch": 0.3019046973496916, - "grad_norm": 1.4662694618989305, - "learning_rate": 1.636787652822559e-06, - "loss": 1.1724, - "step": 2227 - }, - { - "epoch": 0.3020402629973565, - "grad_norm": 2.004891687870438, - "learning_rate": 1.6364489988753757e-06, - "loss": 1.1679, - "step": 2228 - }, - { - "epoch": 0.30217582864502135, - "grad_norm": 1.4021933123751527, - "learning_rate": 1.6361102221940268e-06, - "loss": 1.1911, - "step": 2229 - }, - { - "epoch": 0.3023113942926862, - "grad_norm": 1.551558396233573, - "learning_rate": 1.6357713228438428e-06, - "loss": 1.1306, - "step": 2230 - }, - { - "epoch": 0.3024469599403511, - "grad_norm": 1.4830443959178379, - "learning_rate": 1.6354323008901773e-06, - "loss": 1.1599, - "step": 2231 - }, - { - "epoch": 0.302582525588016, - "grad_norm": 1.7488268826619244, - "learning_rate": 1.6350931563984087e-06, - "loss": 1.1852, - "step": 2232 - }, - { - "epoch": 0.3027180912356809, - "grad_norm": 2.0679886265604455, - "learning_rate": 1.6347538894339379e-06, - "loss": 1.1634, - "step": 2233 - }, - { - "epoch": 0.30285365688334576, - "grad_norm": 1.6356591324010776, - "learning_rate": 1.6344145000621898e-06, - "loss": 1.1501, - "step": 2234 - }, - { - "epoch": 0.3029892225310106, - "grad_norm": 1.878891681889651, - "learning_rate": 1.6340749883486136e-06, - "loss": 1.2101, - "step": 2235 - }, - { - "epoch": 0.3031247881786755, - "grad_norm": 1.5239402565036233, - "learning_rate": 1.6337353543586808e-06, - "loss": 1.1959, - "step": 2236 - }, - { - "epoch": 0.3032603538263404, - "grad_norm": 1.6148709851262455, - "learning_rate": 1.6333955981578868e-06, - "loss": 1.189, - "step": 2237 - }, - { - "epoch": 0.3033959194740053, - "grad_norm": 2.124616178582552, - "learning_rate": 1.633055719811752e-06, - "loss": 1.1857, - "step": 2238 - }, - { - "epoch": 0.30353148512167016, - "grad_norm": 2.4759417794089758, - "learning_rate": 1.6327157193858182e-06, - "loss": 1.2211, - "step": 2239 - }, - { - "epoch": 0.30366705076933503, - "grad_norm": 2.074203608020143, - "learning_rate": 1.6323755969456526e-06, - "loss": 1.1702, - "step": 2240 - }, - { - "epoch": 0.30380261641699996, - "grad_norm": 1.5284584807615968, - "learning_rate": 1.6320353525568447e-06, - "loss": 1.1445, - "step": 2241 - }, - { - "epoch": 0.30393818206466483, - "grad_norm": 1.6983740572292625, - "learning_rate": 1.6316949862850082e-06, - "loss": 1.1657, - "step": 2242 - }, - { - "epoch": 0.3040737477123297, - "grad_norm": 1.7479912591183382, - "learning_rate": 1.6313544981957797e-06, - "loss": 1.215, - "step": 2243 - }, - { - "epoch": 0.30420931335999457, - "grad_norm": 1.6664489850291668, - "learning_rate": 1.6310138883548199e-06, - "loss": 1.162, - "step": 2244 - }, - { - "epoch": 0.30434487900765944, - "grad_norm": 2.0383598258729143, - "learning_rate": 1.6306731568278126e-06, - "loss": 1.1748, - "step": 2245 - }, - { - "epoch": 0.30448044465532437, - "grad_norm": 2.871595089557037, - "learning_rate": 1.6303323036804652e-06, - "loss": 1.2126, - "step": 2246 - }, - { - "epoch": 0.30461601030298924, - "grad_norm": 1.4793349557650004, - "learning_rate": 1.6299913289785087e-06, - "loss": 1.1531, - "step": 2247 - }, - { - "epoch": 0.3047515759506541, - "grad_norm": 3.9711181230727575, - "learning_rate": 1.6296502327876974e-06, - "loss": 1.1944, - "step": 2248 - }, - { - "epoch": 0.304887141598319, - "grad_norm": 2.028232427994707, - "learning_rate": 1.6293090151738086e-06, - "loss": 1.1784, - "step": 2249 - }, - { - "epoch": 0.30502270724598385, - "grad_norm": 1.9069654033596326, - "learning_rate": 1.6289676762026438e-06, - "loss": 1.1558, - "step": 2250 - }, - { - "epoch": 0.3051582728936488, - "grad_norm": 2.076427219593821, - "learning_rate": 1.6286262159400275e-06, - "loss": 1.1779, - "step": 2251 - }, - { - "epoch": 0.30529383854131364, - "grad_norm": 1.56477639107368, - "learning_rate": 1.6282846344518073e-06, - "loss": 1.1843, - "step": 2252 - }, - { - "epoch": 0.3054294041889785, - "grad_norm": 2.0519512216032108, - "learning_rate": 1.627942931803855e-06, - "loss": 1.1703, - "step": 2253 - }, - { - "epoch": 0.3055649698366434, - "grad_norm": 1.7306838426181153, - "learning_rate": 1.627601108062065e-06, - "loss": 1.1693, - "step": 2254 - }, - { - "epoch": 0.30570053548430826, - "grad_norm": 1.63197554643986, - "learning_rate": 1.6272591632923548e-06, - "loss": 1.1796, - "step": 2255 - }, - { - "epoch": 0.3058361011319732, - "grad_norm": 1.7189607065223398, - "learning_rate": 1.6269170975606665e-06, - "loss": 1.1537, - "step": 2256 - }, - { - "epoch": 0.30597166677963805, - "grad_norm": 1.5092914459386333, - "learning_rate": 1.6265749109329647e-06, - "loss": 1.185, - "step": 2257 - }, - { - "epoch": 0.3061072324273029, - "grad_norm": 1.4780163930503543, - "learning_rate": 1.6262326034752371e-06, - "loss": 1.1372, - "step": 2258 - }, - { - "epoch": 0.3062427980749678, - "grad_norm": 1.4498386758978106, - "learning_rate": 1.6258901752534947e-06, - "loss": 1.1997, - "step": 2259 - }, - { - "epoch": 0.30637836372263266, - "grad_norm": 1.6832735097674074, - "learning_rate": 1.625547626333773e-06, - "loss": 1.1954, - "step": 2260 - }, - { - "epoch": 0.3065139293702976, - "grad_norm": 1.8843429826472684, - "learning_rate": 1.6252049567821294e-06, - "loss": 1.1965, - "step": 2261 - }, - { - "epoch": 0.30664949501796246, - "grad_norm": 1.9281034762771578, - "learning_rate": 1.6248621666646448e-06, - "loss": 1.1686, - "step": 2262 - }, - { - "epoch": 0.30678506066562733, - "grad_norm": 1.7006632762529028, - "learning_rate": 1.6245192560474237e-06, - "loss": 1.1401, - "step": 2263 - }, - { - "epoch": 0.3069206263132922, - "grad_norm": 1.4912200993312832, - "learning_rate": 1.6241762249965935e-06, - "loss": 1.1645, - "step": 2264 - }, - { - "epoch": 0.30705619196095707, - "grad_norm": 1.7979518635513156, - "learning_rate": 1.6238330735783054e-06, - "loss": 1.132, - "step": 2265 - }, - { - "epoch": 0.307191757608622, - "grad_norm": 1.8731677505572497, - "learning_rate": 1.6234898018587336e-06, - "loss": 1.1831, - "step": 2266 - }, - { - "epoch": 0.30732732325628687, - "grad_norm": 1.6164338712643667, - "learning_rate": 1.6231464099040748e-06, - "loss": 1.1631, - "step": 2267 - }, - { - "epoch": 0.30746288890395174, - "grad_norm": 1.9767680923414377, - "learning_rate": 1.6228028977805495e-06, - "loss": 1.1824, - "step": 2268 - }, - { - "epoch": 0.3075984545516166, - "grad_norm": 1.4911346375283991, - "learning_rate": 1.6224592655544016e-06, - "loss": 1.1813, - "step": 2269 - }, - { - "epoch": 0.3077340201992815, - "grad_norm": 1.885006011944974, - "learning_rate": 1.6221155132918979e-06, - "loss": 1.2209, - "step": 2270 - }, - { - "epoch": 0.3078695858469464, - "grad_norm": 4.8203723055687, - "learning_rate": 1.6217716410593281e-06, - "loss": 1.1846, - "step": 2271 - }, - { - "epoch": 0.3080051514946113, - "grad_norm": 1.3461631958768916, - "learning_rate": 1.621427648923005e-06, - "loss": 1.1815, - "step": 2272 - }, - { - "epoch": 0.30814071714227614, - "grad_norm": 3.57680347768065, - "learning_rate": 1.6210835369492652e-06, - "loss": 1.1605, - "step": 2273 - }, - { - "epoch": 0.308276282789941, - "grad_norm": 1.9309547235826354, - "learning_rate": 1.6207393052044678e-06, - "loss": 1.1853, - "step": 2274 - }, - { - "epoch": 0.3084118484376059, - "grad_norm": 2.01111197167501, - "learning_rate": 1.6203949537549954e-06, - "loss": 1.1899, - "step": 2275 - }, - { - "epoch": 0.3085474140852708, - "grad_norm": 1.5124705205819138, - "learning_rate": 1.6200504826672533e-06, - "loss": 1.1737, - "step": 2276 - }, - { - "epoch": 0.3086829797329357, - "grad_norm": 1.408477043646839, - "learning_rate": 1.6197058920076696e-06, - "loss": 1.1879, - "step": 2277 - }, - { - "epoch": 0.30881854538060055, - "grad_norm": 4.409721317152355, - "learning_rate": 1.6193611818426968e-06, - "loss": 1.1963, - "step": 2278 - }, - { - "epoch": 0.3089541110282654, - "grad_norm": 1.5985974845674158, - "learning_rate": 1.6190163522388088e-06, - "loss": 1.1715, - "step": 2279 - }, - { - "epoch": 0.3090896766759303, - "grad_norm": 1.5413308932422158, - "learning_rate": 1.6186714032625033e-06, - "loss": 1.1363, - "step": 2280 - }, - { - "epoch": 0.3092252423235952, - "grad_norm": 1.685002545307349, - "learning_rate": 1.6183263349803014e-06, - "loss": 1.1964, - "step": 2281 - }, - { - "epoch": 0.3093608079712601, - "grad_norm": 1.691496011624455, - "learning_rate": 1.6179811474587464e-06, - "loss": 1.1899, - "step": 2282 - }, - { - "epoch": 0.30949637361892496, - "grad_norm": 2.2323438100246644, - "learning_rate": 1.6176358407644055e-06, - "loss": 1.1892, - "step": 2283 - }, - { - "epoch": 0.30963193926658983, - "grad_norm": 1.6909169945917093, - "learning_rate": 1.6172904149638677e-06, - "loss": 1.2243, - "step": 2284 - }, - { - "epoch": 0.30976750491425475, - "grad_norm": 1.8975948950418353, - "learning_rate": 1.616944870123746e-06, - "loss": 1.1819, - "step": 2285 - }, - { - "epoch": 0.3099030705619196, - "grad_norm": 1.5005015795620948, - "learning_rate": 1.616599206310676e-06, - "loss": 1.1792, - "step": 2286 - }, - { - "epoch": 0.3100386362095845, - "grad_norm": 1.839744109878868, - "learning_rate": 1.616253423591316e-06, - "loss": 1.1805, - "step": 2287 - }, - { - "epoch": 0.31017420185724937, - "grad_norm": 2.3728940794234297, - "learning_rate": 1.6159075220323482e-06, - "loss": 1.1871, - "step": 2288 - }, - { - "epoch": 0.31030976750491424, - "grad_norm": 1.7662184364437283, - "learning_rate": 1.6155615017004762e-06, - "loss": 1.1649, - "step": 2289 - }, - { - "epoch": 0.31044533315257916, - "grad_norm": 1.5642968713253589, - "learning_rate": 1.6152153626624275e-06, - "loss": 1.1907, - "step": 2290 - }, - { - "epoch": 0.31058089880024403, - "grad_norm": 1.5109249948360797, - "learning_rate": 1.6148691049849523e-06, - "loss": 1.1485, - "step": 2291 - }, - { - "epoch": 0.3107164644479089, - "grad_norm": 1.8389085247563501, - "learning_rate": 1.6145227287348238e-06, - "loss": 1.1912, - "step": 2292 - }, - { - "epoch": 0.3108520300955738, - "grad_norm": 2.111970094811141, - "learning_rate": 1.6141762339788376e-06, - "loss": 1.1853, - "step": 2293 - }, - { - "epoch": 0.31098759574323864, - "grad_norm": 1.6186001360672304, - "learning_rate": 1.6138296207838127e-06, - "loss": 1.2111, - "step": 2294 - }, - { - "epoch": 0.31112316139090357, - "grad_norm": 1.6850691507314608, - "learning_rate": 1.6134828892165907e-06, - "loss": 1.1937, - "step": 2295 - }, - { - "epoch": 0.31125872703856844, - "grad_norm": 1.6744835537414455, - "learning_rate": 1.6131360393440362e-06, - "loss": 1.1416, - "step": 2296 - }, - { - "epoch": 0.3113942926862333, - "grad_norm": 1.4488850293180109, - "learning_rate": 1.6127890712330364e-06, - "loss": 1.1585, - "step": 2297 - }, - { - "epoch": 0.3115298583338982, - "grad_norm": 1.723607085187055, - "learning_rate": 1.6124419849505013e-06, - "loss": 1.1602, - "step": 2298 - }, - { - "epoch": 0.31166542398156305, - "grad_norm": 1.5097934806168283, - "learning_rate": 1.6120947805633636e-06, - "loss": 1.1837, - "step": 2299 - }, - { - "epoch": 0.311800989629228, - "grad_norm": 6.539789627040813, - "learning_rate": 1.6117474581385788e-06, - "loss": 1.1544, - "step": 2300 - }, - { - "epoch": 0.31193655527689285, - "grad_norm": 2.4504632990273407, - "learning_rate": 1.611400017743126e-06, - "loss": 1.1586, - "step": 2301 - }, - { - "epoch": 0.3120721209245577, - "grad_norm": 1.9615578692670996, - "learning_rate": 1.6110524594440055e-06, - "loss": 1.1561, - "step": 2302 - }, - { - "epoch": 0.3122076865722226, - "grad_norm": 2.390178530444945, - "learning_rate": 1.6107047833082418e-06, - "loss": 1.1514, - "step": 2303 - }, - { - "epoch": 0.31234325221988746, - "grad_norm": 1.5989217776436109, - "learning_rate": 1.6103569894028813e-06, - "loss": 1.1907, - "step": 2304 - }, - { - "epoch": 0.3124788178675524, - "grad_norm": 1.6686825721764456, - "learning_rate": 1.6100090777949928e-06, - "loss": 1.1441, - "step": 2305 - }, - { - "epoch": 0.31261438351521725, - "grad_norm": 1.5825243599452143, - "learning_rate": 1.6096610485516693e-06, - "loss": 1.1769, - "step": 2306 - }, - { - "epoch": 0.3127499491628821, - "grad_norm": 1.5986638031577636, - "learning_rate": 1.6093129017400248e-06, - "loss": 1.1932, - "step": 2307 - }, - { - "epoch": 0.312885514810547, - "grad_norm": 1.7706742056834075, - "learning_rate": 1.6089646374271965e-06, - "loss": 1.2051, - "step": 2308 - }, - { - "epoch": 0.31302108045821186, - "grad_norm": 1.5425079632167962, - "learning_rate": 1.6086162556803453e-06, - "loss": 1.1647, - "step": 2309 - }, - { - "epoch": 0.3131566461058768, - "grad_norm": 1.941703076602271, - "learning_rate": 1.608267756566653e-06, - "loss": 1.176, - "step": 2310 - }, - { - "epoch": 0.31329221175354166, - "grad_norm": 2.137755517178777, - "learning_rate": 1.607919140153325e-06, - "loss": 1.1631, - "step": 2311 - }, - { - "epoch": 0.31342777740120653, - "grad_norm": 1.7538880700143313, - "learning_rate": 1.6075704065075897e-06, - "loss": 1.1917, - "step": 2312 - }, - { - "epoch": 0.3135633430488714, - "grad_norm": 1.6779084593612938, - "learning_rate": 1.6072215556966975e-06, - "loss": 1.2134, - "step": 2313 - }, - { - "epoch": 0.31369890869653627, - "grad_norm": 1.9731799999535071, - "learning_rate": 1.6068725877879213e-06, - "loss": 1.1526, - "step": 2314 - }, - { - "epoch": 0.3138344743442012, - "grad_norm": 1.3490883368221591, - "learning_rate": 1.6065235028485567e-06, - "loss": 1.1379, - "step": 2315 - }, - { - "epoch": 0.31397003999186607, - "grad_norm": 1.5038960888663697, - "learning_rate": 1.6061743009459225e-06, - "loss": 1.1801, - "step": 2316 - }, - { - "epoch": 0.31410560563953094, - "grad_norm": 1.5136817650087178, - "learning_rate": 1.605824982147359e-06, - "loss": 1.1607, - "step": 2317 - }, - { - "epoch": 0.3142411712871958, - "grad_norm": 1.824990317987802, - "learning_rate": 1.6054755465202296e-06, - "loss": 1.1551, - "step": 2318 - }, - { - "epoch": 0.3143767369348607, - "grad_norm": 1.8924377687369396, - "learning_rate": 1.6051259941319209e-06, - "loss": 1.1643, - "step": 2319 - }, - { - "epoch": 0.3145123025825256, - "grad_norm": 1.6092038909894297, - "learning_rate": 1.6047763250498405e-06, - "loss": 1.1862, - "step": 2320 - }, - { - "epoch": 0.3146478682301905, - "grad_norm": 3.031015956269415, - "learning_rate": 1.6044265393414196e-06, - "loss": 1.2246, - "step": 2321 - }, - { - "epoch": 0.31478343387785535, - "grad_norm": 1.5633691301533488, - "learning_rate": 1.6040766370741117e-06, - "loss": 1.1778, - "step": 2322 - }, - { - "epoch": 0.3149189995255202, - "grad_norm": 3.4225138557255486, - "learning_rate": 1.6037266183153925e-06, - "loss": 1.2127, - "step": 2323 - }, - { - "epoch": 0.31505456517318514, - "grad_norm": 1.4276908880565657, - "learning_rate": 1.6033764831327607e-06, - "loss": 1.2107, - "step": 2324 - }, - { - "epoch": 0.31519013082085, - "grad_norm": 1.5155189556066262, - "learning_rate": 1.6030262315937368e-06, - "loss": 1.2392, - "step": 2325 - }, - { - "epoch": 0.3153256964685149, - "grad_norm": 1.8993223174071954, - "learning_rate": 1.6026758637658642e-06, - "loss": 1.2028, - "step": 2326 - }, - { - "epoch": 0.31546126211617975, - "grad_norm": 1.9254866665681047, - "learning_rate": 1.6023253797167084e-06, - "loss": 1.1944, - "step": 2327 - }, - { - "epoch": 0.3155968277638446, - "grad_norm": 1.965991989257432, - "learning_rate": 1.6019747795138576e-06, - "loss": 1.1696, - "step": 2328 - }, - { - "epoch": 0.31573239341150955, - "grad_norm": 2.1414909685138013, - "learning_rate": 1.6016240632249222e-06, - "loss": 1.2023, - "step": 2329 - }, - { - "epoch": 0.3158679590591744, - "grad_norm": 1.524831322906403, - "learning_rate": 1.6012732309175356e-06, - "loss": 1.1878, - "step": 2330 - }, - { - "epoch": 0.3160035247068393, - "grad_norm": 1.8951859979922014, - "learning_rate": 1.600922282659352e-06, - "loss": 1.1598, - "step": 2331 - }, - { - "epoch": 0.31613909035450416, - "grad_norm": 1.5073687768359594, - "learning_rate": 1.60057121851805e-06, - "loss": 1.1498, - "step": 2332 - }, - { - "epoch": 0.31627465600216903, - "grad_norm": 1.5444546314373493, - "learning_rate": 1.600220038561329e-06, - "loss": 1.1875, - "step": 2333 - }, - { - "epoch": 0.31641022164983396, - "grad_norm": 1.8203232350187495, - "learning_rate": 1.5998687428569113e-06, - "loss": 1.187, - "step": 2334 - }, - { - "epoch": 0.3165457872974988, - "grad_norm": 1.4044320621981543, - "learning_rate": 1.5995173314725419e-06, - "loss": 1.1589, - "step": 2335 - }, - { - "epoch": 0.3166813529451637, - "grad_norm": 1.6866627277627506, - "learning_rate": 1.5991658044759871e-06, - "loss": 1.1758, - "step": 2336 - }, - { - "epoch": 0.31681691859282857, - "grad_norm": 2.2999233963117955, - "learning_rate": 1.5988141619350363e-06, - "loss": 1.1606, - "step": 2337 - }, - { - "epoch": 0.31695248424049344, - "grad_norm": 2.090396372651635, - "learning_rate": 1.5984624039175016e-06, - "loss": 1.2313, - "step": 2338 - }, - { - "epoch": 0.31708804988815836, - "grad_norm": 2.0072713314362884, - "learning_rate": 1.5981105304912159e-06, - "loss": 1.1887, - "step": 2339 - }, - { - "epoch": 0.31722361553582323, - "grad_norm": 1.6152399846838474, - "learning_rate": 1.5977585417240358e-06, - "loss": 1.1494, - "step": 2340 - }, - { - "epoch": 0.3173591811834881, - "grad_norm": 1.7517249866509867, - "learning_rate": 1.5974064376838392e-06, - "loss": 1.1634, - "step": 2341 - }, - { - "epoch": 0.317494746831153, - "grad_norm": 1.933758024227007, - "learning_rate": 1.5970542184385268e-06, - "loss": 1.1779, - "step": 2342 - }, - { - "epoch": 0.31763031247881784, - "grad_norm": 1.7450828836738248, - "learning_rate": 1.5967018840560212e-06, - "loss": 1.154, - "step": 2343 - }, - { - "epoch": 0.31776587812648277, - "grad_norm": 1.6042586475211897, - "learning_rate": 1.5963494346042674e-06, - "loss": 1.1915, - "step": 2344 - }, - { - "epoch": 0.31790144377414764, - "grad_norm": 1.887246980986904, - "learning_rate": 1.5959968701512326e-06, - "loss": 1.1731, - "step": 2345 - }, - { - "epoch": 0.3180370094218125, - "grad_norm": 1.7624973461811497, - "learning_rate": 1.5956441907649057e-06, - "loss": 1.1658, - "step": 2346 - }, - { - "epoch": 0.3181725750694774, - "grad_norm": 1.7648269486975643, - "learning_rate": 1.595291396513298e-06, - "loss": 1.1995, - "step": 2347 - }, - { - "epoch": 0.31830814071714225, - "grad_norm": 1.6450161068654188, - "learning_rate": 1.594938487464444e-06, - "loss": 1.176, - "step": 2348 - }, - { - "epoch": 0.3184437063648072, - "grad_norm": 2.197566408688202, - "learning_rate": 1.5945854636863987e-06, - "loss": 1.2011, - "step": 2349 - }, - { - "epoch": 0.31857927201247205, - "grad_norm": 2.0331364452339744, - "learning_rate": 1.59423232524724e-06, - "loss": 1.2104, - "step": 2350 - }, - { - "epoch": 0.3187148376601369, - "grad_norm": 1.8748133646770986, - "learning_rate": 1.593879072215068e-06, - "loss": 1.1867, - "step": 2351 - }, - { - "epoch": 0.3188504033078018, - "grad_norm": 3.3401255785047854, - "learning_rate": 1.5935257046580048e-06, - "loss": 1.1741, - "step": 2352 - }, - { - "epoch": 0.31898596895546666, - "grad_norm": 1.6990653858963403, - "learning_rate": 1.5931722226441945e-06, - "loss": 1.1815, - "step": 2353 - }, - { - "epoch": 0.3191215346031316, - "grad_norm": 1.6894254354442597, - "learning_rate": 1.5928186262418032e-06, - "loss": 1.1835, - "step": 2354 - }, - { - "epoch": 0.31925710025079646, - "grad_norm": 3.2985981614948887, - "learning_rate": 1.5924649155190191e-06, - "loss": 1.1814, - "step": 2355 - }, - { - "epoch": 0.3193926658984613, - "grad_norm": 1.6182994367226786, - "learning_rate": 1.5921110905440526e-06, - "loss": 1.2003, - "step": 2356 - }, - { - "epoch": 0.3195282315461262, - "grad_norm": 3.2417193514481237, - "learning_rate": 1.5917571513851364e-06, - "loss": 1.1754, - "step": 2357 - }, - { - "epoch": 0.31966379719379107, - "grad_norm": 5.2990410514098025, - "learning_rate": 1.5914030981105246e-06, - "loss": 1.1666, - "step": 2358 - }, - { - "epoch": 0.319799362841456, - "grad_norm": 2.306250304500467, - "learning_rate": 1.5910489307884936e-06, - "loss": 1.1297, - "step": 2359 - }, - { - "epoch": 0.31993492848912086, - "grad_norm": 2.2765956514294685, - "learning_rate": 1.5906946494873415e-06, - "loss": 1.1801, - "step": 2360 - }, - { - "epoch": 0.32007049413678573, - "grad_norm": 1.623784165003658, - "learning_rate": 1.590340254275389e-06, - "loss": 1.2186, - "step": 2361 - }, - { - "epoch": 0.3202060597844506, - "grad_norm": 1.7071773159171726, - "learning_rate": 1.5899857452209787e-06, - "loss": 1.1502, - "step": 2362 - }, - { - "epoch": 0.32034162543211553, - "grad_norm": 2.243936224892669, - "learning_rate": 1.589631122392474e-06, - "loss": 1.1745, - "step": 2363 - }, - { - "epoch": 0.3204771910797804, - "grad_norm": 1.3851470312317449, - "learning_rate": 1.5892763858582618e-06, - "loss": 1.1608, - "step": 2364 - }, - { - "epoch": 0.32061275672744527, - "grad_norm": 1.5858681806269863, - "learning_rate": 1.58892153568675e-06, - "loss": 1.1592, - "step": 2365 - }, - { - "epoch": 0.32074832237511014, - "grad_norm": 1.5875087502734546, - "learning_rate": 1.588566571946369e-06, - "loss": 1.1688, - "step": 2366 - }, - { - "epoch": 0.320883888022775, - "grad_norm": 1.5638234038575933, - "learning_rate": 1.58821149470557e-06, - "loss": 1.1585, - "step": 2367 - }, - { - "epoch": 0.32101945367043994, - "grad_norm": 1.4957368603944114, - "learning_rate": 1.5878563040328276e-06, - "loss": 1.1799, - "step": 2368 - }, - { - "epoch": 0.3211550193181048, - "grad_norm": 1.6655081246978516, - "learning_rate": 1.5875009999966371e-06, - "loss": 1.1893, - "step": 2369 - }, - { - "epoch": 0.3212905849657697, - "grad_norm": 1.8077750281122111, - "learning_rate": 1.5871455826655163e-06, - "loss": 1.1582, - "step": 2370 - }, - { - "epoch": 0.32142615061343455, - "grad_norm": 1.6220647991940846, - "learning_rate": 1.5867900521080044e-06, - "loss": 1.1661, - "step": 2371 - }, - { - "epoch": 0.3215617162610994, - "grad_norm": 1.5371450599673544, - "learning_rate": 1.586434408392663e-06, - "loss": 1.188, - "step": 2372 - }, - { - "epoch": 0.32169728190876434, - "grad_norm": 1.6296124512695802, - "learning_rate": 1.5860786515880745e-06, - "loss": 1.1622, - "step": 2373 - }, - { - "epoch": 0.3218328475564292, - "grad_norm": 1.457812276425042, - "learning_rate": 1.5857227817628447e-06, - "loss": 1.1849, - "step": 2374 - }, - { - "epoch": 0.3219684132040941, - "grad_norm": 1.9339609349430458, - "learning_rate": 1.5853667989855999e-06, - "loss": 1.1533, - "step": 2375 - }, - { - "epoch": 0.32210397885175895, - "grad_norm": 1.5226320037673682, - "learning_rate": 1.5850107033249884e-06, - "loss": 1.2102, - "step": 2376 - }, - { - "epoch": 0.3222395444994238, - "grad_norm": 1.8975768345338047, - "learning_rate": 1.5846544948496807e-06, - "loss": 1.1539, - "step": 2377 - }, - { - "epoch": 0.32237511014708875, - "grad_norm": 1.6742247894037718, - "learning_rate": 1.5842981736283685e-06, - "loss": 1.1751, - "step": 2378 - }, - { - "epoch": 0.3225106757947536, - "grad_norm": 1.8963518925654999, - "learning_rate": 1.5839417397297656e-06, - "loss": 1.1614, - "step": 2379 - }, - { - "epoch": 0.3226462414424185, - "grad_norm": 1.5578793019319777, - "learning_rate": 1.5835851932226074e-06, - "loss": 1.2505, - "step": 2380 - }, - { - "epoch": 0.32278180709008336, - "grad_norm": 2.054467570308797, - "learning_rate": 1.5832285341756517e-06, - "loss": 1.1718, - "step": 2381 - }, - { - "epoch": 0.32291737273774823, - "grad_norm": 2.1064876163794226, - "learning_rate": 1.5828717626576766e-06, - "loss": 1.1581, - "step": 2382 - }, - { - "epoch": 0.32305293838541316, - "grad_norm": 1.3495249853088787, - "learning_rate": 1.582514878737483e-06, - "loss": 1.1551, - "step": 2383 - }, - { - "epoch": 0.32318850403307803, - "grad_norm": 1.4937041585474513, - "learning_rate": 1.5821578824838932e-06, - "loss": 1.1608, - "step": 2384 - }, - { - "epoch": 0.3233240696807429, - "grad_norm": 2.894016989008943, - "learning_rate": 1.5818007739657512e-06, - "loss": 1.2006, - "step": 2385 - }, - { - "epoch": 0.32345963532840777, - "grad_norm": 1.8886973283213486, - "learning_rate": 1.5814435532519221e-06, - "loss": 1.1565, - "step": 2386 - }, - { - "epoch": 0.32359520097607264, - "grad_norm": 1.6017218377698488, - "learning_rate": 1.5810862204112933e-06, - "loss": 1.1723, - "step": 2387 - }, - { - "epoch": 0.32373076662373756, - "grad_norm": 1.3930688957896256, - "learning_rate": 1.580728775512774e-06, - "loss": 1.1771, - "step": 2388 - }, - { - "epoch": 0.32386633227140244, - "grad_norm": 1.4645276182434142, - "learning_rate": 1.5803712186252943e-06, - "loss": 1.1658, - "step": 2389 - }, - { - "epoch": 0.3240018979190673, - "grad_norm": 1.8249369847945909, - "learning_rate": 1.5800135498178065e-06, - "loss": 1.1976, - "step": 2390 - }, - { - "epoch": 0.3241374635667322, - "grad_norm": 1.6426730291732201, - "learning_rate": 1.5796557691592835e-06, - "loss": 1.1876, - "step": 2391 - }, - { - "epoch": 0.32427302921439705, - "grad_norm": 3.0008985068475953, - "learning_rate": 1.579297876718721e-06, - "loss": 1.1748, - "step": 2392 - }, - { - "epoch": 0.32440859486206197, - "grad_norm": 2.1120758083173192, - "learning_rate": 1.5789398725651358e-06, - "loss": 1.1522, - "step": 2393 - }, - { - "epoch": 0.32454416050972684, - "grad_norm": 1.4801114783726053, - "learning_rate": 1.5785817567675661e-06, - "loss": 1.1738, - "step": 2394 - }, - { - "epoch": 0.3246797261573917, - "grad_norm": 1.5150684369414056, - "learning_rate": 1.5782235293950717e-06, - "loss": 1.1512, - "step": 2395 - }, - { - "epoch": 0.3248152918050566, - "grad_norm": 2.0736155853334846, - "learning_rate": 1.5778651905167334e-06, - "loss": 1.1481, - "step": 2396 - }, - { - "epoch": 0.32495085745272145, - "grad_norm": 1.645577856732359, - "learning_rate": 1.577506740201655e-06, - "loss": 1.1541, - "step": 2397 - }, - { - "epoch": 0.3250864231003864, - "grad_norm": 1.6399533061237859, - "learning_rate": 1.5771481785189601e-06, - "loss": 1.1121, - "step": 2398 - }, - { - "epoch": 0.32522198874805125, - "grad_norm": 1.4212925617606895, - "learning_rate": 1.5767895055377948e-06, - "loss": 1.1868, - "step": 2399 - }, - { - "epoch": 0.3253575543957161, - "grad_norm": 1.8091578425409152, - "learning_rate": 1.5764307213273264e-06, - "loss": 1.1901, - "step": 2400 - }, - { - "epoch": 0.325493120043381, - "grad_norm": 1.535521360318581, - "learning_rate": 1.5760718259567432e-06, - "loss": 1.196, - "step": 2401 - }, - { - "epoch": 0.3256286856910459, - "grad_norm": 2.0655018866299146, - "learning_rate": 1.5757128194952557e-06, - "loss": 1.1482, - "step": 2402 - }, - { - "epoch": 0.3257642513387108, - "grad_norm": 2.056248585639207, - "learning_rate": 1.5753537020120952e-06, - "loss": 1.2007, - "step": 2403 - }, - { - "epoch": 0.32589981698637566, - "grad_norm": 2.1610810112457743, - "learning_rate": 1.5749944735765153e-06, - "loss": 1.1941, - "step": 2404 - }, - { - "epoch": 0.3260353826340405, - "grad_norm": 2.2927141040183274, - "learning_rate": 1.5746351342577895e-06, - "loss": 1.1849, - "step": 2405 - }, - { - "epoch": 0.3261709482817054, - "grad_norm": 1.8259938559396405, - "learning_rate": 1.5742756841252143e-06, - "loss": 1.1709, - "step": 2406 - }, - { - "epoch": 0.3263065139293703, - "grad_norm": 1.525156547300931, - "learning_rate": 1.573916123248106e-06, - "loss": 1.1799, - "step": 2407 - }, - { - "epoch": 0.3264420795770352, - "grad_norm": 1.5150914763073366, - "learning_rate": 1.5735564516958039e-06, - "loss": 1.1828, - "step": 2408 - }, - { - "epoch": 0.32657764522470006, - "grad_norm": 1.5417327140608577, - "learning_rate": 1.5731966695376672e-06, - "loss": 1.1764, - "step": 2409 - }, - { - "epoch": 0.32671321087236493, - "grad_norm": 1.6972880074054852, - "learning_rate": 1.5728367768430775e-06, - "loss": 1.1398, - "step": 2410 - }, - { - "epoch": 0.3268487765200298, - "grad_norm": 2.0346956846178736, - "learning_rate": 1.572476773681437e-06, - "loss": 1.1858, - "step": 2411 - }, - { - "epoch": 0.32698434216769473, - "grad_norm": 2.0227328838770573, - "learning_rate": 1.5721166601221695e-06, - "loss": 1.1843, - "step": 2412 - }, - { - "epoch": 0.3271199078153596, - "grad_norm": 2.011001560730901, - "learning_rate": 1.5717564362347203e-06, - "loss": 1.1855, - "step": 2413 - }, - { - "epoch": 0.32725547346302447, - "grad_norm": 1.6069357693962427, - "learning_rate": 1.5713961020885553e-06, - "loss": 1.1732, - "step": 2414 - }, - { - "epoch": 0.32739103911068934, - "grad_norm": 1.5054250754300262, - "learning_rate": 1.5710356577531628e-06, - "loss": 1.1523, - "step": 2415 - }, - { - "epoch": 0.3275266047583542, - "grad_norm": 2.743787149706714, - "learning_rate": 1.5706751032980506e-06, - "loss": 1.1865, - "step": 2416 - }, - { - "epoch": 0.32766217040601914, - "grad_norm": 2.302912367189527, - "learning_rate": 1.5703144387927499e-06, - "loss": 1.1821, - "step": 2417 - }, - { - "epoch": 0.327797736053684, - "grad_norm": 1.42021082169721, - "learning_rate": 1.5699536643068113e-06, - "loss": 1.2046, - "step": 2418 - }, - { - "epoch": 0.3279333017013489, - "grad_norm": 2.664306656732138, - "learning_rate": 1.5695927799098071e-06, - "loss": 1.188, - "step": 2419 - }, - { - "epoch": 0.32806886734901375, - "grad_norm": 2.1705036351352844, - "learning_rate": 1.5692317856713318e-06, - "loss": 1.1993, - "step": 2420 - }, - { - "epoch": 0.3282044329966786, - "grad_norm": 2.5842735758955317, - "learning_rate": 1.5688706816609995e-06, - "loss": 1.1808, - "step": 2421 - }, - { - "epoch": 0.32833999864434354, - "grad_norm": 1.8483898436947468, - "learning_rate": 1.5685094679484472e-06, - "loss": 1.1838, - "step": 2422 - }, - { - "epoch": 0.3284755642920084, - "grad_norm": 1.6030608777532764, - "learning_rate": 1.5681481446033312e-06, - "loss": 1.1961, - "step": 2423 - }, - { - "epoch": 0.3286111299396733, - "grad_norm": 2.6417704120693846, - "learning_rate": 1.56778671169533e-06, - "loss": 1.2259, - "step": 2424 - }, - { - "epoch": 0.32874669558733816, - "grad_norm": 1.5902744067164605, - "learning_rate": 1.5674251692941436e-06, - "loss": 1.1925, - "step": 2425 - }, - { - "epoch": 0.328882261235003, - "grad_norm": 1.5350277061523991, - "learning_rate": 1.5670635174694923e-06, - "loss": 1.1898, - "step": 2426 - }, - { - "epoch": 0.32901782688266795, - "grad_norm": 1.5788224172512797, - "learning_rate": 1.5667017562911176e-06, - "loss": 1.1882, - "step": 2427 - }, - { - "epoch": 0.3291533925303328, - "grad_norm": 1.4664354902078172, - "learning_rate": 1.5663398858287824e-06, - "loss": 1.1899, - "step": 2428 - }, - { - "epoch": 0.3292889581779977, - "grad_norm": 3.159941625186901, - "learning_rate": 1.565977906152271e-06, - "loss": 1.1313, - "step": 2429 - }, - { - "epoch": 0.32942452382566256, - "grad_norm": 1.5223465378928511, - "learning_rate": 1.5656158173313876e-06, - "loss": 1.1611, - "step": 2430 - }, - { - "epoch": 0.32956008947332743, - "grad_norm": 1.9838471669717157, - "learning_rate": 1.5652536194359586e-06, - "loss": 1.1907, - "step": 2431 - }, - { - "epoch": 0.32969565512099236, - "grad_norm": 1.5405536111166134, - "learning_rate": 1.5648913125358312e-06, - "loss": 1.1635, - "step": 2432 - }, - { - "epoch": 0.32983122076865723, - "grad_norm": 1.5667194011447718, - "learning_rate": 1.564528896700873e-06, - "loss": 1.188, - "step": 2433 - }, - { - "epoch": 0.3299667864163221, - "grad_norm": 1.5868210256804955, - "learning_rate": 1.5641663720009732e-06, - "loss": 1.1692, - "step": 2434 - }, - { - "epoch": 0.33010235206398697, - "grad_norm": 2.3149739011782713, - "learning_rate": 1.5638037385060416e-06, - "loss": 1.2011, - "step": 2435 - }, - { - "epoch": 0.33023791771165184, - "grad_norm": 1.695726942726153, - "learning_rate": 1.5634409962860096e-06, - "loss": 1.1867, - "step": 2436 - }, - { - "epoch": 0.33037348335931677, - "grad_norm": 1.5609236874808228, - "learning_rate": 1.5630781454108291e-06, - "loss": 1.1605, - "step": 2437 - }, - { - "epoch": 0.33050904900698164, - "grad_norm": 3.4510973065814867, - "learning_rate": 1.5627151859504726e-06, - "loss": 1.1863, - "step": 2438 - }, - { - "epoch": 0.3306446146546465, - "grad_norm": 1.6859709070395692, - "learning_rate": 1.5623521179749346e-06, - "loss": 1.1863, - "step": 2439 - }, - { - "epoch": 0.3307801803023114, - "grad_norm": 2.013174630471076, - "learning_rate": 1.5619889415542296e-06, - "loss": 1.1978, - "step": 2440 - }, - { - "epoch": 0.3309157459499763, - "grad_norm": 1.5723782647334572, - "learning_rate": 1.5616256567583932e-06, - "loss": 1.1935, - "step": 2441 - }, - { - "epoch": 0.3310513115976412, - "grad_norm": 1.623091182875469, - "learning_rate": 1.561262263657482e-06, - "loss": 1.2109, - "step": 2442 - }, - { - "epoch": 0.33118687724530604, - "grad_norm": 1.9675913954818742, - "learning_rate": 1.5608987623215736e-06, - "loss": 1.1789, - "step": 2443 - }, - { - "epoch": 0.3313224428929709, - "grad_norm": 1.4205579394382242, - "learning_rate": 1.5605351528207664e-06, - "loss": 1.1901, - "step": 2444 - }, - { - "epoch": 0.3314580085406358, - "grad_norm": 1.5363328390314617, - "learning_rate": 1.5601714352251798e-06, - "loss": 1.2263, - "step": 2445 - }, - { - "epoch": 0.3315935741883007, - "grad_norm": 2.6178367687286572, - "learning_rate": 1.5598076096049533e-06, - "loss": 1.1753, - "step": 2446 - }, - { - "epoch": 0.3317291398359656, - "grad_norm": 1.5437201157609632, - "learning_rate": 1.5594436760302483e-06, - "loss": 1.1678, - "step": 2447 - }, - { - "epoch": 0.33186470548363045, - "grad_norm": 1.519128811164236, - "learning_rate": 1.5590796345712465e-06, - "loss": 1.1851, - "step": 2448 - }, - { - "epoch": 0.3320002711312953, - "grad_norm": 2.0852787949411606, - "learning_rate": 1.55871548529815e-06, - "loss": 1.1467, - "step": 2449 - }, - { - "epoch": 0.3321358367789602, - "grad_norm": 1.9039829909751242, - "learning_rate": 1.5583512282811826e-06, - "loss": 1.1755, - "step": 2450 - }, - { - "epoch": 0.3322714024266251, - "grad_norm": 1.6213708410077348, - "learning_rate": 1.557986863590588e-06, - "loss": 1.1266, - "step": 2451 - }, - { - "epoch": 0.33240696807429, - "grad_norm": 1.6125414810141137, - "learning_rate": 1.5576223912966313e-06, - "loss": 1.1572, - "step": 2452 - }, - { - "epoch": 0.33254253372195486, - "grad_norm": 2.0045319330219704, - "learning_rate": 1.557257811469598e-06, - "loss": 1.1484, - "step": 2453 - }, - { - "epoch": 0.33267809936961973, - "grad_norm": 2.024883789818294, - "learning_rate": 1.5568931241797947e-06, - "loss": 1.1952, - "step": 2454 - }, - { - "epoch": 0.3328136650172846, - "grad_norm": 1.717904007534209, - "learning_rate": 1.556528329497548e-06, - "loss": 1.1652, - "step": 2455 - }, - { - "epoch": 0.3329492306649495, - "grad_norm": 1.4518009489660013, - "learning_rate": 1.5561634274932061e-06, - "loss": 1.1628, - "step": 2456 - }, - { - "epoch": 0.3330847963126144, - "grad_norm": 1.43067597014406, - "learning_rate": 1.555798418237137e-06, - "loss": 1.1693, - "step": 2457 - }, - { - "epoch": 0.33322036196027927, - "grad_norm": 4.8694453739639885, - "learning_rate": 1.5554333017997306e-06, - "loss": 1.185, - "step": 2458 - }, - { - "epoch": 0.33335592760794414, - "grad_norm": 1.8795835037245296, - "learning_rate": 1.5550680782513962e-06, - "loss": 1.1588, - "step": 2459 - }, - { - "epoch": 0.333491493255609, - "grad_norm": 1.7179070252605535, - "learning_rate": 1.554702747662564e-06, - "loss": 1.1931, - "step": 2460 - }, - { - "epoch": 0.33362705890327393, - "grad_norm": 3.77732312194548, - "learning_rate": 1.5543373101036856e-06, - "loss": 1.1846, - "step": 2461 - }, - { - "epoch": 0.3337626245509388, - "grad_norm": 1.4782603736428397, - "learning_rate": 1.5539717656452327e-06, - "loss": 1.1679, - "step": 2462 - }, - { - "epoch": 0.3338981901986037, - "grad_norm": 1.4162565188289116, - "learning_rate": 1.5536061143576978e-06, - "loss": 1.1697, - "step": 2463 - }, - { - "epoch": 0.33403375584626854, - "grad_norm": 1.9523545826049042, - "learning_rate": 1.5532403563115932e-06, - "loss": 1.1854, - "step": 2464 - }, - { - "epoch": 0.3341693214939334, - "grad_norm": 2.793284211111756, - "learning_rate": 1.5528744915774532e-06, - "loss": 1.1699, - "step": 2465 - }, - { - "epoch": 0.33430488714159834, - "grad_norm": 1.5375877435188245, - "learning_rate": 1.5525085202258316e-06, - "loss": 1.1743, - "step": 2466 - }, - { - "epoch": 0.3344404527892632, - "grad_norm": 1.458781594400697, - "learning_rate": 1.552142442327303e-06, - "loss": 1.1625, - "step": 2467 - }, - { - "epoch": 0.3345760184369281, - "grad_norm": 2.848634658433064, - "learning_rate": 1.5517762579524628e-06, - "loss": 1.1371, - "step": 2468 - }, - { - "epoch": 0.33471158408459295, - "grad_norm": 1.4733233348566235, - "learning_rate": 1.5514099671719267e-06, - "loss": 1.1903, - "step": 2469 - }, - { - "epoch": 0.3348471497322578, - "grad_norm": 1.6231390822779852, - "learning_rate": 1.551043570056331e-06, - "loss": 1.1904, - "step": 2470 - }, - { - "epoch": 0.33498271537992275, - "grad_norm": 1.387384446871848, - "learning_rate": 1.5506770666763324e-06, - "loss": 1.1425, - "step": 2471 - }, - { - "epoch": 0.3351182810275876, - "grad_norm": 1.789215845983205, - "learning_rate": 1.5503104571026084e-06, - "loss": 1.1885, - "step": 2472 - }, - { - "epoch": 0.3352538466752525, - "grad_norm": 1.6376428233867075, - "learning_rate": 1.5499437414058564e-06, - "loss": 1.1822, - "step": 2473 - }, - { - "epoch": 0.33538941232291736, - "grad_norm": 2.1071659509489256, - "learning_rate": 1.5495769196567955e-06, - "loss": 1.162, - "step": 2474 - }, - { - "epoch": 0.3355249779705822, - "grad_norm": 2.694708877945794, - "learning_rate": 1.5492099919261632e-06, - "loss": 1.2231, - "step": 2475 - }, - { - "epoch": 0.33566054361824715, - "grad_norm": 1.7996605542018287, - "learning_rate": 1.5488429582847192e-06, - "loss": 1.186, - "step": 2476 - }, - { - "epoch": 0.335796109265912, - "grad_norm": 1.4860243335394454, - "learning_rate": 1.5484758188032433e-06, - "loss": 1.1727, - "step": 2477 - }, - { - "epoch": 0.3359316749135769, - "grad_norm": 3.041463545264503, - "learning_rate": 1.5481085735525348e-06, - "loss": 1.2026, - "step": 2478 - }, - { - "epoch": 0.33606724056124176, - "grad_norm": 1.7865892972747621, - "learning_rate": 1.5477412226034145e-06, - "loss": 1.1745, - "step": 2479 - }, - { - "epoch": 0.3362028062089067, - "grad_norm": 2.724890390905319, - "learning_rate": 1.547373766026723e-06, - "loss": 1.2254, - "step": 2480 - }, - { - "epoch": 0.33633837185657156, - "grad_norm": 1.8847462630361331, - "learning_rate": 1.5470062038933213e-06, - "loss": 1.1825, - "step": 2481 - }, - { - "epoch": 0.33647393750423643, - "grad_norm": 1.7636095449982443, - "learning_rate": 1.5466385362740911e-06, - "loss": 1.1863, - "step": 2482 - }, - { - "epoch": 0.3366095031519013, - "grad_norm": 1.5912222753111827, - "learning_rate": 1.5462707632399342e-06, - "loss": 1.1385, - "step": 2483 - }, - { - "epoch": 0.33674506879956617, - "grad_norm": 2.0725028742949854, - "learning_rate": 1.5459028848617726e-06, - "loss": 1.1757, - "step": 2484 - }, - { - "epoch": 0.3368806344472311, - "grad_norm": 1.6811715023806568, - "learning_rate": 1.5455349012105486e-06, - "loss": 1.1996, - "step": 2485 - }, - { - "epoch": 0.33701620009489597, - "grad_norm": 2.138084763125434, - "learning_rate": 1.545166812357225e-06, - "loss": 1.1475, - "step": 2486 - }, - { - "epoch": 0.33715176574256084, - "grad_norm": 1.8614744277495263, - "learning_rate": 1.5447986183727852e-06, - "loss": 1.1936, - "step": 2487 - }, - { - "epoch": 0.3372873313902257, - "grad_norm": 1.8532908290761294, - "learning_rate": 1.5444303193282324e-06, - "loss": 1.1874, - "step": 2488 - }, - { - "epoch": 0.3374228970378906, - "grad_norm": 1.6072316612163118, - "learning_rate": 1.5440619152945896e-06, - "loss": 1.1238, - "step": 2489 - }, - { - "epoch": 0.3375584626855555, - "grad_norm": 1.779336481222247, - "learning_rate": 1.5436934063429013e-06, - "loss": 1.2176, - "step": 2490 - }, - { - "epoch": 0.3376940283332204, - "grad_norm": 1.866141743533603, - "learning_rate": 1.5433247925442308e-06, - "loss": 1.1547, - "step": 2491 - }, - { - "epoch": 0.33782959398088525, - "grad_norm": 1.677248425844946, - "learning_rate": 1.542956073969663e-06, - "loss": 1.1523, - "step": 2492 - }, - { - "epoch": 0.3379651596285501, - "grad_norm": 1.414584296905524, - "learning_rate": 1.5425872506903024e-06, - "loss": 1.1388, - "step": 2493 - }, - { - "epoch": 0.338100725276215, - "grad_norm": 1.5103118399048494, - "learning_rate": 1.542218322777273e-06, - "loss": 1.1656, - "step": 2494 - }, - { - "epoch": 0.3382362909238799, - "grad_norm": 1.6262971582968297, - "learning_rate": 1.5418492903017204e-06, - "loss": 1.1831, - "step": 2495 - }, - { - "epoch": 0.3383718565715448, - "grad_norm": 4.9528617762390095, - "learning_rate": 1.5414801533348091e-06, - "loss": 1.2187, - "step": 2496 - }, - { - "epoch": 0.33850742221920965, - "grad_norm": 1.7424828956375824, - "learning_rate": 1.5411109119477247e-06, - "loss": 1.2044, - "step": 2497 - }, - { - "epoch": 0.3386429878668745, - "grad_norm": 1.6747450334596354, - "learning_rate": 1.5407415662116718e-06, - "loss": 1.2084, - "step": 2498 - }, - { - "epoch": 0.3387785535145394, - "grad_norm": 1.4823345879102376, - "learning_rate": 1.5403721161978764e-06, - "loss": 1.155, - "step": 2499 - }, - { - "epoch": 0.3389141191622043, - "grad_norm": 2.1644350441087963, - "learning_rate": 1.5400025619775838e-06, - "loss": 1.1368, - "step": 2500 - }, - { - "epoch": 0.3390496848098692, - "grad_norm": 1.4653870772719526, - "learning_rate": 1.5396329036220598e-06, - "loss": 1.1756, - "step": 2501 - }, - { - "epoch": 0.33918525045753406, - "grad_norm": 1.395342112381541, - "learning_rate": 1.5392631412025898e-06, - "loss": 1.1704, - "step": 2502 - }, - { - "epoch": 0.33932081610519893, - "grad_norm": 1.4299158852173628, - "learning_rate": 1.5388932747904797e-06, - "loss": 1.1719, - "step": 2503 - }, - { - "epoch": 0.3394563817528638, - "grad_norm": 1.5549226997827177, - "learning_rate": 1.5385233044570554e-06, - "loss": 1.169, - "step": 2504 - }, - { - "epoch": 0.3395919474005287, - "grad_norm": 2.241707468736357, - "learning_rate": 1.5381532302736627e-06, - "loss": 1.1835, - "step": 2505 - }, - { - "epoch": 0.3397275130481936, - "grad_norm": 1.4914693436815392, - "learning_rate": 1.5377830523116675e-06, - "loss": 1.1728, - "step": 2506 - }, - { - "epoch": 0.33986307869585847, - "grad_norm": 2.8773001417485293, - "learning_rate": 1.5374127706424553e-06, - "loss": 1.1611, - "step": 2507 - }, - { - "epoch": 0.33999864434352334, - "grad_norm": 1.7267867579365823, - "learning_rate": 1.5370423853374325e-06, - "loss": 1.1762, - "step": 2508 - }, - { - "epoch": 0.3401342099911882, - "grad_norm": 2.259543963609326, - "learning_rate": 1.5366718964680253e-06, - "loss": 1.1483, - "step": 2509 - }, - { - "epoch": 0.34026977563885313, - "grad_norm": 1.4967040716016933, - "learning_rate": 1.5363013041056787e-06, - "loss": 1.1948, - "step": 2510 - }, - { - "epoch": 0.340405341286518, - "grad_norm": 1.3780298561868003, - "learning_rate": 1.5359306083218588e-06, - "loss": 1.1557, - "step": 2511 - }, - { - "epoch": 0.3405409069341829, - "grad_norm": 1.6046577039637295, - "learning_rate": 1.5355598091880517e-06, - "loss": 1.1429, - "step": 2512 - }, - { - "epoch": 0.34067647258184774, - "grad_norm": 1.5508988052623558, - "learning_rate": 1.5351889067757627e-06, - "loss": 1.2082, - "step": 2513 - }, - { - "epoch": 0.3408120382295126, - "grad_norm": 1.4921958529181265, - "learning_rate": 1.5348179011565176e-06, - "loss": 1.2185, - "step": 2514 - }, - { - "epoch": 0.34094760387717754, - "grad_norm": 2.8223432121667194, - "learning_rate": 1.5344467924018619e-06, - "loss": 1.119, - "step": 2515 - }, - { - "epoch": 0.3410831695248424, - "grad_norm": 1.756732120846369, - "learning_rate": 1.534075580583361e-06, - "loss": 1.1302, - "step": 2516 - }, - { - "epoch": 0.3412187351725073, - "grad_norm": 2.864295136149504, - "learning_rate": 1.5337042657726e-06, - "loss": 1.1926, - "step": 2517 - }, - { - "epoch": 0.34135430082017215, - "grad_norm": 1.6905958170357525, - "learning_rate": 1.5333328480411842e-06, - "loss": 1.1366, - "step": 2518 - }, - { - "epoch": 0.3414898664678371, - "grad_norm": 1.8048699616793222, - "learning_rate": 1.5329613274607387e-06, - "loss": 1.1926, - "step": 2519 - }, - { - "epoch": 0.34162543211550195, - "grad_norm": 1.8553406742078025, - "learning_rate": 1.5325897041029078e-06, - "loss": 1.1991, - "step": 2520 - }, - { - "epoch": 0.3417609977631668, - "grad_norm": 2.098655324178943, - "learning_rate": 1.5322179780393567e-06, - "loss": 1.1544, - "step": 2521 - }, - { - "epoch": 0.3418965634108317, - "grad_norm": 1.6852362315562086, - "learning_rate": 1.5318461493417694e-06, - "loss": 1.1837, - "step": 2522 - }, - { - "epoch": 0.34203212905849656, - "grad_norm": 1.5156483614554936, - "learning_rate": 1.5314742180818504e-06, - "loss": 1.2024, - "step": 2523 - }, - { - "epoch": 0.3421676947061615, - "grad_norm": 1.808453115831744, - "learning_rate": 1.5311021843313238e-06, - "loss": 1.1648, - "step": 2524 - }, - { - "epoch": 0.34230326035382636, - "grad_norm": 4.060134911734256, - "learning_rate": 1.5307300481619332e-06, - "loss": 1.2094, - "step": 2525 - }, - { - "epoch": 0.3424388260014912, - "grad_norm": 1.5878831808262936, - "learning_rate": 1.5303578096454422e-06, - "loss": 1.1558, - "step": 2526 - }, - { - "epoch": 0.3425743916491561, - "grad_norm": 1.9932507375469515, - "learning_rate": 1.5299854688536339e-06, - "loss": 1.1625, - "step": 2527 - }, - { - "epoch": 0.34270995729682097, - "grad_norm": 1.7845201018686696, - "learning_rate": 1.5296130258583113e-06, - "loss": 1.1752, - "step": 2528 - }, - { - "epoch": 0.3428455229444859, - "grad_norm": 1.6182848093081028, - "learning_rate": 1.5292404807312971e-06, - "loss": 1.1724, - "step": 2529 - }, - { - "epoch": 0.34298108859215076, - "grad_norm": 1.9114662878221582, - "learning_rate": 1.5288678335444342e-06, - "loss": 1.1783, - "step": 2530 - }, - { - "epoch": 0.34311665423981563, - "grad_norm": 2.13137595556096, - "learning_rate": 1.5284950843695838e-06, - "loss": 1.132, - "step": 2531 - }, - { - "epoch": 0.3432522198874805, - "grad_norm": 2.3929772499321214, - "learning_rate": 1.5281222332786282e-06, - "loss": 1.183, - "step": 2532 - }, - { - "epoch": 0.3433877855351454, - "grad_norm": 1.7716415564978079, - "learning_rate": 1.527749280343469e-06, - "loss": 1.1614, - "step": 2533 - }, - { - "epoch": 0.3435233511828103, - "grad_norm": 2.09980812283455, - "learning_rate": 1.527376225636026e-06, - "loss": 1.1854, - "step": 2534 - }, - { - "epoch": 0.34365891683047517, - "grad_norm": 1.5614142256696892, - "learning_rate": 1.5270030692282415e-06, - "loss": 1.166, - "step": 2535 - }, - { - "epoch": 0.34379448247814004, - "grad_norm": 1.5078887046774765, - "learning_rate": 1.526629811192075e-06, - "loss": 1.1267, - "step": 2536 - }, - { - "epoch": 0.3439300481258049, - "grad_norm": 1.642819765764812, - "learning_rate": 1.5262564515995062e-06, - "loss": 1.1978, - "step": 2537 - }, - { - "epoch": 0.3440656137734698, - "grad_norm": 1.5237180085200164, - "learning_rate": 1.5258829905225348e-06, - "loss": 1.1492, - "step": 2538 - }, - { - "epoch": 0.3442011794211347, - "grad_norm": 1.5141178622976035, - "learning_rate": 1.5255094280331795e-06, - "loss": 1.1766, - "step": 2539 - }, - { - "epoch": 0.3443367450687996, - "grad_norm": 5.619721061171953, - "learning_rate": 1.5251357642034793e-06, - "loss": 1.1449, - "step": 2540 - }, - { - "epoch": 0.34447231071646445, - "grad_norm": 1.6241353015196955, - "learning_rate": 1.524761999105492e-06, - "loss": 1.1358, - "step": 2541 - }, - { - "epoch": 0.3446078763641293, - "grad_norm": 1.6926103549732228, - "learning_rate": 1.5243881328112953e-06, - "loss": 1.1738, - "step": 2542 - }, - { - "epoch": 0.3447434420117942, - "grad_norm": 1.9681743824040814, - "learning_rate": 1.5240141653929868e-06, - "loss": 1.1917, - "step": 2543 - }, - { - "epoch": 0.3448790076594591, - "grad_norm": 2.217168395222342, - "learning_rate": 1.5236400969226828e-06, - "loss": 1.1587, - "step": 2544 - }, - { - "epoch": 0.345014573307124, - "grad_norm": 1.9416355869891382, - "learning_rate": 1.5232659274725195e-06, - "loss": 1.1598, - "step": 2545 - }, - { - "epoch": 0.34515013895478885, - "grad_norm": 1.4402059356037482, - "learning_rate": 1.5228916571146522e-06, - "loss": 1.1672, - "step": 2546 - }, - { - "epoch": 0.3452857046024537, - "grad_norm": 1.6183820138757468, - "learning_rate": 1.5225172859212565e-06, - "loss": 1.1933, - "step": 2547 - }, - { - "epoch": 0.3454212702501186, - "grad_norm": 1.8486492836004675, - "learning_rate": 1.5221428139645266e-06, - "loss": 1.1569, - "step": 2548 - }, - { - "epoch": 0.3455568358977835, - "grad_norm": 2.546437437519464, - "learning_rate": 1.5217682413166767e-06, - "loss": 1.1722, - "step": 2549 - }, - { - "epoch": 0.3456924015454484, - "grad_norm": 1.7171944837968884, - "learning_rate": 1.5213935680499397e-06, - "loss": 1.1202, - "step": 2550 - }, - { - "epoch": 0.34582796719311326, - "grad_norm": 1.4917218700641648, - "learning_rate": 1.521018794236569e-06, - "loss": 1.1391, - "step": 2551 - }, - { - "epoch": 0.34596353284077813, - "grad_norm": 1.6290184684525204, - "learning_rate": 1.5206439199488366e-06, - "loss": 1.1948, - "step": 2552 - }, - { - "epoch": 0.346099098488443, - "grad_norm": 2.8265169201285634, - "learning_rate": 1.5202689452590339e-06, - "loss": 1.1738, - "step": 2553 - }, - { - "epoch": 0.34623466413610793, - "grad_norm": 1.6065619320293492, - "learning_rate": 1.5198938702394717e-06, - "loss": 1.1376, - "step": 2554 - }, - { - "epoch": 0.3463702297837728, - "grad_norm": 1.556238190013172, - "learning_rate": 1.5195186949624804e-06, - "loss": 1.1587, - "step": 2555 - }, - { - "epoch": 0.34650579543143767, - "grad_norm": 1.421661396832942, - "learning_rate": 1.5191434195004098e-06, - "loss": 1.1451, - "step": 2556 - }, - { - "epoch": 0.34664136107910254, - "grad_norm": 1.5504893398726336, - "learning_rate": 1.5187680439256285e-06, - "loss": 1.1392, - "step": 2557 - }, - { - "epoch": 0.34677692672676746, - "grad_norm": 2.303879566344356, - "learning_rate": 1.5183925683105251e-06, - "loss": 1.1717, - "step": 2558 - }, - { - "epoch": 0.34691249237443234, - "grad_norm": 1.554204394951351, - "learning_rate": 1.5180169927275066e-06, - "loss": 1.1598, - "step": 2559 - }, - { - "epoch": 0.3470480580220972, - "grad_norm": 1.4422866830862724, - "learning_rate": 1.517641317249e-06, - "loss": 1.2145, - "step": 2560 - }, - { - "epoch": 0.3471836236697621, - "grad_norm": 1.7845357057228097, - "learning_rate": 1.5172655419474514e-06, - "loss": 1.1684, - "step": 2561 - }, - { - "epoch": 0.34731918931742695, - "grad_norm": 1.502789015374646, - "learning_rate": 1.5168896668953261e-06, - "loss": 1.15, - "step": 2562 - }, - { - "epoch": 0.34745475496509187, - "grad_norm": 1.7295365680916088, - "learning_rate": 1.5165136921651084e-06, - "loss": 1.147, - "step": 2563 - }, - { - "epoch": 0.34759032061275674, - "grad_norm": 1.444049198985322, - "learning_rate": 1.5161376178293028e-06, - "loss": 1.1719, - "step": 2564 - }, - { - "epoch": 0.3477258862604216, - "grad_norm": 1.7690747832997413, - "learning_rate": 1.5157614439604313e-06, - "loss": 1.1451, - "step": 2565 - }, - { - "epoch": 0.3478614519080865, - "grad_norm": 1.640830262567967, - "learning_rate": 1.5153851706310367e-06, - "loss": 1.2154, - "step": 2566 - }, - { - "epoch": 0.34799701755575135, - "grad_norm": 1.7095111064878008, - "learning_rate": 1.51500879791368e-06, - "loss": 1.1573, - "step": 2567 - }, - { - "epoch": 0.3481325832034163, - "grad_norm": 1.5583161837792532, - "learning_rate": 1.5146323258809423e-06, - "loss": 1.1586, - "step": 2568 - }, - { - "epoch": 0.34826814885108115, - "grad_norm": 1.5212248087193019, - "learning_rate": 1.5142557546054224e-06, - "loss": 1.2024, - "step": 2569 - }, - { - "epoch": 0.348403714498746, - "grad_norm": 1.815975967976842, - "learning_rate": 1.5138790841597398e-06, - "loss": 1.1905, - "step": 2570 - }, - { - "epoch": 0.3485392801464109, - "grad_norm": 1.7936911150149335, - "learning_rate": 1.5135023146165317e-06, - "loss": 1.2275, - "step": 2571 - }, - { - "epoch": 0.34867484579407576, - "grad_norm": 1.5879685888431645, - "learning_rate": 1.513125446048456e-06, - "loss": 1.198, - "step": 2572 - }, - { - "epoch": 0.3488104114417407, - "grad_norm": 1.812959768474053, - "learning_rate": 1.5127484785281884e-06, - "loss": 1.1703, - "step": 2573 - }, - { - "epoch": 0.34894597708940556, - "grad_norm": 9.456260472523303, - "learning_rate": 1.5123714121284237e-06, - "loss": 1.1845, - "step": 2574 - }, - { - "epoch": 0.3490815427370704, - "grad_norm": 1.555701307340489, - "learning_rate": 1.5119942469218768e-06, - "loss": 1.194, - "step": 2575 - }, - { - "epoch": 0.3492171083847353, - "grad_norm": 1.5805052149630412, - "learning_rate": 1.5116169829812807e-06, - "loss": 1.1989, - "step": 2576 - }, - { - "epoch": 0.34935267403240017, - "grad_norm": 2.006200982505679, - "learning_rate": 1.511239620379388e-06, - "loss": 1.1831, - "step": 2577 - }, - { - "epoch": 0.3494882396800651, - "grad_norm": 1.410119377687749, - "learning_rate": 1.51086215918897e-06, - "loss": 1.1841, - "step": 2578 - }, - { - "epoch": 0.34962380532772996, - "grad_norm": 1.6914081412295234, - "learning_rate": 1.510484599482817e-06, - "loss": 1.1409, - "step": 2579 - }, - { - "epoch": 0.34975937097539483, - "grad_norm": 2.229864755581548, - "learning_rate": 1.5101069413337386e-06, - "loss": 1.1461, - "step": 2580 - }, - { - "epoch": 0.3498949366230597, - "grad_norm": 2.4279656970330596, - "learning_rate": 1.5097291848145631e-06, - "loss": 1.1791, - "step": 2581 - }, - { - "epoch": 0.3500305022707246, - "grad_norm": 1.5196804626418745, - "learning_rate": 1.5093513299981378e-06, - "loss": 1.1623, - "step": 2582 - }, - { - "epoch": 0.3501660679183895, - "grad_norm": 1.6335671396799987, - "learning_rate": 1.5089733769573292e-06, - "loss": 1.2023, - "step": 2583 - }, - { - "epoch": 0.35030163356605437, - "grad_norm": 1.4514494445632573, - "learning_rate": 1.5085953257650223e-06, - "loss": 1.1579, - "step": 2584 - }, - { - "epoch": 0.35043719921371924, - "grad_norm": 1.382426184860584, - "learning_rate": 1.5082171764941216e-06, - "loss": 1.1788, - "step": 2585 - }, - { - "epoch": 0.3505727648613841, - "grad_norm": 1.582656130171159, - "learning_rate": 1.5078389292175499e-06, - "loss": 1.1792, - "step": 2586 - }, - { - "epoch": 0.350708330509049, - "grad_norm": 1.4269047092156713, - "learning_rate": 1.5074605840082494e-06, - "loss": 1.1553, - "step": 2587 - }, - { - "epoch": 0.3508438961567139, - "grad_norm": 1.8867114122806237, - "learning_rate": 1.5070821409391812e-06, - "loss": 1.1917, - "step": 2588 - }, - { - "epoch": 0.3509794618043788, - "grad_norm": 2.0566925428007834, - "learning_rate": 1.5067036000833242e-06, - "loss": 1.1725, - "step": 2589 - }, - { - "epoch": 0.35111502745204365, - "grad_norm": 2.2461253662337235, - "learning_rate": 1.5063249615136782e-06, - "loss": 1.1457, - "step": 2590 - }, - { - "epoch": 0.3512505930997085, - "grad_norm": 1.4567243202906368, - "learning_rate": 1.5059462253032595e-06, - "loss": 1.198, - "step": 2591 - }, - { - "epoch": 0.3513861587473734, - "grad_norm": 1.456370806969757, - "learning_rate": 1.5055673915251052e-06, - "loss": 1.1314, - "step": 2592 - }, - { - "epoch": 0.3515217243950383, - "grad_norm": 1.69199709753919, - "learning_rate": 1.5051884602522702e-06, - "loss": 1.1419, - "step": 2593 - }, - { - "epoch": 0.3516572900427032, - "grad_norm": 1.6902121083878445, - "learning_rate": 1.5048094315578284e-06, - "loss": 1.1767, - "step": 2594 - }, - { - "epoch": 0.35179285569036806, - "grad_norm": 2.829407612821414, - "learning_rate": 1.5044303055148722e-06, - "loss": 1.1298, - "step": 2595 - }, - { - "epoch": 0.3519284213380329, - "grad_norm": 1.8621016415936875, - "learning_rate": 1.5040510821965135e-06, - "loss": 1.1906, - "step": 2596 - }, - { - "epoch": 0.3520639869856978, - "grad_norm": 1.42644507353959, - "learning_rate": 1.5036717616758824e-06, - "loss": 1.1353, - "step": 2597 - }, - { - "epoch": 0.3521995526333627, - "grad_norm": 2.986860449195858, - "learning_rate": 1.5032923440261276e-06, - "loss": 1.1749, - "step": 2598 - }, - { - "epoch": 0.3523351182810276, - "grad_norm": 1.6744503535993989, - "learning_rate": 1.5029128293204174e-06, - "loss": 1.1748, - "step": 2599 - }, - { - "epoch": 0.35247068392869246, - "grad_norm": 1.9020136274752257, - "learning_rate": 1.5025332176319373e-06, - "loss": 1.1637, - "step": 2600 - }, - { - "epoch": 0.35260624957635733, - "grad_norm": 1.617007796143291, - "learning_rate": 1.5021535090338932e-06, - "loss": 1.1474, - "step": 2601 - }, - { - "epoch": 0.35274181522402226, - "grad_norm": 1.641163905145374, - "learning_rate": 1.5017737035995087e-06, - "loss": 1.1568, - "step": 2602 - }, - { - "epoch": 0.35287738087168713, - "grad_norm": 1.7116085402724026, - "learning_rate": 1.5013938014020262e-06, - "loss": 1.158, - "step": 2603 - }, - { - "epoch": 0.353012946519352, - "grad_norm": 1.6092847179237328, - "learning_rate": 1.501013802514707e-06, - "loss": 1.1586, - "step": 2604 - }, - { - "epoch": 0.35314851216701687, - "grad_norm": 1.3522334242409297, - "learning_rate": 1.5006337070108304e-06, - "loss": 1.226, - "step": 2605 - }, - { - "epoch": 0.35328407781468174, - "grad_norm": 1.949933208991432, - "learning_rate": 1.5002535149636952e-06, - "loss": 1.1681, - "step": 2606 - }, - { - "epoch": 0.35341964346234667, - "grad_norm": 1.5059996110551774, - "learning_rate": 1.4998732264466186e-06, - "loss": 1.1212, - "step": 2607 - }, - { - "epoch": 0.35355520911001154, - "grad_norm": 1.8096207275950944, - "learning_rate": 1.499492841532936e-06, - "loss": 1.1652, - "step": 2608 - }, - { - "epoch": 0.3536907747576764, - "grad_norm": 1.3935979112776484, - "learning_rate": 1.4991123602960017e-06, - "loss": 1.1858, - "step": 2609 - }, - { - "epoch": 0.3538263404053413, - "grad_norm": 1.6413332160041496, - "learning_rate": 1.4987317828091882e-06, - "loss": 1.194, - "step": 2610 - }, - { - "epoch": 0.35396190605300615, - "grad_norm": 1.5220159948431358, - "learning_rate": 1.4983511091458874e-06, - "loss": 1.1656, - "step": 2611 - }, - { - "epoch": 0.3540974717006711, - "grad_norm": 1.6628804682471179, - "learning_rate": 1.4979703393795086e-06, - "loss": 1.1838, - "step": 2612 - }, - { - "epoch": 0.35423303734833594, - "grad_norm": 1.7277515314341483, - "learning_rate": 1.4975894735834809e-06, - "loss": 1.1005, - "step": 2613 - }, - { - "epoch": 0.3543686029960008, - "grad_norm": 2.498333622509205, - "learning_rate": 1.4972085118312511e-06, - "loss": 1.1407, - "step": 2614 - }, - { - "epoch": 0.3545041686436657, - "grad_norm": 1.4829501892534938, - "learning_rate": 1.4968274541962845e-06, - "loss": 1.1729, - "step": 2615 - }, - { - "epoch": 0.35463973429133056, - "grad_norm": 1.8082218582473992, - "learning_rate": 1.4964463007520647e-06, - "loss": 1.1768, - "step": 2616 - }, - { - "epoch": 0.3547752999389955, - "grad_norm": 2.218279636327135, - "learning_rate": 1.4960650515720947e-06, - "loss": 1.1579, - "step": 2617 - }, - { - "epoch": 0.35491086558666035, - "grad_norm": 8.338122575372838, - "learning_rate": 1.4956837067298954e-06, - "loss": 1.1927, - "step": 2618 - }, - { - "epoch": 0.3550464312343252, - "grad_norm": 1.6389055402395662, - "learning_rate": 1.4953022662990057e-06, - "loss": 1.1424, - "step": 2619 - }, - { - "epoch": 0.3551819968819901, - "grad_norm": 1.8754516584790053, - "learning_rate": 1.4949207303529835e-06, - "loss": 1.1943, - "step": 2620 - }, - { - "epoch": 0.35531756252965496, - "grad_norm": 1.7544957315491456, - "learning_rate": 1.4945390989654054e-06, - "loss": 1.1813, - "step": 2621 - }, - { - "epoch": 0.3554531281773199, - "grad_norm": 1.5513847430497485, - "learning_rate": 1.4941573722098655e-06, - "loss": 1.1841, - "step": 2622 - }, - { - "epoch": 0.35558869382498476, - "grad_norm": 1.6228288687048842, - "learning_rate": 1.4937755501599772e-06, - "loss": 1.1401, - "step": 2623 - }, - { - "epoch": 0.35572425947264963, - "grad_norm": 1.732401383353003, - "learning_rate": 1.4933936328893714e-06, - "loss": 1.1638, - "step": 2624 - }, - { - "epoch": 0.3558598251203145, - "grad_norm": 1.6561927837817787, - "learning_rate": 1.4930116204716984e-06, - "loss": 1.1838, - "step": 2625 - }, - { - "epoch": 0.35599539076797937, - "grad_norm": 1.6215881394865508, - "learning_rate": 1.492629512980626e-06, - "loss": 1.144, - "step": 2626 - }, - { - "epoch": 0.3561309564156443, - "grad_norm": 1.517726743805834, - "learning_rate": 1.4922473104898404e-06, - "loss": 1.1547, - "step": 2627 - }, - { - "epoch": 0.35626652206330917, - "grad_norm": 1.4326048213493696, - "learning_rate": 1.4918650130730467e-06, - "loss": 1.1593, - "step": 2628 - }, - { - "epoch": 0.35640208771097404, - "grad_norm": 1.6211379927951852, - "learning_rate": 1.491482620803968e-06, - "loss": 1.1617, - "step": 2629 - }, - { - "epoch": 0.3565376533586389, - "grad_norm": 2.048579414086659, - "learning_rate": 1.491100133756345e-06, - "loss": 1.1657, - "step": 2630 - }, - { - "epoch": 0.3566732190063038, - "grad_norm": 1.4810531516457066, - "learning_rate": 1.490717552003938e-06, - "loss": 1.1905, - "step": 2631 - }, - { - "epoch": 0.3568087846539687, - "grad_norm": 2.054141332738419, - "learning_rate": 1.4903348756205242e-06, - "loss": 1.1781, - "step": 2632 - }, - { - "epoch": 0.3569443503016336, - "grad_norm": 1.636274616456946, - "learning_rate": 1.4899521046799005e-06, - "loss": 1.1906, - "step": 2633 - }, - { - "epoch": 0.35707991594929844, - "grad_norm": 1.4463960948216283, - "learning_rate": 1.4895692392558806e-06, - "loss": 1.1612, - "step": 2634 - }, - { - "epoch": 0.3572154815969633, - "grad_norm": 1.69005728935076, - "learning_rate": 1.4891862794222976e-06, - "loss": 1.1679, - "step": 2635 - }, - { - "epoch": 0.3573510472446282, - "grad_norm": 1.492695415241083, - "learning_rate": 1.4888032252530017e-06, - "loss": 1.1852, - "step": 2636 - }, - { - "epoch": 0.3574866128922931, - "grad_norm": 1.4455297598554233, - "learning_rate": 1.4884200768218625e-06, - "loss": 1.1925, - "step": 2637 - }, - { - "epoch": 0.357622178539958, - "grad_norm": 1.5109543644058567, - "learning_rate": 1.4880368342027665e-06, - "loss": 1.1737, - "step": 2638 - }, - { - "epoch": 0.35775774418762285, - "grad_norm": 1.5689054597352117, - "learning_rate": 1.4876534974696196e-06, - "loss": 1.1856, - "step": 2639 - }, - { - "epoch": 0.3578933098352877, - "grad_norm": 1.9962714194800582, - "learning_rate": 1.487270066696345e-06, - "loss": 1.1375, - "step": 2640 - }, - { - "epoch": 0.35802887548295265, - "grad_norm": 1.8058997121070783, - "learning_rate": 1.4868865419568841e-06, - "loss": 1.1196, - "step": 2641 - }, - { - "epoch": 0.3581644411306175, - "grad_norm": 1.5141522141726145, - "learning_rate": 1.4865029233251971e-06, - "loss": 1.168, - "step": 2642 - }, - { - "epoch": 0.3583000067782824, - "grad_norm": 1.476717533108141, - "learning_rate": 1.4861192108752617e-06, - "loss": 1.1935, - "step": 2643 - }, - { - "epoch": 0.35843557242594726, - "grad_norm": 2.703030261936946, - "learning_rate": 1.485735404681073e-06, - "loss": 1.1608, - "step": 2644 - }, - { - "epoch": 0.35857113807361213, - "grad_norm": 1.898779611935178, - "learning_rate": 1.4853515048166463e-06, - "loss": 1.1927, - "step": 2645 - }, - { - "epoch": 0.35870670372127705, - "grad_norm": 1.950449628391994, - "learning_rate": 1.4849675113560128e-06, - "loss": 1.125, - "step": 2646 - }, - { - "epoch": 0.3588422693689419, - "grad_norm": 7.609229216719428, - "learning_rate": 1.4845834243732228e-06, - "loss": 1.1733, - "step": 2647 - }, - { - "epoch": 0.3589778350166068, - "grad_norm": 1.6598753266371196, - "learning_rate": 1.4841992439423445e-06, - "loss": 1.1431, - "step": 2648 - }, - { - "epoch": 0.35911340066427166, - "grad_norm": 1.5982399382278576, - "learning_rate": 1.483814970137464e-06, - "loss": 1.1305, - "step": 2649 - }, - { - "epoch": 0.35924896631193654, - "grad_norm": 1.525270162105972, - "learning_rate": 1.4834306030326855e-06, - "loss": 1.1502, - "step": 2650 - }, - { - "epoch": 0.35938453195960146, - "grad_norm": 1.6641512463010923, - "learning_rate": 1.4830461427021311e-06, - "loss": 1.1942, - "step": 2651 - }, - { - "epoch": 0.35952009760726633, - "grad_norm": 1.3609225403667926, - "learning_rate": 1.4826615892199415e-06, - "loss": 1.1502, - "step": 2652 - }, - { - "epoch": 0.3596556632549312, - "grad_norm": 1.3240542645303324, - "learning_rate": 1.482276942660274e-06, - "loss": 1.1138, - "step": 2653 - }, - { - "epoch": 0.35979122890259607, - "grad_norm": 1.3975066078441705, - "learning_rate": 1.481892203097305e-06, - "loss": 1.1467, - "step": 2654 - }, - { - "epoch": 0.35992679455026094, - "grad_norm": 2.029336325609661, - "learning_rate": 1.481507370605228e-06, - "loss": 1.1797, - "step": 2655 - }, - { - "epoch": 0.36006236019792587, - "grad_norm": 1.5001549699594299, - "learning_rate": 1.481122445258256e-06, - "loss": 1.1843, - "step": 2656 - }, - { - "epoch": 0.36019792584559074, - "grad_norm": 1.618504789382162, - "learning_rate": 1.4807374271306182e-06, - "loss": 1.1735, - "step": 2657 - }, - { - "epoch": 0.3603334914932556, - "grad_norm": 1.9499612071448267, - "learning_rate": 1.4803523162965618e-06, - "loss": 1.1426, - "step": 2658 - }, - { - "epoch": 0.3604690571409205, - "grad_norm": 1.3618743902976063, - "learning_rate": 1.4799671128303533e-06, - "loss": 1.1408, - "step": 2659 - }, - { - "epoch": 0.36060462278858535, - "grad_norm": 1.788661516120287, - "learning_rate": 1.4795818168062755e-06, - "loss": 1.1535, - "step": 2660 - }, - { - "epoch": 0.3607401884362503, - "grad_norm": 4.736508785158781, - "learning_rate": 1.47919642829863e-06, - "loss": 1.1855, - "step": 2661 - }, - { - "epoch": 0.36087575408391515, - "grad_norm": 1.9843319143338536, - "learning_rate": 1.4788109473817359e-06, - "loss": 1.1813, - "step": 2662 - }, - { - "epoch": 0.36101131973158, - "grad_norm": 1.9136566941430528, - "learning_rate": 1.4784253741299298e-06, - "loss": 1.177, - "step": 2663 - }, - { - "epoch": 0.3611468853792449, - "grad_norm": 1.3647472889171828, - "learning_rate": 1.4780397086175672e-06, - "loss": 1.1285, - "step": 2664 - }, - { - "epoch": 0.36128245102690976, - "grad_norm": 1.3730753360843175, - "learning_rate": 1.4776539509190198e-06, - "loss": 1.1702, - "step": 2665 - }, - { - "epoch": 0.3614180166745747, - "grad_norm": 1.4336975348514496, - "learning_rate": 1.4772681011086788e-06, - "loss": 1.141, - "step": 2666 - }, - { - "epoch": 0.36155358232223955, - "grad_norm": 1.541120201273637, - "learning_rate": 1.4768821592609513e-06, - "loss": 1.1562, - "step": 2667 - }, - { - "epoch": 0.3616891479699044, - "grad_norm": 1.6555035906673434, - "learning_rate": 1.4764961254502639e-06, - "loss": 1.1991, - "step": 2668 - }, - { - "epoch": 0.3618247136175693, - "grad_norm": 1.8525512304244152, - "learning_rate": 1.47610999975106e-06, - "loss": 1.1596, - "step": 2669 - }, - { - "epoch": 0.36196027926523416, - "grad_norm": 1.7768786227129278, - "learning_rate": 1.4757237822378009e-06, - "loss": 1.1602, - "step": 2670 - }, - { - "epoch": 0.3620958449128991, - "grad_norm": 3.939037369301901, - "learning_rate": 1.4753374729849656e-06, - "loss": 1.1899, - "step": 2671 - }, - { - "epoch": 0.36223141056056396, - "grad_norm": 1.7575313204059744, - "learning_rate": 1.4749510720670503e-06, - "loss": 1.1343, - "step": 2672 - }, - { - "epoch": 0.36236697620822883, - "grad_norm": 1.578567985133941, - "learning_rate": 1.47456457955857e-06, - "loss": 1.1443, - "step": 2673 - }, - { - "epoch": 0.3625025418558937, - "grad_norm": 1.5396381306932578, - "learning_rate": 1.4741779955340565e-06, - "loss": 1.1262, - "step": 2674 - }, - { - "epoch": 0.36263810750355857, - "grad_norm": 2.8833840762682477, - "learning_rate": 1.4737913200680596e-06, - "loss": 1.1393, - "step": 2675 - }, - { - "epoch": 0.3627736731512235, - "grad_norm": 1.7680226556399328, - "learning_rate": 1.4734045532351463e-06, - "loss": 1.2197, - "step": 2676 - }, - { - "epoch": 0.36290923879888837, - "grad_norm": 1.7081557341720084, - "learning_rate": 1.473017695109902e-06, - "loss": 1.1425, - "step": 2677 - }, - { - "epoch": 0.36304480444655324, - "grad_norm": 1.8734224000525042, - "learning_rate": 1.472630745766929e-06, - "loss": 1.1902, - "step": 2678 - }, - { - "epoch": 0.3631803700942181, - "grad_norm": 2.293544196026003, - "learning_rate": 1.4722437052808472e-06, - "loss": 1.1724, - "step": 2679 - }, - { - "epoch": 0.36331593574188303, - "grad_norm": 1.6461253430764882, - "learning_rate": 1.4718565737262945e-06, - "loss": 1.1533, - "step": 2680 - }, - { - "epoch": 0.3634515013895479, - "grad_norm": 1.7012907618929891, - "learning_rate": 1.4714693511779262e-06, - "loss": 1.1618, - "step": 2681 - }, - { - "epoch": 0.3635870670372128, - "grad_norm": 1.6192203253896096, - "learning_rate": 1.471082037710415e-06, - "loss": 1.1833, - "step": 2682 - }, - { - "epoch": 0.36372263268487764, - "grad_norm": 1.4083848894603994, - "learning_rate": 1.4706946333984514e-06, - "loss": 1.1541, - "step": 2683 - }, - { - "epoch": 0.3638581983325425, - "grad_norm": 1.540624138247834, - "learning_rate": 1.4703071383167433e-06, - "loss": 1.1601, - "step": 2684 - }, - { - "epoch": 0.36399376398020744, - "grad_norm": 2.247881257183708, - "learning_rate": 1.4699195525400158e-06, - "loss": 1.1798, - "step": 2685 - }, - { - "epoch": 0.3641293296278723, - "grad_norm": 1.5638328371116674, - "learning_rate": 1.469531876143012e-06, - "loss": 1.1931, - "step": 2686 - }, - { - "epoch": 0.3642648952755372, - "grad_norm": 1.6333906232835949, - "learning_rate": 1.4691441092004921e-06, - "loss": 1.1425, - "step": 2687 - }, - { - "epoch": 0.36440046092320205, - "grad_norm": 1.6152983902891416, - "learning_rate": 1.4687562517872342e-06, - "loss": 1.1752, - "step": 2688 - }, - { - "epoch": 0.3645360265708669, - "grad_norm": 1.8313478628575959, - "learning_rate": 1.4683683039780328e-06, - "loss": 1.1909, - "step": 2689 - }, - { - "epoch": 0.36467159221853185, - "grad_norm": 2.679128731466459, - "learning_rate": 1.4679802658477013e-06, - "loss": 1.1697, - "step": 2690 - }, - { - "epoch": 0.3648071578661967, - "grad_norm": 6.753283924530054, - "learning_rate": 1.4675921374710696e-06, - "loss": 1.1629, - "step": 2691 - }, - { - "epoch": 0.3649427235138616, - "grad_norm": 1.4216753453823174, - "learning_rate": 1.467203918922985e-06, - "loss": 1.1626, - "step": 2692 - }, - { - "epoch": 0.36507828916152646, - "grad_norm": 1.9760819969022407, - "learning_rate": 1.4668156102783125e-06, - "loss": 1.1352, - "step": 2693 - }, - { - "epoch": 0.36521385480919133, - "grad_norm": 1.6073711014251932, - "learning_rate": 1.4664272116119345e-06, - "loss": 1.1484, - "step": 2694 - }, - { - "epoch": 0.36534942045685626, - "grad_norm": 1.6345763109718703, - "learning_rate": 1.4660387229987504e-06, - "loss": 1.1306, - "step": 2695 - }, - { - "epoch": 0.3654849861045211, - "grad_norm": 2.4928566282945708, - "learning_rate": 1.4656501445136774e-06, - "loss": 1.193, - "step": 2696 - }, - { - "epoch": 0.365620551752186, - "grad_norm": 2.1972041566387652, - "learning_rate": 1.4652614762316495e-06, - "loss": 1.1321, - "step": 2697 - }, - { - "epoch": 0.36575611739985087, - "grad_norm": 1.4602277677590003, - "learning_rate": 1.4648727182276186e-06, - "loss": 1.1898, - "step": 2698 - }, - { - "epoch": 0.36589168304751574, - "grad_norm": 1.401122869535631, - "learning_rate": 1.4644838705765534e-06, - "loss": 1.1607, - "step": 2699 - }, - { - "epoch": 0.36602724869518066, - "grad_norm": 1.549340835051294, - "learning_rate": 1.46409493335344e-06, - "loss": 1.1585, - "step": 2700 - }, - { - "epoch": 0.36616281434284553, - "grad_norm": 2.083453949802379, - "learning_rate": 1.4637059066332824e-06, - "loss": 1.1817, - "step": 2701 - }, - { - "epoch": 0.3662983799905104, - "grad_norm": 1.5353936920011488, - "learning_rate": 1.4633167904911008e-06, - "loss": 1.1849, - "step": 2702 - }, - { - "epoch": 0.3664339456381753, - "grad_norm": 1.4694783857342484, - "learning_rate": 1.4629275850019336e-06, - "loss": 1.167, - "step": 2703 - }, - { - "epoch": 0.36656951128584014, - "grad_norm": 1.8538216918000776, - "learning_rate": 1.4625382902408354e-06, - "loss": 1.1791, - "step": 2704 - }, - { - "epoch": 0.36670507693350507, - "grad_norm": 1.987606862086196, - "learning_rate": 1.4621489062828788e-06, - "loss": 1.1516, - "step": 2705 - }, - { - "epoch": 0.36684064258116994, - "grad_norm": 1.678344312207345, - "learning_rate": 1.461759433203154e-06, - "loss": 1.1449, - "step": 2706 - }, - { - "epoch": 0.3669762082288348, - "grad_norm": 1.7523857112068422, - "learning_rate": 1.4613698710767674e-06, - "loss": 1.1699, - "step": 2707 - }, - { - "epoch": 0.3671117738764997, - "grad_norm": 1.4245468151012477, - "learning_rate": 1.4609802199788427e-06, - "loss": 1.1605, - "step": 2708 - }, - { - "epoch": 0.36724733952416455, - "grad_norm": 1.5498498574434827, - "learning_rate": 1.4605904799845218e-06, - "loss": 1.1512, - "step": 2709 - }, - { - "epoch": 0.3673829051718295, - "grad_norm": 1.6221057289562324, - "learning_rate": 1.4602006511689623e-06, - "loss": 1.1799, - "step": 2710 - }, - { - "epoch": 0.36751847081949435, - "grad_norm": 1.560958347284891, - "learning_rate": 1.4598107336073396e-06, - "loss": 1.1352, - "step": 2711 - }, - { - "epoch": 0.3676540364671592, - "grad_norm": 2.0584691154729886, - "learning_rate": 1.4594207273748467e-06, - "loss": 1.1442, - "step": 2712 - }, - { - "epoch": 0.3677896021148241, - "grad_norm": 1.570543900221731, - "learning_rate": 1.459030632546693e-06, - "loss": 1.1917, - "step": 2713 - }, - { - "epoch": 0.36792516776248896, - "grad_norm": 1.7043370759474203, - "learning_rate": 1.458640449198105e-06, - "loss": 1.1886, - "step": 2714 - }, - { - "epoch": 0.3680607334101539, - "grad_norm": 1.5445081300652688, - "learning_rate": 1.4582501774043268e-06, - "loss": 1.2099, - "step": 2715 - }, - { - "epoch": 0.36819629905781875, - "grad_norm": 1.550816099092105, - "learning_rate": 1.4578598172406189e-06, - "loss": 1.1643, - "step": 2716 - }, - { - "epoch": 0.3683318647054836, - "grad_norm": 2.739707662416962, - "learning_rate": 1.4574693687822594e-06, - "loss": 1.1648, - "step": 2717 - }, - { - "epoch": 0.3684674303531485, - "grad_norm": 2.3007387096578467, - "learning_rate": 1.4570788321045432e-06, - "loss": 1.1352, - "step": 2718 - }, - { - "epoch": 0.3686029960008134, - "grad_norm": 1.625210015977814, - "learning_rate": 1.4566882072827824e-06, - "loss": 1.1404, - "step": 2719 - }, - { - "epoch": 0.3687385616484783, - "grad_norm": 1.3942560086227505, - "learning_rate": 1.4562974943923054e-06, - "loss": 1.1996, - "step": 2720 - }, - { - "epoch": 0.36887412729614316, - "grad_norm": 1.6253800163027943, - "learning_rate": 1.4559066935084588e-06, - "loss": 1.1483, - "step": 2721 - }, - { - "epoch": 0.36900969294380803, - "grad_norm": 1.4809094884735345, - "learning_rate": 1.4555158047066047e-06, - "loss": 1.1728, - "step": 2722 - }, - { - "epoch": 0.3691452585914729, - "grad_norm": 1.7007893145949957, - "learning_rate": 1.4551248280621234e-06, - "loss": 1.1505, - "step": 2723 - }, - { - "epoch": 0.36928082423913783, - "grad_norm": 3.170640423971189, - "learning_rate": 1.4547337636504116e-06, - "loss": 1.1973, - "step": 2724 - }, - { - "epoch": 0.3694163898868027, - "grad_norm": 1.9667372866432289, - "learning_rate": 1.4543426115468829e-06, - "loss": 1.1352, - "step": 2725 - }, - { - "epoch": 0.36955195553446757, - "grad_norm": 1.4645855255181286, - "learning_rate": 1.453951371826968e-06, - "loss": 1.1382, - "step": 2726 - }, - { - "epoch": 0.36968752118213244, - "grad_norm": 1.611839684995952, - "learning_rate": 1.4535600445661143e-06, - "loss": 1.2117, - "step": 2727 - }, - { - "epoch": 0.3698230868297973, - "grad_norm": 6.521376993789329, - "learning_rate": 1.453168629839786e-06, - "loss": 1.1844, - "step": 2728 - }, - { - "epoch": 0.36995865247746224, - "grad_norm": 1.6457127001467826, - "learning_rate": 1.4527771277234648e-06, - "loss": 1.2082, - "step": 2729 - }, - { - "epoch": 0.3700942181251271, - "grad_norm": 1.6601472053421304, - "learning_rate": 1.4523855382926483e-06, - "loss": 1.2099, - "step": 2730 - }, - { - "epoch": 0.370229783772792, - "grad_norm": 1.5007357714298144, - "learning_rate": 1.4519938616228518e-06, - "loss": 1.1591, - "step": 2731 - }, - { - "epoch": 0.37036534942045685, - "grad_norm": 1.6830521179323668, - "learning_rate": 1.4516020977896067e-06, - "loss": 1.1685, - "step": 2732 - }, - { - "epoch": 0.3705009150681217, - "grad_norm": 1.6553329746508367, - "learning_rate": 1.4512102468684621e-06, - "loss": 1.1799, - "step": 2733 - }, - { - "epoch": 0.37063648071578664, - "grad_norm": 1.4562088837869065, - "learning_rate": 1.4508183089349828e-06, - "loss": 1.1496, - "step": 2734 - }, - { - "epoch": 0.3707720463634515, - "grad_norm": 1.617960202713426, - "learning_rate": 1.4504262840647512e-06, - "loss": 1.1275, - "step": 2735 - }, - { - "epoch": 0.3709076120111164, - "grad_norm": 1.4177137596930662, - "learning_rate": 1.4500341723333663e-06, - "loss": 1.1855, - "step": 2736 - }, - { - "epoch": 0.37104317765878125, - "grad_norm": 1.3988461259865945, - "learning_rate": 1.4496419738164434e-06, - "loss": 1.1586, - "step": 2737 - }, - { - "epoch": 0.3711787433064461, - "grad_norm": 2.521803783387221, - "learning_rate": 1.449249688589615e-06, - "loss": 1.1288, - "step": 2738 - }, - { - "epoch": 0.37131430895411105, - "grad_norm": 2.194761253198728, - "learning_rate": 1.4488573167285307e-06, - "loss": 1.1664, - "step": 2739 - }, - { - "epoch": 0.3714498746017759, - "grad_norm": 1.864805239577867, - "learning_rate": 1.448464858308856e-06, - "loss": 1.178, - "step": 2740 - }, - { - "epoch": 0.3715854402494408, - "grad_norm": 1.7336757709708022, - "learning_rate": 1.4480723134062732e-06, - "loss": 1.1279, - "step": 2741 - }, - { - "epoch": 0.37172100589710566, - "grad_norm": 2.2076014722787325, - "learning_rate": 1.4476796820964814e-06, - "loss": 1.1564, - "step": 2742 - }, - { - "epoch": 0.37185657154477053, - "grad_norm": 2.2552869483442586, - "learning_rate": 1.4472869644551966e-06, - "loss": 1.1697, - "step": 2743 - }, - { - "epoch": 0.37199213719243546, - "grad_norm": 1.4144394525579016, - "learning_rate": 1.4468941605581518e-06, - "loss": 1.1388, - "step": 2744 - }, - { - "epoch": 0.3721277028401003, - "grad_norm": 2.385013353517025, - "learning_rate": 1.4465012704810952e-06, - "loss": 1.1782, - "step": 2745 - }, - { - "epoch": 0.3722632684877652, - "grad_norm": 1.7620320118311337, - "learning_rate": 1.4461082942997936e-06, - "loss": 1.1664, - "step": 2746 - }, - { - "epoch": 0.37239883413543007, - "grad_norm": 1.9821329333901359, - "learning_rate": 1.4457152320900283e-06, - "loss": 1.1646, - "step": 2747 - }, - { - "epoch": 0.37253439978309494, - "grad_norm": 1.6355533646171287, - "learning_rate": 1.445322083927599e-06, - "loss": 1.1546, - "step": 2748 - }, - { - "epoch": 0.37266996543075986, - "grad_norm": 1.4563067611909064, - "learning_rate": 1.444928849888321e-06, - "loss": 1.144, - "step": 2749 - }, - { - "epoch": 0.37280553107842473, - "grad_norm": 2.066800687620252, - "learning_rate": 1.4445355300480262e-06, - "loss": 1.105, - "step": 2750 - }, - { - "epoch": 0.3729410967260896, - "grad_norm": 1.5970715835534157, - "learning_rate": 1.4441421244825636e-06, - "loss": 1.1606, - "step": 2751 - }, - { - "epoch": 0.3730766623737545, - "grad_norm": 1.5288751793718627, - "learning_rate": 1.443748633267798e-06, - "loss": 1.179, - "step": 2752 - }, - { - "epoch": 0.37321222802141935, - "grad_norm": 1.4850752210606024, - "learning_rate": 1.443355056479611e-06, - "loss": 1.1857, - "step": 2753 - }, - { - "epoch": 0.37334779366908427, - "grad_norm": 2.999854428786684, - "learning_rate": 1.4429613941939016e-06, - "loss": 1.1613, - "step": 2754 - }, - { - "epoch": 0.37348335931674914, - "grad_norm": 1.5169384826054657, - "learning_rate": 1.4425676464865835e-06, - "loss": 1.2191, - "step": 2755 - }, - { - "epoch": 0.373618924964414, - "grad_norm": 1.8646520958786716, - "learning_rate": 1.442173813433588e-06, - "loss": 1.1693, - "step": 2756 - }, - { - "epoch": 0.3737544906120789, - "grad_norm": 2.6900789268856564, - "learning_rate": 1.4417798951108632e-06, - "loss": 1.2018, - "step": 2757 - }, - { - "epoch": 0.3738900562597438, - "grad_norm": 1.498842219490377, - "learning_rate": 1.4413858915943728e-06, - "loss": 1.1784, - "step": 2758 - }, - { - "epoch": 0.3740256219074087, - "grad_norm": 1.4582280748645529, - "learning_rate": 1.4409918029600972e-06, - "loss": 1.1986, - "step": 2759 - }, - { - "epoch": 0.37416118755507355, - "grad_norm": 2.802451553917144, - "learning_rate": 1.4405976292840332e-06, - "loss": 1.1657, - "step": 2760 - }, - { - "epoch": 0.3742967532027384, - "grad_norm": 1.6250789091739706, - "learning_rate": 1.4402033706421945e-06, - "loss": 1.1343, - "step": 2761 - }, - { - "epoch": 0.3744323188504033, - "grad_norm": 1.545422975223051, - "learning_rate": 1.4398090271106104e-06, - "loss": 1.1815, - "step": 2762 - }, - { - "epoch": 0.3745678844980682, - "grad_norm": 2.0476824632080595, - "learning_rate": 1.4394145987653272e-06, - "loss": 1.1729, - "step": 2763 - }, - { - "epoch": 0.3747034501457331, - "grad_norm": 1.5275166535597073, - "learning_rate": 1.4390200856824072e-06, - "loss": 1.1519, - "step": 2764 - }, - { - "epoch": 0.37483901579339796, - "grad_norm": 2.281607861973894, - "learning_rate": 1.438625487937929e-06, - "loss": 1.2188, - "step": 2765 - }, - { - "epoch": 0.3749745814410628, - "grad_norm": 1.3657368675486452, - "learning_rate": 1.4382308056079876e-06, - "loss": 1.1518, - "step": 2766 - }, - { - "epoch": 0.3751101470887277, - "grad_norm": 1.4402786953655446, - "learning_rate": 1.4378360387686948e-06, - "loss": 1.1871, - "step": 2767 - }, - { - "epoch": 0.3752457127363926, - "grad_norm": 1.507309898553813, - "learning_rate": 1.4374411874961777e-06, - "loss": 1.1639, - "step": 2768 - }, - { - "epoch": 0.3753812783840575, - "grad_norm": 1.3346213048229865, - "learning_rate": 1.437046251866581e-06, - "loss": 1.1583, - "step": 2769 - }, - { - "epoch": 0.37551684403172236, - "grad_norm": 1.8142667398115502, - "learning_rate": 1.436651231956064e-06, - "loss": 1.1631, - "step": 2770 - }, - { - "epoch": 0.37565240967938723, - "grad_norm": 1.3890426144393107, - "learning_rate": 1.4362561278408038e-06, - "loss": 1.1875, - "step": 2771 - }, - { - "epoch": 0.3757879753270521, - "grad_norm": 1.5638339865458988, - "learning_rate": 1.435860939596993e-06, - "loss": 1.1867, - "step": 2772 - }, - { - "epoch": 0.37592354097471703, - "grad_norm": 1.7711571853676347, - "learning_rate": 1.43546566730084e-06, - "loss": 1.1247, - "step": 2773 - }, - { - "epoch": 0.3760591066223819, - "grad_norm": 1.766265656206971, - "learning_rate": 1.4350703110285709e-06, - "loss": 1.1743, - "step": 2774 - }, - { - "epoch": 0.37619467227004677, - "grad_norm": 1.5642211881676202, - "learning_rate": 1.4346748708564264e-06, - "loss": 1.1943, - "step": 2775 - }, - { - "epoch": 0.37633023791771164, - "grad_norm": 2.047967602826698, - "learning_rate": 1.4342793468606643e-06, - "loss": 1.1559, - "step": 2776 - }, - { - "epoch": 0.3764658035653765, - "grad_norm": 2.006996651248735, - "learning_rate": 1.433883739117558e-06, - "loss": 1.1308, - "step": 2777 - }, - { - "epoch": 0.37660136921304144, - "grad_norm": 1.5186452992241666, - "learning_rate": 1.4334880477033976e-06, - "loss": 1.1715, - "step": 2778 - }, - { - "epoch": 0.3767369348607063, - "grad_norm": 1.6607362567734327, - "learning_rate": 1.4330922726944889e-06, - "loss": 1.1301, - "step": 2779 - }, - { - "epoch": 0.3768725005083712, - "grad_norm": 1.5453222295281377, - "learning_rate": 1.432696414167154e-06, - "loss": 1.2095, - "step": 2780 - }, - { - "epoch": 0.37700806615603605, - "grad_norm": 1.792396594101511, - "learning_rate": 1.4323004721977312e-06, - "loss": 1.1287, - "step": 2781 - }, - { - "epoch": 0.3771436318037009, - "grad_norm": 3.1082710768684962, - "learning_rate": 1.4319044468625748e-06, - "loss": 1.1102, - "step": 2782 - }, - { - "epoch": 0.37727919745136584, - "grad_norm": 1.9713543794933754, - "learning_rate": 1.4315083382380552e-06, - "loss": 1.1734, - "step": 2783 - }, - { - "epoch": 0.3774147630990307, - "grad_norm": 1.7442480204491544, - "learning_rate": 1.4311121464005582e-06, - "loss": 1.1717, - "step": 2784 - }, - { - "epoch": 0.3775503287466956, - "grad_norm": 1.5122265404592905, - "learning_rate": 1.430715871426487e-06, - "loss": 1.1914, - "step": 2785 - }, - { - "epoch": 0.37768589439436046, - "grad_norm": 1.5204944970032126, - "learning_rate": 1.43031951339226e-06, - "loss": 1.1836, - "step": 2786 - }, - { - "epoch": 0.3778214600420253, - "grad_norm": 1.9529502867331507, - "learning_rate": 1.4299230723743112e-06, - "loss": 1.2126, - "step": 2787 - }, - { - "epoch": 0.37795702568969025, - "grad_norm": 1.778852281482099, - "learning_rate": 1.4295265484490918e-06, - "loss": 1.1605, - "step": 2788 - }, - { - "epoch": 0.3780925913373551, - "grad_norm": 1.4864297961303838, - "learning_rate": 1.429129941693068e-06, - "loss": 1.1824, - "step": 2789 - }, - { - "epoch": 0.37822815698502, - "grad_norm": 1.5960510938654302, - "learning_rate": 1.428733252182722e-06, - "loss": 1.1389, - "step": 2790 - }, - { - "epoch": 0.37836372263268486, - "grad_norm": 1.5167815357069063, - "learning_rate": 1.4283364799945527e-06, - "loss": 1.1194, - "step": 2791 - }, - { - "epoch": 0.37849928828034973, - "grad_norm": 1.7848490350515618, - "learning_rate": 1.4279396252050747e-06, - "loss": 1.1639, - "step": 2792 - }, - { - "epoch": 0.37863485392801466, - "grad_norm": 1.3359202176708957, - "learning_rate": 1.4275426878908174e-06, - "loss": 1.1498, - "step": 2793 - }, - { - "epoch": 0.37877041957567953, - "grad_norm": 2.3674322986010736, - "learning_rate": 1.4271456681283275e-06, - "loss": 1.1603, - "step": 2794 - }, - { - "epoch": 0.3789059852233444, - "grad_norm": 1.359766048932401, - "learning_rate": 1.4267485659941676e-06, - "loss": 1.1589, - "step": 2795 - }, - { - "epoch": 0.37904155087100927, - "grad_norm": 1.5939151875662965, - "learning_rate": 1.4263513815649152e-06, - "loss": 1.1489, - "step": 2796 - }, - { - "epoch": 0.3791771165186742, - "grad_norm": 3.0727943411257757, - "learning_rate": 1.4259541149171643e-06, - "loss": 1.1617, - "step": 2797 - }, - { - "epoch": 0.37931268216633907, - "grad_norm": 3.3112961852151908, - "learning_rate": 1.4255567661275247e-06, - "loss": 1.1536, - "step": 2798 - }, - { - "epoch": 0.37944824781400394, - "grad_norm": 1.938616387147304, - "learning_rate": 1.4251593352726217e-06, - "loss": 1.1737, - "step": 2799 - }, - { - "epoch": 0.3795838134616688, - "grad_norm": 6.050452952233669, - "learning_rate": 1.4247618224290968e-06, - "loss": 1.1849, - "step": 2800 - }, - { - "epoch": 0.3797193791093337, - "grad_norm": 1.5210417409490518, - "learning_rate": 1.4243642276736076e-06, - "loss": 1.1666, - "step": 2801 - }, - { - "epoch": 0.3798549447569986, - "grad_norm": 2.062975898183793, - "learning_rate": 1.4239665510828266e-06, - "loss": 1.2055, - "step": 2802 - }, - { - "epoch": 0.3799905104046635, - "grad_norm": 1.605288343327243, - "learning_rate": 1.423568792733443e-06, - "loss": 1.1483, - "step": 2803 - }, - { - "epoch": 0.38012607605232834, - "grad_norm": 1.4507483767338765, - "learning_rate": 1.423170952702161e-06, - "loss": 1.1534, - "step": 2804 - }, - { - "epoch": 0.3802616416999932, - "grad_norm": 1.4515498793616715, - "learning_rate": 1.422773031065701e-06, - "loss": 1.1898, - "step": 2805 - }, - { - "epoch": 0.3803972073476581, - "grad_norm": 1.514980659908831, - "learning_rate": 1.4223750279007993e-06, - "loss": 1.1514, - "step": 2806 - }, - { - "epoch": 0.380532772995323, - "grad_norm": 1.556497991888125, - "learning_rate": 1.4219769432842075e-06, - "loss": 1.1817, - "step": 2807 - }, - { - "epoch": 0.3806683386429879, - "grad_norm": 1.6070197668394395, - "learning_rate": 1.4215787772926931e-06, - "loss": 1.1577, - "step": 2808 - }, - { - "epoch": 0.38080390429065275, - "grad_norm": 1.439184088336358, - "learning_rate": 1.4211805300030389e-06, - "loss": 1.132, - "step": 2809 - }, - { - "epoch": 0.3809394699383176, - "grad_norm": 1.5014660350209088, - "learning_rate": 1.4207822014920443e-06, - "loss": 1.1486, - "step": 2810 - }, - { - "epoch": 0.3810750355859825, - "grad_norm": 2.271999104279936, - "learning_rate": 1.420383791836524e-06, - "loss": 1.1726, - "step": 2811 - }, - { - "epoch": 0.3812106012336474, - "grad_norm": 1.593736085399741, - "learning_rate": 1.419985301113307e-06, - "loss": 1.1614, - "step": 2812 - }, - { - "epoch": 0.3813461668813123, - "grad_norm": 1.573277696128947, - "learning_rate": 1.4195867293992405e-06, - "loss": 1.1839, - "step": 2813 - }, - { - "epoch": 0.38148173252897716, - "grad_norm": 1.4693870652003027, - "learning_rate": 1.419188076771185e-06, - "loss": 1.1818, - "step": 2814 - }, - { - "epoch": 0.38161729817664203, - "grad_norm": 1.9838664104176267, - "learning_rate": 1.4187893433060176e-06, - "loss": 1.1466, - "step": 2815 - }, - { - "epoch": 0.3817528638243069, - "grad_norm": 1.6339054825705612, - "learning_rate": 1.4183905290806313e-06, - "loss": 1.1721, - "step": 2816 - }, - { - "epoch": 0.3818884294719718, - "grad_norm": 1.771101620208098, - "learning_rate": 1.4179916341719339e-06, - "loss": 1.1665, - "step": 2817 - }, - { - "epoch": 0.3820239951196367, - "grad_norm": 1.4680352031402837, - "learning_rate": 1.4175926586568493e-06, - "loss": 1.1578, - "step": 2818 - }, - { - "epoch": 0.38215956076730156, - "grad_norm": 1.8037288665695252, - "learning_rate": 1.4171936026123168e-06, - "loss": 1.1694, - "step": 2819 - }, - { - "epoch": 0.38229512641496644, - "grad_norm": 1.6703585960298812, - "learning_rate": 1.4167944661152911e-06, - "loss": 1.1833, - "step": 2820 - }, - { - "epoch": 0.3824306920626313, - "grad_norm": 1.9509561287145525, - "learning_rate": 1.4163952492427424e-06, - "loss": 1.1768, - "step": 2821 - }, - { - "epoch": 0.38256625771029623, - "grad_norm": 1.5296067377296414, - "learning_rate": 1.415995952071657e-06, - "loss": 1.1926, - "step": 2822 - }, - { - "epoch": 0.3827018233579611, - "grad_norm": 1.5787528569878315, - "learning_rate": 1.415596574679036e-06, - "loss": 1.1582, - "step": 2823 - }, - { - "epoch": 0.38283738900562597, - "grad_norm": 1.4771473276512048, - "learning_rate": 1.4151971171418959e-06, - "loss": 1.1471, - "step": 2824 - }, - { - "epoch": 0.38297295465329084, - "grad_norm": 1.462319358778104, - "learning_rate": 1.4147975795372694e-06, - "loss": 1.1279, - "step": 2825 - }, - { - "epoch": 0.3831085203009557, - "grad_norm": 2.272576307997905, - "learning_rate": 1.4143979619422035e-06, - "loss": 1.1804, - "step": 2826 - }, - { - "epoch": 0.38324408594862064, - "grad_norm": 1.3750311165685134, - "learning_rate": 1.4139982644337617e-06, - "loss": 1.1548, - "step": 2827 - }, - { - "epoch": 0.3833796515962855, - "grad_norm": 1.5630439347123788, - "learning_rate": 1.4135984870890228e-06, - "loss": 1.112, - "step": 2828 - }, - { - "epoch": 0.3835152172439504, - "grad_norm": 1.6064647658951947, - "learning_rate": 1.4131986299850803e-06, - "loss": 1.2068, - "step": 2829 - }, - { - "epoch": 0.38365078289161525, - "grad_norm": 1.6814441809724268, - "learning_rate": 1.4127986931990437e-06, - "loss": 1.1124, - "step": 2830 - }, - { - "epoch": 0.3837863485392801, - "grad_norm": 1.659809146381489, - "learning_rate": 1.4123986768080375e-06, - "loss": 1.1398, - "step": 2831 - }, - { - "epoch": 0.38392191418694505, - "grad_norm": 1.739252464929412, - "learning_rate": 1.4119985808892016e-06, - "loss": 1.1957, - "step": 2832 - }, - { - "epoch": 0.3840574798346099, - "grad_norm": 1.7205162995875132, - "learning_rate": 1.4115984055196918e-06, - "loss": 1.1416, - "step": 2833 - }, - { - "epoch": 0.3841930454822748, - "grad_norm": 2.419135482554706, - "learning_rate": 1.4111981507766782e-06, - "loss": 1.1631, - "step": 2834 - }, - { - "epoch": 0.38432861112993966, - "grad_norm": 2.430771888359287, - "learning_rate": 1.4107978167373469e-06, - "loss": 1.1893, - "step": 2835 - }, - { - "epoch": 0.3844641767776046, - "grad_norm": 1.4687859092262687, - "learning_rate": 1.4103974034788994e-06, - "loss": 1.1398, - "step": 2836 - }, - { - "epoch": 0.38459974242526945, - "grad_norm": 1.5287193327902664, - "learning_rate": 1.4099969110785521e-06, - "loss": 1.1674, - "step": 2837 - }, - { - "epoch": 0.3847353080729343, - "grad_norm": 1.663613394681787, - "learning_rate": 1.409596339613537e-06, - "loss": 1.1683, - "step": 2838 - }, - { - "epoch": 0.3848708737205992, - "grad_norm": 1.683140308729507, - "learning_rate": 1.409195689161101e-06, - "loss": 1.1778, - "step": 2839 - }, - { - "epoch": 0.38500643936826406, - "grad_norm": 1.55034548735741, - "learning_rate": 1.4087949597985062e-06, - "loss": 1.1898, - "step": 2840 - }, - { - "epoch": 0.385142005015929, - "grad_norm": 2.1240340575140007, - "learning_rate": 1.4083941516030303e-06, - "loss": 1.1963, - "step": 2841 - }, - { - "epoch": 0.38527757066359386, - "grad_norm": 1.6122656313664805, - "learning_rate": 1.407993264651966e-06, - "loss": 1.139, - "step": 2842 - }, - { - "epoch": 0.38541313631125873, - "grad_norm": 1.7973892877832507, - "learning_rate": 1.4075922990226209e-06, - "loss": 1.1571, - "step": 2843 - }, - { - "epoch": 0.3855487019589236, - "grad_norm": 1.5595611002580205, - "learning_rate": 1.407191254792318e-06, - "loss": 1.1208, - "step": 2844 - }, - { - "epoch": 0.38568426760658847, - "grad_norm": 1.7303972267487238, - "learning_rate": 1.4067901320383962e-06, - "loss": 1.1401, - "step": 2845 - }, - { - "epoch": 0.3858198332542534, - "grad_norm": 1.4602619799089978, - "learning_rate": 1.4063889308382084e-06, - "loss": 1.1356, - "step": 2846 - }, - { - "epoch": 0.38595539890191827, - "grad_norm": 5.912755060816053, - "learning_rate": 1.405987651269123e-06, - "loss": 1.1651, - "step": 2847 - }, - { - "epoch": 0.38609096454958314, - "grad_norm": 1.468187479131227, - "learning_rate": 1.4055862934085239e-06, - "loss": 1.1908, - "step": 2848 - }, - { - "epoch": 0.386226530197248, - "grad_norm": 2.032308973452589, - "learning_rate": 1.4051848573338095e-06, - "loss": 1.1506, - "step": 2849 - }, - { - "epoch": 0.3863620958449129, - "grad_norm": 1.9065344099718349, - "learning_rate": 1.4047833431223936e-06, - "loss": 1.1346, - "step": 2850 - }, - { - "epoch": 0.3864976614925778, - "grad_norm": 1.5646153956305318, - "learning_rate": 1.4043817508517053e-06, - "loss": 1.2145, - "step": 2851 - }, - { - "epoch": 0.3866332271402427, - "grad_norm": 2.3350765558369537, - "learning_rate": 1.4039800805991883e-06, - "loss": 1.1438, - "step": 2852 - }, - { - "epoch": 0.38676879278790754, - "grad_norm": 1.960640282858717, - "learning_rate": 1.403578332442302e-06, - "loss": 1.1888, - "step": 2853 - }, - { - "epoch": 0.3869043584355724, - "grad_norm": 1.722859454804138, - "learning_rate": 1.4031765064585196e-06, - "loss": 1.2039, - "step": 2854 - }, - { - "epoch": 0.3870399240832373, - "grad_norm": 4.763504354624624, - "learning_rate": 1.4027746027253301e-06, - "loss": 1.1486, - "step": 2855 - }, - { - "epoch": 0.3871754897309022, - "grad_norm": 1.953915042025117, - "learning_rate": 1.402372621320238e-06, - "loss": 1.2267, - "step": 2856 - }, - { - "epoch": 0.3873110553785671, - "grad_norm": 1.7048647882690227, - "learning_rate": 1.401970562320762e-06, - "loss": 1.1844, - "step": 2857 - }, - { - "epoch": 0.38744662102623195, - "grad_norm": 1.5869268974344535, - "learning_rate": 1.4015684258044363e-06, - "loss": 1.1266, - "step": 2858 - }, - { - "epoch": 0.3875821866738968, - "grad_norm": 2.1777112685571196, - "learning_rate": 1.401166211848809e-06, - "loss": 1.1201, - "step": 2859 - }, - { - "epoch": 0.3877177523215617, - "grad_norm": 1.5964885574440972, - "learning_rate": 1.4007639205314448e-06, - "loss": 1.1492, - "step": 2860 - }, - { - "epoch": 0.3878533179692266, - "grad_norm": 1.557816694305163, - "learning_rate": 1.4003615519299216e-06, - "loss": 1.1376, - "step": 2861 - }, - { - "epoch": 0.3879888836168915, - "grad_norm": 1.6541381384829568, - "learning_rate": 1.3999591061218334e-06, - "loss": 1.1471, - "step": 2862 - }, - { - "epoch": 0.38812444926455636, - "grad_norm": 1.4192962732636445, - "learning_rate": 1.399556583184789e-06, - "loss": 1.1747, - "step": 2863 - }, - { - "epoch": 0.38826001491222123, - "grad_norm": 1.8912671716612806, - "learning_rate": 1.3991539831964114e-06, - "loss": 1.1209, - "step": 2864 - }, - { - "epoch": 0.3883955805598861, - "grad_norm": 1.851004432886314, - "learning_rate": 1.3987513062343385e-06, - "loss": 1.1865, - "step": 2865 - }, - { - "epoch": 0.388531146207551, - "grad_norm": 3.714269895667919, - "learning_rate": 1.3983485523762243e-06, - "loss": 1.1715, - "step": 2866 - }, - { - "epoch": 0.3886667118552159, - "grad_norm": 1.7590417170206345, - "learning_rate": 1.3979457216997358e-06, - "loss": 1.1667, - "step": 2867 - }, - { - "epoch": 0.38880227750288077, - "grad_norm": 1.53919376052351, - "learning_rate": 1.397542814282556e-06, - "loss": 1.1451, - "step": 2868 - }, - { - "epoch": 0.38893784315054564, - "grad_norm": 2.1993843749852506, - "learning_rate": 1.3971398302023824e-06, - "loss": 1.1583, - "step": 2869 - }, - { - "epoch": 0.3890734087982105, - "grad_norm": 1.6972561291139334, - "learning_rate": 1.3967367695369276e-06, - "loss": 1.1885, - "step": 2870 - }, - { - "epoch": 0.38920897444587543, - "grad_norm": 1.6110741947190144, - "learning_rate": 1.3963336323639183e-06, - "loss": 1.1874, - "step": 2871 - }, - { - "epoch": 0.3893445400935403, - "grad_norm": 2.4217238783890216, - "learning_rate": 1.3959304187610967e-06, - "loss": 1.1262, - "step": 2872 - }, - { - "epoch": 0.3894801057412052, - "grad_norm": 3.6583943371089007, - "learning_rate": 1.3955271288062188e-06, - "loss": 1.1768, - "step": 2873 - }, - { - "epoch": 0.38961567138887004, - "grad_norm": 1.7348174040132258, - "learning_rate": 1.3951237625770564e-06, - "loss": 1.1436, - "step": 2874 - }, - { - "epoch": 0.3897512370365349, - "grad_norm": 2.2319838188109054, - "learning_rate": 1.3947203201513953e-06, - "loss": 1.167, - "step": 2875 - }, - { - "epoch": 0.38988680268419984, - "grad_norm": 1.3440004308366116, - "learning_rate": 1.3943168016070361e-06, - "loss": 1.1587, - "step": 2876 - }, - { - "epoch": 0.3900223683318647, - "grad_norm": 1.6829786304331738, - "learning_rate": 1.3939132070217942e-06, - "loss": 1.1479, - "step": 2877 - }, - { - "epoch": 0.3901579339795296, - "grad_norm": 1.5602532432458818, - "learning_rate": 1.3935095364734998e-06, - "loss": 1.1644, - "step": 2878 - }, - { - "epoch": 0.39029349962719445, - "grad_norm": 2.0652739771439044, - "learning_rate": 1.3931057900399976e-06, - "loss": 1.1777, - "step": 2879 - }, - { - "epoch": 0.3904290652748594, - "grad_norm": 1.6789162853314994, - "learning_rate": 1.3927019677991466e-06, - "loss": 1.1767, - "step": 2880 - }, - { - "epoch": 0.39056463092252425, - "grad_norm": 1.4786200085944008, - "learning_rate": 1.3922980698288212e-06, - "loss": 1.1443, - "step": 2881 - }, - { - "epoch": 0.3907001965701891, - "grad_norm": 1.6133007358015177, - "learning_rate": 1.3918940962069093e-06, - "loss": 1.2077, - "step": 2882 - }, - { - "epoch": 0.390835762217854, - "grad_norm": 2.077044504825278, - "learning_rate": 1.3914900470113144e-06, - "loss": 1.1566, - "step": 2883 - }, - { - "epoch": 0.39097132786551886, - "grad_norm": 1.5059348100292025, - "learning_rate": 1.3910859223199545e-06, - "loss": 1.182, - "step": 2884 - }, - { - "epoch": 0.3911068935131838, - "grad_norm": 1.6441667476776818, - "learning_rate": 1.3906817222107611e-06, - "loss": 1.1853, - "step": 2885 - }, - { - "epoch": 0.39124245916084865, - "grad_norm": 1.6273875730492553, - "learning_rate": 1.3902774467616817e-06, - "loss": 1.1535, - "step": 2886 - }, - { - "epoch": 0.3913780248085135, - "grad_norm": 2.1041340297572857, - "learning_rate": 1.3898730960506772e-06, - "loss": 1.193, - "step": 2887 - }, - { - "epoch": 0.3915135904561784, - "grad_norm": 2.3837881528394416, - "learning_rate": 1.3894686701557237e-06, - "loss": 1.1562, - "step": 2888 - }, - { - "epoch": 0.39164915610384327, - "grad_norm": 2.5358096117213336, - "learning_rate": 1.3890641691548113e-06, - "loss": 1.1878, - "step": 2889 - }, - { - "epoch": 0.3917847217515082, - "grad_norm": 1.769766446914845, - "learning_rate": 1.3886595931259451e-06, - "loss": 1.1568, - "step": 2890 - }, - { - "epoch": 0.39192028739917306, - "grad_norm": 1.697012730275507, - "learning_rate": 1.3882549421471442e-06, - "loss": 1.1964, - "step": 2891 - }, - { - "epoch": 0.39205585304683793, - "grad_norm": 2.008216127379292, - "learning_rate": 1.3878502162964422e-06, - "loss": 1.1401, - "step": 2892 - }, - { - "epoch": 0.3921914186945028, - "grad_norm": 2.131718553934897, - "learning_rate": 1.3874454156518877e-06, - "loss": 1.1467, - "step": 2893 - }, - { - "epoch": 0.3923269843421677, - "grad_norm": 1.8073738374387083, - "learning_rate": 1.3870405402915436e-06, - "loss": 1.1843, - "step": 2894 - }, - { - "epoch": 0.3924625499898326, - "grad_norm": 1.7597850311135184, - "learning_rate": 1.3866355902934856e-06, - "loss": 1.1494, - "step": 2895 - }, - { - "epoch": 0.39259811563749747, - "grad_norm": 1.7196738852766922, - "learning_rate": 1.3862305657358065e-06, - "loss": 1.1617, - "step": 2896 - }, - { - "epoch": 0.39273368128516234, - "grad_norm": 1.620564139548848, - "learning_rate": 1.385825466696611e-06, - "loss": 1.1435, - "step": 2897 - }, - { - "epoch": 0.3928692469328272, - "grad_norm": 1.5533674435527773, - "learning_rate": 1.3854202932540202e-06, - "loss": 1.1689, - "step": 2898 - }, - { - "epoch": 0.3930048125804921, - "grad_norm": 1.9526508040501527, - "learning_rate": 1.3850150454861682e-06, - "loss": 1.1692, - "step": 2899 - }, - { - "epoch": 0.393140378228157, - "grad_norm": 4.0511027052084305, - "learning_rate": 1.3846097234712034e-06, - "loss": 1.1305, - "step": 2900 - }, - { - "epoch": 0.3932759438758219, - "grad_norm": 1.3881770108330898, - "learning_rate": 1.3842043272872896e-06, - "loss": 1.1707, - "step": 2901 - }, - { - "epoch": 0.39341150952348675, - "grad_norm": 2.9452558568559226, - "learning_rate": 1.383798857012604e-06, - "loss": 1.1938, - "step": 2902 - }, - { - "epoch": 0.3935470751711516, - "grad_norm": 1.8139675021921795, - "learning_rate": 1.3833933127253383e-06, - "loss": 1.1583, - "step": 2903 - }, - { - "epoch": 0.3936826408188165, - "grad_norm": 1.6966298641360933, - "learning_rate": 1.3829876945036987e-06, - "loss": 1.1508, - "step": 2904 - }, - { - "epoch": 0.3938182064664814, - "grad_norm": 1.828601933779395, - "learning_rate": 1.3825820024259052e-06, - "loss": 1.1564, - "step": 2905 - }, - { - "epoch": 0.3939537721141463, - "grad_norm": 1.6276557530401048, - "learning_rate": 1.3821762365701926e-06, - "loss": 1.159, - "step": 2906 - }, - { - "epoch": 0.39408933776181115, - "grad_norm": 1.6373057377384896, - "learning_rate": 1.3817703970148092e-06, - "loss": 1.173, - "step": 2907 - }, - { - "epoch": 0.394224903409476, - "grad_norm": 1.5005677370245436, - "learning_rate": 1.3813644838380184e-06, - "loss": 1.1837, - "step": 2908 - }, - { - "epoch": 0.3943604690571409, - "grad_norm": 1.9014083895892977, - "learning_rate": 1.3809584971180975e-06, - "loss": 1.1602, - "step": 2909 - }, - { - "epoch": 0.3944960347048058, - "grad_norm": 1.4106759286088375, - "learning_rate": 1.3805524369333371e-06, - "loss": 1.1794, - "step": 2910 - }, - { - "epoch": 0.3946316003524707, - "grad_norm": 1.665078968449005, - "learning_rate": 1.3801463033620433e-06, - "loss": 1.1674, - "step": 2911 - }, - { - "epoch": 0.39476716600013556, - "grad_norm": 2.3539886591753616, - "learning_rate": 1.3797400964825357e-06, - "loss": 1.1628, - "step": 2912 - }, - { - "epoch": 0.39490273164780043, - "grad_norm": 1.5468405646208678, - "learning_rate": 1.3793338163731476e-06, - "loss": 1.172, - "step": 2913 - }, - { - "epoch": 0.3950382972954653, - "grad_norm": 1.3619832253614186, - "learning_rate": 1.3789274631122277e-06, - "loss": 1.1424, - "step": 2914 - }, - { - "epoch": 0.3951738629431302, - "grad_norm": 1.4089555660920028, - "learning_rate": 1.3785210367781375e-06, - "loss": 1.1659, - "step": 2915 - }, - { - "epoch": 0.3953094285907951, - "grad_norm": 1.6534910604019812, - "learning_rate": 1.378114537449253e-06, - "loss": 1.1539, - "step": 2916 - }, - { - "epoch": 0.39544499423845997, - "grad_norm": 1.6802042294786912, - "learning_rate": 1.3777079652039646e-06, - "loss": 1.1964, - "step": 2917 - }, - { - "epoch": 0.39558055988612484, - "grad_norm": 2.0181628931180993, - "learning_rate": 1.3773013201206768e-06, - "loss": 1.2102, - "step": 2918 - }, - { - "epoch": 0.39571612553378976, - "grad_norm": 1.6093728355565118, - "learning_rate": 1.3768946022778075e-06, - "loss": 1.186, - "step": 2919 - }, - { - "epoch": 0.39585169118145463, - "grad_norm": 2.2887939059251776, - "learning_rate": 1.3764878117537895e-06, - "loss": 1.1468, - "step": 2920 - }, - { - "epoch": 0.3959872568291195, - "grad_norm": 1.7763276159349957, - "learning_rate": 1.3760809486270684e-06, - "loss": 1.1688, - "step": 2921 - }, - { - "epoch": 0.3961228224767844, - "grad_norm": 1.7209920560787226, - "learning_rate": 1.3756740129761053e-06, - "loss": 1.2155, - "step": 2922 - }, - { - "epoch": 0.39625838812444925, - "grad_norm": 1.4698348934716852, - "learning_rate": 1.3752670048793743e-06, - "loss": 1.2136, - "step": 2923 - }, - { - "epoch": 0.39639395377211417, - "grad_norm": 1.4954826335745455, - "learning_rate": 1.3748599244153632e-06, - "loss": 1.1275, - "step": 2924 - }, - { - "epoch": 0.39652951941977904, - "grad_norm": 2.7486124319846947, - "learning_rate": 1.3744527716625746e-06, - "loss": 1.1518, - "step": 2925 - }, - { - "epoch": 0.3966650850674439, - "grad_norm": 1.858455766066703, - "learning_rate": 1.3740455466995248e-06, - "loss": 1.1443, - "step": 2926 - }, - { - "epoch": 0.3968006507151088, - "grad_norm": 1.5975256607675536, - "learning_rate": 1.373638249604744e-06, - "loss": 1.1919, - "step": 2927 - }, - { - "epoch": 0.39693621636277365, - "grad_norm": 1.9632931797466104, - "learning_rate": 1.3732308804567761e-06, - "loss": 1.1586, - "step": 2928 - }, - { - "epoch": 0.3970717820104386, - "grad_norm": 1.5821228807001735, - "learning_rate": 1.3728234393341789e-06, - "loss": 1.1025, - "step": 2929 - }, - { - "epoch": 0.39720734765810345, - "grad_norm": 1.7383154711250326, - "learning_rate": 1.3724159263155246e-06, - "loss": 1.1192, - "step": 2930 - }, - { - "epoch": 0.3973429133057683, - "grad_norm": 1.4810305802599066, - "learning_rate": 1.3720083414793984e-06, - "loss": 1.1696, - "step": 2931 - }, - { - "epoch": 0.3974784789534332, - "grad_norm": 1.4849575254618865, - "learning_rate": 1.3716006849043998e-06, - "loss": 1.1723, - "step": 2932 - }, - { - "epoch": 0.39761404460109806, - "grad_norm": 5.019997044318018, - "learning_rate": 1.3711929566691424e-06, - "loss": 1.1475, - "step": 2933 - }, - { - "epoch": 0.397749610248763, - "grad_norm": 1.6334647975835166, - "learning_rate": 1.3707851568522534e-06, - "loss": 1.1712, - "step": 2934 - }, - { - "epoch": 0.39788517589642786, - "grad_norm": 5.448050018329099, - "learning_rate": 1.3703772855323739e-06, - "loss": 1.1226, - "step": 2935 - }, - { - "epoch": 0.3980207415440927, - "grad_norm": 1.5252027590123771, - "learning_rate": 1.3699693427881582e-06, - "loss": 1.1475, - "step": 2936 - }, - { - "epoch": 0.3981563071917576, - "grad_norm": 1.5007526886757023, - "learning_rate": 1.3695613286982754e-06, - "loss": 1.1312, - "step": 2937 - }, - { - "epoch": 0.39829187283942247, - "grad_norm": 1.4671660672214968, - "learning_rate": 1.3691532433414073e-06, - "loss": 1.1763, - "step": 2938 - }, - { - "epoch": 0.3984274384870874, - "grad_norm": 1.456291440662813, - "learning_rate": 1.36874508679625e-06, - "loss": 1.1163, - "step": 2939 - }, - { - "epoch": 0.39856300413475226, - "grad_norm": 1.8556237199225252, - "learning_rate": 1.3683368591415137e-06, - "loss": 1.1167, - "step": 2940 - }, - { - "epoch": 0.39869856978241713, - "grad_norm": 1.5608833360170438, - "learning_rate": 1.3679285604559211e-06, - "loss": 1.1523, - "step": 2941 - }, - { - "epoch": 0.398834135430082, - "grad_norm": 1.6937871043535924, - "learning_rate": 1.3675201908182103e-06, - "loss": 1.1631, - "step": 2942 - }, - { - "epoch": 0.3989697010777469, - "grad_norm": 1.582177444873064, - "learning_rate": 1.3671117503071317e-06, - "loss": 1.1608, - "step": 2943 - }, - { - "epoch": 0.3991052667254118, - "grad_norm": 1.4342281188732642, - "learning_rate": 1.3667032390014497e-06, - "loss": 1.161, - "step": 2944 - }, - { - "epoch": 0.39924083237307667, - "grad_norm": 2.1830147250567, - "learning_rate": 1.3662946569799426e-06, - "loss": 1.1808, - "step": 2945 - }, - { - "epoch": 0.39937639802074154, - "grad_norm": 1.618749165415772, - "learning_rate": 1.3658860043214024e-06, - "loss": 1.2023, - "step": 2946 - }, - { - "epoch": 0.3995119636684064, - "grad_norm": 1.6403535266157367, - "learning_rate": 1.3654772811046344e-06, - "loss": 1.1531, - "step": 2947 - }, - { - "epoch": 0.3996475293160713, - "grad_norm": 1.6309386479020063, - "learning_rate": 1.3650684874084577e-06, - "loss": 1.165, - "step": 2948 - }, - { - "epoch": 0.3997830949637362, - "grad_norm": 1.3680705573137304, - "learning_rate": 1.3646596233117047e-06, - "loss": 1.1093, - "step": 2949 - }, - { - "epoch": 0.3999186606114011, - "grad_norm": 1.705299133856725, - "learning_rate": 1.364250688893222e-06, - "loss": 1.1386, - "step": 2950 - }, - { - "epoch": 0.40005422625906595, - "grad_norm": 1.817826824670329, - "learning_rate": 1.3638416842318691e-06, - "loss": 1.1404, - "step": 2951 - }, - { - "epoch": 0.4001897919067308, - "grad_norm": 1.543778521773758, - "learning_rate": 1.3634326094065194e-06, - "loss": 1.1666, - "step": 2952 - }, - { - "epoch": 0.4003253575543957, - "grad_norm": 1.4875320474474816, - "learning_rate": 1.3630234644960597e-06, - "loss": 1.1707, - "step": 2953 - }, - { - "epoch": 0.4004609232020606, - "grad_norm": 2.391640724631687, - "learning_rate": 1.3626142495793902e-06, - "loss": 1.1752, - "step": 2954 - }, - { - "epoch": 0.4005964888497255, - "grad_norm": 1.5074333318442203, - "learning_rate": 1.3622049647354252e-06, - "loss": 1.1693, - "step": 2955 - }, - { - "epoch": 0.40073205449739036, - "grad_norm": 2.423258827205519, - "learning_rate": 1.361795610043092e-06, - "loss": 1.1489, - "step": 2956 - }, - { - "epoch": 0.4008676201450552, - "grad_norm": 1.7766385485254983, - "learning_rate": 1.3613861855813308e-06, - "loss": 1.1778, - "step": 2957 - }, - { - "epoch": 0.40100318579272015, - "grad_norm": 1.4707697143132035, - "learning_rate": 1.3609766914290965e-06, - "loss": 1.1509, - "step": 2958 - }, - { - "epoch": 0.401138751440385, - "grad_norm": 1.9958112302291842, - "learning_rate": 1.3605671276653565e-06, - "loss": 1.183, - "step": 2959 - }, - { - "epoch": 0.4012743170880499, - "grad_norm": 1.949409904889143, - "learning_rate": 1.3601574943690924e-06, - "loss": 1.2306, - "step": 2960 - }, - { - "epoch": 0.40140988273571476, - "grad_norm": 1.5556201584728675, - "learning_rate": 1.3597477916192985e-06, - "loss": 1.1905, - "step": 2961 - }, - { - "epoch": 0.40154544838337963, - "grad_norm": 1.4379553521059147, - "learning_rate": 1.3593380194949823e-06, - "loss": 1.167, - "step": 2962 - }, - { - "epoch": 0.40168101403104456, - "grad_norm": 1.3911873519179774, - "learning_rate": 1.3589281780751659e-06, - "loss": 1.1426, - "step": 2963 - }, - { - "epoch": 0.40181657967870943, - "grad_norm": 1.5030159497242004, - "learning_rate": 1.358518267438883e-06, - "loss": 1.1624, - "step": 2964 - }, - { - "epoch": 0.4019521453263743, - "grad_norm": 1.6750322556222297, - "learning_rate": 1.3581082876651824e-06, - "loss": 1.206, - "step": 2965 - }, - { - "epoch": 0.40208771097403917, - "grad_norm": 1.9965189814847508, - "learning_rate": 1.3576982388331258e-06, - "loss": 1.1712, - "step": 2966 - }, - { - "epoch": 0.40222327662170404, - "grad_norm": 2.025000175823773, - "learning_rate": 1.3572881210217869e-06, - "loss": 1.1368, - "step": 2967 - }, - { - "epoch": 0.40235884226936897, - "grad_norm": 1.8972174296372128, - "learning_rate": 1.3568779343102539e-06, - "loss": 1.1891, - "step": 2968 - }, - { - "epoch": 0.40249440791703384, - "grad_norm": 1.4308764717888318, - "learning_rate": 1.3564676787776282e-06, - "loss": 1.1129, - "step": 2969 - }, - { - "epoch": 0.4026299735646987, - "grad_norm": 1.4537409126888967, - "learning_rate": 1.356057354503025e-06, - "loss": 1.1768, - "step": 2970 - }, - { - "epoch": 0.4027655392123636, - "grad_norm": 1.8119229602360758, - "learning_rate": 1.3556469615655713e-06, - "loss": 1.2113, - "step": 2971 - }, - { - "epoch": 0.40290110486002845, - "grad_norm": 4.364344594971192, - "learning_rate": 1.355236500044408e-06, - "loss": 1.1596, - "step": 2972 - }, - { - "epoch": 0.4030366705076934, - "grad_norm": 4.24985190217303, - "learning_rate": 1.3548259700186901e-06, - "loss": 1.1584, - "step": 2973 - }, - { - "epoch": 0.40317223615535824, - "grad_norm": 2.4172476085522594, - "learning_rate": 1.3544153715675848e-06, - "loss": 1.2085, - "step": 2974 - }, - { - "epoch": 0.4033078018030231, - "grad_norm": 1.414267639127324, - "learning_rate": 1.3540047047702725e-06, - "loss": 1.1452, - "step": 2975 - }, - { - "epoch": 0.403443367450688, - "grad_norm": 1.7153493047614972, - "learning_rate": 1.353593969705947e-06, - "loss": 1.1437, - "step": 2976 - }, - { - "epoch": 0.40357893309835285, - "grad_norm": 2.0081981841408205, - "learning_rate": 1.353183166453816e-06, - "loss": 1.187, - "step": 2977 - }, - { - "epoch": 0.4037144987460178, - "grad_norm": 2.579701491391784, - "learning_rate": 1.352772295093099e-06, - "loss": 1.1836, - "step": 2978 - }, - { - "epoch": 0.40385006439368265, - "grad_norm": 1.5218429184248605, - "learning_rate": 1.3523613557030298e-06, - "loss": 1.1693, - "step": 2979 - }, - { - "epoch": 0.4039856300413475, - "grad_norm": 1.482985648336377, - "learning_rate": 1.3519503483628541e-06, - "loss": 1.1582, - "step": 2980 - }, - { - "epoch": 0.4041211956890124, - "grad_norm": 1.6578596576152131, - "learning_rate": 1.351539273151832e-06, - "loss": 1.1295, - "step": 2981 - }, - { - "epoch": 0.40425676133667726, - "grad_norm": 1.5236561442131458, - "learning_rate": 1.3511281301492358e-06, - "loss": 1.1846, - "step": 2982 - }, - { - "epoch": 0.4043923269843422, - "grad_norm": 1.4209968689140218, - "learning_rate": 1.3507169194343514e-06, - "loss": 1.1618, - "step": 2983 - }, - { - "epoch": 0.40452789263200706, - "grad_norm": 1.7711222021577624, - "learning_rate": 1.3503056410864777e-06, - "loss": 1.1712, - "step": 2984 - }, - { - "epoch": 0.40466345827967193, - "grad_norm": 1.573985053656633, - "learning_rate": 1.349894295184926e-06, - "loss": 1.1726, - "step": 2985 - }, - { - "epoch": 0.4047990239273368, - "grad_norm": 1.8008048734857227, - "learning_rate": 1.3494828818090215e-06, - "loss": 1.1556, - "step": 2986 - }, - { - "epoch": 0.40493458957500167, - "grad_norm": 1.7127172276083944, - "learning_rate": 1.349071401038102e-06, - "loss": 1.1636, - "step": 2987 - }, - { - "epoch": 0.4050701552226666, - "grad_norm": 1.7790089198373586, - "learning_rate": 1.348659852951518e-06, - "loss": 1.1284, - "step": 2988 - }, - { - "epoch": 0.40520572087033147, - "grad_norm": 1.6540583780597542, - "learning_rate": 1.3482482376286338e-06, - "loss": 1.1365, - "step": 2989 - }, - { - "epoch": 0.40534128651799634, - "grad_norm": 1.7482892199023257, - "learning_rate": 1.3478365551488256e-06, - "loss": 1.1715, - "step": 2990 - }, - { - "epoch": 0.4054768521656612, - "grad_norm": 1.621260201466407, - "learning_rate": 1.3474248055914834e-06, - "loss": 1.1599, - "step": 2991 - }, - { - "epoch": 0.4056124178133261, - "grad_norm": 1.6008358173348922, - "learning_rate": 1.3470129890360103e-06, - "loss": 1.142, - "step": 2992 - }, - { - "epoch": 0.405747983460991, - "grad_norm": 1.4446118753847812, - "learning_rate": 1.3466011055618207e-06, - "loss": 1.1253, - "step": 2993 - }, - { - "epoch": 0.40588354910865587, - "grad_norm": 1.6821957075353624, - "learning_rate": 1.3461891552483442e-06, - "loss": 1.1678, - "step": 2994 - }, - { - "epoch": 0.40601911475632074, - "grad_norm": 2.4361166858420136, - "learning_rate": 1.3457771381750217e-06, - "loss": 1.1479, - "step": 2995 - }, - { - "epoch": 0.4061546804039856, - "grad_norm": 1.5904096485020944, - "learning_rate": 1.3453650544213076e-06, - "loss": 1.1559, - "step": 2996 - }, - { - "epoch": 0.40629024605165054, - "grad_norm": 1.8171258367911183, - "learning_rate": 1.344952904066669e-06, - "loss": 1.1524, - "step": 2997 - }, - { - "epoch": 0.4064258116993154, - "grad_norm": 1.490680958988263, - "learning_rate": 1.3445406871905855e-06, - "loss": 1.1572, - "step": 2998 - }, - { - "epoch": 0.4065613773469803, - "grad_norm": 1.5139356001230093, - "learning_rate": 1.34412840387255e-06, - "loss": 1.1474, - "step": 2999 - }, - { - "epoch": 0.40669694299464515, - "grad_norm": 1.6335278425770918, - "learning_rate": 1.3437160541920685e-06, - "loss": 1.1577, - "step": 3000 - }, - { - "epoch": 0.40683250864231, - "grad_norm": 1.4820501388631733, - "learning_rate": 1.3433036382286589e-06, - "loss": 1.1694, - "step": 3001 - }, - { - "epoch": 0.40696807428997495, - "grad_norm": 1.5655418855615892, - "learning_rate": 1.3428911560618525e-06, - "loss": 1.183, - "step": 3002 - }, - { - "epoch": 0.4071036399376398, - "grad_norm": 1.429386320774816, - "learning_rate": 1.3424786077711933e-06, - "loss": 1.1682, - "step": 3003 - }, - { - "epoch": 0.4072392055853047, - "grad_norm": 1.4824716555677548, - "learning_rate": 1.342065993436238e-06, - "loss": 1.1156, - "step": 3004 - }, - { - "epoch": 0.40737477123296956, - "grad_norm": 9.037844752533024, - "learning_rate": 1.3416533131365563e-06, - "loss": 1.15, - "step": 3005 - }, - { - "epoch": 0.4075103368806344, - "grad_norm": 1.4736608220927312, - "learning_rate": 1.3412405669517296e-06, - "loss": 1.157, - "step": 3006 - }, - { - "epoch": 0.40764590252829935, - "grad_norm": 1.913598186432291, - "learning_rate": 1.3408277549613534e-06, - "loss": 1.1517, - "step": 3007 - }, - { - "epoch": 0.4077814681759642, - "grad_norm": 1.319910064254687, - "learning_rate": 1.3404148772450348e-06, - "loss": 1.1707, - "step": 3008 - }, - { - "epoch": 0.4079170338236291, - "grad_norm": 1.3941334320343979, - "learning_rate": 1.340001933882394e-06, - "loss": 1.1348, - "step": 3009 - }, - { - "epoch": 0.40805259947129396, - "grad_norm": 1.5806683754088922, - "learning_rate": 1.3395889249530642e-06, - "loss": 1.1788, - "step": 3010 - }, - { - "epoch": 0.40818816511895883, - "grad_norm": 1.824847761334936, - "learning_rate": 1.339175850536691e-06, - "loss": 1.1533, - "step": 3011 - }, - { - "epoch": 0.40832373076662376, - "grad_norm": 1.50896343686145, - "learning_rate": 1.338762710712932e-06, - "loss": 1.1604, - "step": 3012 - }, - { - "epoch": 0.40845929641428863, - "grad_norm": 1.4420071746377094, - "learning_rate": 1.3383495055614586e-06, - "loss": 1.1482, - "step": 3013 - }, - { - "epoch": 0.4085948620619535, - "grad_norm": 1.426725595670434, - "learning_rate": 1.3379362351619537e-06, - "loss": 1.1684, - "step": 3014 - }, - { - "epoch": 0.40873042770961837, - "grad_norm": 1.4223784678318407, - "learning_rate": 1.3375228995941132e-06, - "loss": 1.2014, - "step": 3015 - }, - { - "epoch": 0.40886599335728324, - "grad_norm": 1.7523576359619732, - "learning_rate": 1.337109498937646e-06, - "loss": 1.1913, - "step": 3016 - }, - { - "epoch": 0.40900155900494817, - "grad_norm": 4.73984162821829, - "learning_rate": 1.3366960332722728e-06, - "loss": 1.1363, - "step": 3017 - }, - { - "epoch": 0.40913712465261304, - "grad_norm": 1.8833208454846375, - "learning_rate": 1.3362825026777272e-06, - "loss": 1.1915, - "step": 3018 - }, - { - "epoch": 0.4092726903002779, - "grad_norm": 1.410841023499633, - "learning_rate": 1.3358689072337554e-06, - "loss": 1.1661, - "step": 3019 - }, - { - "epoch": 0.4094082559479428, - "grad_norm": 1.619929943771246, - "learning_rate": 1.3354552470201161e-06, - "loss": 1.1318, - "step": 3020 - }, - { - "epoch": 0.40954382159560765, - "grad_norm": 2.779923050188363, - "learning_rate": 1.3350415221165805e-06, - "loss": 1.1537, - "step": 3021 - }, - { - "epoch": 0.4096793872432726, - "grad_norm": 1.5230873987998803, - "learning_rate": 1.3346277326029317e-06, - "loss": 1.1482, - "step": 3022 - }, - { - "epoch": 0.40981495289093745, - "grad_norm": 1.689604048714454, - "learning_rate": 1.3342138785589666e-06, - "loss": 1.1446, - "step": 3023 - }, - { - "epoch": 0.4099505185386023, - "grad_norm": 1.4062020003720674, - "learning_rate": 1.3337999600644928e-06, - "loss": 1.2011, - "step": 3024 - }, - { - "epoch": 0.4100860841862672, - "grad_norm": 3.0183277639508415, - "learning_rate": 1.3333859771993315e-06, - "loss": 1.1586, - "step": 3025 - }, - { - "epoch": 0.41022164983393206, - "grad_norm": 1.4482351939597709, - "learning_rate": 1.332971930043316e-06, - "loss": 1.1429, - "step": 3026 - }, - { - "epoch": 0.410357215481597, - "grad_norm": 2.1634725770068024, - "learning_rate": 1.3325578186762923e-06, - "loss": 1.1752, - "step": 3027 - }, - { - "epoch": 0.41049278112926185, - "grad_norm": 1.824843380878061, - "learning_rate": 1.3321436431781183e-06, - "loss": 1.1589, - "step": 3028 - }, - { - "epoch": 0.4106283467769267, - "grad_norm": 1.546505938653725, - "learning_rate": 1.3317294036286644e-06, - "loss": 1.1171, - "step": 3029 - }, - { - "epoch": 0.4107639124245916, - "grad_norm": 1.6712608849364268, - "learning_rate": 1.3313151001078135e-06, - "loss": 1.1344, - "step": 3030 - }, - { - "epoch": 0.41089947807225646, - "grad_norm": 2.013583970927868, - "learning_rate": 1.3309007326954608e-06, - "loss": 1.1347, - "step": 3031 - }, - { - "epoch": 0.4110350437199214, - "grad_norm": 1.506368310524576, - "learning_rate": 1.330486301471514e-06, - "loss": 1.1812, - "step": 3032 - }, - { - "epoch": 0.41117060936758626, - "grad_norm": 2.014179564165211, - "learning_rate": 1.3300718065158924e-06, - "loss": 1.1216, - "step": 3033 - }, - { - "epoch": 0.41130617501525113, - "grad_norm": 1.9893124243438511, - "learning_rate": 1.3296572479085284e-06, - "loss": 1.2167, - "step": 3034 - }, - { - "epoch": 0.411441740662916, - "grad_norm": 1.4361196392121307, - "learning_rate": 1.3292426257293668e-06, - "loss": 1.1328, - "step": 3035 - }, - { - "epoch": 0.4115773063105809, - "grad_norm": 2.0658066984512895, - "learning_rate": 1.3288279400583631e-06, - "loss": 1.172, - "step": 3036 - }, - { - "epoch": 0.4117128719582458, - "grad_norm": 1.5196156729294548, - "learning_rate": 1.3284131909754868e-06, - "loss": 1.1572, - "step": 3037 - }, - { - "epoch": 0.41184843760591067, - "grad_norm": 2.2117400603866093, - "learning_rate": 1.3279983785607192e-06, - "loss": 1.1556, - "step": 3038 - }, - { - "epoch": 0.41198400325357554, - "grad_norm": 1.6185009024302024, - "learning_rate": 1.327583502894053e-06, - "loss": 1.1278, - "step": 3039 - }, - { - "epoch": 0.4121195689012404, - "grad_norm": 1.5764578662916684, - "learning_rate": 1.3271685640554943e-06, - "loss": 1.1326, - "step": 3040 - }, - { - "epoch": 0.41225513454890533, - "grad_norm": 1.4127489966857139, - "learning_rate": 1.3267535621250604e-06, - "loss": 1.1457, - "step": 3041 - }, - { - "epoch": 0.4123907001965702, - "grad_norm": 1.6004494035225776, - "learning_rate": 1.3263384971827816e-06, - "loss": 1.1548, - "step": 3042 - }, - { - "epoch": 0.4125262658442351, - "grad_norm": 1.3806129811082926, - "learning_rate": 1.3259233693086993e-06, - "loss": 1.1428, - "step": 3043 - }, - { - "epoch": 0.41266183149189994, - "grad_norm": 1.7035808085016433, - "learning_rate": 1.3255081785828678e-06, - "loss": 1.1634, - "step": 3044 - }, - { - "epoch": 0.4127973971395648, - "grad_norm": 1.4791083051538811, - "learning_rate": 1.3250929250853537e-06, - "loss": 1.1793, - "step": 3045 - }, - { - "epoch": 0.41293296278722974, - "grad_norm": 1.9553326337111017, - "learning_rate": 1.324677608896235e-06, - "loss": 1.1555, - "step": 3046 - }, - { - "epoch": 0.4130685284348946, - "grad_norm": 1.4772360884770326, - "learning_rate": 1.3242622300956027e-06, - "loss": 1.1438, - "step": 3047 - }, - { - "epoch": 0.4132040940825595, - "grad_norm": 1.4811318763209484, - "learning_rate": 1.3238467887635583e-06, - "loss": 1.1618, - "step": 3048 - }, - { - "epoch": 0.41333965973022435, - "grad_norm": 3.351399129027451, - "learning_rate": 1.3234312849802173e-06, - "loss": 1.166, - "step": 3049 - }, - { - "epoch": 0.4134752253778892, - "grad_norm": 2.1841089130053395, - "learning_rate": 1.323015718825706e-06, - "loss": 1.133, - "step": 3050 - }, - { - "epoch": 0.41361079102555415, - "grad_norm": 2.7924505354012052, - "learning_rate": 1.3226000903801632e-06, - "loss": 1.1298, - "step": 3051 - }, - { - "epoch": 0.413746356673219, - "grad_norm": 1.8691480453541558, - "learning_rate": 1.322184399723739e-06, - "loss": 1.1872, - "step": 3052 - }, - { - "epoch": 0.4138819223208839, - "grad_norm": 1.4580262222271165, - "learning_rate": 1.3217686469365967e-06, - "loss": 1.1805, - "step": 3053 - }, - { - "epoch": 0.41401748796854876, - "grad_norm": 1.7338173901392382, - "learning_rate": 1.3213528320989107e-06, - "loss": 1.169, - "step": 3054 - }, - { - "epoch": 0.41415305361621363, - "grad_norm": 1.47936257799703, - "learning_rate": 1.3209369552908676e-06, - "loss": 1.127, - "step": 3055 - }, - { - "epoch": 0.41428861926387855, - "grad_norm": 2.5743683461216467, - "learning_rate": 1.320521016592666e-06, - "loss": 1.1396, - "step": 3056 - }, - { - "epoch": 0.4144241849115434, - "grad_norm": 1.727185728144914, - "learning_rate": 1.3201050160845164e-06, - "loss": 1.1605, - "step": 3057 - }, - { - "epoch": 0.4145597505592083, - "grad_norm": 1.8638213145180602, - "learning_rate": 1.3196889538466413e-06, - "loss": 1.13, - "step": 3058 - }, - { - "epoch": 0.41469531620687317, - "grad_norm": 1.5935752825833525, - "learning_rate": 1.319272829959275e-06, - "loss": 1.1442, - "step": 3059 - }, - { - "epoch": 0.41483088185453804, - "grad_norm": 2.3861144183150005, - "learning_rate": 1.3188566445026635e-06, - "loss": 1.1706, - "step": 3060 - }, - { - "epoch": 0.41496644750220296, - "grad_norm": 1.7932350091450062, - "learning_rate": 1.3184403975570648e-06, - "loss": 1.1385, - "step": 3061 - }, - { - "epoch": 0.41510201314986783, - "grad_norm": 1.4526245091815548, - "learning_rate": 1.3180240892027494e-06, - "loss": 1.1508, - "step": 3062 - }, - { - "epoch": 0.4152375787975327, - "grad_norm": 1.5430268659187045, - "learning_rate": 1.3176077195199984e-06, - "loss": 1.1505, - "step": 3063 - }, - { - "epoch": 0.4153731444451976, - "grad_norm": 1.772463411362096, - "learning_rate": 1.3171912885891061e-06, - "loss": 1.1704, - "step": 3064 - }, - { - "epoch": 0.41550871009286244, - "grad_norm": 1.5908375376393264, - "learning_rate": 1.3167747964903775e-06, - "loss": 1.183, - "step": 3065 - }, - { - "epoch": 0.41564427574052737, - "grad_norm": 2.356156565589939, - "learning_rate": 1.3163582433041296e-06, - "loss": 1.1567, - "step": 3066 - }, - { - "epoch": 0.41577984138819224, - "grad_norm": 1.8248926885676873, - "learning_rate": 1.3159416291106916e-06, - "loss": 1.1846, - "step": 3067 - }, - { - "epoch": 0.4159154070358571, - "grad_norm": 1.5008855269755847, - "learning_rate": 1.3155249539904049e-06, - "loss": 1.1746, - "step": 3068 - }, - { - "epoch": 0.416050972683522, - "grad_norm": 1.6132698774320378, - "learning_rate": 1.3151082180236209e-06, - "loss": 1.175, - "step": 3069 - }, - { - "epoch": 0.41618653833118685, - "grad_norm": 1.908583824256697, - "learning_rate": 1.3146914212907042e-06, - "loss": 1.1355, - "step": 3070 - }, - { - "epoch": 0.4163221039788518, - "grad_norm": 1.9766885636598193, - "learning_rate": 1.3142745638720314e-06, - "loss": 1.1541, - "step": 3071 - }, - { - "epoch": 0.41645766962651665, - "grad_norm": 1.5695378613134423, - "learning_rate": 1.3138576458479893e-06, - "loss": 1.1756, - "step": 3072 - }, - { - "epoch": 0.4165932352741815, - "grad_norm": 2.867925659609644, - "learning_rate": 1.3134406672989779e-06, - "loss": 1.1414, - "step": 3073 - }, - { - "epoch": 0.4167288009218464, - "grad_norm": 1.6486689906895649, - "learning_rate": 1.313023628305408e-06, - "loss": 1.1728, - "step": 3074 - }, - { - "epoch": 0.4168643665695113, - "grad_norm": 1.6532423690513778, - "learning_rate": 1.3126065289477019e-06, - "loss": 1.2033, - "step": 3075 - }, - { - "epoch": 0.4169999322171762, - "grad_norm": 1.5634942692465632, - "learning_rate": 1.3121893693062947e-06, - "loss": 1.1552, - "step": 3076 - }, - { - "epoch": 0.41713549786484105, - "grad_norm": 85.4996727769988, - "learning_rate": 1.3117721494616319e-06, - "loss": 1.1499, - "step": 3077 - }, - { - "epoch": 0.4172710635125059, - "grad_norm": 1.4608761150844305, - "learning_rate": 1.3113548694941708e-06, - "loss": 1.0957, - "step": 3078 - }, - { - "epoch": 0.4174066291601708, - "grad_norm": 1.5891287066359499, - "learning_rate": 1.3109375294843808e-06, - "loss": 1.1311, - "step": 3079 - }, - { - "epoch": 0.4175421948078357, - "grad_norm": 1.4566246421178983, - "learning_rate": 1.3105201295127426e-06, - "loss": 1.2088, - "step": 3080 - }, - { - "epoch": 0.4176777604555006, - "grad_norm": 1.6296087811308304, - "learning_rate": 1.3101026696597487e-06, - "loss": 1.1721, - "step": 3081 - }, - { - "epoch": 0.41781332610316546, - "grad_norm": 1.4840653142866052, - "learning_rate": 1.3096851500059028e-06, - "loss": 1.1334, - "step": 3082 - }, - { - "epoch": 0.41794889175083033, - "grad_norm": 2.2283947559752844, - "learning_rate": 1.3092675706317197e-06, - "loss": 1.1813, - "step": 3083 - }, - { - "epoch": 0.4180844573984952, - "grad_norm": 1.7710971902079036, - "learning_rate": 1.3088499316177272e-06, - "loss": 1.1977, - "step": 3084 - }, - { - "epoch": 0.4182200230461601, - "grad_norm": 1.7749363426409754, - "learning_rate": 1.3084322330444635e-06, - "loss": 1.1343, - "step": 3085 - }, - { - "epoch": 0.418355588693825, - "grad_norm": 2.3600671997237153, - "learning_rate": 1.3080144749924782e-06, - "loss": 1.1364, - "step": 3086 - }, - { - "epoch": 0.41849115434148987, - "grad_norm": 2.529652083597637, - "learning_rate": 1.3075966575423326e-06, - "loss": 1.1978, - "step": 3087 - }, - { - "epoch": 0.41862671998915474, - "grad_norm": 2.5869066570465766, - "learning_rate": 1.3071787807745996e-06, - "loss": 1.1489, - "step": 3088 - }, - { - "epoch": 0.4187622856368196, - "grad_norm": 1.7117098585518964, - "learning_rate": 1.3067608447698633e-06, - "loss": 1.1252, - "step": 3089 - }, - { - "epoch": 0.41889785128448453, - "grad_norm": 1.601865974853601, - "learning_rate": 1.3063428496087196e-06, - "loss": 1.1706, - "step": 3090 - }, - { - "epoch": 0.4190334169321494, - "grad_norm": 2.6335143970976085, - "learning_rate": 1.3059247953717758e-06, - "loss": 1.1729, - "step": 3091 - }, - { - "epoch": 0.4191689825798143, - "grad_norm": 2.1963969427207686, - "learning_rate": 1.3055066821396498e-06, - "loss": 1.1762, - "step": 3092 - }, - { - "epoch": 0.41930454822747915, - "grad_norm": 1.4590742760191215, - "learning_rate": 1.3050885099929716e-06, - "loss": 1.1318, - "step": 3093 - }, - { - "epoch": 0.419440113875144, - "grad_norm": 1.8051605357817924, - "learning_rate": 1.3046702790123824e-06, - "loss": 1.1271, - "step": 3094 - }, - { - "epoch": 0.41957567952280894, - "grad_norm": 1.4351830853783298, - "learning_rate": 1.3042519892785353e-06, - "loss": 1.1613, - "step": 3095 - }, - { - "epoch": 0.4197112451704738, - "grad_norm": 1.5343789029306965, - "learning_rate": 1.3038336408720932e-06, - "loss": 1.1435, - "step": 3096 - }, - { - "epoch": 0.4198468108181387, - "grad_norm": 2.424919846356393, - "learning_rate": 1.303415233873732e-06, - "loss": 1.1789, - "step": 3097 - }, - { - "epoch": 0.41998237646580355, - "grad_norm": 6.107785821685712, - "learning_rate": 1.3029967683641378e-06, - "loss": 1.1649, - "step": 3098 - }, - { - "epoch": 0.4201179421134684, - "grad_norm": 2.912580751317892, - "learning_rate": 1.3025782444240085e-06, - "loss": 1.1671, - "step": 3099 - }, - { - "epoch": 0.42025350776113335, - "grad_norm": 1.6444689255496552, - "learning_rate": 1.3021596621340533e-06, - "loss": 1.143, - "step": 3100 - }, - { - "epoch": 0.4203890734087982, - "grad_norm": 1.4708546316661175, - "learning_rate": 1.3017410215749924e-06, - "loss": 1.1267, - "step": 3101 - }, - { - "epoch": 0.4205246390564631, - "grad_norm": 1.563970051967962, - "learning_rate": 1.3013223228275571e-06, - "loss": 1.2191, - "step": 3102 - }, - { - "epoch": 0.42066020470412796, - "grad_norm": 1.8372974467957068, - "learning_rate": 1.3009035659724904e-06, - "loss": 1.171, - "step": 3103 - }, - { - "epoch": 0.42079577035179283, - "grad_norm": 1.6203704006667359, - "learning_rate": 1.3004847510905463e-06, - "loss": 1.1661, - "step": 3104 - }, - { - "epoch": 0.42093133599945776, - "grad_norm": 1.7299620928043133, - "learning_rate": 1.30006587826249e-06, - "loss": 1.1665, - "step": 3105 - }, - { - "epoch": 0.4210669016471226, - "grad_norm": 1.527243927916385, - "learning_rate": 1.2996469475690975e-06, - "loss": 1.1295, - "step": 3106 - }, - { - "epoch": 0.4212024672947875, - "grad_norm": 1.5459111647293593, - "learning_rate": 1.2992279590911563e-06, - "loss": 1.189, - "step": 3107 - }, - { - "epoch": 0.42133803294245237, - "grad_norm": 1.657715763685031, - "learning_rate": 1.298808912909465e-06, - "loss": 1.1381, - "step": 3108 - }, - { - "epoch": 0.42147359859011724, - "grad_norm": 1.4198176550989168, - "learning_rate": 1.298389809104834e-06, - "loss": 1.1699, - "step": 3109 - }, - { - "epoch": 0.42160916423778216, - "grad_norm": 1.3858170308571767, - "learning_rate": 1.297970647758083e-06, - "loss": 1.1491, - "step": 3110 - }, - { - "epoch": 0.42174472988544703, - "grad_norm": 1.7501739595821408, - "learning_rate": 1.2975514289500451e-06, - "loss": 1.1715, - "step": 3111 - }, - { - "epoch": 0.4218802955331119, - "grad_norm": 1.4504465095563333, - "learning_rate": 1.2971321527615629e-06, - "loss": 1.1978, - "step": 3112 - }, - { - "epoch": 0.4220158611807768, - "grad_norm": 1.5088708445488614, - "learning_rate": 1.2967128192734902e-06, - "loss": 1.1709, - "step": 3113 - }, - { - "epoch": 0.4221514268284417, - "grad_norm": 2.741104596764835, - "learning_rate": 1.2962934285666924e-06, - "loss": 1.1764, - "step": 3114 - }, - { - "epoch": 0.42228699247610657, - "grad_norm": 4.6771413040022765, - "learning_rate": 1.295873980722046e-06, - "loss": 1.1879, - "step": 3115 - }, - { - "epoch": 0.42242255812377144, - "grad_norm": 1.530633143429659, - "learning_rate": 1.2954544758204374e-06, - "loss": 1.17, - "step": 3116 - }, - { - "epoch": 0.4225581237714363, - "grad_norm": 1.4784226482616707, - "learning_rate": 1.2950349139427659e-06, - "loss": 1.1415, - "step": 3117 - }, - { - "epoch": 0.4226936894191012, - "grad_norm": 1.54450023954128, - "learning_rate": 1.2946152951699398e-06, - "loss": 1.1624, - "step": 3118 - }, - { - "epoch": 0.4228292550667661, - "grad_norm": 1.4511824616975326, - "learning_rate": 1.2941956195828797e-06, - "loss": 1.1734, - "step": 3119 - }, - { - "epoch": 0.422964820714431, - "grad_norm": 1.6147612751281493, - "learning_rate": 1.2937758872625166e-06, - "loss": 1.1502, - "step": 3120 - }, - { - "epoch": 0.42310038636209585, - "grad_norm": 1.5402848112110301, - "learning_rate": 1.2933560982897924e-06, - "loss": 1.1409, - "step": 3121 - }, - { - "epoch": 0.4232359520097607, - "grad_norm": 1.4737170827031436, - "learning_rate": 1.2929362527456604e-06, - "loss": 1.1757, - "step": 3122 - }, - { - "epoch": 0.4233715176574256, - "grad_norm": 1.4902436644037902, - "learning_rate": 1.2925163507110843e-06, - "loss": 1.1545, - "step": 3123 - }, - { - "epoch": 0.4235070833050905, - "grad_norm": 1.7205064561456682, - "learning_rate": 1.292096392267039e-06, - "loss": 1.1538, - "step": 3124 - }, - { - "epoch": 0.4236426489527554, - "grad_norm": 1.556970950006168, - "learning_rate": 1.2916763774945101e-06, - "loss": 1.1475, - "step": 3125 - }, - { - "epoch": 0.42377821460042026, - "grad_norm": 1.539234995742151, - "learning_rate": 1.2912563064744938e-06, - "loss": 1.1565, - "step": 3126 - }, - { - "epoch": 0.4239137802480851, - "grad_norm": 1.667291249478388, - "learning_rate": 1.2908361792879984e-06, - "loss": 1.1554, - "step": 3127 - }, - { - "epoch": 0.42404934589575, - "grad_norm": 1.3827416009142899, - "learning_rate": 1.2904159960160415e-06, - "loss": 1.1361, - "step": 3128 - }, - { - "epoch": 0.4241849115434149, - "grad_norm": 1.5181675078583197, - "learning_rate": 1.289995756739652e-06, - "loss": 1.1634, - "step": 3129 - }, - { - "epoch": 0.4243204771910798, - "grad_norm": 1.5866631101497217, - "learning_rate": 1.2895754615398697e-06, - "loss": 1.126, - "step": 3130 - }, - { - "epoch": 0.42445604283874466, - "grad_norm": 1.547753519187641, - "learning_rate": 1.2891551104977457e-06, - "loss": 1.1183, - "step": 3131 - }, - { - "epoch": 0.42459160848640953, - "grad_norm": 1.51790355562571, - "learning_rate": 1.2887347036943407e-06, - "loss": 1.1515, - "step": 3132 - }, - { - "epoch": 0.4247271741340744, - "grad_norm": 1.4547865896027543, - "learning_rate": 1.288314241210728e-06, - "loss": 1.1792, - "step": 3133 - }, - { - "epoch": 0.42486273978173933, - "grad_norm": 1.474068101032312, - "learning_rate": 1.2878937231279892e-06, - "loss": 1.1627, - "step": 3134 - }, - { - "epoch": 0.4249983054294042, - "grad_norm": 2.145227035254027, - "learning_rate": 1.2874731495272181e-06, - "loss": 1.1346, - "step": 3135 - }, - { - "epoch": 0.42513387107706907, - "grad_norm": 1.8826671200680443, - "learning_rate": 1.2870525204895197e-06, - "loss": 1.1925, - "step": 3136 - }, - { - "epoch": 0.42526943672473394, - "grad_norm": 1.5974365936444923, - "learning_rate": 1.2866318360960084e-06, - "loss": 1.1391, - "step": 3137 - }, - { - "epoch": 0.4254050023723988, - "grad_norm": 1.8383636896941988, - "learning_rate": 1.2862110964278102e-06, - "loss": 1.1609, - "step": 3138 - }, - { - "epoch": 0.42554056802006374, - "grad_norm": 2.1510786628948293, - "learning_rate": 1.2857903015660612e-06, - "loss": 1.1862, - "step": 3139 - }, - { - "epoch": 0.4256761336677286, - "grad_norm": 1.7471968913884803, - "learning_rate": 1.2853694515919082e-06, - "loss": 1.1658, - "step": 3140 - }, - { - "epoch": 0.4258116993153935, - "grad_norm": 2.524841813929566, - "learning_rate": 1.2849485465865092e-06, - "loss": 1.1512, - "step": 3141 - }, - { - "epoch": 0.42594726496305835, - "grad_norm": 6.996042888041468, - "learning_rate": 1.2845275866310324e-06, - "loss": 1.1706, - "step": 3142 - }, - { - "epoch": 0.4260828306107232, - "grad_norm": 1.5019775257781363, - "learning_rate": 1.2841065718066563e-06, - "loss": 1.1421, - "step": 3143 - }, - { - "epoch": 0.42621839625838814, - "grad_norm": 1.6731008909155871, - "learning_rate": 1.2836855021945705e-06, - "loss": 1.1647, - "step": 3144 - }, - { - "epoch": 0.426353961906053, - "grad_norm": 1.7948555567328346, - "learning_rate": 1.283264377875975e-06, - "loss": 1.1671, - "step": 3145 - }, - { - "epoch": 0.4264895275537179, - "grad_norm": 1.5185440132364287, - "learning_rate": 1.2828431989320797e-06, - "loss": 1.1316, - "step": 3146 - }, - { - "epoch": 0.42662509320138275, - "grad_norm": 1.808833546727928, - "learning_rate": 1.2824219654441067e-06, - "loss": 1.1414, - "step": 3147 - }, - { - "epoch": 0.4267606588490476, - "grad_norm": 1.4101271540775084, - "learning_rate": 1.2820006774932866e-06, - "loss": 1.1764, - "step": 3148 - }, - { - "epoch": 0.42689622449671255, - "grad_norm": 1.5825629389496412, - "learning_rate": 1.281579335160862e-06, - "loss": 1.1473, - "step": 3149 - }, - { - "epoch": 0.4270317901443774, - "grad_norm": 1.522857814055657, - "learning_rate": 1.281157938528085e-06, - "loss": 1.1283, - "step": 3150 - }, - { - "epoch": 0.4271673557920423, - "grad_norm": 1.6255331376117934, - "learning_rate": 1.280736487676219e-06, - "loss": 1.154, - "step": 3151 - }, - { - "epoch": 0.42730292143970716, - "grad_norm": 1.3665781703786546, - "learning_rate": 1.2803149826865375e-06, - "loss": 1.1532, - "step": 3152 - }, - { - "epoch": 0.4274384870873721, - "grad_norm": 1.7816047670527708, - "learning_rate": 1.279893423640324e-06, - "loss": 1.1437, - "step": 3153 - }, - { - "epoch": 0.42757405273503696, - "grad_norm": 1.8035484631902388, - "learning_rate": 1.2794718106188734e-06, - "loss": 1.1868, - "step": 3154 - }, - { - "epoch": 0.42770961838270183, - "grad_norm": 1.3529656519595523, - "learning_rate": 1.27905014370349e-06, - "loss": 1.1685, - "step": 3155 - }, - { - "epoch": 0.4278451840303667, - "grad_norm": 1.9089637435330438, - "learning_rate": 1.2786284229754892e-06, - "loss": 1.1399, - "step": 3156 - }, - { - "epoch": 0.42798074967803157, - "grad_norm": 1.7611622450196192, - "learning_rate": 1.2782066485161961e-06, - "loss": 1.1578, - "step": 3157 - }, - { - "epoch": 0.4281163153256965, - "grad_norm": 1.5898400788087077, - "learning_rate": 1.2777848204069473e-06, - "loss": 1.18, - "step": 3158 - }, - { - "epoch": 0.42825188097336137, - "grad_norm": 1.72988417636035, - "learning_rate": 1.2773629387290883e-06, - "loss": 1.1542, - "step": 3159 - }, - { - "epoch": 0.42838744662102624, - "grad_norm": 37.194487590736294, - "learning_rate": 1.276941003563976e-06, - "loss": 1.1229, - "step": 3160 - }, - { - "epoch": 0.4285230122686911, - "grad_norm": 1.3467072532638165, - "learning_rate": 1.276519014992977e-06, - "loss": 1.1755, - "step": 3161 - }, - { - "epoch": 0.428658577916356, - "grad_norm": 1.5473168372354684, - "learning_rate": 1.276096973097469e-06, - "loss": 1.1152, - "step": 3162 - }, - { - "epoch": 0.4287941435640209, - "grad_norm": 1.6144015928885658, - "learning_rate": 1.275674877958839e-06, - "loss": 1.1764, - "step": 3163 - }, - { - "epoch": 0.4289297092116858, - "grad_norm": 1.7478721157148958, - "learning_rate": 1.2752527296584847e-06, - "loss": 1.1493, - "step": 3164 - }, - { - "epoch": 0.42906527485935064, - "grad_norm": 1.697762983095258, - "learning_rate": 1.2748305282778142e-06, - "loss": 1.13, - "step": 3165 - }, - { - "epoch": 0.4292008405070155, - "grad_norm": 1.4184532505484515, - "learning_rate": 1.2744082738982457e-06, - "loss": 1.149, - "step": 3166 - }, - { - "epoch": 0.4293364061546804, - "grad_norm": 1.7300709602832642, - "learning_rate": 1.2739859666012076e-06, - "loss": 1.1586, - "step": 3167 - }, - { - "epoch": 0.4294719718023453, - "grad_norm": 1.6235020270879368, - "learning_rate": 1.2735636064681387e-06, - "loss": 1.1453, - "step": 3168 - }, - { - "epoch": 0.4296075374500102, - "grad_norm": 1.692007137402705, - "learning_rate": 1.2731411935804877e-06, - "loss": 1.1432, - "step": 3169 - }, - { - "epoch": 0.42974310309767505, - "grad_norm": 1.6773269126656398, - "learning_rate": 1.2727187280197133e-06, - "loss": 1.1398, - "step": 3170 - }, - { - "epoch": 0.4298786687453399, - "grad_norm": 1.7469366860660391, - "learning_rate": 1.272296209867285e-06, - "loss": 1.1742, - "step": 3171 - }, - { - "epoch": 0.4300142343930048, - "grad_norm": 4.558604276506105, - "learning_rate": 1.2718736392046824e-06, - "loss": 1.1503, - "step": 3172 - }, - { - "epoch": 0.4301498000406697, - "grad_norm": 1.5226736308947884, - "learning_rate": 1.271451016113394e-06, - "loss": 1.1235, - "step": 3173 - }, - { - "epoch": 0.4302853656883346, - "grad_norm": 1.4817786839337552, - "learning_rate": 1.27102834067492e-06, - "loss": 1.1595, - "step": 3174 - }, - { - "epoch": 0.43042093133599946, - "grad_norm": 1.559793427090105, - "learning_rate": 1.2706056129707703e-06, - "loss": 1.1848, - "step": 3175 - }, - { - "epoch": 0.4305564969836643, - "grad_norm": 2.3657512218012497, - "learning_rate": 1.2701828330824638e-06, - "loss": 1.1796, - "step": 3176 - }, - { - "epoch": 0.4306920626313292, - "grad_norm": 1.5075021426407098, - "learning_rate": 1.2697600010915306e-06, - "loss": 1.1917, - "step": 3177 - }, - { - "epoch": 0.4308276282789941, - "grad_norm": 1.6510234303800357, - "learning_rate": 1.2693371170795107e-06, - "loss": 1.1215, - "step": 3178 - }, - { - "epoch": 0.430963193926659, - "grad_norm": 1.4656668652059128, - "learning_rate": 1.2689141811279536e-06, - "loss": 1.1403, - "step": 3179 - }, - { - "epoch": 0.43109875957432386, - "grad_norm": 1.5060450817364808, - "learning_rate": 1.2684911933184193e-06, - "loss": 1.141, - "step": 3180 - }, - { - "epoch": 0.43123432522198873, - "grad_norm": 1.5336204307736274, - "learning_rate": 1.2680681537324779e-06, - "loss": 1.1674, - "step": 3181 - }, - { - "epoch": 0.4313698908696536, - "grad_norm": 1.4706252163066993, - "learning_rate": 1.267645062451709e-06, - "loss": 1.1216, - "step": 3182 - }, - { - "epoch": 0.43150545651731853, - "grad_norm": 1.548410665605616, - "learning_rate": 1.2672219195577023e-06, - "loss": 1.2431, - "step": 3183 - }, - { - "epoch": 0.4316410221649834, - "grad_norm": 2.189684840287999, - "learning_rate": 1.266798725132058e-06, - "loss": 1.1556, - "step": 3184 - }, - { - "epoch": 0.43177658781264827, - "grad_norm": 1.4528497420314503, - "learning_rate": 1.2663754792563852e-06, - "loss": 1.1307, - "step": 3185 - }, - { - "epoch": 0.43191215346031314, - "grad_norm": 1.563784882372342, - "learning_rate": 1.2659521820123042e-06, - "loss": 1.1812, - "step": 3186 - }, - { - "epoch": 0.432047719107978, - "grad_norm": 1.7687160879267911, - "learning_rate": 1.265528833481444e-06, - "loss": 1.1607, - "step": 3187 - }, - { - "epoch": 0.43218328475564294, - "grad_norm": 1.536900977789783, - "learning_rate": 1.2651054337454443e-06, - "loss": 1.1575, - "step": 3188 - }, - { - "epoch": 0.4323188504033078, - "grad_norm": 1.485390608522885, - "learning_rate": 1.2646819828859545e-06, - "loss": 1.1634, - "step": 3189 - }, - { - "epoch": 0.4324544160509727, - "grad_norm": 1.8918687038917237, - "learning_rate": 1.2642584809846333e-06, - "loss": 1.1568, - "step": 3190 - }, - { - "epoch": 0.43258998169863755, - "grad_norm": 2.602486985966941, - "learning_rate": 1.2638349281231503e-06, - "loss": 1.1788, - "step": 3191 - }, - { - "epoch": 0.4327255473463024, - "grad_norm": 1.579691753701753, - "learning_rate": 1.2634113243831836e-06, - "loss": 1.1737, - "step": 3192 - }, - { - "epoch": 0.43286111299396735, - "grad_norm": 1.6524643967184733, - "learning_rate": 1.2629876698464223e-06, - "loss": 1.1383, - "step": 3193 - }, - { - "epoch": 0.4329966786416322, - "grad_norm": 1.9185081776555426, - "learning_rate": 1.2625639645945652e-06, - "loss": 1.1631, - "step": 3194 - }, - { - "epoch": 0.4331322442892971, - "grad_norm": 1.5662633973464515, - "learning_rate": 1.2621402087093195e-06, - "loss": 1.1696, - "step": 3195 - }, - { - "epoch": 0.43326780993696196, - "grad_norm": 2.0359133323833984, - "learning_rate": 1.261716402272404e-06, - "loss": 1.1485, - "step": 3196 - }, - { - "epoch": 0.4334033755846269, - "grad_norm": 1.6597370912669163, - "learning_rate": 1.2612925453655462e-06, - "loss": 1.1382, - "step": 3197 - }, - { - "epoch": 0.43353894123229175, - "grad_norm": 1.555343681590784, - "learning_rate": 1.2608686380704838e-06, - "loss": 1.1022, - "step": 3198 - }, - { - "epoch": 0.4336745068799566, - "grad_norm": 3.2298364626811913, - "learning_rate": 1.2604446804689635e-06, - "loss": 1.1697, - "step": 3199 - }, - { - "epoch": 0.4338100725276215, - "grad_norm": 4.232577542417798, - "learning_rate": 1.2600206726427422e-06, - "loss": 1.1521, - "step": 3200 - }, - { - "epoch": 0.43394563817528636, - "grad_norm": 1.611605569922186, - "learning_rate": 1.2595966146735868e-06, - "loss": 1.1568, - "step": 3201 - }, - { - "epoch": 0.4340812038229513, - "grad_norm": 1.9346191649569062, - "learning_rate": 1.2591725066432734e-06, - "loss": 1.1403, - "step": 3202 - }, - { - "epoch": 0.43421676947061616, - "grad_norm": 1.596272402065424, - "learning_rate": 1.258748348633588e-06, - "loss": 1.1324, - "step": 3203 - }, - { - "epoch": 0.43435233511828103, - "grad_norm": 1.7013131973406388, - "learning_rate": 1.2583241407263259e-06, - "loss": 1.142, - "step": 3204 - }, - { - "epoch": 0.4344879007659459, - "grad_norm": 1.507977351316248, - "learning_rate": 1.2578998830032924e-06, - "loss": 1.1692, - "step": 3205 - }, - { - "epoch": 0.43462346641361077, - "grad_norm": 2.4878004295675478, - "learning_rate": 1.257475575546302e-06, - "loss": 1.1361, - "step": 3206 - }, - { - "epoch": 0.4347590320612757, - "grad_norm": 1.4530813183050282, - "learning_rate": 1.2570512184371796e-06, - "loss": 1.1372, - "step": 3207 - }, - { - "epoch": 0.43489459770894057, - "grad_norm": 5.642950804893734, - "learning_rate": 1.2566268117577583e-06, - "loss": 1.1584, - "step": 3208 - }, - { - "epoch": 0.43503016335660544, - "grad_norm": 1.5505491416945127, - "learning_rate": 1.2562023555898823e-06, - "loss": 1.1506, - "step": 3209 - }, - { - "epoch": 0.4351657290042703, - "grad_norm": 1.498696198310259, - "learning_rate": 1.2557778500154044e-06, - "loss": 1.1418, - "step": 3210 - }, - { - "epoch": 0.4353012946519352, - "grad_norm": 1.9902097112031991, - "learning_rate": 1.2553532951161868e-06, - "loss": 1.1913, - "step": 3211 - }, - { - "epoch": 0.4354368602996001, - "grad_norm": 1.4471133256828637, - "learning_rate": 1.2549286909741024e-06, - "loss": 1.1039, - "step": 3212 - }, - { - "epoch": 0.435572425947265, - "grad_norm": 1.4673989695038776, - "learning_rate": 1.254504037671032e-06, - "loss": 1.1837, - "step": 3213 - }, - { - "epoch": 0.43570799159492984, - "grad_norm": 6.571570594319828, - "learning_rate": 1.2540793352888667e-06, - "loss": 1.1284, - "step": 3214 - }, - { - "epoch": 0.4358435572425947, - "grad_norm": 1.4986502085861304, - "learning_rate": 1.2536545839095072e-06, - "loss": 1.1809, - "step": 3215 - }, - { - "epoch": 0.4359791228902596, - "grad_norm": 1.5705099429770013, - "learning_rate": 1.2532297836148636e-06, - "loss": 1.1369, - "step": 3216 - }, - { - "epoch": 0.4361146885379245, - "grad_norm": 1.4791293868196709, - "learning_rate": 1.2528049344868553e-06, - "loss": 1.158, - "step": 3217 - }, - { - "epoch": 0.4362502541855894, - "grad_norm": 1.6579404694495858, - "learning_rate": 1.2523800366074104e-06, - "loss": 1.1553, - "step": 3218 - }, - { - "epoch": 0.43638581983325425, - "grad_norm": 1.600338864299649, - "learning_rate": 1.251955090058468e-06, - "loss": 1.1705, - "step": 3219 - }, - { - "epoch": 0.4365213854809191, - "grad_norm": 2.312358584403603, - "learning_rate": 1.251530094921975e-06, - "loss": 1.1863, - "step": 3220 - }, - { - "epoch": 0.436656951128584, - "grad_norm": 1.7349811012669276, - "learning_rate": 1.2511050512798889e-06, - "loss": 1.1256, - "step": 3221 - }, - { - "epoch": 0.4367925167762489, - "grad_norm": 3.53424531356544, - "learning_rate": 1.2506799592141754e-06, - "loss": 1.1262, - "step": 3222 - }, - { - "epoch": 0.4369280824239138, - "grad_norm": 1.5239093203996437, - "learning_rate": 1.2502548188068109e-06, - "loss": 1.1756, - "step": 3223 - }, - { - "epoch": 0.43706364807157866, - "grad_norm": 1.4966105055694634, - "learning_rate": 1.24982963013978e-06, - "loss": 1.1218, - "step": 3224 - }, - { - "epoch": 0.43719921371924353, - "grad_norm": 1.55485872690755, - "learning_rate": 1.2494043932950768e-06, - "loss": 1.1682, - "step": 3225 - }, - { - "epoch": 0.4373347793669084, - "grad_norm": 1.601325191815758, - "learning_rate": 1.248979108354705e-06, - "loss": 1.1508, - "step": 3226 - }, - { - "epoch": 0.4374703450145733, - "grad_norm": 1.747882938437111, - "learning_rate": 1.2485537754006776e-06, - "loss": 1.1301, - "step": 3227 - }, - { - "epoch": 0.4376059106622382, - "grad_norm": 2.6330048791586886, - "learning_rate": 1.2481283945150164e-06, - "loss": 1.1722, - "step": 3228 - }, - { - "epoch": 0.43774147630990307, - "grad_norm": 2.0052923526765976, - "learning_rate": 1.2477029657797531e-06, - "loss": 1.1838, - "step": 3229 - }, - { - "epoch": 0.43787704195756794, - "grad_norm": 1.440005714740513, - "learning_rate": 1.247277489276928e-06, - "loss": 1.1789, - "step": 3230 - }, - { - "epoch": 0.4380126076052328, - "grad_norm": 1.8840509712574505, - "learning_rate": 1.2468519650885912e-06, - "loss": 1.1743, - "step": 3231 - }, - { - "epoch": 0.43814817325289773, - "grad_norm": 16.33458105594972, - "learning_rate": 1.2464263932968012e-06, - "loss": 1.1334, - "step": 3232 - }, - { - "epoch": 0.4382837389005626, - "grad_norm": 1.7766821117190994, - "learning_rate": 1.2460007739836265e-06, - "loss": 1.1638, - "step": 3233 - }, - { - "epoch": 0.4384193045482275, - "grad_norm": 1.8215923336766116, - "learning_rate": 1.2455751072311443e-06, - "loss": 1.1338, - "step": 3234 - }, - { - "epoch": 0.43855487019589234, - "grad_norm": 2.0103642597187235, - "learning_rate": 1.245149393121441e-06, - "loss": 1.1835, - "step": 3235 - }, - { - "epoch": 0.43869043584355727, - "grad_norm": 1.651919186041763, - "learning_rate": 1.2447236317366124e-06, - "loss": 1.164, - "step": 3236 - }, - { - "epoch": 0.43882600149122214, - "grad_norm": 1.7080742911521356, - "learning_rate": 1.2442978231587633e-06, - "loss": 1.1488, - "step": 3237 - }, - { - "epoch": 0.438961567138887, - "grad_norm": 1.5284977541376796, - "learning_rate": 1.2438719674700073e-06, - "loss": 1.1616, - "step": 3238 - }, - { - "epoch": 0.4390971327865519, - "grad_norm": 1.5481020576483937, - "learning_rate": 1.2434460647524675e-06, - "loss": 1.1114, - "step": 3239 - }, - { - "epoch": 0.43923269843421675, - "grad_norm": 2.071447032571576, - "learning_rate": 1.2430201150882755e-06, - "loss": 1.1277, - "step": 3240 - }, - { - "epoch": 0.4393682640818817, - "grad_norm": 1.4723496682261221, - "learning_rate": 1.2425941185595726e-06, - "loss": 1.1858, - "step": 3241 - }, - { - "epoch": 0.43950382972954655, - "grad_norm": 1.5376443649286868, - "learning_rate": 1.2421680752485092e-06, - "loss": 1.1505, - "step": 3242 - }, - { - "epoch": 0.4396393953772114, - "grad_norm": 2.30511740724904, - "learning_rate": 1.241741985237244e-06, - "loss": 1.0963, - "step": 3243 - }, - { - "epoch": 0.4397749610248763, - "grad_norm": 1.5235653493440653, - "learning_rate": 1.241315848607945e-06, - "loss": 1.1756, - "step": 3244 - }, - { - "epoch": 0.43991052667254116, - "grad_norm": 2.5077791047346016, - "learning_rate": 1.2408896654427894e-06, - "loss": 1.1488, - "step": 3245 - }, - { - "epoch": 0.4400460923202061, - "grad_norm": 1.9233372184503001, - "learning_rate": 1.2404634358239632e-06, - "loss": 1.209, - "step": 3246 - }, - { - "epoch": 0.44018165796787095, - "grad_norm": 1.6030113042174845, - "learning_rate": 1.2400371598336617e-06, - "loss": 1.1953, - "step": 3247 - }, - { - "epoch": 0.4403172236155358, - "grad_norm": 2.2097170084505966, - "learning_rate": 1.2396108375540885e-06, - "loss": 1.1729, - "step": 3248 - }, - { - "epoch": 0.4404527892632007, - "grad_norm": 1.7824253758388764, - "learning_rate": 1.2391844690674567e-06, - "loss": 1.1405, - "step": 3249 - }, - { - "epoch": 0.44058835491086557, - "grad_norm": 1.9686298213157343, - "learning_rate": 1.2387580544559881e-06, - "loss": 1.1471, - "step": 3250 - }, - { - "epoch": 0.4407239205585305, - "grad_norm": 1.6418082181826572, - "learning_rate": 1.2383315938019132e-06, - "loss": 1.1515, - "step": 3251 - }, - { - "epoch": 0.44085948620619536, - "grad_norm": 1.464348900551424, - "learning_rate": 1.2379050871874719e-06, - "loss": 1.1789, - "step": 3252 - }, - { - "epoch": 0.44099505185386023, - "grad_norm": 1.6436375450091674, - "learning_rate": 1.2374785346949125e-06, - "loss": 1.1246, - "step": 3253 - }, - { - "epoch": 0.4411306175015251, - "grad_norm": 2.0390049753146755, - "learning_rate": 1.2370519364064919e-06, - "loss": 1.1627, - "step": 3254 - }, - { - "epoch": 0.44126618314918997, - "grad_norm": 1.6895838627855406, - "learning_rate": 1.2366252924044767e-06, - "loss": 1.1499, - "step": 3255 - }, - { - "epoch": 0.4414017487968549, - "grad_norm": 1.7608190544120044, - "learning_rate": 1.236198602771142e-06, - "loss": 1.1505, - "step": 3256 - }, - { - "epoch": 0.44153731444451977, - "grad_norm": 3.991215205470924, - "learning_rate": 1.2357718675887707e-06, - "loss": 1.1434, - "step": 3257 - }, - { - "epoch": 0.44167288009218464, - "grad_norm": 1.6242560095037306, - "learning_rate": 1.235345086939656e-06, - "loss": 1.1144, - "step": 3258 - }, - { - "epoch": 0.4418084457398495, - "grad_norm": 1.4079850078321585, - "learning_rate": 1.234918260906099e-06, - "loss": 1.1523, - "step": 3259 - }, - { - "epoch": 0.4419440113875144, - "grad_norm": 2.1354154694076595, - "learning_rate": 1.2344913895704096e-06, - "loss": 1.1559, - "step": 3260 - }, - { - "epoch": 0.4420795770351793, - "grad_norm": 1.720468108687745, - "learning_rate": 1.234064473014907e-06, - "loss": 1.1666, - "step": 3261 - }, - { - "epoch": 0.4422151426828442, - "grad_norm": 2.5153818569895314, - "learning_rate": 1.2336375113219182e-06, - "loss": 1.1752, - "step": 3262 - }, - { - "epoch": 0.44235070833050905, - "grad_norm": 5.742628085912226, - "learning_rate": 1.2332105045737796e-06, - "loss": 1.1513, - "step": 3263 - }, - { - "epoch": 0.4424862739781739, - "grad_norm": 1.942575073983917, - "learning_rate": 1.2327834528528357e-06, - "loss": 1.1534, - "step": 3264 - }, - { - "epoch": 0.4426218396258388, - "grad_norm": 1.6247650040007169, - "learning_rate": 1.2323563562414407e-06, - "loss": 1.1502, - "step": 3265 - }, - { - "epoch": 0.4427574052735037, - "grad_norm": 1.4842343108399836, - "learning_rate": 1.2319292148219566e-06, - "loss": 1.1631, - "step": 3266 - }, - { - "epoch": 0.4428929709211686, - "grad_norm": 1.8318447518478602, - "learning_rate": 1.2315020286767538e-06, - "loss": 1.1519, - "step": 3267 - }, - { - "epoch": 0.44302853656883345, - "grad_norm": 1.4444951922378129, - "learning_rate": 1.2310747978882126e-06, - "loss": 1.1627, - "step": 3268 - }, - { - "epoch": 0.4431641022164983, - "grad_norm": 1.785447186010145, - "learning_rate": 1.2306475225387203e-06, - "loss": 1.1815, - "step": 3269 - }, - { - "epoch": 0.4432996678641632, - "grad_norm": 1.7508032686291954, - "learning_rate": 1.2302202027106739e-06, - "loss": 1.1502, - "step": 3270 - }, - { - "epoch": 0.4434352335118281, - "grad_norm": 2.5133820054776645, - "learning_rate": 1.2297928384864787e-06, - "loss": 1.1832, - "step": 3271 - }, - { - "epoch": 0.443570799159493, - "grad_norm": 2.9687017472843023, - "learning_rate": 1.2293654299485485e-06, - "loss": 1.1337, - "step": 3272 - }, - { - "epoch": 0.44370636480715786, - "grad_norm": 2.9283044129033486, - "learning_rate": 1.2289379771793059e-06, - "loss": 1.158, - "step": 3273 - }, - { - "epoch": 0.44384193045482273, - "grad_norm": 1.680816267725318, - "learning_rate": 1.2285104802611812e-06, - "loss": 1.1661, - "step": 3274 - }, - { - "epoch": 0.44397749610248766, - "grad_norm": 1.8214446284725856, - "learning_rate": 1.2280829392766143e-06, - "loss": 1.1957, - "step": 3275 - }, - { - "epoch": 0.4441130617501525, - "grad_norm": 1.6344554722226396, - "learning_rate": 1.2276553543080527e-06, - "loss": 1.1541, - "step": 3276 - }, - { - "epoch": 0.4442486273978174, - "grad_norm": 1.6416107902517707, - "learning_rate": 1.2272277254379533e-06, - "loss": 1.1536, - "step": 3277 - }, - { - "epoch": 0.44438419304548227, - "grad_norm": 1.7975116243119598, - "learning_rate": 1.2268000527487803e-06, - "loss": 1.1708, - "step": 3278 - }, - { - "epoch": 0.44451975869314714, - "grad_norm": 1.6721700640249624, - "learning_rate": 1.2263723363230076e-06, - "loss": 1.1538, - "step": 3279 - }, - { - "epoch": 0.44465532434081206, - "grad_norm": 1.5662034890834837, - "learning_rate": 1.2259445762431168e-06, - "loss": 1.1282, - "step": 3280 - }, - { - "epoch": 0.44479088998847693, - "grad_norm": 1.8444268726796649, - "learning_rate": 1.2255167725915981e-06, - "loss": 1.1584, - "step": 3281 - }, - { - "epoch": 0.4449264556361418, - "grad_norm": 1.607833260256158, - "learning_rate": 1.2250889254509496e-06, - "loss": 1.1673, - "step": 3282 - }, - { - "epoch": 0.4450620212838067, - "grad_norm": 1.7379573735629854, - "learning_rate": 1.2246610349036785e-06, - "loss": 1.1468, - "step": 3283 - }, - { - "epoch": 0.44519758693147155, - "grad_norm": 1.7223481273856827, - "learning_rate": 1.2242331010323005e-06, - "loss": 1.1536, - "step": 3284 - }, - { - "epoch": 0.44533315257913647, - "grad_norm": 1.7395335208965865, - "learning_rate": 1.2238051239193387e-06, - "loss": 1.1553, - "step": 3285 - }, - { - "epoch": 0.44546871822680134, - "grad_norm": 1.608388248875753, - "learning_rate": 1.2233771036473255e-06, - "loss": 1.1446, - "step": 3286 - }, - { - "epoch": 0.4456042838744662, - "grad_norm": 1.7390980593178567, - "learning_rate": 1.2229490402988014e-06, - "loss": 1.1595, - "step": 3287 - }, - { - "epoch": 0.4457398495221311, - "grad_norm": 1.7824982538032725, - "learning_rate": 1.2225209339563143e-06, - "loss": 1.1586, - "step": 3288 - }, - { - "epoch": 0.44587541516979595, - "grad_norm": 1.5331277766042948, - "learning_rate": 1.2220927847024218e-06, - "loss": 1.1766, - "step": 3289 - }, - { - "epoch": 0.4460109808174609, - "grad_norm": 1.6205217814494162, - "learning_rate": 1.2216645926196886e-06, - "loss": 1.1623, - "step": 3290 - }, - { - "epoch": 0.44614654646512575, - "grad_norm": 1.6425414034066592, - "learning_rate": 1.2212363577906889e-06, - "loss": 1.1265, - "step": 3291 - }, - { - "epoch": 0.4462821121127906, - "grad_norm": 1.7301649363924163, - "learning_rate": 1.2208080802980037e-06, - "loss": 1.1276, - "step": 3292 - }, - { - "epoch": 0.4464176777604555, - "grad_norm": 1.9506606813635394, - "learning_rate": 1.220379760224223e-06, - "loss": 1.1655, - "step": 3293 - }, - { - "epoch": 0.44655324340812036, - "grad_norm": 1.5954290294752294, - "learning_rate": 1.2199513976519451e-06, - "loss": 1.1423, - "step": 3294 - }, - { - "epoch": 0.4466888090557853, - "grad_norm": 2.0475042441318294, - "learning_rate": 1.2195229926637764e-06, - "loss": 1.1749, - "step": 3295 - }, - { - "epoch": 0.44682437470345016, - "grad_norm": 1.4886456816435272, - "learning_rate": 1.2190945453423315e-06, - "loss": 1.0945, - "step": 3296 - }, - { - "epoch": 0.446959940351115, - "grad_norm": 1.4685200540995849, - "learning_rate": 1.2186660557702328e-06, - "loss": 1.1816, - "step": 3297 - }, - { - "epoch": 0.4470955059987799, - "grad_norm": 1.5392291684623705, - "learning_rate": 1.2182375240301114e-06, - "loss": 1.1292, - "step": 3298 - }, - { - "epoch": 0.44723107164644477, - "grad_norm": 1.9044477523336485, - "learning_rate": 1.217808950204606e-06, - "loss": 1.1676, - "step": 3299 - }, - { - "epoch": 0.4473666372941097, - "grad_norm": 2.675918377513272, - "learning_rate": 1.217380334376364e-06, - "loss": 1.1525, - "step": 3300 - }, - { - "epoch": 0.44750220294177456, - "grad_norm": 1.7857823923563956, - "learning_rate": 1.2169516766280404e-06, - "loss": 1.1934, - "step": 3301 - }, - { - "epoch": 0.44763776858943943, - "grad_norm": 1.7439104273997204, - "learning_rate": 1.2165229770422986e-06, - "loss": 1.1987, - "step": 3302 - }, - { - "epoch": 0.4477733342371043, - "grad_norm": 1.7788413834454972, - "learning_rate": 1.2160942357018096e-06, - "loss": 1.1473, - "step": 3303 - }, - { - "epoch": 0.4479088998847692, - "grad_norm": 1.3903410097462903, - "learning_rate": 1.215665452689253e-06, - "loss": 1.135, - "step": 3304 - }, - { - "epoch": 0.4480444655324341, - "grad_norm": 1.4785474686672486, - "learning_rate": 1.2152366280873163e-06, - "loss": 1.1714, - "step": 3305 - }, - { - "epoch": 0.44818003118009897, - "grad_norm": 1.665863666355688, - "learning_rate": 1.2148077619786948e-06, - "loss": 1.1715, - "step": 3306 - }, - { - "epoch": 0.44831559682776384, - "grad_norm": 1.3672070056462164, - "learning_rate": 1.214378854446092e-06, - "loss": 1.144, - "step": 3307 - }, - { - "epoch": 0.4484511624754287, - "grad_norm": 1.5607947189036087, - "learning_rate": 1.2139499055722193e-06, - "loss": 1.1775, - "step": 3308 - }, - { - "epoch": 0.4485867281230936, - "grad_norm": 1.792482544467519, - "learning_rate": 1.213520915439796e-06, - "loss": 1.0883, - "step": 3309 - }, - { - "epoch": 0.4487222937707585, - "grad_norm": 1.8839115733404674, - "learning_rate": 1.2130918841315496e-06, - "loss": 1.1603, - "step": 3310 - }, - { - "epoch": 0.4488578594184234, - "grad_norm": 6.1033074495148165, - "learning_rate": 1.2126628117302156e-06, - "loss": 1.1326, - "step": 3311 - }, - { - "epoch": 0.44899342506608825, - "grad_norm": 1.7119958481581332, - "learning_rate": 1.212233698318537e-06, - "loss": 1.1645, - "step": 3312 - }, - { - "epoch": 0.4491289907137531, - "grad_norm": 1.547338697328333, - "learning_rate": 1.2118045439792648e-06, - "loss": 1.1554, - "step": 3313 - }, - { - "epoch": 0.44926455636141804, - "grad_norm": 2.964396021602258, - "learning_rate": 1.2113753487951584e-06, - "loss": 1.1437, - "step": 3314 - }, - { - "epoch": 0.4494001220090829, - "grad_norm": 1.7718205734956578, - "learning_rate": 1.2109461128489842e-06, - "loss": 1.1579, - "step": 3315 - }, - { - "epoch": 0.4495356876567478, - "grad_norm": 1.547725392783264, - "learning_rate": 1.2105168362235176e-06, - "loss": 1.152, - "step": 3316 - }, - { - "epoch": 0.44967125330441265, - "grad_norm": 1.7142684813329903, - "learning_rate": 1.2100875190015405e-06, - "loss": 1.1293, - "step": 3317 - }, - { - "epoch": 0.4498068189520775, - "grad_norm": 1.4330504597041882, - "learning_rate": 1.2096581612658438e-06, - "loss": 1.1734, - "step": 3318 - }, - { - "epoch": 0.44994238459974245, - "grad_norm": 1.7318090156575805, - "learning_rate": 1.2092287630992257e-06, - "loss": 1.1831, - "step": 3319 - }, - { - "epoch": 0.4500779502474073, - "grad_norm": 1.4397016206181021, - "learning_rate": 1.208799324584492e-06, - "loss": 1.1232, - "step": 3320 - }, - { - "epoch": 0.4502135158950722, - "grad_norm": 1.8340632720464851, - "learning_rate": 1.2083698458044572e-06, - "loss": 1.1886, - "step": 3321 - }, - { - "epoch": 0.45034908154273706, - "grad_norm": 1.8205679266246426, - "learning_rate": 1.207940326841942e-06, - "loss": 1.1451, - "step": 3322 - }, - { - "epoch": 0.45048464719040193, - "grad_norm": 1.5575606975658127, - "learning_rate": 1.2075107677797763e-06, - "loss": 1.1329, - "step": 3323 - }, - { - "epoch": 0.45062021283806686, - "grad_norm": 1.6471653336499343, - "learning_rate": 1.2070811687007969e-06, - "loss": 1.1407, - "step": 3324 - }, - { - "epoch": 0.45075577848573173, - "grad_norm": 1.6487416319869697, - "learning_rate": 1.2066515296878488e-06, - "loss": 1.1749, - "step": 3325 - }, - { - "epoch": 0.4508913441333966, - "grad_norm": 1.5447305743387372, - "learning_rate": 1.2062218508237845e-06, - "loss": 1.1831, - "step": 3326 - }, - { - "epoch": 0.45102690978106147, - "grad_norm": 1.552864351033013, - "learning_rate": 1.2057921321914638e-06, - "loss": 1.1056, - "step": 3327 - }, - { - "epoch": 0.45116247542872634, - "grad_norm": 1.4556329271799824, - "learning_rate": 1.205362373873755e-06, - "loss": 1.1276, - "step": 3328 - }, - { - "epoch": 0.45129804107639127, - "grad_norm": 1.547389685310996, - "learning_rate": 1.2049325759535334e-06, - "loss": 1.151, - "step": 3329 - }, - { - "epoch": 0.45143360672405614, - "grad_norm": 2.0746634710983995, - "learning_rate": 1.2045027385136823e-06, - "loss": 1.1345, - "step": 3330 - }, - { - "epoch": 0.451569172371721, - "grad_norm": 1.9199974406318976, - "learning_rate": 1.2040728616370924e-06, - "loss": 1.1524, - "step": 3331 - }, - { - "epoch": 0.4517047380193859, - "grad_norm": 2.7571291562833755, - "learning_rate": 1.2036429454066616e-06, - "loss": 1.1672, - "step": 3332 - }, - { - "epoch": 0.45184030366705075, - "grad_norm": 3.9844818432773272, - "learning_rate": 1.2032129899052965e-06, - "loss": 1.1983, - "step": 3333 - }, - { - "epoch": 0.4519758693147157, - "grad_norm": 1.7121079259425511, - "learning_rate": 1.2027829952159104e-06, - "loss": 1.1775, - "step": 3334 - }, - { - "epoch": 0.45211143496238054, - "grad_norm": 1.4977541180913634, - "learning_rate": 1.2023529614214242e-06, - "loss": 1.1428, - "step": 3335 - }, - { - "epoch": 0.4522470006100454, - "grad_norm": 1.80431070848047, - "learning_rate": 1.2019228886047666e-06, - "loss": 1.1642, - "step": 3336 - }, - { - "epoch": 0.4523825662577103, - "grad_norm": 2.1593292777049355, - "learning_rate": 1.2014927768488739e-06, - "loss": 1.1259, - "step": 3337 - }, - { - "epoch": 0.45251813190537515, - "grad_norm": 1.5109637946841203, - "learning_rate": 1.2010626262366896e-06, - "loss": 1.1413, - "step": 3338 - }, - { - "epoch": 0.4526536975530401, - "grad_norm": 2.8966970215984857, - "learning_rate": 1.2006324368511651e-06, - "loss": 1.1706, - "step": 3339 - }, - { - "epoch": 0.45278926320070495, - "grad_norm": 1.7841079914665035, - "learning_rate": 1.200202208775259e-06, - "loss": 1.1612, - "step": 3340 - }, - { - "epoch": 0.4529248288483698, - "grad_norm": 1.5216754077132102, - "learning_rate": 1.1997719420919368e-06, - "loss": 1.1553, - "step": 3341 - }, - { - "epoch": 0.4530603944960347, - "grad_norm": 1.557661184347607, - "learning_rate": 1.1993416368841727e-06, - "loss": 1.1772, - "step": 3342 - }, - { - "epoch": 0.45319596014369956, - "grad_norm": 2.2319256928960165, - "learning_rate": 1.1989112932349473e-06, - "loss": 1.1461, - "step": 3343 - }, - { - "epoch": 0.4533315257913645, - "grad_norm": 1.962236803021401, - "learning_rate": 1.1984809112272493e-06, - "loss": 1.1556, - "step": 3344 - }, - { - "epoch": 0.45346709143902936, - "grad_norm": 1.4760737564453223, - "learning_rate": 1.1980504909440743e-06, - "loss": 1.1526, - "step": 3345 - }, - { - "epoch": 0.4536026570866942, - "grad_norm": 2.01301904372849, - "learning_rate": 1.1976200324684253e-06, - "loss": 1.1867, - "step": 3346 - }, - { - "epoch": 0.4537382227343591, - "grad_norm": 1.4131492730910546, - "learning_rate": 1.197189535883313e-06, - "loss": 1.1582, - "step": 3347 - }, - { - "epoch": 0.45387378838202397, - "grad_norm": 1.5826239201200616, - "learning_rate": 1.1967590012717552e-06, - "loss": 1.0993, - "step": 3348 - }, - { - "epoch": 0.4540093540296889, - "grad_norm": 1.5335423911296349, - "learning_rate": 1.1963284287167772e-06, - "loss": 1.2, - "step": 3349 - }, - { - "epoch": 0.45414491967735376, - "grad_norm": 1.6420133495622085, - "learning_rate": 1.1958978183014111e-06, - "loss": 1.1589, - "step": 3350 - }, - { - "epoch": 0.45428048532501863, - "grad_norm": 1.5694745038522437, - "learning_rate": 1.1954671701086976e-06, - "loss": 1.1566, - "step": 3351 - }, - { - "epoch": 0.4544160509726835, - "grad_norm": 1.405865806749544, - "learning_rate": 1.195036484221683e-06, - "loss": 1.1825, - "step": 3352 - }, - { - "epoch": 0.45455161662034843, - "grad_norm": 2.9841549547383477, - "learning_rate": 1.194605760723422e-06, - "loss": 1.1437, - "step": 3353 - }, - { - "epoch": 0.4546871822680133, - "grad_norm": 1.4828440805288003, - "learning_rate": 1.1941749996969762e-06, - "loss": 1.1755, - "step": 3354 - }, - { - "epoch": 0.45482274791567817, - "grad_norm": 1.5813056478637613, - "learning_rate": 1.1937442012254144e-06, - "loss": 1.1478, - "step": 3355 - }, - { - "epoch": 0.45495831356334304, - "grad_norm": 2.045158831463965, - "learning_rate": 1.1933133653918126e-06, - "loss": 1.1681, - "step": 3356 - }, - { - "epoch": 0.4550938792110079, - "grad_norm": 2.537995643909469, - "learning_rate": 1.1928824922792543e-06, - "loss": 1.1682, - "step": 3357 - }, - { - "epoch": 0.45522944485867284, - "grad_norm": 1.4681169147828277, - "learning_rate": 1.1924515819708298e-06, - "loss": 1.1595, - "step": 3358 - }, - { - "epoch": 0.4553650105063377, - "grad_norm": 1.998353359519511, - "learning_rate": 1.1920206345496372e-06, - "loss": 1.1763, - "step": 3359 - }, - { - "epoch": 0.4555005761540026, - "grad_norm": 1.5564454495359157, - "learning_rate": 1.1915896500987809e-06, - "loss": 1.106, - "step": 3360 - }, - { - "epoch": 0.45563614180166745, - "grad_norm": 3.404020772293265, - "learning_rate": 1.1911586287013725e-06, - "loss": 1.1605, - "step": 3361 - }, - { - "epoch": 0.4557717074493323, - "grad_norm": 1.463561729281201, - "learning_rate": 1.1907275704405316e-06, - "loss": 1.1852, - "step": 3362 - }, - { - "epoch": 0.45590727309699725, - "grad_norm": 1.5366917880386286, - "learning_rate": 1.1902964753993842e-06, - "loss": 1.1574, - "step": 3363 - }, - { - "epoch": 0.4560428387446621, - "grad_norm": 2.630766547163037, - "learning_rate": 1.1898653436610637e-06, - "loss": 1.1451, - "step": 3364 - }, - { - "epoch": 0.456178404392327, - "grad_norm": 1.4356278082908327, - "learning_rate": 1.1894341753087105e-06, - "loss": 1.1286, - "step": 3365 - }, - { - "epoch": 0.45631397003999186, - "grad_norm": 2.1643592272010426, - "learning_rate": 1.1890029704254716e-06, - "loss": 1.1443, - "step": 3366 - }, - { - "epoch": 0.4564495356876567, - "grad_norm": 1.8542390086800153, - "learning_rate": 1.188571729094502e-06, - "loss": 1.1208, - "step": 3367 - }, - { - "epoch": 0.45658510133532165, - "grad_norm": 1.8375508848702908, - "learning_rate": 1.1881404513989629e-06, - "loss": 1.1546, - "step": 3368 - }, - { - "epoch": 0.4567206669829865, - "grad_norm": 1.6307993911120482, - "learning_rate": 1.1877091374220228e-06, - "loss": 1.1797, - "step": 3369 - }, - { - "epoch": 0.4568562326306514, - "grad_norm": 2.6750429257379342, - "learning_rate": 1.1872777872468572e-06, - "loss": 1.1434, - "step": 3370 - }, - { - "epoch": 0.45699179827831626, - "grad_norm": 2.985921028839723, - "learning_rate": 1.1868464009566485e-06, - "loss": 1.1472, - "step": 3371 - }, - { - "epoch": 0.45712736392598113, - "grad_norm": 1.6309629769090157, - "learning_rate": 1.1864149786345868e-06, - "loss": 1.135, - "step": 3372 - }, - { - "epoch": 0.45726292957364606, - "grad_norm": 1.4537892923876943, - "learning_rate": 1.1859835203638675e-06, - "loss": 1.1715, - "step": 3373 - }, - { - "epoch": 0.45739849522131093, - "grad_norm": 1.3830602721200975, - "learning_rate": 1.1855520262276943e-06, - "loss": 1.0847, - "step": 3374 - }, - { - "epoch": 0.4575340608689758, - "grad_norm": 1.623009000010444, - "learning_rate": 1.1851204963092775e-06, - "loss": 1.13, - "step": 3375 - }, - { - "epoch": 0.45766962651664067, - "grad_norm": 1.7215118270343857, - "learning_rate": 1.1846889306918344e-06, - "loss": 1.1661, - "step": 3376 - }, - { - "epoch": 0.45780519216430554, - "grad_norm": 1.681022466616578, - "learning_rate": 1.1842573294585889e-06, - "loss": 1.1693, - "step": 3377 - }, - { - "epoch": 0.45794075781197047, - "grad_norm": 1.6517533286039925, - "learning_rate": 1.1838256926927718e-06, - "loss": 1.1152, - "step": 3378 - }, - { - "epoch": 0.45807632345963534, - "grad_norm": 1.6332659624051278, - "learning_rate": 1.1833940204776208e-06, - "loss": 1.1756, - "step": 3379 - }, - { - "epoch": 0.4582118891073002, - "grad_norm": 3.5973810451632238, - "learning_rate": 1.1829623128963807e-06, - "loss": 1.1737, - "step": 3380 - }, - { - "epoch": 0.4583474547549651, - "grad_norm": 2.1063365399170224, - "learning_rate": 1.1825305700323025e-06, - "loss": 1.1402, - "step": 3381 - }, - { - "epoch": 0.45848302040262995, - "grad_norm": 1.7374132837656904, - "learning_rate": 1.182098791968645e-06, - "loss": 1.117, - "step": 3382 - }, - { - "epoch": 0.4586185860502949, - "grad_norm": 1.6492865015413294, - "learning_rate": 1.1816669787886727e-06, - "loss": 1.1144, - "step": 3383 - }, - { - "epoch": 0.45875415169795974, - "grad_norm": 2.748047782514574, - "learning_rate": 1.1812351305756575e-06, - "loss": 1.1702, - "step": 3384 - }, - { - "epoch": 0.4588897173456246, - "grad_norm": 1.539550841693305, - "learning_rate": 1.1808032474128782e-06, - "loss": 1.1779, - "step": 3385 - }, - { - "epoch": 0.4590252829932895, - "grad_norm": 1.6666799411348443, - "learning_rate": 1.1803713293836198e-06, - "loss": 1.1341, - "step": 3386 - }, - { - "epoch": 0.45916084864095436, - "grad_norm": 2.150674927245081, - "learning_rate": 1.179939376571174e-06, - "loss": 1.191, - "step": 3387 - }, - { - "epoch": 0.4592964142886193, - "grad_norm": 1.6829115178898237, - "learning_rate": 1.1795073890588401e-06, - "loss": 1.1577, - "step": 3388 - }, - { - "epoch": 0.45943197993628415, - "grad_norm": 2.7051111700749844, - "learning_rate": 1.179075366929923e-06, - "loss": 1.1416, - "step": 3389 - }, - { - "epoch": 0.459567545583949, - "grad_norm": 3.196928646978494, - "learning_rate": 1.1786433102677348e-06, - "loss": 1.1403, - "step": 3390 - }, - { - "epoch": 0.4597031112316139, - "grad_norm": 1.7499724696914283, - "learning_rate": 1.1782112191555946e-06, - "loss": 1.1539, - "step": 3391 - }, - { - "epoch": 0.4598386768792788, - "grad_norm": 1.883604360177047, - "learning_rate": 1.1777790936768272e-06, - "loss": 1.1766, - "step": 3392 - }, - { - "epoch": 0.4599742425269437, - "grad_norm": 1.947487441941586, - "learning_rate": 1.1773469339147653e-06, - "loss": 1.1304, - "step": 3393 - }, - { - "epoch": 0.46010980817460856, - "grad_norm": 1.5286862851894245, - "learning_rate": 1.1769147399527466e-06, - "loss": 1.1438, - "step": 3394 - }, - { - "epoch": 0.46024537382227343, - "grad_norm": 2.709978568337033, - "learning_rate": 1.176482511874117e-06, - "loss": 1.1828, - "step": 3395 - }, - { - "epoch": 0.4603809394699383, - "grad_norm": 1.4031025757964122, - "learning_rate": 1.1760502497622281e-06, - "loss": 1.1569, - "step": 3396 - }, - { - "epoch": 0.4605165051176032, - "grad_norm": 1.9752835552067582, - "learning_rate": 1.1756179537004383e-06, - "loss": 1.1641, - "step": 3397 - }, - { - "epoch": 0.4606520707652681, - "grad_norm": 2.0542190341804694, - "learning_rate": 1.175185623772112e-06, - "loss": 1.1227, - "step": 3398 - }, - { - "epoch": 0.46078763641293297, - "grad_norm": 1.8945649203690382, - "learning_rate": 1.1747532600606213e-06, - "loss": 1.1413, - "step": 3399 - }, - { - "epoch": 0.46092320206059784, - "grad_norm": 1.6039852749787311, - "learning_rate": 1.174320862649344e-06, - "loss": 1.1533, - "step": 3400 - }, - { - "epoch": 0.4610587677082627, - "grad_norm": 2.214731351963026, - "learning_rate": 1.173888431621664e-06, - "loss": 1.1535, - "step": 3401 - }, - { - "epoch": 0.46119433335592763, - "grad_norm": 1.513109189506774, - "learning_rate": 1.1734559670609727e-06, - "loss": 1.1252, - "step": 3402 - }, - { - "epoch": 0.4613298990035925, - "grad_norm": 1.4613202784349688, - "learning_rate": 1.1730234690506671e-06, - "loss": 1.1235, - "step": 3403 - }, - { - "epoch": 0.4614654646512574, - "grad_norm": 1.5618015867180446, - "learning_rate": 1.1725909376741515e-06, - "loss": 1.1321, - "step": 3404 - }, - { - "epoch": 0.46160103029892224, - "grad_norm": 1.6136561156407137, - "learning_rate": 1.1721583730148356e-06, - "loss": 1.1483, - "step": 3405 - }, - { - "epoch": 0.4617365959465871, - "grad_norm": 1.5026375219832422, - "learning_rate": 1.1717257751561367e-06, - "loss": 1.1403, - "step": 3406 - }, - { - "epoch": 0.46187216159425204, - "grad_norm": 1.5582720373011743, - "learning_rate": 1.1712931441814775e-06, - "loss": 1.1147, - "step": 3407 - }, - { - "epoch": 0.4620077272419169, - "grad_norm": 2.05741881952399, - "learning_rate": 1.1708604801742877e-06, - "loss": 1.158, - "step": 3408 - }, - { - "epoch": 0.4621432928895818, - "grad_norm": 1.713945328475039, - "learning_rate": 1.1704277832180027e-06, - "loss": 1.145, - "step": 3409 - }, - { - "epoch": 0.46227885853724665, - "grad_norm": 1.7931190270837247, - "learning_rate": 1.1699950533960652e-06, - "loss": 1.1443, - "step": 3410 - }, - { - "epoch": 0.4624144241849115, - "grad_norm": 1.5947320443964803, - "learning_rate": 1.1695622907919233e-06, - "loss": 1.1502, - "step": 3411 - }, - { - "epoch": 0.46254998983257645, - "grad_norm": 2.3789703888511182, - "learning_rate": 1.1691294954890323e-06, - "loss": 1.1518, - "step": 3412 - }, - { - "epoch": 0.4626855554802413, - "grad_norm": 1.4346717169947016, - "learning_rate": 1.168696667570853e-06, - "loss": 1.1454, - "step": 3413 - }, - { - "epoch": 0.4628211211279062, - "grad_norm": 1.4551661726266283, - "learning_rate": 1.1682638071208532e-06, - "loss": 1.1286, - "step": 3414 - }, - { - "epoch": 0.46295668677557106, - "grad_norm": 1.6380596965761713, - "learning_rate": 1.1678309142225062e-06, - "loss": 1.159, - "step": 3415 - }, - { - "epoch": 0.46309225242323593, - "grad_norm": 1.4816350233813613, - "learning_rate": 1.1673979889592923e-06, - "loss": 1.1453, - "step": 3416 - }, - { - "epoch": 0.46322781807090085, - "grad_norm": 2.0795376549099465, - "learning_rate": 1.1669650314146973e-06, - "loss": 1.1836, - "step": 3417 - }, - { - "epoch": 0.4633633837185657, - "grad_norm": 1.7188072156364993, - "learning_rate": 1.166532041672214e-06, - "loss": 1.183, - "step": 3418 - }, - { - "epoch": 0.4634989493662306, - "grad_norm": 1.9669977990323493, - "learning_rate": 1.166099019815341e-06, - "loss": 1.1177, - "step": 3419 - }, - { - "epoch": 0.46363451501389547, - "grad_norm": 1.5229918611264304, - "learning_rate": 1.1656659659275835e-06, - "loss": 1.1537, - "step": 3420 - }, - { - "epoch": 0.46377008066156034, - "grad_norm": 1.754452511779192, - "learning_rate": 1.1652328800924517e-06, - "loss": 1.1291, - "step": 3421 - }, - { - "epoch": 0.46390564630922526, - "grad_norm": 2.090700064945903, - "learning_rate": 1.1647997623934636e-06, - "loss": 1.1486, - "step": 3422 - }, - { - "epoch": 0.46404121195689013, - "grad_norm": 1.4184816729364818, - "learning_rate": 1.164366612914142e-06, - "loss": 1.137, - "step": 3423 - }, - { - "epoch": 0.464176777604555, - "grad_norm": 2.539394226587885, - "learning_rate": 1.1639334317380164e-06, - "loss": 1.1209, - "step": 3424 - }, - { - "epoch": 0.4643123432522199, - "grad_norm": 1.714356764268565, - "learning_rate": 1.1635002189486228e-06, - "loss": 1.1663, - "step": 3425 - }, - { - "epoch": 0.46444790889988474, - "grad_norm": 2.154329039942759, - "learning_rate": 1.1630669746295022e-06, - "loss": 1.1853, - "step": 3426 - }, - { - "epoch": 0.46458347454754967, - "grad_norm": 1.4852294414370182, - "learning_rate": 1.1626336988642029e-06, - "loss": 1.1504, - "step": 3427 - }, - { - "epoch": 0.46471904019521454, - "grad_norm": 2.305827956312661, - "learning_rate": 1.1622003917362788e-06, - "loss": 1.1335, - "step": 3428 - }, - { - "epoch": 0.4648546058428794, - "grad_norm": 1.6923961469874318, - "learning_rate": 1.1617670533292892e-06, - "loss": 1.1414, - "step": 3429 - }, - { - "epoch": 0.4649901714905443, - "grad_norm": 1.5822904527013693, - "learning_rate": 1.1613336837268001e-06, - "loss": 1.1421, - "step": 3430 - }, - { - "epoch": 0.4651257371382092, - "grad_norm": 1.892925694541542, - "learning_rate": 1.1609002830123837e-06, - "loss": 1.1676, - "step": 3431 - }, - { - "epoch": 0.4652613027858741, - "grad_norm": 2.2258424193966246, - "learning_rate": 1.1604668512696179e-06, - "loss": 1.1927, - "step": 3432 - }, - { - "epoch": 0.46539686843353895, - "grad_norm": 1.7296071023994972, - "learning_rate": 1.1600333885820867e-06, - "loss": 1.146, - "step": 3433 - }, - { - "epoch": 0.4655324340812038, - "grad_norm": 2.4518343035333463, - "learning_rate": 1.1595998950333793e-06, - "loss": 1.1488, - "step": 3434 - }, - { - "epoch": 0.4656679997288687, - "grad_norm": 1.7391401923269687, - "learning_rate": 1.159166370707092e-06, - "loss": 1.174, - "step": 3435 - }, - { - "epoch": 0.4658035653765336, - "grad_norm": 1.552300165, - "learning_rate": 1.1587328156868266e-06, - "loss": 1.2169, - "step": 3436 - }, - { - "epoch": 0.4659391310241985, - "grad_norm": 2.0678483330885933, - "learning_rate": 1.1582992300561906e-06, - "loss": 1.1984, - "step": 3437 - }, - { - "epoch": 0.46607469667186335, - "grad_norm": 3.927103582864385, - "learning_rate": 1.157865613898798e-06, - "loss": 1.1383, - "step": 3438 - }, - { - "epoch": 0.4662102623195282, - "grad_norm": 2.1019650022546186, - "learning_rate": 1.1574319672982673e-06, - "loss": 1.1243, - "step": 3439 - }, - { - "epoch": 0.4663458279671931, - "grad_norm": 1.420555724650725, - "learning_rate": 1.1569982903382247e-06, - "loss": 1.1385, - "step": 3440 - }, - { - "epoch": 0.466481393614858, - "grad_norm": 1.4983658405408697, - "learning_rate": 1.156564583102301e-06, - "loss": 1.0997, - "step": 3441 - }, - { - "epoch": 0.4666169592625229, - "grad_norm": 1.3944877221710248, - "learning_rate": 1.1561308456741336e-06, - "loss": 1.145, - "step": 3442 - }, - { - "epoch": 0.46675252491018776, - "grad_norm": 1.6939676094159934, - "learning_rate": 1.1556970781373648e-06, - "loss": 1.1692, - "step": 3443 - }, - { - "epoch": 0.46688809055785263, - "grad_norm": 1.6577709222647992, - "learning_rate": 1.1552632805756436e-06, - "loss": 1.1551, - "step": 3444 - }, - { - "epoch": 0.4670236562055175, - "grad_norm": 1.6221387141361685, - "learning_rate": 1.154829453072624e-06, - "loss": 1.1744, - "step": 3445 - }, - { - "epoch": 0.4671592218531824, - "grad_norm": 1.4934217912831786, - "learning_rate": 1.1543955957119667e-06, - "loss": 1.1541, - "step": 3446 - }, - { - "epoch": 0.4672947875008473, - "grad_norm": 1.5753337785725665, - "learning_rate": 1.1539617085773373e-06, - "loss": 1.1297, - "step": 3447 - }, - { - "epoch": 0.46743035314851217, - "grad_norm": 2.042005952754216, - "learning_rate": 1.1535277917524079e-06, - "loss": 1.1635, - "step": 3448 - }, - { - "epoch": 0.46756591879617704, - "grad_norm": 1.9486029801667553, - "learning_rate": 1.153093845320856e-06, - "loss": 1.1567, - "step": 3449 - }, - { - "epoch": 0.4677014844438419, - "grad_norm": 2.897086707218251, - "learning_rate": 1.152659869366364e-06, - "loss": 1.1482, - "step": 3450 - }, - { - "epoch": 0.46783705009150683, - "grad_norm": 1.675148485588197, - "learning_rate": 1.1522258639726215e-06, - "loss": 1.1651, - "step": 3451 - }, - { - "epoch": 0.4679726157391717, - "grad_norm": 1.449241037723842, - "learning_rate": 1.1517918292233226e-06, - "loss": 1.1369, - "step": 3452 - }, - { - "epoch": 0.4681081813868366, - "grad_norm": 2.42959256914325, - "learning_rate": 1.1513577652021678e-06, - "loss": 1.1761, - "step": 3453 - }, - { - "epoch": 0.46824374703450145, - "grad_norm": 2.146067508865502, - "learning_rate": 1.1509236719928627e-06, - "loss": 1.1762, - "step": 3454 - }, - { - "epoch": 0.4683793126821663, - "grad_norm": 1.5734806144850175, - "learning_rate": 1.1504895496791185e-06, - "loss": 1.1689, - "step": 3455 - }, - { - "epoch": 0.46851487832983124, - "grad_norm": 2.440662532404865, - "learning_rate": 1.1500553983446526e-06, - "loss": 1.1709, - "step": 3456 - }, - { - "epoch": 0.4686504439774961, - "grad_norm": 1.8476485891967291, - "learning_rate": 1.1496212180731877e-06, - "loss": 1.1846, - "step": 3457 - }, - { - "epoch": 0.468786009625161, - "grad_norm": 1.598923796792217, - "learning_rate": 1.149187008948452e-06, - "loss": 1.1983, - "step": 3458 - }, - { - "epoch": 0.46892157527282585, - "grad_norm": 1.5757200734586951, - "learning_rate": 1.1487527710541794e-06, - "loss": 1.1323, - "step": 3459 - }, - { - "epoch": 0.4690571409204907, - "grad_norm": 1.5817369966169483, - "learning_rate": 1.1483185044741088e-06, - "loss": 1.1255, - "step": 3460 - }, - { - "epoch": 0.46919270656815565, - "grad_norm": 1.4610154660583967, - "learning_rate": 1.1478842092919854e-06, - "loss": 1.1443, - "step": 3461 - }, - { - "epoch": 0.4693282722158205, - "grad_norm": 1.9515605073465088, - "learning_rate": 1.1474498855915596e-06, - "loss": 1.1242, - "step": 3462 - }, - { - "epoch": 0.4694638378634854, - "grad_norm": 1.9907881192487036, - "learning_rate": 1.1470155334565869e-06, - "loss": 1.1843, - "step": 3463 - }, - { - "epoch": 0.46959940351115026, - "grad_norm": 1.7901217422935565, - "learning_rate": 1.1465811529708295e-06, - "loss": 1.1478, - "step": 3464 - }, - { - "epoch": 0.46973496915881513, - "grad_norm": 2.5006089395308853, - "learning_rate": 1.1461467442180537e-06, - "loss": 1.1617, - "step": 3465 - }, - { - "epoch": 0.46987053480648006, - "grad_norm": 2.414645502083628, - "learning_rate": 1.1457123072820319e-06, - "loss": 1.1659, - "step": 3466 - }, - { - "epoch": 0.4700061004541449, - "grad_norm": 1.641511730048037, - "learning_rate": 1.1452778422465416e-06, - "loss": 1.1494, - "step": 3467 - }, - { - "epoch": 0.4701416661018098, - "grad_norm": 1.6361453295987942, - "learning_rate": 1.1448433491953665e-06, - "loss": 1.1805, - "step": 3468 - }, - { - "epoch": 0.47027723174947467, - "grad_norm": 1.5832396902165105, - "learning_rate": 1.1444088282122945e-06, - "loss": 1.1744, - "step": 3469 - }, - { - "epoch": 0.47041279739713954, - "grad_norm": 1.6795600103588808, - "learning_rate": 1.1439742793811205e-06, - "loss": 1.1667, - "step": 3470 - }, - { - "epoch": 0.47054836304480446, - "grad_norm": 1.9030930538895539, - "learning_rate": 1.1435397027856425e-06, - "loss": 1.1455, - "step": 3471 - }, - { - "epoch": 0.47068392869246933, - "grad_norm": 1.4848307805859566, - "learning_rate": 1.1431050985096663e-06, - "loss": 1.135, - "step": 3472 - }, - { - "epoch": 0.4708194943401342, - "grad_norm": 1.5981666669549186, - "learning_rate": 1.142670466637001e-06, - "loss": 1.1856, - "step": 3473 - }, - { - "epoch": 0.4709550599877991, - "grad_norm": 1.4113213201107069, - "learning_rate": 1.142235807251463e-06, - "loss": 1.1278, - "step": 3474 - }, - { - "epoch": 0.471090625635464, - "grad_norm": 2.1632224331304153, - "learning_rate": 1.1418011204368717e-06, - "loss": 1.1493, - "step": 3475 - }, - { - "epoch": 0.47122619128312887, - "grad_norm": 2.0273461490755165, - "learning_rate": 1.1413664062770538e-06, - "loss": 1.1192, - "step": 3476 - }, - { - "epoch": 0.47136175693079374, - "grad_norm": 1.5991104647776848, - "learning_rate": 1.1409316648558404e-06, - "loss": 1.1343, - "step": 3477 - }, - { - "epoch": 0.4714973225784586, - "grad_norm": 1.4689919523373078, - "learning_rate": 1.140496896257068e-06, - "loss": 1.1631, - "step": 3478 - }, - { - "epoch": 0.4716328882261235, - "grad_norm": 1.7555138779640804, - "learning_rate": 1.140062100564578e-06, - "loss": 1.1303, - "step": 3479 - }, - { - "epoch": 0.4717684538737884, - "grad_norm": 1.5018546144556142, - "learning_rate": 1.1396272778622175e-06, - "loss": 1.1531, - "step": 3480 - }, - { - "epoch": 0.4719040195214533, - "grad_norm": 1.4964562484280601, - "learning_rate": 1.1391924282338388e-06, - "loss": 1.1312, - "step": 3481 - }, - { - "epoch": 0.47203958516911815, - "grad_norm": 1.5680142847535083, - "learning_rate": 1.1387575517632987e-06, - "loss": 1.1093, - "step": 3482 - }, - { - "epoch": 0.472175150816783, - "grad_norm": 1.7117594713547797, - "learning_rate": 1.1383226485344604e-06, - "loss": 1.1148, - "step": 3483 - }, - { - "epoch": 0.4723107164644479, - "grad_norm": 1.5501612860609972, - "learning_rate": 1.137887718631191e-06, - "loss": 1.1743, - "step": 3484 - }, - { - "epoch": 0.4724462821121128, - "grad_norm": 1.5014904146999122, - "learning_rate": 1.1374527621373636e-06, - "loss": 1.1496, - "step": 3485 - }, - { - "epoch": 0.4725818477597777, - "grad_norm": 1.6575657562580006, - "learning_rate": 1.1370177791368558e-06, - "loss": 1.1271, - "step": 3486 - }, - { - "epoch": 0.47271741340744255, - "grad_norm": 1.5079871303232595, - "learning_rate": 1.136582769713551e-06, - "loss": 1.1437, - "step": 3487 - }, - { - "epoch": 0.4728529790551074, - "grad_norm": 1.3974102484443782, - "learning_rate": 1.136147733951337e-06, - "loss": 1.1295, - "step": 3488 - }, - { - "epoch": 0.4729885447027723, - "grad_norm": 1.6424085656564664, - "learning_rate": 1.1357126719341076e-06, - "loss": 1.1472, - "step": 3489 - }, - { - "epoch": 0.4731241103504372, - "grad_norm": 1.9094533572269357, - "learning_rate": 1.1352775837457605e-06, - "loss": 1.1398, - "step": 3490 - }, - { - "epoch": 0.4732596759981021, - "grad_norm": 1.8811731682995831, - "learning_rate": 1.134842469470199e-06, - "loss": 1.1599, - "step": 3491 - }, - { - "epoch": 0.47339524164576696, - "grad_norm": 1.5803695903490464, - "learning_rate": 1.1344073291913317e-06, - "loss": 1.1865, - "step": 3492 - }, - { - "epoch": 0.47353080729343183, - "grad_norm": 1.7041300096324976, - "learning_rate": 1.133972162993072e-06, - "loss": 1.1512, - "step": 3493 - }, - { - "epoch": 0.4736663729410967, - "grad_norm": 1.899278853905375, - "learning_rate": 1.1335369709593382e-06, - "loss": 1.1306, - "step": 3494 - }, - { - "epoch": 0.47380193858876163, - "grad_norm": 1.4353409014035914, - "learning_rate": 1.1331017531740533e-06, - "loss": 1.1394, - "step": 3495 - }, - { - "epoch": 0.4739375042364265, - "grad_norm": 1.7215127423678345, - "learning_rate": 1.132666509721146e-06, - "loss": 1.1692, - "step": 3496 - }, - { - "epoch": 0.47407306988409137, - "grad_norm": 1.4851389769314325, - "learning_rate": 1.1322312406845498e-06, - "loss": 1.1525, - "step": 3497 - }, - { - "epoch": 0.47420863553175624, - "grad_norm": 1.8688507473367009, - "learning_rate": 1.1317959461482028e-06, - "loss": 1.1738, - "step": 3498 - }, - { - "epoch": 0.4743442011794211, - "grad_norm": 1.7946664777533146, - "learning_rate": 1.1313606261960475e-06, - "loss": 1.1564, - "step": 3499 - }, - { - "epoch": 0.47447976682708604, - "grad_norm": 1.5953237606369206, - "learning_rate": 1.1309252809120324e-06, - "loss": 1.1249, - "step": 3500 - }, - { - "epoch": 0.4746153324747509, - "grad_norm": 1.591117908241102, - "learning_rate": 1.1304899103801105e-06, - "loss": 1.177, - "step": 3501 - }, - { - "epoch": 0.4747508981224158, - "grad_norm": 1.576464722521765, - "learning_rate": 1.1300545146842393e-06, - "loss": 1.1297, - "step": 3502 - }, - { - "epoch": 0.47488646377008065, - "grad_norm": 1.7142510863238611, - "learning_rate": 1.1296190939083815e-06, - "loss": 1.1366, - "step": 3503 - }, - { - "epoch": 0.4750220294177455, - "grad_norm": 1.669074597119181, - "learning_rate": 1.1291836481365045e-06, - "loss": 1.1491, - "step": 3504 - }, - { - "epoch": 0.47515759506541044, - "grad_norm": 1.555550288970278, - "learning_rate": 1.128748177452581e-06, - "loss": 1.1202, - "step": 3505 - }, - { - "epoch": 0.4752931607130753, - "grad_norm": 4.634173967072817, - "learning_rate": 1.1283126819405873e-06, - "loss": 1.1748, - "step": 3506 - }, - { - "epoch": 0.4754287263607402, - "grad_norm": 1.5116326540967229, - "learning_rate": 1.127877161684506e-06, - "loss": 1.1451, - "step": 3507 - }, - { - "epoch": 0.47556429200840505, - "grad_norm": 1.495431225140549, - "learning_rate": 1.1274416167683234e-06, - "loss": 1.1434, - "step": 3508 - }, - { - "epoch": 0.4756998576560699, - "grad_norm": 1.6959891766493962, - "learning_rate": 1.127006047276031e-06, - "loss": 1.1172, - "step": 3509 - }, - { - "epoch": 0.47583542330373485, - "grad_norm": 4.000128354779319, - "learning_rate": 1.126570453291625e-06, - "loss": 1.1503, - "step": 3510 - }, - { - "epoch": 0.4759709889513997, - "grad_norm": 2.8464616867088197, - "learning_rate": 1.126134834899106e-06, - "loss": 1.1731, - "step": 3511 - }, - { - "epoch": 0.4761065545990646, - "grad_norm": 1.6957230803588321, - "learning_rate": 1.1256991921824798e-06, - "loss": 1.1639, - "step": 3512 - }, - { - "epoch": 0.47624212024672946, - "grad_norm": 1.8968857885985984, - "learning_rate": 1.1252635252257567e-06, - "loss": 1.1343, - "step": 3513 - }, - { - "epoch": 0.4763776858943944, - "grad_norm": 1.6626059862878253, - "learning_rate": 1.1248278341129516e-06, - "loss": 1.0962, - "step": 3514 - }, - { - "epoch": 0.47651325154205926, - "grad_norm": 2.225410002522759, - "learning_rate": 1.1243921189280838e-06, - "loss": 1.1434, - "step": 3515 - }, - { - "epoch": 0.4766488171897241, - "grad_norm": 3.4144896655371038, - "learning_rate": 1.1239563797551777e-06, - "loss": 1.1508, - "step": 3516 - }, - { - "epoch": 0.476784382837389, - "grad_norm": 1.7368280834202425, - "learning_rate": 1.1235206166782622e-06, - "loss": 1.1438, - "step": 3517 - }, - { - "epoch": 0.47691994848505387, - "grad_norm": 2.0270855601006685, - "learning_rate": 1.1230848297813712e-06, - "loss": 1.1373, - "step": 3518 - }, - { - "epoch": 0.4770555141327188, - "grad_norm": 1.870470398959643, - "learning_rate": 1.122649019148542e-06, - "loss": 1.1197, - "step": 3519 - }, - { - "epoch": 0.47719107978038366, - "grad_norm": 1.5050994827951414, - "learning_rate": 1.122213184863818e-06, - "loss": 1.1555, - "step": 3520 - }, - { - "epoch": 0.47732664542804853, - "grad_norm": 3.4789121565400585, - "learning_rate": 1.1217773270112454e-06, - "loss": 1.2026, - "step": 3521 - }, - { - "epoch": 0.4774622110757134, - "grad_norm": 1.615078589655677, - "learning_rate": 1.121341445674877e-06, - "loss": 1.1274, - "step": 3522 - }, - { - "epoch": 0.4775977767233783, - "grad_norm": 1.5757036239671598, - "learning_rate": 1.1209055409387682e-06, - "loss": 1.1342, - "step": 3523 - }, - { - "epoch": 0.4777333423710432, - "grad_norm": 1.5746144315243455, - "learning_rate": 1.1204696128869803e-06, - "loss": 1.1086, - "step": 3524 - }, - { - "epoch": 0.47786890801870807, - "grad_norm": 1.8820150395686257, - "learning_rate": 1.1200336616035788e-06, - "loss": 1.1105, - "step": 3525 - }, - { - "epoch": 0.47800447366637294, - "grad_norm": 1.8742291476026685, - "learning_rate": 1.1195976871726332e-06, - "loss": 1.1635, - "step": 3526 - }, - { - "epoch": 0.4781400393140378, - "grad_norm": 1.951524037742861, - "learning_rate": 1.1191616896782172e-06, - "loss": 1.1339, - "step": 3527 - }, - { - "epoch": 0.4782756049617027, - "grad_norm": 1.6014674126459307, - "learning_rate": 1.1187256692044103e-06, - "loss": 1.1659, - "step": 3528 - }, - { - "epoch": 0.4784111706093676, - "grad_norm": 3.475180086771282, - "learning_rate": 1.1182896258352949e-06, - "loss": 1.1619, - "step": 3529 - }, - { - "epoch": 0.4785467362570325, - "grad_norm": 1.6395769996318221, - "learning_rate": 1.1178535596549592e-06, - "loss": 1.119, - "step": 3530 - }, - { - "epoch": 0.47868230190469735, - "grad_norm": 1.6589074269184587, - "learning_rate": 1.1174174707474947e-06, - "loss": 1.1031, - "step": 3531 - }, - { - "epoch": 0.4788178675523622, - "grad_norm": 1.634600205398327, - "learning_rate": 1.116981359196998e-06, - "loss": 1.1775, - "step": 3532 - }, - { - "epoch": 0.4789534332000271, - "grad_norm": 1.6424491905147218, - "learning_rate": 1.116545225087569e-06, - "loss": 1.1547, - "step": 3533 - }, - { - "epoch": 0.479088998847692, - "grad_norm": 1.7856758618082655, - "learning_rate": 1.1161090685033138e-06, - "loss": 1.1723, - "step": 3534 - }, - { - "epoch": 0.4792245644953569, - "grad_norm": 1.6994810065764525, - "learning_rate": 1.1156728895283412e-06, - "loss": 1.1534, - "step": 3535 - }, - { - "epoch": 0.47936013014302176, - "grad_norm": 1.6267079556303796, - "learning_rate": 1.1152366882467647e-06, - "loss": 1.1543, - "step": 3536 - }, - { - "epoch": 0.4794956957906866, - "grad_norm": 1.726620665201554, - "learning_rate": 1.1148004647427027e-06, - "loss": 1.1547, - "step": 3537 - }, - { - "epoch": 0.4796312614383515, - "grad_norm": 1.8660509226275437, - "learning_rate": 1.114364219100277e-06, - "loss": 1.1362, - "step": 3538 - }, - { - "epoch": 0.4797668270860164, - "grad_norm": 1.839066248027463, - "learning_rate": 1.1139279514036147e-06, - "loss": 1.1383, - "step": 3539 - }, - { - "epoch": 0.4799023927336813, - "grad_norm": 1.7585818599694187, - "learning_rate": 1.1134916617368464e-06, - "loss": 1.1361, - "step": 3540 - }, - { - "epoch": 0.48003795838134616, - "grad_norm": 1.6698461643494962, - "learning_rate": 1.1130553501841066e-06, - "loss": 1.1247, - "step": 3541 - }, - { - "epoch": 0.48017352402901103, - "grad_norm": 3.6819199121130897, - "learning_rate": 1.112619016829535e-06, - "loss": 1.1579, - "step": 3542 - }, - { - "epoch": 0.4803090896766759, - "grad_norm": 1.6637489696940353, - "learning_rate": 1.1121826617572752e-06, - "loss": 1.1257, - "step": 3543 - }, - { - "epoch": 0.48044465532434083, - "grad_norm": 14.079769457726842, - "learning_rate": 1.1117462850514744e-06, - "loss": 1.1685, - "step": 3544 - }, - { - "epoch": 0.4805802209720057, - "grad_norm": 1.7584390000318697, - "learning_rate": 1.1113098867962844e-06, - "loss": 1.1958, - "step": 3545 - }, - { - "epoch": 0.48071578661967057, - "grad_norm": 1.6944694402627782, - "learning_rate": 1.1108734670758616e-06, - "loss": 1.1016, - "step": 3546 - }, - { - "epoch": 0.48085135226733544, - "grad_norm": 1.8434701576201205, - "learning_rate": 1.1104370259743659e-06, - "loss": 1.1935, - "step": 3547 - }, - { - "epoch": 0.4809869179150003, - "grad_norm": 1.618594862144222, - "learning_rate": 1.1100005635759612e-06, - "loss": 1.1344, - "step": 3548 - }, - { - "epoch": 0.48112248356266524, - "grad_norm": 1.6581809867725898, - "learning_rate": 1.1095640799648162e-06, - "loss": 1.1199, - "step": 3549 - }, - { - "epoch": 0.4812580492103301, - "grad_norm": 1.5292023469937734, - "learning_rate": 1.1091275752251035e-06, - "loss": 1.1378, - "step": 3550 - }, - { - "epoch": 0.481393614857995, - "grad_norm": 2.547324884475726, - "learning_rate": 1.1086910494409993e-06, - "loss": 1.1242, - "step": 3551 - }, - { - "epoch": 0.48152918050565985, - "grad_norm": 1.4587412753588946, - "learning_rate": 1.1082545026966841e-06, - "loss": 1.1125, - "step": 3552 - }, - { - "epoch": 0.4816647461533248, - "grad_norm": 2.0356807597178372, - "learning_rate": 1.1078179350763424e-06, - "loss": 1.1318, - "step": 3553 - }, - { - "epoch": 0.48180031180098964, - "grad_norm": 1.5597202029501183, - "learning_rate": 1.107381346664163e-06, - "loss": 1.1278, - "step": 3554 - }, - { - "epoch": 0.4819358774486545, - "grad_norm": 1.7286622544531334, - "learning_rate": 1.1069447375443386e-06, - "loss": 1.1483, - "step": 3555 - }, - { - "epoch": 0.4820714430963194, - "grad_norm": 2.060587130749749, - "learning_rate": 1.106508107801066e-06, - "loss": 1.1212, - "step": 3556 - }, - { - "epoch": 0.48220700874398426, - "grad_norm": 1.7196926523993636, - "learning_rate": 1.1060714575185453e-06, - "loss": 1.1518, - "step": 3557 - }, - { - "epoch": 0.4823425743916492, - "grad_norm": 1.41171119181269, - "learning_rate": 1.105634786780981e-06, - "loss": 1.1396, - "step": 3558 - }, - { - "epoch": 0.48247814003931405, - "grad_norm": 1.599562223284275, - "learning_rate": 1.105198095672582e-06, - "loss": 1.119, - "step": 3559 - }, - { - "epoch": 0.4826137056869789, - "grad_norm": 1.4917147032442624, - "learning_rate": 1.104761384277561e-06, - "loss": 1.1186, - "step": 3560 - }, - { - "epoch": 0.4827492713346438, - "grad_norm": 1.6168949740774694, - "learning_rate": 1.1043246526801338e-06, - "loss": 1.1587, - "step": 3561 - }, - { - "epoch": 0.48288483698230866, - "grad_norm": 1.4859103520353631, - "learning_rate": 1.1038879009645205e-06, - "loss": 1.1045, - "step": 3562 - }, - { - "epoch": 0.4830204026299736, - "grad_norm": 3.885561354927812, - "learning_rate": 1.103451129214946e-06, - "loss": 1.161, - "step": 3563 - }, - { - "epoch": 0.48315596827763846, - "grad_norm": 1.9221148348198203, - "learning_rate": 1.1030143375156375e-06, - "loss": 1.1527, - "step": 3564 - }, - { - "epoch": 0.48329153392530333, - "grad_norm": 1.75989723732532, - "learning_rate": 1.1025775259508275e-06, - "loss": 1.1733, - "step": 3565 - }, - { - "epoch": 0.4834270995729682, - "grad_norm": 1.7103540616345116, - "learning_rate": 1.1021406946047508e-06, - "loss": 1.1465, - "step": 3566 - }, - { - "epoch": 0.48356266522063307, - "grad_norm": 1.8311931828727706, - "learning_rate": 1.101703843561648e-06, - "loss": 1.1101, - "step": 3567 - }, - { - "epoch": 0.483698230868298, - "grad_norm": 1.807395435795349, - "learning_rate": 1.1012669729057615e-06, - "loss": 1.199, - "step": 3568 - }, - { - "epoch": 0.48383379651596287, - "grad_norm": 1.855809180092538, - "learning_rate": 1.1008300827213385e-06, - "loss": 1.175, - "step": 3569 - }, - { - "epoch": 0.48396936216362774, - "grad_norm": 2.0357714369253372, - "learning_rate": 1.10039317309263e-06, - "loss": 1.1949, - "step": 3570 - }, - { - "epoch": 0.4841049278112926, - "grad_norm": 1.6849342557682192, - "learning_rate": 1.0999562441038909e-06, - "loss": 1.1453, - "step": 3571 - }, - { - "epoch": 0.4842404934589575, - "grad_norm": 1.5414945499009933, - "learning_rate": 1.0995192958393785e-06, - "loss": 1.1649, - "step": 3572 - }, - { - "epoch": 0.4843760591066224, - "grad_norm": 1.4245061912298957, - "learning_rate": 1.099082328383356e-06, - "loss": 1.1538, - "step": 3573 - }, - { - "epoch": 0.4845116247542873, - "grad_norm": 1.3855599084535017, - "learning_rate": 1.098645341820088e-06, - "loss": 1.1401, - "step": 3574 - }, - { - "epoch": 0.48464719040195214, - "grad_norm": 1.658349376318634, - "learning_rate": 1.098208336233845e-06, - "loss": 1.1459, - "step": 3575 - }, - { - "epoch": 0.484782756049617, - "grad_norm": 2.4413000616109803, - "learning_rate": 1.0977713117088994e-06, - "loss": 1.1445, - "step": 3576 - }, - { - "epoch": 0.4849183216972819, - "grad_norm": 2.196368411649948, - "learning_rate": 1.097334268329528e-06, - "loss": 1.2057, - "step": 3577 - }, - { - "epoch": 0.4850538873449468, - "grad_norm": 1.8163540428859475, - "learning_rate": 1.0968972061800115e-06, - "loss": 1.1323, - "step": 3578 - }, - { - "epoch": 0.4851894529926117, - "grad_norm": 1.5120990782538632, - "learning_rate": 1.0964601253446332e-06, - "loss": 1.115, - "step": 3579 - }, - { - "epoch": 0.48532501864027655, - "grad_norm": 2.3518273107092185, - "learning_rate": 1.0960230259076817e-06, - "loss": 1.1383, - "step": 3580 - }, - { - "epoch": 0.4854605842879414, - "grad_norm": 1.5239199306453117, - "learning_rate": 1.0955859079534473e-06, - "loss": 1.1647, - "step": 3581 - }, - { - "epoch": 0.4855961499356063, - "grad_norm": 2.2092938308389054, - "learning_rate": 1.0951487715662253e-06, - "loss": 1.1311, - "step": 3582 - }, - { - "epoch": 0.4857317155832712, - "grad_norm": 1.852310506404215, - "learning_rate": 1.0947116168303137e-06, - "loss": 1.1266, - "step": 3583 - }, - { - "epoch": 0.4858672812309361, - "grad_norm": 1.3835570279171496, - "learning_rate": 1.0942744438300141e-06, - "loss": 1.1334, - "step": 3584 - }, - { - "epoch": 0.48600284687860096, - "grad_norm": 1.6111095015817956, - "learning_rate": 1.0938372526496324e-06, - "loss": 1.1367, - "step": 3585 - }, - { - "epoch": 0.48613841252626583, - "grad_norm": 1.332272235781251, - "learning_rate": 1.0934000433734772e-06, - "loss": 1.125, - "step": 3586 - }, - { - "epoch": 0.4862739781739307, - "grad_norm": 1.5291006896304649, - "learning_rate": 1.0929628160858611e-06, - "loss": 1.1486, - "step": 3587 - }, - { - "epoch": 0.4864095438215956, - "grad_norm": 1.6830807334701, - "learning_rate": 1.0925255708710994e-06, - "loss": 1.1507, - "step": 3588 - }, - { - "epoch": 0.4865451094692605, - "grad_norm": 1.471632599906325, - "learning_rate": 1.0920883078135118e-06, - "loss": 1.1904, - "step": 3589 - }, - { - "epoch": 0.48668067511692537, - "grad_norm": 1.8609444338989496, - "learning_rate": 1.0916510269974208e-06, - "loss": 1.1764, - "step": 3590 - }, - { - "epoch": 0.48681624076459024, - "grad_norm": 1.7713957800441362, - "learning_rate": 1.091213728507153e-06, - "loss": 1.1315, - "step": 3591 - }, - { - "epoch": 0.48695180641225516, - "grad_norm": 2.4900453775048654, - "learning_rate": 1.0907764124270374e-06, - "loss": 1.1617, - "step": 3592 - }, - { - "epoch": 0.48708737205992003, - "grad_norm": 1.9271141262861093, - "learning_rate": 1.0903390788414072e-06, - "loss": 1.143, - "step": 3593 - }, - { - "epoch": 0.4872229377075849, - "grad_norm": 1.4566165417455617, - "learning_rate": 1.089901727834599e-06, - "loss": 1.0852, - "step": 3594 - }, - { - "epoch": 0.4873585033552498, - "grad_norm": 1.5508365977653122, - "learning_rate": 1.0894643594909518e-06, - "loss": 1.1504, - "step": 3595 - }, - { - "epoch": 0.48749406900291464, - "grad_norm": 1.8506040839194813, - "learning_rate": 1.0890269738948096e-06, - "loss": 1.1416, - "step": 3596 - }, - { - "epoch": 0.48762963465057957, - "grad_norm": 1.4606664964243519, - "learning_rate": 1.088589571130518e-06, - "loss": 1.1479, - "step": 3597 - }, - { - "epoch": 0.48776520029824444, - "grad_norm": 1.986660481361705, - "learning_rate": 1.0881521512824268e-06, - "loss": 1.1576, - "step": 3598 - }, - { - "epoch": 0.4879007659459093, - "grad_norm": 1.8391449564072078, - "learning_rate": 1.0877147144348892e-06, - "loss": 1.1328, - "step": 3599 - }, - { - "epoch": 0.4880363315935742, - "grad_norm": 1.5244862049633476, - "learning_rate": 1.087277260672261e-06, - "loss": 1.1766, - "step": 3600 - }, - { - "epoch": 0.48817189724123905, - "grad_norm": 4.585203862917811, - "learning_rate": 1.0868397900789024e-06, - "loss": 1.155, - "step": 3601 - }, - { - "epoch": 0.488307462888904, - "grad_norm": 1.694488797285962, - "learning_rate": 1.0864023027391753e-06, - "loss": 1.1434, - "step": 3602 - }, - { - "epoch": 0.48844302853656885, - "grad_norm": 10.115804396934161, - "learning_rate": 1.0859647987374464e-06, - "loss": 1.1413, - "step": 3603 - }, - { - "epoch": 0.4885785941842337, - "grad_norm": 2.1493452001448725, - "learning_rate": 1.0855272781580846e-06, - "loss": 1.1608, - "step": 3604 - }, - { - "epoch": 0.4887141598318986, - "grad_norm": 1.5723223528194172, - "learning_rate": 1.0850897410854624e-06, - "loss": 1.118, - "step": 3605 - }, - { - "epoch": 0.48884972547956346, - "grad_norm": 1.5913617391687054, - "learning_rate": 1.084652187603955e-06, - "loss": 1.1671, - "step": 3606 - }, - { - "epoch": 0.4889852911272284, - "grad_norm": 1.751777917364118, - "learning_rate": 1.0842146177979418e-06, - "loss": 1.1794, - "step": 3607 - }, - { - "epoch": 0.48912085677489325, - "grad_norm": 1.719324861346867, - "learning_rate": 1.0837770317518043e-06, - "loss": 1.168, - "step": 3608 - }, - { - "epoch": 0.4892564224225581, - "grad_norm": 1.7670869890836165, - "learning_rate": 1.083339429549927e-06, - "loss": 1.1475, - "step": 3609 - }, - { - "epoch": 0.489391988070223, - "grad_norm": 1.459870902763866, - "learning_rate": 1.0829018112766993e-06, - "loss": 1.1177, - "step": 3610 - }, - { - "epoch": 0.48952755371788786, - "grad_norm": 1.75756387402939, - "learning_rate": 1.0824641770165112e-06, - "loss": 1.1597, - "step": 3611 - }, - { - "epoch": 0.4896631193655528, - "grad_norm": 1.5133482408390362, - "learning_rate": 1.0820265268537578e-06, - "loss": 1.2001, - "step": 3612 - }, - { - "epoch": 0.48979868501321766, - "grad_norm": 1.5308530949473897, - "learning_rate": 1.0815888608728359e-06, - "loss": 1.174, - "step": 3613 - }, - { - "epoch": 0.48993425066088253, - "grad_norm": 1.7066146410517056, - "learning_rate": 1.0811511791581463e-06, - "loss": 1.1538, - "step": 3614 - }, - { - "epoch": 0.4900698163085474, - "grad_norm": 2.216916690854367, - "learning_rate": 1.0807134817940923e-06, - "loss": 1.1613, - "step": 3615 - }, - { - "epoch": 0.49020538195621227, - "grad_norm": 1.652733013116373, - "learning_rate": 1.0802757688650805e-06, - "loss": 1.1849, - "step": 3616 - }, - { - "epoch": 0.4903409476038772, - "grad_norm": 1.5380584692950303, - "learning_rate": 1.0798380404555203e-06, - "loss": 1.1391, - "step": 3617 - }, - { - "epoch": 0.49047651325154207, - "grad_norm": 1.4933788104817227, - "learning_rate": 1.0794002966498246e-06, - "loss": 1.1093, - "step": 3618 - }, - { - "epoch": 0.49061207889920694, - "grad_norm": 2.514099117082194, - "learning_rate": 1.0789625375324078e-06, - "loss": 1.107, - "step": 3619 - }, - { - "epoch": 0.4907476445468718, - "grad_norm": 3.9613565064765224, - "learning_rate": 1.0785247631876892e-06, - "loss": 1.158, - "step": 3620 - }, - { - "epoch": 0.4908832101945367, - "grad_norm": 1.455276633418966, - "learning_rate": 1.0780869737000898e-06, - "loss": 1.1166, - "step": 3621 - }, - { - "epoch": 0.4910187758422016, - "grad_norm": 1.9438681092867243, - "learning_rate": 1.0776491691540342e-06, - "loss": 1.1494, - "step": 3622 - }, - { - "epoch": 0.4911543414898665, - "grad_norm": 1.4988356625843238, - "learning_rate": 1.077211349633949e-06, - "loss": 1.1793, - "step": 3623 - }, - { - "epoch": 0.49128990713753135, - "grad_norm": 1.7715508198827887, - "learning_rate": 1.0767735152242646e-06, - "loss": 1.1659, - "step": 3624 - }, - { - "epoch": 0.4914254727851962, - "grad_norm": 1.480427068908792, - "learning_rate": 1.0763356660094139e-06, - "loss": 1.1432, - "step": 3625 - }, - { - "epoch": 0.4915610384328611, - "grad_norm": 1.9875808485211517, - "learning_rate": 1.0758978020738323e-06, - "loss": 1.16, - "step": 3626 - }, - { - "epoch": 0.491696604080526, - "grad_norm": 1.7478768703554663, - "learning_rate": 1.0754599235019586e-06, - "loss": 1.1718, - "step": 3627 - }, - { - "epoch": 0.4918321697281909, - "grad_norm": 1.9555723776927423, - "learning_rate": 1.0750220303782345e-06, - "loss": 1.1648, - "step": 3628 - }, - { - "epoch": 0.49196773537585575, - "grad_norm": 1.5870595506694363, - "learning_rate": 1.074584122787104e-06, - "loss": 1.1758, - "step": 3629 - }, - { - "epoch": 0.4921033010235206, - "grad_norm": 1.5006773241771592, - "learning_rate": 1.074146200813014e-06, - "loss": 1.153, - "step": 3630 - }, - { - "epoch": 0.49223886667118555, - "grad_norm": 1.5631614136815106, - "learning_rate": 1.0737082645404147e-06, - "loss": 1.1025, - "step": 3631 - }, - { - "epoch": 0.4923744323188504, - "grad_norm": 1.6497289138725941, - "learning_rate": 1.0732703140537583e-06, - "loss": 1.1409, - "step": 3632 - }, - { - "epoch": 0.4925099979665153, - "grad_norm": 2.510305790751615, - "learning_rate": 1.0728323494375e-06, - "loss": 1.1443, - "step": 3633 - }, - { - "epoch": 0.49264556361418016, - "grad_norm": 1.694873716676153, - "learning_rate": 1.0723943707760984e-06, - "loss": 1.1738, - "step": 3634 - }, - { - "epoch": 0.49278112926184503, - "grad_norm": 2.4941820561688637, - "learning_rate": 1.0719563781540135e-06, - "loss": 1.1368, - "step": 3635 - }, - { - "epoch": 0.49291669490950996, - "grad_norm": 1.571842359996238, - "learning_rate": 1.071518371655709e-06, - "loss": 1.1606, - "step": 3636 - }, - { - "epoch": 0.4930522605571748, - "grad_norm": 2.97197829610926, - "learning_rate": 1.0710803513656514e-06, - "loss": 1.1555, - "step": 3637 - }, - { - "epoch": 0.4931878262048397, - "grad_norm": 2.0427206215293348, - "learning_rate": 1.0706423173683092e-06, - "loss": 1.1412, - "step": 3638 - }, - { - "epoch": 0.49332339185250457, - "grad_norm": 1.5917610832403006, - "learning_rate": 1.0702042697481536e-06, - "loss": 1.1419, - "step": 3639 - }, - { - "epoch": 0.49345895750016944, - "grad_norm": 2.189765919938558, - "learning_rate": 1.0697662085896583e-06, - "loss": 1.1527, - "step": 3640 - }, - { - "epoch": 0.49359452314783436, - "grad_norm": 3.8075447735845334, - "learning_rate": 1.0693281339773009e-06, - "loss": 1.1606, - "step": 3641 - }, - { - "epoch": 0.49373008879549923, - "grad_norm": 1.4672893303343444, - "learning_rate": 1.0688900459955596e-06, - "loss": 1.1377, - "step": 3642 - }, - { - "epoch": 0.4938656544431641, - "grad_norm": 4.226370464822672, - "learning_rate": 1.0684519447289171e-06, - "loss": 1.1098, - "step": 3643 - }, - { - "epoch": 0.494001220090829, - "grad_norm": 1.8562753725188814, - "learning_rate": 1.0680138302618572e-06, - "loss": 1.1524, - "step": 3644 - }, - { - "epoch": 0.49413678573849384, - "grad_norm": 1.5195636530093366, - "learning_rate": 1.0675757026788672e-06, - "loss": 1.1975, - "step": 3645 - }, - { - "epoch": 0.49427235138615877, - "grad_norm": 1.5069791121545708, - "learning_rate": 1.0671375620644363e-06, - "loss": 1.1394, - "step": 3646 - }, - { - "epoch": 0.49440791703382364, - "grad_norm": 2.018290895387507, - "learning_rate": 1.0666994085030563e-06, - "loss": 1.1227, - "step": 3647 - }, - { - "epoch": 0.4945434826814885, - "grad_norm": 1.8320517007479566, - "learning_rate": 1.066261242079222e-06, - "loss": 1.1245, - "step": 3648 - }, - { - "epoch": 0.4946790483291534, - "grad_norm": 4.749633734151363, - "learning_rate": 1.0658230628774302e-06, - "loss": 1.2089, - "step": 3649 - }, - { - "epoch": 0.49481461397681825, - "grad_norm": 1.7624989145233199, - "learning_rate": 1.0653848709821806e-06, - "loss": 1.1467, - "step": 3650 - }, - { - "epoch": 0.4949501796244832, - "grad_norm": 1.61451018754693, - "learning_rate": 1.0649466664779744e-06, - "loss": 1.1513, - "step": 3651 - }, - { - "epoch": 0.49508574527214805, - "grad_norm": 1.5505637657610423, - "learning_rate": 1.0645084494493164e-06, - "loss": 1.1617, - "step": 3652 - }, - { - "epoch": 0.4952213109198129, - "grad_norm": 1.7064563990422226, - "learning_rate": 1.064070219980713e-06, - "loss": 1.103, - "step": 3653 - }, - { - "epoch": 0.4953568765674778, - "grad_norm": 1.4640474180440175, - "learning_rate": 1.0636319781566736e-06, - "loss": 1.1331, - "step": 3654 - }, - { - "epoch": 0.49549244221514266, - "grad_norm": 1.8161999973380896, - "learning_rate": 1.0631937240617093e-06, - "loss": 1.1356, - "step": 3655 - }, - { - "epoch": 0.4956280078628076, - "grad_norm": 1.8423959417134634, - "learning_rate": 1.062755457780334e-06, - "loss": 1.151, - "step": 3656 - }, - { - "epoch": 0.49576357351047246, - "grad_norm": 1.9149671267522905, - "learning_rate": 1.0623171793970642e-06, - "loss": 1.0889, - "step": 3657 - }, - { - "epoch": 0.4958991391581373, - "grad_norm": 1.528626502649572, - "learning_rate": 1.0618788889964182e-06, - "loss": 1.1644, - "step": 3658 - }, - { - "epoch": 0.4960347048058022, - "grad_norm": 1.7782142760661765, - "learning_rate": 1.061440586662917e-06, - "loss": 1.1793, - "step": 3659 - }, - { - "epoch": 0.49617027045346707, - "grad_norm": 1.5355297194402997, - "learning_rate": 1.0610022724810837e-06, - "loss": 1.1849, - "step": 3660 - }, - { - "epoch": 0.496305836101132, - "grad_norm": 1.4072073961900664, - "learning_rate": 1.0605639465354435e-06, - "loss": 1.1453, - "step": 3661 - }, - { - "epoch": 0.49644140174879686, - "grad_norm": 1.4463208894312496, - "learning_rate": 1.0601256089105242e-06, - "loss": 1.1659, - "step": 3662 - }, - { - "epoch": 0.49657696739646173, - "grad_norm": 3.2763275339156945, - "learning_rate": 1.059687259690856e-06, - "loss": 1.1483, - "step": 3663 - }, - { - "epoch": 0.4967125330441266, - "grad_norm": 1.7701030785086718, - "learning_rate": 1.0592488989609708e-06, - "loss": 1.1367, - "step": 3664 - }, - { - "epoch": 0.4968480986917915, - "grad_norm": 1.7907577389580676, - "learning_rate": 1.0588105268054032e-06, - "loss": 1.1433, - "step": 3665 - }, - { - "epoch": 0.4969836643394564, - "grad_norm": 1.5954810073016321, - "learning_rate": 1.0583721433086899e-06, - "loss": 1.1688, - "step": 3666 - }, - { - "epoch": 0.49711922998712127, - "grad_norm": 2.4753086292965114, - "learning_rate": 1.0579337485553695e-06, - "loss": 1.1478, - "step": 3667 - }, - { - "epoch": 0.49725479563478614, - "grad_norm": 1.5247498763310616, - "learning_rate": 1.0574953426299825e-06, - "loss": 1.1729, - "step": 3668 - }, - { - "epoch": 0.497390361282451, - "grad_norm": 1.5058008125974485, - "learning_rate": 1.057056925617073e-06, - "loss": 1.1473, - "step": 3669 - }, - { - "epoch": 0.49752592693011594, - "grad_norm": 1.574996421534381, - "learning_rate": 1.0566184976011855e-06, - "loss": 1.131, - "step": 3670 - }, - { - "epoch": 0.4976614925777808, - "grad_norm": 1.6220675743569348, - "learning_rate": 1.0561800586668678e-06, - "loss": 1.1561, - "step": 3671 - }, - { - "epoch": 0.4977970582254457, - "grad_norm": 1.6016358677118139, - "learning_rate": 1.0557416088986692e-06, - "loss": 1.12, - "step": 3672 - }, - { - "epoch": 0.49793262387311055, - "grad_norm": 1.6920729219608819, - "learning_rate": 1.0553031483811414e-06, - "loss": 1.1398, - "step": 3673 - }, - { - "epoch": 0.4980681895207754, - "grad_norm": 1.479376435842318, - "learning_rate": 1.054864677198838e-06, - "loss": 1.13, - "step": 3674 - }, - { - "epoch": 0.49820375516844034, - "grad_norm": 1.5016128915376805, - "learning_rate": 1.0544261954363146e-06, - "loss": 1.1522, - "step": 3675 - }, - { - "epoch": 0.4983393208161052, - "grad_norm": 2.462371473609971, - "learning_rate": 1.0539877031781289e-06, - "loss": 1.147, - "step": 3676 - }, - { - "epoch": 0.4984748864637701, - "grad_norm": 6.884988825597979, - "learning_rate": 1.053549200508841e-06, - "loss": 1.1284, - "step": 3677 - }, - { - "epoch": 0.49861045211143495, - "grad_norm": 1.7378347440513875, - "learning_rate": 1.0531106875130123e-06, - "loss": 1.1408, - "step": 3678 - }, - { - "epoch": 0.4987460177590998, - "grad_norm": 4.671052269168908, - "learning_rate": 1.0526721642752069e-06, - "loss": 1.13, - "step": 3679 - }, - { - "epoch": 0.49888158340676475, - "grad_norm": 1.5749102780323372, - "learning_rate": 1.0522336308799904e-06, - "loss": 1.1245, - "step": 3680 - }, - { - "epoch": 0.4990171490544296, - "grad_norm": 1.5131360196593595, - "learning_rate": 1.0517950874119304e-06, - "loss": 1.1318, - "step": 3681 - }, - { - "epoch": 0.4991527147020945, - "grad_norm": 2.562763393362881, - "learning_rate": 1.0513565339555965e-06, - "loss": 1.1854, - "step": 3682 - }, - { - "epoch": 0.49928828034975936, - "grad_norm": 2.4026849492594207, - "learning_rate": 1.0509179705955607e-06, - "loss": 1.1602, - "step": 3683 - }, - { - "epoch": 0.49942384599742423, - "grad_norm": 1.84264408437198, - "learning_rate": 1.050479397416396e-06, - "loss": 1.1653, - "step": 3684 - }, - { - "epoch": 0.49955941164508916, - "grad_norm": 1.5782361793280155, - "learning_rate": 1.050040814502678e-06, - "loss": 1.1463, - "step": 3685 - }, - { - "epoch": 0.49969497729275403, - "grad_norm": 1.6220058909962602, - "learning_rate": 1.049602221938984e-06, - "loss": 1.1352, - "step": 3686 - }, - { - "epoch": 0.4998305429404189, - "grad_norm": 1.9568684814025437, - "learning_rate": 1.0491636198098932e-06, - "loss": 1.1538, - "step": 3687 - }, - { - "epoch": 0.49996610858808377, - "grad_norm": 1.6565553467673286, - "learning_rate": 1.048725008199986e-06, - "loss": 1.1485, - "step": 3688 - }, - { - "epoch": 0.5001016742357487, - "grad_norm": 2.242811777789825, - "learning_rate": 1.0482863871938459e-06, - "loss": 1.1412, - "step": 3689 - }, - { - "epoch": 0.5002372398834135, - "grad_norm": 1.6241588631858979, - "learning_rate": 1.047847756876057e-06, - "loss": 1.1695, - "step": 3690 - }, - { - "epoch": 0.5003728055310784, - "grad_norm": 1.5891348004163717, - "learning_rate": 1.0474091173312058e-06, - "loss": 1.161, - "step": 3691 - }, - { - "epoch": 0.5005083711787434, - "grad_norm": 1.7482768549375916, - "learning_rate": 1.0469704686438807e-06, - "loss": 1.1465, - "step": 3692 - }, - { - "epoch": 0.5006439368264082, - "grad_norm": 1.7935797948865637, - "learning_rate": 1.0465318108986713e-06, - "loss": 1.1507, - "step": 3693 - }, - { - "epoch": 0.5007795024740731, - "grad_norm": 1.3745606635336294, - "learning_rate": 1.04609314418017e-06, - "loss": 1.1712, - "step": 3694 - }, - { - "epoch": 0.5009150681217379, - "grad_norm": 1.5633039679717373, - "learning_rate": 1.045654468572969e-06, - "loss": 1.1558, - "step": 3695 - }, - { - "epoch": 0.5010506337694028, - "grad_norm": 1.4327804371129096, - "learning_rate": 1.0452157841616645e-06, - "loss": 1.1095, - "step": 3696 - }, - { - "epoch": 0.5011861994170678, - "grad_norm": 1.60910734150205, - "learning_rate": 1.044777091030853e-06, - "loss": 1.1416, - "step": 3697 - }, - { - "epoch": 0.5013217650647326, - "grad_norm": 1.5456345367732929, - "learning_rate": 1.0443383892651325e-06, - "loss": 1.1227, - "step": 3698 - }, - { - "epoch": 0.5014573307123975, - "grad_norm": 1.8706089301247457, - "learning_rate": 1.043899678949104e-06, - "loss": 1.1406, - "step": 3699 - }, - { - "epoch": 0.5015928963600623, - "grad_norm": 2.1068679581829497, - "learning_rate": 1.0434609601673687e-06, - "loss": 1.1365, - "step": 3700 - }, - { - "epoch": 0.5017284620077272, - "grad_norm": 1.4875713543742959, - "learning_rate": 1.0430222330045304e-06, - "loss": 1.1426, - "step": 3701 - }, - { - "epoch": 0.5018640276553922, - "grad_norm": 2.145547164029504, - "learning_rate": 1.0425834975451942e-06, - "loss": 1.2324, - "step": 3702 - }, - { - "epoch": 0.501999593303057, - "grad_norm": 2.0197221463151322, - "learning_rate": 1.0421447538739664e-06, - "loss": 1.1614, - "step": 3703 - }, - { - "epoch": 0.5021351589507219, - "grad_norm": 1.622471554879056, - "learning_rate": 1.0417060020754555e-06, - "loss": 1.1015, - "step": 3704 - }, - { - "epoch": 0.5022707245983867, - "grad_norm": 1.4906523777739624, - "learning_rate": 1.0412672422342714e-06, - "loss": 1.1422, - "step": 3705 - }, - { - "epoch": 0.5024062902460517, - "grad_norm": 1.4515354655512587, - "learning_rate": 1.0408284744350255e-06, - "loss": 1.1034, - "step": 3706 - }, - { - "epoch": 0.5025418558937166, - "grad_norm": 2.060809831944587, - "learning_rate": 1.0403896987623304e-06, - "loss": 1.154, - "step": 3707 - }, - { - "epoch": 0.5026774215413814, - "grad_norm": 1.4577985200057395, - "learning_rate": 1.039950915300801e-06, - "loss": 1.1582, - "step": 3708 - }, - { - "epoch": 0.5028129871890463, - "grad_norm": 1.4740277895083194, - "learning_rate": 1.039512124135053e-06, - "loss": 1.0987, - "step": 3709 - }, - { - "epoch": 0.5029485528367111, - "grad_norm": 4.0482080494936765, - "learning_rate": 1.0390733253497033e-06, - "loss": 1.1539, - "step": 3710 - }, - { - "epoch": 0.5030841184843761, - "grad_norm": 8.210457208192071, - "learning_rate": 1.0386345190293714e-06, - "loss": 1.1181, - "step": 3711 - }, - { - "epoch": 0.503219684132041, - "grad_norm": 3.575353482584083, - "learning_rate": 1.0381957052586774e-06, - "loss": 1.1223, - "step": 3712 - }, - { - "epoch": 0.5033552497797058, - "grad_norm": 1.7502711882166442, - "learning_rate": 1.037756884122243e-06, - "loss": 1.1383, - "step": 3713 - }, - { - "epoch": 0.5034908154273707, - "grad_norm": 1.4819637585915433, - "learning_rate": 1.037318055704692e-06, - "loss": 1.1308, - "step": 3714 - }, - { - "epoch": 0.5036263810750355, - "grad_norm": 1.4303085087055922, - "learning_rate": 1.0368792200906482e-06, - "loss": 1.1203, - "step": 3715 - }, - { - "epoch": 0.5037619467227005, - "grad_norm": 1.7089172131133035, - "learning_rate": 1.0364403773647379e-06, - "loss": 1.1069, - "step": 3716 - }, - { - "epoch": 0.5038975123703654, - "grad_norm": 1.9181995171101505, - "learning_rate": 1.0360015276115888e-06, - "loss": 1.1598, - "step": 3717 - }, - { - "epoch": 0.5040330780180302, - "grad_norm": 1.7887625357502435, - "learning_rate": 1.035562670915829e-06, - "loss": 1.1428, - "step": 3718 - }, - { - "epoch": 0.5041686436656951, - "grad_norm": 1.4185976702587364, - "learning_rate": 1.0351238073620887e-06, - "loss": 1.1554, - "step": 3719 - }, - { - "epoch": 0.50430420931336, - "grad_norm": 1.8658457628501681, - "learning_rate": 1.0346849370349997e-06, - "loss": 1.1357, - "step": 3720 - }, - { - "epoch": 0.5044397749610249, - "grad_norm": 1.8978550323440646, - "learning_rate": 1.0342460600191942e-06, - "loss": 1.1603, - "step": 3721 - }, - { - "epoch": 0.5045753406086898, - "grad_norm": 2.4026477978676435, - "learning_rate": 1.0338071763993065e-06, - "loss": 1.1338, - "step": 3722 - }, - { - "epoch": 0.5047109062563546, - "grad_norm": 1.5817287276038865, - "learning_rate": 1.0333682862599714e-06, - "loss": 1.1603, - "step": 3723 - }, - { - "epoch": 0.5048464719040195, - "grad_norm": 1.5052837864743007, - "learning_rate": 1.032929389685826e-06, - "loss": 1.1054, - "step": 3724 - }, - { - "epoch": 0.5049820375516844, - "grad_norm": 1.6332221142612835, - "learning_rate": 1.0324904867615077e-06, - "loss": 1.1864, - "step": 3725 - }, - { - "epoch": 0.5051176031993493, - "grad_norm": 1.667317259692694, - "learning_rate": 1.0320515775716554e-06, - "loss": 1.1444, - "step": 3726 - }, - { - "epoch": 0.5052531688470142, - "grad_norm": 2.77999413096949, - "learning_rate": 1.0316126622009092e-06, - "loss": 1.1319, - "step": 3727 - }, - { - "epoch": 0.505388734494679, - "grad_norm": 2.4620416029528505, - "learning_rate": 1.0311737407339106e-06, - "loss": 1.1346, - "step": 3728 - }, - { - "epoch": 0.505524300142344, - "grad_norm": 1.8470012609819948, - "learning_rate": 1.0307348132553024e-06, - "loss": 1.1191, - "step": 3729 - }, - { - "epoch": 0.5056598657900088, - "grad_norm": 1.6012391633145808, - "learning_rate": 1.030295879849728e-06, - "loss": 1.1615, - "step": 3730 - }, - { - "epoch": 0.5057954314376737, - "grad_norm": 1.9865130268321027, - "learning_rate": 1.0298569406018325e-06, - "loss": 1.1564, - "step": 3731 - }, - { - "epoch": 0.5059309970853386, - "grad_norm": 1.7475169806821904, - "learning_rate": 1.0294179955962614e-06, - "loss": 1.1183, - "step": 3732 - }, - { - "epoch": 0.5060665627330034, - "grad_norm": 1.476561925148515, - "learning_rate": 1.0289790449176622e-06, - "loss": 1.1647, - "step": 3733 - }, - { - "epoch": 0.5062021283806684, - "grad_norm": 3.7322238048386684, - "learning_rate": 1.0285400886506828e-06, - "loss": 1.1303, - "step": 3734 - }, - { - "epoch": 0.5063376940283332, - "grad_norm": 1.5802359990046724, - "learning_rate": 1.0281011268799726e-06, - "loss": 1.124, - "step": 3735 - }, - { - "epoch": 0.5064732596759981, - "grad_norm": 1.7648532923324178, - "learning_rate": 1.0276621596901821e-06, - "loss": 1.1369, - "step": 3736 - }, - { - "epoch": 0.506608825323663, - "grad_norm": 1.9678365201587165, - "learning_rate": 1.0272231871659624e-06, - "loss": 1.1269, - "step": 3737 - }, - { - "epoch": 0.5067443909713278, - "grad_norm": 1.6460921394053207, - "learning_rate": 1.026784209391966e-06, - "loss": 1.1502, - "step": 3738 - }, - { - "epoch": 0.5068799566189928, - "grad_norm": 2.1078601090317513, - "learning_rate": 1.026345226452846e-06, - "loss": 1.1538, - "step": 3739 - }, - { - "epoch": 0.5070155222666576, - "grad_norm": 1.8495175400994377, - "learning_rate": 1.0259062384332573e-06, - "loss": 1.137, - "step": 3740 - }, - { - "epoch": 0.5071510879143225, - "grad_norm": 3.233473973081341, - "learning_rate": 1.0254672454178547e-06, - "loss": 1.1552, - "step": 3741 - }, - { - "epoch": 0.5072866535619874, - "grad_norm": 1.4502085510720206, - "learning_rate": 1.0250282474912952e-06, - "loss": 1.1478, - "step": 3742 - }, - { - "epoch": 0.5074222192096522, - "grad_norm": 1.5794972799973739, - "learning_rate": 1.0245892447382354e-06, - "loss": 1.1338, - "step": 3743 - }, - { - "epoch": 0.5075577848573172, - "grad_norm": 3.0150658976470686, - "learning_rate": 1.0241502372433342e-06, - "loss": 1.1419, - "step": 3744 - }, - { - "epoch": 0.507693350504982, - "grad_norm": 1.4966937717247666, - "learning_rate": 1.02371122509125e-06, - "loss": 1.1361, - "step": 3745 - }, - { - "epoch": 0.5078289161526469, - "grad_norm": 1.4680119866043455, - "learning_rate": 1.0232722083666435e-06, - "loss": 1.1444, - "step": 3746 - }, - { - "epoch": 0.5079644818003118, - "grad_norm": 1.4469558645020175, - "learning_rate": 1.022833187154175e-06, - "loss": 1.1421, - "step": 3747 - }, - { - "epoch": 0.5081000474479767, - "grad_norm": 1.7563004155939272, - "learning_rate": 1.022394161538507e-06, - "loss": 1.1327, - "step": 3748 - }, - { - "epoch": 0.5082356130956416, - "grad_norm": 15.751217277738931, - "learning_rate": 1.0219551316043016e-06, - "loss": 1.154, - "step": 3749 - }, - { - "epoch": 0.5083711787433064, - "grad_norm": 3.734338202354972, - "learning_rate": 1.0215160974362223e-06, - "loss": 1.0939, - "step": 3750 - }, - { - "epoch": 0.5085067443909713, - "grad_norm": 1.6821975704703676, - "learning_rate": 1.0210770591189333e-06, - "loss": 1.1108, - "step": 3751 - }, - { - "epoch": 0.5086423100386362, - "grad_norm": 1.5417800380967217, - "learning_rate": 1.0206380167371e-06, - "loss": 1.1462, - "step": 3752 - }, - { - "epoch": 0.5087778756863011, - "grad_norm": 3.676404480567771, - "learning_rate": 1.0201989703753881e-06, - "loss": 1.1555, - "step": 3753 - }, - { - "epoch": 0.508913441333966, - "grad_norm": 1.6165012238473813, - "learning_rate": 1.0197599201184642e-06, - "loss": 1.169, - "step": 3754 - }, - { - "epoch": 0.5090490069816308, - "grad_norm": 1.7526814345463846, - "learning_rate": 1.0193208660509956e-06, - "loss": 1.1454, - "step": 3755 - }, - { - "epoch": 0.5091845726292957, - "grad_norm": 1.761587529927443, - "learning_rate": 1.0188818082576505e-06, - "loss": 1.1381, - "step": 3756 - }, - { - "epoch": 0.5093201382769607, - "grad_norm": 2.379813359444368, - "learning_rate": 1.0184427468230976e-06, - "loss": 1.1485, - "step": 3757 - }, - { - "epoch": 0.5094557039246255, - "grad_norm": 1.739535264095292, - "learning_rate": 1.0180036818320067e-06, - "loss": 1.1512, - "step": 3758 - }, - { - "epoch": 0.5095912695722904, - "grad_norm": 1.5936619348469676, - "learning_rate": 1.0175646133690479e-06, - "loss": 1.1488, - "step": 3759 - }, - { - "epoch": 0.5097268352199552, - "grad_norm": 1.7558292578896764, - "learning_rate": 1.017125541518892e-06, - "loss": 1.142, - "step": 3760 - }, - { - "epoch": 0.5098624008676201, - "grad_norm": 1.6956825410098038, - "learning_rate": 1.0166864663662104e-06, - "loss": 1.1631, - "step": 3761 - }, - { - "epoch": 0.5099979665152851, - "grad_norm": 3.1181943013403512, - "learning_rate": 1.016247387995676e-06, - "loss": 1.1436, - "step": 3762 - }, - { - "epoch": 0.5101335321629499, - "grad_norm": 2.2854720111206923, - "learning_rate": 1.0158083064919605e-06, - "loss": 1.1074, - "step": 3763 - }, - { - "epoch": 0.5102690978106148, - "grad_norm": 1.515114121700042, - "learning_rate": 1.0153692219397385e-06, - "loss": 1.1482, - "step": 3764 - }, - { - "epoch": 0.5104046634582796, - "grad_norm": 1.5072502021012482, - "learning_rate": 1.014930134423683e-06, - "loss": 1.1436, - "step": 3765 - }, - { - "epoch": 0.5105402291059445, - "grad_norm": 2.841311540570994, - "learning_rate": 1.0144910440284689e-06, - "loss": 1.1345, - "step": 3766 - }, - { - "epoch": 0.5106757947536095, - "grad_norm": 1.644298285819212, - "learning_rate": 1.0140519508387713e-06, - "loss": 1.1344, - "step": 3767 - }, - { - "epoch": 0.5108113604012743, - "grad_norm": 1.812476427425424, - "learning_rate": 1.013612854939266e-06, - "loss": 1.1777, - "step": 3768 - }, - { - "epoch": 0.5109469260489392, - "grad_norm": 4.624759798781143, - "learning_rate": 1.013173756414629e-06, - "loss": 1.1257, - "step": 3769 - }, - { - "epoch": 0.5110824916966041, - "grad_norm": 1.3442891029306898, - "learning_rate": 1.0127346553495371e-06, - "loss": 1.1631, - "step": 3770 - }, - { - "epoch": 0.511218057344269, - "grad_norm": 2.07488525940021, - "learning_rate": 1.0122955518286672e-06, - "loss": 1.1076, - "step": 3771 - }, - { - "epoch": 0.5113536229919339, - "grad_norm": 1.6647965337098636, - "learning_rate": 1.0118564459366976e-06, - "loss": 1.1188, - "step": 3772 - }, - { - "epoch": 0.5114891886395987, - "grad_norm": 1.6867358228347287, - "learning_rate": 1.0114173377583057e-06, - "loss": 1.1411, - "step": 3773 - }, - { - "epoch": 0.5116247542872636, - "grad_norm": 1.3723920800727132, - "learning_rate": 1.0109782273781706e-06, - "loss": 1.1216, - "step": 3774 - }, - { - "epoch": 0.5117603199349285, - "grad_norm": 1.7163976377592385, - "learning_rate": 1.0105391148809707e-06, - "loss": 1.1576, - "step": 3775 - }, - { - "epoch": 0.5118958855825934, - "grad_norm": 3.476889427981893, - "learning_rate": 1.010100000351386e-06, - "loss": 1.15, - "step": 3776 - }, - { - "epoch": 0.5120314512302583, - "grad_norm": 2.017557696189528, - "learning_rate": 1.0096608838740956e-06, - "loss": 1.0844, - "step": 3777 - }, - { - "epoch": 0.5121670168779231, - "grad_norm": 1.6129077668893823, - "learning_rate": 1.0092217655337806e-06, - "loss": 1.1847, - "step": 3778 - }, - { - "epoch": 0.512302582525588, - "grad_norm": 1.6490547256148527, - "learning_rate": 1.0087826454151205e-06, - "loss": 1.196, - "step": 3779 - }, - { - "epoch": 0.512438148173253, - "grad_norm": 1.6108935087575438, - "learning_rate": 1.0083435236027967e-06, - "loss": 1.1439, - "step": 3780 - }, - { - "epoch": 0.5125737138209178, - "grad_norm": 1.8722766208353923, - "learning_rate": 1.00790440018149e-06, - "loss": 1.1361, - "step": 3781 - }, - { - "epoch": 0.5127092794685827, - "grad_norm": 1.5677020392575725, - "learning_rate": 1.0074652752358822e-06, - "loss": 1.1033, - "step": 3782 - }, - { - "epoch": 0.5128448451162475, - "grad_norm": 1.6967363382907665, - "learning_rate": 1.0070261488506551e-06, - "loss": 1.1271, - "step": 3783 - }, - { - "epoch": 0.5129804107639124, - "grad_norm": 1.9986352860376846, - "learning_rate": 1.0065870211104906e-06, - "loss": 1.1833, - "step": 3784 - }, - { - "epoch": 0.5131159764115774, - "grad_norm": 2.022572591020284, - "learning_rate": 1.006147892100071e-06, - "loss": 1.1528, - "step": 3785 - }, - { - "epoch": 0.5132515420592422, - "grad_norm": 1.6232828510313575, - "learning_rate": 1.0057087619040792e-06, - "loss": 1.1285, - "step": 3786 - }, - { - "epoch": 0.5133871077069071, - "grad_norm": 1.7491368409401762, - "learning_rate": 1.0052696306071974e-06, - "loss": 1.1485, - "step": 3787 - }, - { - "epoch": 0.5135226733545719, - "grad_norm": 1.498290936046994, - "learning_rate": 1.0048304982941089e-06, - "loss": 1.1584, - "step": 3788 - }, - { - "epoch": 0.5136582390022368, - "grad_norm": 1.840202118310834, - "learning_rate": 1.0043913650494972e-06, - "loss": 1.1388, - "step": 3789 - }, - { - "epoch": 0.5137938046499018, - "grad_norm": 2.124050355158528, - "learning_rate": 1.0039522309580453e-06, - "loss": 1.1352, - "step": 3790 - }, - { - "epoch": 0.5139293702975666, - "grad_norm": 1.39727836655907, - "learning_rate": 1.003513096104437e-06, - "loss": 1.1239, - "step": 3791 - }, - { - "epoch": 0.5140649359452315, - "grad_norm": 1.6178262479559251, - "learning_rate": 1.0030739605733557e-06, - "loss": 1.1634, - "step": 3792 - }, - { - "epoch": 0.5142005015928963, - "grad_norm": 1.7754006135384162, - "learning_rate": 1.0026348244494853e-06, - "loss": 1.159, - "step": 3793 - }, - { - "epoch": 0.5143360672405612, - "grad_norm": 2.8851718217555344, - "learning_rate": 1.0021956878175099e-06, - "loss": 1.13, - "step": 3794 - }, - { - "epoch": 0.5144716328882262, - "grad_norm": 2.420087597140344, - "learning_rate": 1.0017565507621135e-06, - "loss": 1.1298, - "step": 3795 - }, - { - "epoch": 0.514607198535891, - "grad_norm": 1.963089540298795, - "learning_rate": 1.0013174133679801e-06, - "loss": 1.17, - "step": 3796 - }, - { - "epoch": 0.5147427641835559, - "grad_norm": 1.8640347783058695, - "learning_rate": 1.0008782757197939e-06, - "loss": 1.1858, - "step": 3797 - }, - { - "epoch": 0.5148783298312207, - "grad_norm": 1.437103615637163, - "learning_rate": 1.000439137902239e-06, - "loss": 1.1458, - "step": 3798 - }, - { - "epoch": 0.5150138954788857, - "grad_norm": 1.6098972280538904, - "learning_rate": 1e-06, - "loss": 1.1381, - "step": 3799 - }, - { - "epoch": 0.5151494611265506, - "grad_norm": 1.6965140431276151, - "learning_rate": 9.995608620977612e-07, - "loss": 1.1373, - "step": 3800 - }, - { - "epoch": 0.5152850267742154, - "grad_norm": 1.485745212333801, - "learning_rate": 9.991217242802063e-07, - "loss": 1.157, - "step": 3801 - }, - { - "epoch": 0.5154205924218803, - "grad_norm": 2.6078019357956164, - "learning_rate": 9.986825866320202e-07, - "loss": 1.1616, - "step": 3802 - }, - { - "epoch": 0.5155561580695451, - "grad_norm": 1.6940971244328535, - "learning_rate": 9.982434492378864e-07, - "loss": 1.1847, - "step": 3803 - }, - { - "epoch": 0.5156917237172101, - "grad_norm": 1.6090642550238574, - "learning_rate": 9.978043121824903e-07, - "loss": 1.1241, - "step": 3804 - }, - { - "epoch": 0.515827289364875, - "grad_norm": 3.041758742831177, - "learning_rate": 9.973651755505146e-07, - "loss": 1.1145, - "step": 3805 - }, - { - "epoch": 0.5159628550125398, - "grad_norm": 1.5515238635834339, - "learning_rate": 9.969260394266446e-07, - "loss": 1.1687, - "step": 3806 - }, - { - "epoch": 0.5160984206602047, - "grad_norm": 1.7330144741535902, - "learning_rate": 9.96486903895563e-07, - "loss": 1.1734, - "step": 3807 - }, - { - "epoch": 0.5162339863078695, - "grad_norm": 1.7519483874025938, - "learning_rate": 9.960477690419548e-07, - "loss": 1.1349, - "step": 3808 - }, - { - "epoch": 0.5163695519555345, - "grad_norm": 2.1529271550425517, - "learning_rate": 9.956086349505027e-07, - "loss": 1.1421, - "step": 3809 - }, - { - "epoch": 0.5165051176031994, - "grad_norm": 1.7612404075346286, - "learning_rate": 9.95169501705891e-07, - "loss": 1.1223, - "step": 3810 - }, - { - "epoch": 0.5166406832508642, - "grad_norm": 1.3792800202113686, - "learning_rate": 9.947303693928026e-07, - "loss": 1.1614, - "step": 3811 - }, - { - "epoch": 0.5167762488985291, - "grad_norm": 1.6613793020034537, - "learning_rate": 9.94291238095921e-07, - "loss": 1.1562, - "step": 3812 - }, - { - "epoch": 0.516911814546194, - "grad_norm": 3.0935254998930994, - "learning_rate": 9.938521078999288e-07, - "loss": 1.1465, - "step": 3813 - }, - { - "epoch": 0.5170473801938589, - "grad_norm": 1.8286072135606555, - "learning_rate": 9.934129788895093e-07, - "loss": 1.1316, - "step": 3814 - }, - { - "epoch": 0.5171829458415238, - "grad_norm": 2.0204385965987184, - "learning_rate": 9.92973851149345e-07, - "loss": 1.1787, - "step": 3815 - }, - { - "epoch": 0.5173185114891886, - "grad_norm": 1.4790884584841835, - "learning_rate": 9.92534724764118e-07, - "loss": 1.1195, - "step": 3816 - }, - { - "epoch": 0.5174540771368535, - "grad_norm": 2.1788083105156906, - "learning_rate": 9.920955998185102e-07, - "loss": 1.1087, - "step": 3817 - }, - { - "epoch": 0.5175896427845184, - "grad_norm": 1.4886195522952153, - "learning_rate": 9.916564763972035e-07, - "loss": 1.1542, - "step": 3818 - }, - { - "epoch": 0.5177252084321833, - "grad_norm": 1.528986652175169, - "learning_rate": 9.912173545848796e-07, - "loss": 1.1071, - "step": 3819 - }, - { - "epoch": 0.5178607740798482, - "grad_norm": 1.840939645187074, - "learning_rate": 9.907782344662194e-07, - "loss": 1.14, - "step": 3820 - }, - { - "epoch": 0.517996339727513, - "grad_norm": 1.5429265154614986, - "learning_rate": 9.903391161259043e-07, - "loss": 1.1433, - "step": 3821 - }, - { - "epoch": 0.518131905375178, - "grad_norm": 4.156043806667673, - "learning_rate": 9.898999996486137e-07, - "loss": 1.1147, - "step": 3822 - }, - { - "epoch": 0.5182674710228428, - "grad_norm": 3.04561903971447, - "learning_rate": 9.894608851190292e-07, - "loss": 1.1237, - "step": 3823 - }, - { - "epoch": 0.5184030366705077, - "grad_norm": 1.4604267705917786, - "learning_rate": 9.890217726218293e-07, - "loss": 1.1276, - "step": 3824 - }, - { - "epoch": 0.5185386023181726, - "grad_norm": 1.8474085112993734, - "learning_rate": 9.885826622416942e-07, - "loss": 1.1663, - "step": 3825 - }, - { - "epoch": 0.5186741679658374, - "grad_norm": 1.4386563556170044, - "learning_rate": 9.88143554063302e-07, - "loss": 1.1567, - "step": 3826 - }, - { - "epoch": 0.5188097336135024, - "grad_norm": 1.805810281403571, - "learning_rate": 9.877044481713327e-07, - "loss": 1.1354, - "step": 3827 - }, - { - "epoch": 0.5189452992611672, - "grad_norm": 1.5454204218446463, - "learning_rate": 9.872653446504632e-07, - "loss": 1.1121, - "step": 3828 - }, - { - "epoch": 0.5190808649088321, - "grad_norm": 1.7025366363958732, - "learning_rate": 9.86826243585371e-07, - "loss": 1.1789, - "step": 3829 - }, - { - "epoch": 0.519216430556497, - "grad_norm": 1.4620737293193315, - "learning_rate": 9.863871450607342e-07, - "loss": 1.1474, - "step": 3830 - }, - { - "epoch": 0.5193519962041618, - "grad_norm": 2.028261936229309, - "learning_rate": 9.859480491612288e-07, - "loss": 1.1524, - "step": 3831 - }, - { - "epoch": 0.5194875618518268, - "grad_norm": 2.203600315270896, - "learning_rate": 9.855089559715314e-07, - "loss": 1.1313, - "step": 3832 - }, - { - "epoch": 0.5196231274994916, - "grad_norm": 4.836023954474209, - "learning_rate": 9.850698655763171e-07, - "loss": 1.1614, - "step": 3833 - }, - { - "epoch": 0.5197586931471565, - "grad_norm": 1.6069582580220267, - "learning_rate": 9.846307780602619e-07, - "loss": 1.1089, - "step": 3834 - }, - { - "epoch": 0.5198942587948214, - "grad_norm": 1.5927755451057266, - "learning_rate": 9.841916935080392e-07, - "loss": 1.1414, - "step": 3835 - }, - { - "epoch": 0.5200298244424862, - "grad_norm": 1.5722488451444716, - "learning_rate": 9.837526120043242e-07, - "loss": 1.1379, - "step": 3836 - }, - { - "epoch": 0.5201653900901512, - "grad_norm": 2.2233437686814463, - "learning_rate": 9.833135336337893e-07, - "loss": 1.1087, - "step": 3837 - }, - { - "epoch": 0.520300955737816, - "grad_norm": 6.931028654297517, - "learning_rate": 9.82874458481108e-07, - "loss": 1.1435, - "step": 3838 - }, - { - "epoch": 0.5204365213854809, - "grad_norm": 1.7395578328148433, - "learning_rate": 9.82435386630952e-07, - "loss": 1.1872, - "step": 3839 - }, - { - "epoch": 0.5205720870331458, - "grad_norm": 1.5829108907688485, - "learning_rate": 9.819963181679934e-07, - "loss": 1.1053, - "step": 3840 - }, - { - "epoch": 0.5207076526808107, - "grad_norm": 2.4979757414892223, - "learning_rate": 9.81557253176902e-07, - "loss": 1.0986, - "step": 3841 - }, - { - "epoch": 0.5208432183284756, - "grad_norm": 1.4742627658578178, - "learning_rate": 9.811181917423495e-07, - "loss": 1.1363, - "step": 3842 - }, - { - "epoch": 0.5209787839761404, - "grad_norm": 1.5919608816191824, - "learning_rate": 9.806791339490047e-07, - "loss": 1.1009, - "step": 3843 - }, - { - "epoch": 0.5211143496238053, - "grad_norm": 2.1331043987425358, - "learning_rate": 9.802400798815357e-07, - "loss": 1.1566, - "step": 3844 - }, - { - "epoch": 0.5212499152714702, - "grad_norm": 1.8358623117586705, - "learning_rate": 9.79801029624612e-07, - "loss": 1.1097, - "step": 3845 - }, - { - "epoch": 0.5213854809191351, - "grad_norm": 1.4660668135280408, - "learning_rate": 9.793619832629001e-07, - "loss": 1.1616, - "step": 3846 - }, - { - "epoch": 0.5215210465668, - "grad_norm": 1.7713203299248932, - "learning_rate": 9.789229408810668e-07, - "loss": 1.1395, - "step": 3847 - }, - { - "epoch": 0.5216566122144649, - "grad_norm": 1.4747368718706004, - "learning_rate": 9.784839025637778e-07, - "loss": 1.1309, - "step": 3848 - }, - { - "epoch": 0.5217921778621297, - "grad_norm": 1.6294665424592707, - "learning_rate": 9.780448683956983e-07, - "loss": 1.1483, - "step": 3849 - }, - { - "epoch": 0.5219277435097947, - "grad_norm": 1.5160659318156828, - "learning_rate": 9.77605838461493e-07, - "loss": 1.1691, - "step": 3850 - }, - { - "epoch": 0.5220633091574595, - "grad_norm": 1.592660670226848, - "learning_rate": 9.771668128458251e-07, - "loss": 1.089, - "step": 3851 - }, - { - "epoch": 0.5221988748051244, - "grad_norm": 1.8108746137965357, - "learning_rate": 9.767277916333564e-07, - "loss": 1.1121, - "step": 3852 - }, - { - "epoch": 0.5223344404527893, - "grad_norm": 1.7498308158802973, - "learning_rate": 9.762887749087501e-07, - "loss": 1.159, - "step": 3853 - }, - { - "epoch": 0.5224700061004541, - "grad_norm": 1.6004326265373434, - "learning_rate": 9.758497627566657e-07, - "loss": 1.1561, - "step": 3854 - }, - { - "epoch": 0.5226055717481191, - "grad_norm": 1.4932150282247383, - "learning_rate": 9.754107552617645e-07, - "loss": 1.1721, - "step": 3855 - }, - { - "epoch": 0.5227411373957839, - "grad_norm": 1.717654314344305, - "learning_rate": 9.749717525087051e-07, - "loss": 1.1486, - "step": 3856 - }, - { - "epoch": 0.5228767030434488, - "grad_norm": 1.7612172774392394, - "learning_rate": 9.745327545821452e-07, - "loss": 1.139, - "step": 3857 - }, - { - "epoch": 0.5230122686911137, - "grad_norm": 1.7505418906845625, - "learning_rate": 9.74093761566743e-07, - "loss": 1.0943, - "step": 3858 - }, - { - "epoch": 0.5231478343387785, - "grad_norm": 1.8461893927655517, - "learning_rate": 9.736547735471539e-07, - "loss": 1.1417, - "step": 3859 - }, - { - "epoch": 0.5232833999864435, - "grad_norm": 1.7405831498273523, - "learning_rate": 9.732157906080343e-07, - "loss": 1.1777, - "step": 3860 - }, - { - "epoch": 0.5234189656341083, - "grad_norm": 1.4327050090433808, - "learning_rate": 9.727768128340375e-07, - "loss": 1.1303, - "step": 3861 - }, - { - "epoch": 0.5235545312817732, - "grad_norm": 3.152626251475727, - "learning_rate": 9.72337840309818e-07, - "loss": 1.1461, - "step": 3862 - }, - { - "epoch": 0.5236900969294381, - "grad_norm": 1.6474954991029214, - "learning_rate": 9.718988731200271e-07, - "loss": 1.1682, - "step": 3863 - }, - { - "epoch": 0.523825662577103, - "grad_norm": 1.4055151125265204, - "learning_rate": 9.714599113493171e-07, - "loss": 1.1367, - "step": 3864 - }, - { - "epoch": 0.5239612282247679, - "grad_norm": 1.61269235458932, - "learning_rate": 9.710209550823375e-07, - "loss": 1.1397, - "step": 3865 - }, - { - "epoch": 0.5240967938724327, - "grad_norm": 1.7573813927770752, - "learning_rate": 9.705820044037387e-07, - "loss": 1.141, - "step": 3866 - }, - { - "epoch": 0.5242323595200976, - "grad_norm": 1.3891504257161602, - "learning_rate": 9.701430593981674e-07, - "loss": 1.1645, - "step": 3867 - }, - { - "epoch": 0.5243679251677625, - "grad_norm": 1.6418828498550277, - "learning_rate": 9.697041201502718e-07, - "loss": 1.2171, - "step": 3868 - }, - { - "epoch": 0.5245034908154274, - "grad_norm": 1.5063490966419066, - "learning_rate": 9.692651867446973e-07, - "loss": 1.1616, - "step": 3869 - }, - { - "epoch": 0.5246390564630923, - "grad_norm": 2.959613109664356, - "learning_rate": 9.688262592660893e-07, - "loss": 1.1233, - "step": 3870 - }, - { - "epoch": 0.5247746221107571, - "grad_norm": 1.4354524567733877, - "learning_rate": 9.68387337799091e-07, - "loss": 1.1301, - "step": 3871 - }, - { - "epoch": 0.524910187758422, - "grad_norm": 1.5812304587597648, - "learning_rate": 9.679484224283447e-07, - "loss": 1.1841, - "step": 3872 - }, - { - "epoch": 0.525045753406087, - "grad_norm": 1.6514985264557942, - "learning_rate": 9.675095132384927e-07, - "loss": 1.1123, - "step": 3873 - }, - { - "epoch": 0.5251813190537518, - "grad_norm": 1.7538776097634916, - "learning_rate": 9.67070610314174e-07, - "loss": 1.1342, - "step": 3874 - }, - { - "epoch": 0.5253168847014167, - "grad_norm": 2.373865766486846, - "learning_rate": 9.666317137400287e-07, - "loss": 1.1243, - "step": 3875 - }, - { - "epoch": 0.5254524503490815, - "grad_norm": 2.411804763155915, - "learning_rate": 9.661928236006936e-07, - "loss": 1.0929, - "step": 3876 - }, - { - "epoch": 0.5255880159967464, - "grad_norm": 1.7828671352491299, - "learning_rate": 9.65753939980806e-07, - "loss": 1.1973, - "step": 3877 - }, - { - "epoch": 0.5257235816444114, - "grad_norm": 5.300435496526742, - "learning_rate": 9.653150629650004e-07, - "loss": 1.1185, - "step": 3878 - }, - { - "epoch": 0.5258591472920762, - "grad_norm": 1.515301454230536, - "learning_rate": 9.648761926379112e-07, - "loss": 1.1647, - "step": 3879 - }, - { - "epoch": 0.5259947129397411, - "grad_norm": 1.977735375263373, - "learning_rate": 9.644373290841712e-07, - "loss": 1.1563, - "step": 3880 - }, - { - "epoch": 0.5261302785874059, - "grad_norm": 1.5203113143361526, - "learning_rate": 9.639984723884112e-07, - "loss": 1.1437, - "step": 3881 - }, - { - "epoch": 0.5262658442350708, - "grad_norm": 1.5218940097811937, - "learning_rate": 9.635596226352618e-07, - "loss": 1.1517, - "step": 3882 - }, - { - "epoch": 0.5264014098827358, - "grad_norm": 1.7701040326006894, - "learning_rate": 9.63120779909352e-07, - "loss": 1.1545, - "step": 3883 - }, - { - "epoch": 0.5265369755304006, - "grad_norm": 1.900904362418526, - "learning_rate": 9.626819442953081e-07, - "loss": 1.1659, - "step": 3884 - }, - { - "epoch": 0.5266725411780655, - "grad_norm": 1.946278591349942, - "learning_rate": 9.622431158777568e-07, - "loss": 1.1337, - "step": 3885 - }, - { - "epoch": 0.5268081068257303, - "grad_norm": 1.8151296094721412, - "learning_rate": 9.618042947413228e-07, - "loss": 1.1619, - "step": 3886 - }, - { - "epoch": 0.5269436724733952, - "grad_norm": 3.327973485879826, - "learning_rate": 9.613654809706288e-07, - "loss": 1.1231, - "step": 3887 - }, - { - "epoch": 0.5270792381210602, - "grad_norm": 1.4797915591583486, - "learning_rate": 9.60926674650297e-07, - "loss": 1.1413, - "step": 3888 - }, - { - "epoch": 0.527214803768725, - "grad_norm": 3.3569987720244434, - "learning_rate": 9.604878758649472e-07, - "loss": 1.1397, - "step": 3889 - }, - { - "epoch": 0.5273503694163899, - "grad_norm": 9.16176608546038, - "learning_rate": 9.60049084699199e-07, - "loss": 1.1078, - "step": 3890 - }, - { - "epoch": 0.5274859350640547, - "grad_norm": 2.2274185968694593, - "learning_rate": 9.596103012376695e-07, - "loss": 1.103, - "step": 3891 - }, - { - "epoch": 0.5276215007117196, - "grad_norm": 1.7646551931003238, - "learning_rate": 9.591715255649746e-07, - "loss": 1.1416, - "step": 3892 - }, - { - "epoch": 0.5277570663593846, - "grad_norm": 1.5041364638629717, - "learning_rate": 9.587327577657283e-07, - "loss": 1.1568, - "step": 3893 - }, - { - "epoch": 0.5278926320070494, - "grad_norm": 1.6128350375712481, - "learning_rate": 9.582939979245444e-07, - "loss": 1.172, - "step": 3894 - }, - { - "epoch": 0.5280281976547143, - "grad_norm": 2.4675676920849146, - "learning_rate": 9.578552461260335e-07, - "loss": 1.1397, - "step": 3895 - }, - { - "epoch": 0.5281637633023791, - "grad_norm": 1.8268944070007982, - "learning_rate": 9.57416502454806e-07, - "loss": 1.1611, - "step": 3896 - }, - { - "epoch": 0.5282993289500441, - "grad_norm": 1.6746158142852754, - "learning_rate": 9.569777669954693e-07, - "loss": 1.1563, - "step": 3897 - }, - { - "epoch": 0.528434894597709, - "grad_norm": 2.540946556557655, - "learning_rate": 9.565390398326312e-07, - "loss": 1.1479, - "step": 3898 - }, - { - "epoch": 0.5285704602453738, - "grad_norm": 1.7139158713676466, - "learning_rate": 9.561003210508963e-07, - "loss": 1.1435, - "step": 3899 - }, - { - "epoch": 0.5287060258930387, - "grad_norm": 1.5234907753564322, - "learning_rate": 9.556616107348675e-07, - "loss": 1.1234, - "step": 3900 - }, - { - "epoch": 0.5288415915407035, - "grad_norm": 1.733072083010232, - "learning_rate": 9.552229089691474e-07, - "loss": 1.1328, - "step": 3901 - }, - { - "epoch": 0.5289771571883685, - "grad_norm": 1.8316930578900048, - "learning_rate": 9.547842158383354e-07, - "loss": 1.1348, - "step": 3902 - }, - { - "epoch": 0.5291127228360334, - "grad_norm": 1.6319615679472268, - "learning_rate": 9.54345531427031e-07, - "loss": 1.1271, - "step": 3903 - }, - { - "epoch": 0.5292482884836982, - "grad_norm": 1.7308129970822055, - "learning_rate": 9.539068558198301e-07, - "loss": 1.169, - "step": 3904 - }, - { - "epoch": 0.5293838541313631, - "grad_norm": 1.824660247441993, - "learning_rate": 9.534681891013286e-07, - "loss": 1.1607, - "step": 3905 - }, - { - "epoch": 0.5295194197790279, - "grad_norm": 1.7022690390353408, - "learning_rate": 9.530295313561192e-07, - "loss": 1.151, - "step": 3906 - }, - { - "epoch": 0.5296549854266929, - "grad_norm": 1.6946162079989933, - "learning_rate": 9.525908826687943e-07, - "loss": 1.1746, - "step": 3907 - }, - { - "epoch": 0.5297905510743578, - "grad_norm": 1.7515640178595928, - "learning_rate": 9.521522431239429e-07, - "loss": 1.1191, - "step": 3908 - }, - { - "epoch": 0.5299261167220226, - "grad_norm": 1.4469400175871883, - "learning_rate": 9.517136128061543e-07, - "loss": 1.1173, - "step": 3909 - }, - { - "epoch": 0.5300616823696875, - "grad_norm": 2.1385794986294107, - "learning_rate": 9.51274991800014e-07, - "loss": 1.099, - "step": 3910 - }, - { - "epoch": 0.5301972480173524, - "grad_norm": 1.8758223337425535, - "learning_rate": 9.508363801901069e-07, - "loss": 1.1409, - "step": 3911 - }, - { - "epoch": 0.5303328136650173, - "grad_norm": 1.6057180421808077, - "learning_rate": 9.50397778061016e-07, - "loss": 1.1463, - "step": 3912 - }, - { - "epoch": 0.5304683793126822, - "grad_norm": 1.4747276510247216, - "learning_rate": 9.49959185497322e-07, - "loss": 1.0638, - "step": 3913 - }, - { - "epoch": 0.530603944960347, - "grad_norm": 1.8018572660113148, - "learning_rate": 9.49520602583604e-07, - "loss": 1.1388, - "step": 3914 - }, - { - "epoch": 0.5307395106080119, - "grad_norm": 2.0279776745253306, - "learning_rate": 9.490820294044394e-07, - "loss": 1.1292, - "step": 3915 - }, - { - "epoch": 0.5308750762556768, - "grad_norm": 1.4642492741737183, - "learning_rate": 9.486434660444034e-07, - "loss": 1.1788, - "step": 3916 - }, - { - "epoch": 0.5310106419033417, - "grad_norm": 1.5822183873636648, - "learning_rate": 9.482049125880697e-07, - "loss": 1.1742, - "step": 3917 - }, - { - "epoch": 0.5311462075510066, - "grad_norm": 1.6613854416523042, - "learning_rate": 9.477663691200099e-07, - "loss": 1.1373, - "step": 3918 - }, - { - "epoch": 0.5312817731986714, - "grad_norm": 1.6830385799985845, - "learning_rate": 9.47327835724793e-07, - "loss": 1.1154, - "step": 3919 - }, - { - "epoch": 0.5314173388463364, - "grad_norm": 1.7935667863136269, - "learning_rate": 9.468893124869878e-07, - "loss": 1.145, - "step": 3920 - }, - { - "epoch": 0.5315529044940012, - "grad_norm": 1.5401423126100569, - "learning_rate": 9.464507994911589e-07, - "loss": 1.136, - "step": 3921 - }, - { - "epoch": 0.5316884701416661, - "grad_norm": 5.3408133848874915, - "learning_rate": 9.460122968218711e-07, - "loss": 1.1145, - "step": 3922 - }, - { - "epoch": 0.531824035789331, - "grad_norm": 1.9105087984031632, - "learning_rate": 9.455738045636853e-07, - "loss": 1.1083, - "step": 3923 - }, - { - "epoch": 0.5319596014369958, - "grad_norm": 2.7354195011992917, - "learning_rate": 9.451353228011622e-07, - "loss": 1.1249, - "step": 3924 - }, - { - "epoch": 0.5320951670846608, - "grad_norm": 1.7967146302067714, - "learning_rate": 9.446968516188584e-07, - "loss": 1.1341, - "step": 3925 - }, - { - "epoch": 0.5322307327323256, - "grad_norm": 2.1393089981439015, - "learning_rate": 9.442583911013308e-07, - "loss": 1.1125, - "step": 3926 - }, - { - "epoch": 0.5323662983799905, - "grad_norm": 2.0798940982068768, - "learning_rate": 9.438199413331323e-07, - "loss": 1.1367, - "step": 3927 - }, - { - "epoch": 0.5325018640276554, - "grad_norm": 1.7805568946113108, - "learning_rate": 9.433815023988144e-07, - "loss": 1.153, - "step": 3928 - }, - { - "epoch": 0.5326374296753202, - "grad_norm": 1.8920733733394852, - "learning_rate": 9.429430743829272e-07, - "loss": 1.1012, - "step": 3929 - }, - { - "epoch": 0.5327729953229852, - "grad_norm": 1.5442828320690682, - "learning_rate": 9.425046573700174e-07, - "loss": 1.0842, - "step": 3930 - }, - { - "epoch": 0.5329085609706501, - "grad_norm": 3.769269250318628, - "learning_rate": 9.420662514446309e-07, - "loss": 1.1379, - "step": 3931 - }, - { - "epoch": 0.5330441266183149, - "grad_norm": 1.5437535084785903, - "learning_rate": 9.4162785669131e-07, - "loss": 1.1206, - "step": 3932 - }, - { - "epoch": 0.5331796922659798, - "grad_norm": 1.3756762066882513, - "learning_rate": 9.411894731945968e-07, - "loss": 1.1761, - "step": 3933 - }, - { - "epoch": 0.5333152579136446, - "grad_norm": 1.608099464622897, - "learning_rate": 9.40751101039029e-07, - "loss": 1.1443, - "step": 3934 - }, - { - "epoch": 0.5334508235613096, - "grad_norm": 1.5803458077068513, - "learning_rate": 9.403127403091441e-07, - "loss": 1.1366, - "step": 3935 - }, - { - "epoch": 0.5335863892089745, - "grad_norm": 1.7706034744522712, - "learning_rate": 9.398743910894755e-07, - "loss": 1.1707, - "step": 3936 - }, - { - "epoch": 0.5337219548566393, - "grad_norm": 1.6277929252896688, - "learning_rate": 9.394360534645566e-07, - "loss": 1.1345, - "step": 3937 - }, - { - "epoch": 0.5338575205043042, - "grad_norm": 1.5038275074635077, - "learning_rate": 9.389977275189163e-07, - "loss": 1.1376, - "step": 3938 - }, - { - "epoch": 0.533993086151969, - "grad_norm": 1.7403062547128598, - "learning_rate": 9.38559413337083e-07, - "loss": 1.1575, - "step": 3939 - }, - { - "epoch": 0.534128651799634, - "grad_norm": 1.8891470550503489, - "learning_rate": 9.381211110035819e-07, - "loss": 1.1461, - "step": 3940 - }, - { - "epoch": 0.5342642174472989, - "grad_norm": 2.0991645292708565, - "learning_rate": 9.376828206029358e-07, - "loss": 1.1286, - "step": 3941 - }, - { - "epoch": 0.5343997830949637, - "grad_norm": 2.7633237949695584, - "learning_rate": 9.372445422196662e-07, - "loss": 1.1221, - "step": 3942 - }, - { - "epoch": 0.5345353487426286, - "grad_norm": 1.5269555086304247, - "learning_rate": 9.368062759382908e-07, - "loss": 1.13, - "step": 3943 - }, - { - "epoch": 0.5346709143902935, - "grad_norm": 1.9311033237646111, - "learning_rate": 9.363680218433267e-07, - "loss": 1.1568, - "step": 3944 - }, - { - "epoch": 0.5348064800379584, - "grad_norm": 1.426954834413145, - "learning_rate": 9.359297800192871e-07, - "loss": 1.1546, - "step": 3945 - }, - { - "epoch": 0.5349420456856233, - "grad_norm": 2.8391062486333363, - "learning_rate": 9.354915505506838e-07, - "loss": 1.1552, - "step": 3946 - }, - { - "epoch": 0.5350776113332881, - "grad_norm": 1.8692108903209737, - "learning_rate": 9.350533335220256e-07, - "loss": 1.1529, - "step": 3947 - }, - { - "epoch": 0.535213176980953, - "grad_norm": 2.709043716441755, - "learning_rate": 9.346151290178195e-07, - "loss": 1.136, - "step": 3948 - }, - { - "epoch": 0.5353487426286179, - "grad_norm": 1.7462871120092351, - "learning_rate": 9.341769371225696e-07, - "loss": 1.1385, - "step": 3949 - }, - { - "epoch": 0.5354843082762828, - "grad_norm": 1.6094722767230376, - "learning_rate": 9.337387579207779e-07, - "loss": 1.124, - "step": 3950 - }, - { - "epoch": 0.5356198739239477, - "grad_norm": 1.4382934197572197, - "learning_rate": 9.333005914969434e-07, - "loss": 1.1345, - "step": 3951 - }, - { - "epoch": 0.5357554395716125, - "grad_norm": 1.7369710644799246, - "learning_rate": 9.328624379355639e-07, - "loss": 1.1309, - "step": 3952 - }, - { - "epoch": 0.5358910052192775, - "grad_norm": 1.7591233272958737, - "learning_rate": 9.324242973211326e-07, - "loss": 1.1018, - "step": 3953 - }, - { - "epoch": 0.5360265708669423, - "grad_norm": 1.7008116439505885, - "learning_rate": 9.319861697381427e-07, - "loss": 1.1309, - "step": 3954 - }, - { - "epoch": 0.5361621365146072, - "grad_norm": 1.7275809694711328, - "learning_rate": 9.315480552710832e-07, - "loss": 1.0833, - "step": 3955 - }, - { - "epoch": 0.5362977021622721, - "grad_norm": 1.8745886968108774, - "learning_rate": 9.311099540044402e-07, - "loss": 1.1408, - "step": 3956 - }, - { - "epoch": 0.5364332678099369, - "grad_norm": 3.2018032072797693, - "learning_rate": 9.306718660226996e-07, - "loss": 1.1729, - "step": 3957 - }, - { - "epoch": 0.5365688334576019, - "grad_norm": 1.9196333846058828, - "learning_rate": 9.302337914103416e-07, - "loss": 1.1636, - "step": 3958 - }, - { - "epoch": 0.5367043991052667, - "grad_norm": 1.5997020088413285, - "learning_rate": 9.297957302518469e-07, - "loss": 1.2061, - "step": 3959 - }, - { - "epoch": 0.5368399647529316, - "grad_norm": 3.1950367642490667, - "learning_rate": 9.293576826316909e-07, - "loss": 1.1211, - "step": 3960 - }, - { - "epoch": 0.5369755304005965, - "grad_norm": 1.5864621202101075, - "learning_rate": 9.289196486343487e-07, - "loss": 1.1369, - "step": 3961 - }, - { - "epoch": 0.5371110960482613, - "grad_norm": 1.5965549053563621, - "learning_rate": 9.284816283442907e-07, - "loss": 1.1458, - "step": 3962 - }, - { - "epoch": 0.5372466616959263, - "grad_norm": 1.4167124041054016, - "learning_rate": 9.280436218459866e-07, - "loss": 1.1271, - "step": 3963 - }, - { - "epoch": 0.5373822273435911, - "grad_norm": 1.6834201147055754, - "learning_rate": 9.276056292239016e-07, - "loss": 1.1664, - "step": 3964 - }, - { - "epoch": 0.537517792991256, - "grad_norm": 1.6157824738227264, - "learning_rate": 9.271676505625e-07, - "loss": 1.1269, - "step": 3965 - }, - { - "epoch": 0.5376533586389209, - "grad_norm": 1.9973078806661086, - "learning_rate": 9.267296859462416e-07, - "loss": 1.1337, - "step": 3966 - }, - { - "epoch": 0.5377889242865858, - "grad_norm": 1.4816613786203243, - "learning_rate": 9.262917354595854e-07, - "loss": 1.1697, - "step": 3967 - }, - { - "epoch": 0.5379244899342507, - "grad_norm": 1.4300663710638506, - "learning_rate": 9.258537991869861e-07, - "loss": 1.1311, - "step": 3968 - }, - { - "epoch": 0.5380600555819155, - "grad_norm": 4.197234708252887, - "learning_rate": 9.254158772128961e-07, - "loss": 1.1262, - "step": 3969 - }, - { - "epoch": 0.5381956212295804, - "grad_norm": 1.586103274298997, - "learning_rate": 9.249779696217658e-07, - "loss": 1.1712, - "step": 3970 - }, - { - "epoch": 0.5383311868772453, - "grad_norm": 1.4321070736924264, - "learning_rate": 9.245400764980413e-07, - "loss": 1.0968, - "step": 3971 - }, - { - "epoch": 0.5384667525249102, - "grad_norm": 1.6166836608920132, - "learning_rate": 9.241021979261681e-07, - "loss": 1.1487, - "step": 3972 - }, - { - "epoch": 0.5386023181725751, - "grad_norm": 1.8028273525427696, - "learning_rate": 9.236643339905863e-07, - "loss": 1.1279, - "step": 3973 - }, - { - "epoch": 0.5387378838202399, - "grad_norm": 1.4045469105452582, - "learning_rate": 9.232264847757356e-07, - "loss": 1.1622, - "step": 3974 - }, - { - "epoch": 0.5388734494679048, - "grad_norm": 1.5161538107787003, - "learning_rate": 9.227886503660509e-07, - "loss": 1.1037, - "step": 3975 - }, - { - "epoch": 0.5390090151155698, - "grad_norm": 2.384954535464977, - "learning_rate": 9.223508308459659e-07, - "loss": 1.1402, - "step": 3976 - }, - { - "epoch": 0.5391445807632346, - "grad_norm": 2.0705494141923455, - "learning_rate": 9.219130262999101e-07, - "loss": 1.1319, - "step": 3977 - }, - { - "epoch": 0.5392801464108995, - "grad_norm": 1.6172109674518025, - "learning_rate": 9.214752368123107e-07, - "loss": 1.1565, - "step": 3978 - }, - { - "epoch": 0.5394157120585643, - "grad_norm": 1.528251745885748, - "learning_rate": 9.21037462467592e-07, - "loss": 1.1373, - "step": 3979 - }, - { - "epoch": 0.5395512777062292, - "grad_norm": 1.697461598154436, - "learning_rate": 9.205997033501756e-07, - "loss": 1.1234, - "step": 3980 - }, - { - "epoch": 0.5396868433538942, - "grad_norm": 1.5594737282066502, - "learning_rate": 9.201619595444795e-07, - "loss": 1.1882, - "step": 3981 - }, - { - "epoch": 0.539822409001559, - "grad_norm": 2.0031835670564075, - "learning_rate": 9.197242311349195e-07, - "loss": 1.1535, - "step": 3982 - }, - { - "epoch": 0.5399579746492239, - "grad_norm": 1.720051511864373, - "learning_rate": 9.192865182059077e-07, - "loss": 1.1214, - "step": 3983 - }, - { - "epoch": 0.5400935402968887, - "grad_norm": 1.7846625465967871, - "learning_rate": 9.188488208418538e-07, - "loss": 1.1029, - "step": 3984 - }, - { - "epoch": 0.5402291059445536, - "grad_norm": 1.6655984906894223, - "learning_rate": 9.184111391271642e-07, - "loss": 1.1118, - "step": 3985 - }, - { - "epoch": 0.5403646715922186, - "grad_norm": 1.5668392025615916, - "learning_rate": 9.179734731462423e-07, - "loss": 1.1271, - "step": 3986 - }, - { - "epoch": 0.5405002372398834, - "grad_norm": 1.5530850637675997, - "learning_rate": 9.175358229834888e-07, - "loss": 1.0894, - "step": 3987 - }, - { - "epoch": 0.5406358028875483, - "grad_norm": 1.8115391858283925, - "learning_rate": 9.170981887233007e-07, - "loss": 1.1498, - "step": 3988 - }, - { - "epoch": 0.5407713685352131, - "grad_norm": 1.6326583011463607, - "learning_rate": 9.166605704500728e-07, - "loss": 1.111, - "step": 3989 - }, - { - "epoch": 0.540906934182878, - "grad_norm": 1.5550377536730526, - "learning_rate": 9.162229682481957e-07, - "loss": 1.1291, - "step": 3990 - }, - { - "epoch": 0.541042499830543, - "grad_norm": 1.8418227549409383, - "learning_rate": 9.157853822020582e-07, - "loss": 1.1521, - "step": 3991 - }, - { - "epoch": 0.5411780654782078, - "grad_norm": 1.578555286532687, - "learning_rate": 9.153478123960446e-07, - "loss": 1.1308, - "step": 3992 - }, - { - "epoch": 0.5413136311258727, - "grad_norm": 1.5970968320738483, - "learning_rate": 9.149102589145376e-07, - "loss": 1.1494, - "step": 3993 - }, - { - "epoch": 0.5414491967735375, - "grad_norm": 2.204999633261993, - "learning_rate": 9.144727218419151e-07, - "loss": 1.1749, - "step": 3994 - }, - { - "epoch": 0.5415847624212025, - "grad_norm": 1.598858448240508, - "learning_rate": 9.140352012625536e-07, - "loss": 1.1212, - "step": 3995 - }, - { - "epoch": 0.5417203280688674, - "grad_norm": 2.260286772764513, - "learning_rate": 9.135976972608248e-07, - "loss": 1.1425, - "step": 3996 - }, - { - "epoch": 0.5418558937165322, - "grad_norm": 1.5653492636481954, - "learning_rate": 9.131602099210978e-07, - "loss": 1.1776, - "step": 3997 - }, - { - "epoch": 0.5419914593641971, - "grad_norm": 1.7252343836341248, - "learning_rate": 9.127227393277391e-07, - "loss": 1.1547, - "step": 3998 - }, - { - "epoch": 0.5421270250118619, - "grad_norm": 1.905002121864447, - "learning_rate": 9.12285285565111e-07, - "loss": 1.1254, - "step": 3999 - }, - { - "epoch": 0.5422625906595269, - "grad_norm": 1.8944898898336646, - "learning_rate": 9.118478487175735e-07, - "loss": 1.1509, - "step": 4000 - }, - { - "epoch": 0.5423981563071918, - "grad_norm": 1.5256815678274867, - "learning_rate": 9.114104288694821e-07, - "loss": 1.1508, - "step": 4001 - }, - { - "epoch": 0.5425337219548566, - "grad_norm": 1.9864163950832316, - "learning_rate": 9.109730261051905e-07, - "loss": 1.1548, - "step": 4002 - }, - { - "epoch": 0.5426692876025215, - "grad_norm": 1.5936323097044178, - "learning_rate": 9.105356405090479e-07, - "loss": 1.1256, - "step": 4003 - }, - { - "epoch": 0.5428048532501863, - "grad_norm": 1.840236767391538, - "learning_rate": 9.100982721654011e-07, - "loss": 1.1385, - "step": 4004 - }, - { - "epoch": 0.5429404188978513, - "grad_norm": 1.9954352911855533, - "learning_rate": 9.096609211585926e-07, - "loss": 1.1368, - "step": 4005 - }, - { - "epoch": 0.5430759845455162, - "grad_norm": 1.5589498033981692, - "learning_rate": 9.092235875729627e-07, - "loss": 1.1303, - "step": 4006 - }, - { - "epoch": 0.543211550193181, - "grad_norm": 1.5130481889529435, - "learning_rate": 9.087862714928471e-07, - "loss": 1.1111, - "step": 4007 - }, - { - "epoch": 0.5433471158408459, - "grad_norm": 1.6393815854560134, - "learning_rate": 9.083489730025791e-07, - "loss": 1.144, - "step": 4008 - }, - { - "epoch": 0.5434826814885109, - "grad_norm": 1.9817906874716276, - "learning_rate": 9.079116921864883e-07, - "loss": 1.1393, - "step": 4009 - }, - { - "epoch": 0.5436182471361757, - "grad_norm": 1.4592579201377023, - "learning_rate": 9.074744291289007e-07, - "loss": 1.1237, - "step": 4010 - }, - { - "epoch": 0.5437538127838406, - "grad_norm": 1.6008523021898433, - "learning_rate": 9.070371839141393e-07, - "loss": 1.1033, - "step": 4011 - }, - { - "epoch": 0.5438893784315054, - "grad_norm": 2.4121636770114248, - "learning_rate": 9.065999566265229e-07, - "loss": 1.1397, - "step": 4012 - }, - { - "epoch": 0.5440249440791703, - "grad_norm": 1.7651489668498048, - "learning_rate": 9.061627473503677e-07, - "loss": 1.1795, - "step": 4013 - }, - { - "epoch": 0.5441605097268353, - "grad_norm": 2.6000111174092186, - "learning_rate": 9.057255561699859e-07, - "loss": 1.147, - "step": 4014 - }, - { - "epoch": 0.5442960753745001, - "grad_norm": 1.9657373138870953, - "learning_rate": 9.052883831696865e-07, - "loss": 1.1142, - "step": 4015 - }, - { - "epoch": 0.544431641022165, - "grad_norm": 2.1984837331989326, - "learning_rate": 9.048512284337747e-07, - "loss": 1.141, - "step": 4016 - }, - { - "epoch": 0.5445672066698298, - "grad_norm": 1.5013606336255727, - "learning_rate": 9.044140920465529e-07, - "loss": 1.1625, - "step": 4017 - }, - { - "epoch": 0.5447027723174948, - "grad_norm": 1.4853184594828879, - "learning_rate": 9.039769740923182e-07, - "loss": 1.1279, - "step": 4018 - }, - { - "epoch": 0.5448383379651597, - "grad_norm": 1.5855307159415042, - "learning_rate": 9.035398746553667e-07, - "loss": 1.127, - "step": 4019 - }, - { - "epoch": 0.5449739036128245, - "grad_norm": 1.7144798876393268, - "learning_rate": 9.031027938199884e-07, - "loss": 1.1511, - "step": 4020 - }, - { - "epoch": 0.5451094692604894, - "grad_norm": 1.8752141478407691, - "learning_rate": 9.02665731670472e-07, - "loss": 1.1254, - "step": 4021 - }, - { - "epoch": 0.5452450349081542, - "grad_norm": 1.655521426414369, - "learning_rate": 9.022286882911005e-07, - "loss": 1.1517, - "step": 4022 - }, - { - "epoch": 0.5453806005558192, - "grad_norm": 1.5877432929742874, - "learning_rate": 9.01791663766155e-07, - "loss": 1.1371, - "step": 4023 - }, - { - "epoch": 0.5455161662034841, - "grad_norm": 1.5740448168207701, - "learning_rate": 9.01354658179912e-07, - "loss": 1.1111, - "step": 4024 - }, - { - "epoch": 0.5456517318511489, - "grad_norm": 2.319005522267522, - "learning_rate": 9.009176716166442e-07, - "loss": 1.1273, - "step": 4025 - }, - { - "epoch": 0.5457872974988138, - "grad_norm": 1.4317388427112172, - "learning_rate": 9.004807041606217e-07, - "loss": 1.1312, - "step": 4026 - }, - { - "epoch": 0.5459228631464786, - "grad_norm": 1.51944930691087, - "learning_rate": 9.000437558961094e-07, - "loss": 1.0891, - "step": 4027 - }, - { - "epoch": 0.5460584287941436, - "grad_norm": 1.4334466206834038, - "learning_rate": 8.996068269073701e-07, - "loss": 1.1364, - "step": 4028 - }, - { - "epoch": 0.5461939944418085, - "grad_norm": 1.7291177231224637, - "learning_rate": 8.991699172786614e-07, - "loss": 1.1053, - "step": 4029 - }, - { - "epoch": 0.5463295600894733, - "grad_norm": 1.8521865637257795, - "learning_rate": 8.987330270942388e-07, - "loss": 1.1823, - "step": 4030 - }, - { - "epoch": 0.5464651257371382, - "grad_norm": 1.5103781134402934, - "learning_rate": 8.98296156438352e-07, - "loss": 1.143, - "step": 4031 - }, - { - "epoch": 0.546600691384803, - "grad_norm": 1.9161517374485035, - "learning_rate": 8.978593053952492e-07, - "loss": 1.1192, - "step": 4032 - }, - { - "epoch": 0.546736257032468, - "grad_norm": 1.5987047520662883, - "learning_rate": 8.974224740491725e-07, - "loss": 1.1456, - "step": 4033 - }, - { - "epoch": 0.5468718226801329, - "grad_norm": 1.5835721015942135, - "learning_rate": 8.969856624843625e-07, - "loss": 1.138, - "step": 4034 - }, - { - "epoch": 0.5470073883277977, - "grad_norm": 1.60784914215882, - "learning_rate": 8.965488707850539e-07, - "loss": 1.139, - "step": 4035 - }, - { - "epoch": 0.5471429539754626, - "grad_norm": 1.5528551580397438, - "learning_rate": 8.961120990354794e-07, - "loss": 1.1555, - "step": 4036 - }, - { - "epoch": 0.5472785196231275, - "grad_norm": 1.6469743510735977, - "learning_rate": 8.956753473198662e-07, - "loss": 1.1493, - "step": 4037 - }, - { - "epoch": 0.5474140852707924, - "grad_norm": 2.38839964712956, - "learning_rate": 8.952386157224391e-07, - "loss": 1.1395, - "step": 4038 - }, - { - "epoch": 0.5475496509184573, - "grad_norm": 1.776608236131249, - "learning_rate": 8.948019043274181e-07, - "loss": 1.1326, - "step": 4039 - }, - { - "epoch": 0.5476852165661221, - "grad_norm": 1.839164829703461, - "learning_rate": 8.943652132190189e-07, - "loss": 1.149, - "step": 4040 - }, - { - "epoch": 0.547820782213787, - "grad_norm": 2.387492187113473, - "learning_rate": 8.939285424814551e-07, - "loss": 1.1462, - "step": 4041 - }, - { - "epoch": 0.5479563478614519, - "grad_norm": 1.5765621283512716, - "learning_rate": 8.934918921989341e-07, - "loss": 1.1322, - "step": 4042 - }, - { - "epoch": 0.5480919135091168, - "grad_norm": 1.5783106090653327, - "learning_rate": 8.930552624556615e-07, - "loss": 1.1451, - "step": 4043 - }, - { - "epoch": 0.5482274791567817, - "grad_norm": 1.7076706248871436, - "learning_rate": 8.92618653335837e-07, - "loss": 1.1458, - "step": 4044 - }, - { - "epoch": 0.5483630448044465, - "grad_norm": 1.7964338042543342, - "learning_rate": 8.921820649236576e-07, - "loss": 1.1294, - "step": 4045 - }, - { - "epoch": 0.5484986104521115, - "grad_norm": 2.019596569141138, - "learning_rate": 8.917454973033161e-07, - "loss": 1.1157, - "step": 4046 - }, - { - "epoch": 0.5486341760997763, - "grad_norm": 1.6900258529138212, - "learning_rate": 8.913089505590007e-07, - "loss": 1.1533, - "step": 4047 - }, - { - "epoch": 0.5487697417474412, - "grad_norm": 1.669594833773368, - "learning_rate": 8.908724247748963e-07, - "loss": 1.1414, - "step": 4048 - }, - { - "epoch": 0.5489053073951061, - "grad_norm": 3.0280010844862546, - "learning_rate": 8.904359200351837e-07, - "loss": 1.1753, - "step": 4049 - }, - { - "epoch": 0.5490408730427709, - "grad_norm": 2.927262357133036, - "learning_rate": 8.899994364240385e-07, - "loss": 1.1306, - "step": 4050 - }, - { - "epoch": 0.5491764386904359, - "grad_norm": 1.5866021820075271, - "learning_rate": 8.895629740256343e-07, - "loss": 1.1184, - "step": 4051 - }, - { - "epoch": 0.5493120043381007, - "grad_norm": 1.5358775066203663, - "learning_rate": 8.891265329241387e-07, - "loss": 1.2029, - "step": 4052 - }, - { - "epoch": 0.5494475699857656, - "grad_norm": 1.7952816333831787, - "learning_rate": 8.886901132037155e-07, - "loss": 1.1744, - "step": 4053 - }, - { - "epoch": 0.5495831356334305, - "grad_norm": 1.6802241386413324, - "learning_rate": 8.88253714948526e-07, - "loss": 1.1664, - "step": 4054 - }, - { - "epoch": 0.5497187012810953, - "grad_norm": 1.8013425138862826, - "learning_rate": 8.87817338242725e-07, - "loss": 1.1747, - "step": 4055 - }, - { - "epoch": 0.5498542669287603, - "grad_norm": 1.5769747426029008, - "learning_rate": 8.873809831704652e-07, - "loss": 1.1463, - "step": 4056 - }, - { - "epoch": 0.5499898325764251, - "grad_norm": 1.7103010334023014, - "learning_rate": 8.869446498158935e-07, - "loss": 1.1267, - "step": 4057 - }, - { - "epoch": 0.55012539822409, - "grad_norm": 1.7753447197658734, - "learning_rate": 8.865083382631539e-07, - "loss": 1.1408, - "step": 4058 - }, - { - "epoch": 0.5502609638717549, - "grad_norm": 1.4197755602998843, - "learning_rate": 8.860720485963851e-07, - "loss": 1.1402, - "step": 4059 - }, - { - "epoch": 0.5503965295194198, - "grad_norm": 1.8720053106006902, - "learning_rate": 8.856357808997229e-07, - "loss": 1.1575, - "step": 4060 - }, - { - "epoch": 0.5505320951670847, - "grad_norm": 1.553173999315336, - "learning_rate": 8.851995352572972e-07, - "loss": 1.1548, - "step": 4061 - }, - { - "epoch": 0.5506676608147495, - "grad_norm": 1.6992505710878092, - "learning_rate": 8.847633117532353e-07, - "loss": 1.1412, - "step": 4062 - }, - { - "epoch": 0.5508032264624144, - "grad_norm": 1.6566246197547656, - "learning_rate": 8.843271104716588e-07, - "loss": 1.1824, - "step": 4063 - }, - { - "epoch": 0.5509387921100793, - "grad_norm": 1.536544493617469, - "learning_rate": 8.838909314966863e-07, - "loss": 1.1413, - "step": 4064 - }, - { - "epoch": 0.5510743577577442, - "grad_norm": 2.5137027779332204, - "learning_rate": 8.834547749124307e-07, - "loss": 1.1465, - "step": 4065 - }, - { - "epoch": 0.5512099234054091, - "grad_norm": 1.8263726324470377, - "learning_rate": 8.830186408030023e-07, - "loss": 1.1111, - "step": 4066 - }, - { - "epoch": 0.5513454890530739, - "grad_norm": 1.45775743083772, - "learning_rate": 8.825825292525056e-07, - "loss": 1.1779, - "step": 4067 - }, - { - "epoch": 0.5514810547007388, - "grad_norm": 1.5677916855743508, - "learning_rate": 8.821464403450408e-07, - "loss": 1.1543, - "step": 4068 - }, - { - "epoch": 0.5516166203484038, - "grad_norm": 2.8766285864445043, - "learning_rate": 8.817103741647052e-07, - "loss": 1.1138, - "step": 4069 - }, - { - "epoch": 0.5517521859960686, - "grad_norm": 1.6499324689095587, - "learning_rate": 8.812743307955899e-07, - "loss": 1.1673, - "step": 4070 - }, - { - "epoch": 0.5518877516437335, - "grad_norm": 1.7061261754153114, - "learning_rate": 8.80838310321783e-07, - "loss": 1.119, - "step": 4071 - }, - { - "epoch": 0.5520233172913983, - "grad_norm": 1.6810894644905399, - "learning_rate": 8.80402312827367e-07, - "loss": 1.1243, - "step": 4072 - }, - { - "epoch": 0.5521588829390632, - "grad_norm": 1.5871141876578345, - "learning_rate": 8.799663383964213e-07, - "loss": 1.1108, - "step": 4073 - }, - { - "epoch": 0.5522944485867282, - "grad_norm": 1.6560511097416506, - "learning_rate": 8.795303871130196e-07, - "loss": 1.1434, - "step": 4074 - }, - { - "epoch": 0.552430014234393, - "grad_norm": 1.4789641874559005, - "learning_rate": 8.790944590612318e-07, - "loss": 1.1275, - "step": 4075 - }, - { - "epoch": 0.5525655798820579, - "grad_norm": 1.5551134249759244, - "learning_rate": 8.786585543251232e-07, - "loss": 1.1423, - "step": 4076 - }, - { - "epoch": 0.5527011455297227, - "grad_norm": 1.8267683333062337, - "learning_rate": 8.782226729887546e-07, - "loss": 1.133, - "step": 4077 - }, - { - "epoch": 0.5528367111773876, - "grad_norm": 1.4594944535754646, - "learning_rate": 8.777868151361823e-07, - "loss": 1.1452, - "step": 4078 - }, - { - "epoch": 0.5529722768250526, - "grad_norm": 1.511131069141946, - "learning_rate": 8.773509808514581e-07, - "loss": 1.0993, - "step": 4079 - }, - { - "epoch": 0.5531078424727174, - "grad_norm": 1.5737444562049019, - "learning_rate": 8.769151702186289e-07, - "loss": 1.1499, - "step": 4080 - }, - { - "epoch": 0.5532434081203823, - "grad_norm": 3.1925465794375, - "learning_rate": 8.764793833217377e-07, - "loss": 1.1563, - "step": 4081 - }, - { - "epoch": 0.5533789737680471, - "grad_norm": 3.178844614030592, - "learning_rate": 8.760436202448223e-07, - "loss": 1.0882, - "step": 4082 - }, - { - "epoch": 0.553514539415712, - "grad_norm": 1.5358409438204397, - "learning_rate": 8.756078810719163e-07, - "loss": 1.1407, - "step": 4083 - }, - { - "epoch": 0.553650105063377, - "grad_norm": 1.5228624762339982, - "learning_rate": 8.751721658870488e-07, - "loss": 1.1338, - "step": 4084 - }, - { - "epoch": 0.5537856707110418, - "grad_norm": 1.8512435845546995, - "learning_rate": 8.747364747742433e-07, - "loss": 1.1122, - "step": 4085 - }, - { - "epoch": 0.5539212363587067, - "grad_norm": 1.7855864403529425, - "learning_rate": 8.743008078175202e-07, - "loss": 1.1651, - "step": 4086 - }, - { - "epoch": 0.5540568020063716, - "grad_norm": 4.237322228967542, - "learning_rate": 8.73865165100894e-07, - "loss": 1.1378, - "step": 4087 - }, - { - "epoch": 0.5541923676540365, - "grad_norm": 1.5079255229872401, - "learning_rate": 8.734295467083752e-07, - "loss": 1.1364, - "step": 4088 - }, - { - "epoch": 0.5543279333017014, - "grad_norm": 2.0962834264316443, - "learning_rate": 8.729939527239688e-07, - "loss": 1.1358, - "step": 4089 - }, - { - "epoch": 0.5544634989493662, - "grad_norm": 1.8220625360290157, - "learning_rate": 8.725583832316767e-07, - "loss": 1.1538, - "step": 4090 - }, - { - "epoch": 0.5545990645970311, - "grad_norm": 1.5646060382839637, - "learning_rate": 8.721228383154939e-07, - "loss": 1.1571, - "step": 4091 - }, - { - "epoch": 0.554734630244696, - "grad_norm": 1.8441176509926933, - "learning_rate": 8.716873180594128e-07, - "loss": 1.112, - "step": 4092 - }, - { - "epoch": 0.5548701958923609, - "grad_norm": 1.6309013026839787, - "learning_rate": 8.71251822547419e-07, - "loss": 1.1586, - "step": 4093 - }, - { - "epoch": 0.5550057615400258, - "grad_norm": 1.7096390223376987, - "learning_rate": 8.708163518634956e-07, - "loss": 1.155, - "step": 4094 - }, - { - "epoch": 0.5551413271876906, - "grad_norm": 1.531360610982817, - "learning_rate": 8.703809060916188e-07, - "loss": 1.1289, - "step": 4095 - }, - { - "epoch": 0.5552768928353555, - "grad_norm": 1.69197801219766, - "learning_rate": 8.699454853157608e-07, - "loss": 1.1492, - "step": 4096 - }, - { - "epoch": 0.5554124584830205, - "grad_norm": 1.540872990384378, - "learning_rate": 8.695100896198898e-07, - "loss": 1.1395, - "step": 4097 - }, - { - "epoch": 0.5555480241306853, - "grad_norm": 1.756706166765803, - "learning_rate": 8.690747190879676e-07, - "loss": 1.1524, - "step": 4098 - }, - { - "epoch": 0.5556835897783502, - "grad_norm": 1.5406728588137266, - "learning_rate": 8.686393738039527e-07, - "loss": 1.1243, - "step": 4099 - }, - { - "epoch": 0.555819155426015, - "grad_norm": 1.6981763023563432, - "learning_rate": 8.682040538517973e-07, - "loss": 1.1504, - "step": 4100 - }, - { - "epoch": 0.5559547210736799, - "grad_norm": 2.1741822333265977, - "learning_rate": 8.677687593154503e-07, - "loss": 1.1097, - "step": 4101 - }, - { - "epoch": 0.5560902867213449, - "grad_norm": 1.6945949565923084, - "learning_rate": 8.673334902788536e-07, - "loss": 1.1221, - "step": 4102 - }, - { - "epoch": 0.5562258523690097, - "grad_norm": 3.3362684873111466, - "learning_rate": 8.668982468259467e-07, - "loss": 1.1875, - "step": 4103 - }, - { - "epoch": 0.5563614180166746, - "grad_norm": 1.8061470829881572, - "learning_rate": 8.664630290406618e-07, - "loss": 1.1572, - "step": 4104 - }, - { - "epoch": 0.5564969836643394, - "grad_norm": 1.4574612916406884, - "learning_rate": 8.660278370069281e-07, - "loss": 1.1478, - "step": 4105 - }, - { - "epoch": 0.5566325493120043, - "grad_norm": 1.4337854513912798, - "learning_rate": 8.655926708086684e-07, - "loss": 1.1405, - "step": 4106 - }, - { - "epoch": 0.5567681149596693, - "grad_norm": 1.5151889264026215, - "learning_rate": 8.651575305298011e-07, - "loss": 1.1347, - "step": 4107 - }, - { - "epoch": 0.5569036806073341, - "grad_norm": 1.5882214040450653, - "learning_rate": 8.6472241625424e-07, - "loss": 1.1653, - "step": 4108 - }, - { - "epoch": 0.557039246254999, - "grad_norm": 1.816518684543613, - "learning_rate": 8.642873280658924e-07, - "loss": 1.1484, - "step": 4109 - }, - { - "epoch": 0.5571748119026638, - "grad_norm": 2.3204515837412205, - "learning_rate": 8.63852266048663e-07, - "loss": 1.1246, - "step": 4110 - }, - { - "epoch": 0.5573103775503288, - "grad_norm": 1.6007596147627656, - "learning_rate": 8.634172302864491e-07, - "loss": 1.1281, - "step": 4111 - }, - { - "epoch": 0.5574459431979937, - "grad_norm": 7.411235995537328, - "learning_rate": 8.629822208631442e-07, - "loss": 1.132, - "step": 4112 - }, - { - "epoch": 0.5575815088456585, - "grad_norm": 1.52127921297392, - "learning_rate": 8.625472378626365e-07, - "loss": 1.1268, - "step": 4113 - }, - { - "epoch": 0.5577170744933234, - "grad_norm": 1.5914875958245174, - "learning_rate": 8.62112281368809e-07, - "loss": 1.128, - "step": 4114 - }, - { - "epoch": 0.5578526401409882, - "grad_norm": 1.8158463922004142, - "learning_rate": 8.616773514655395e-07, - "loss": 1.1038, - "step": 4115 - }, - { - "epoch": 0.5579882057886532, - "grad_norm": 1.4161768772189804, - "learning_rate": 8.612424482367014e-07, - "loss": 1.1264, - "step": 4116 - }, - { - "epoch": 0.5581237714363181, - "grad_norm": 1.3852361117271006, - "learning_rate": 8.608075717661611e-07, - "loss": 1.1212, - "step": 4117 - }, - { - "epoch": 0.5582593370839829, - "grad_norm": 2.021346167164917, - "learning_rate": 8.603727221377826e-07, - "loss": 1.1638, - "step": 4118 - }, - { - "epoch": 0.5583949027316478, - "grad_norm": 1.758990582339411, - "learning_rate": 8.599378994354218e-07, - "loss": 1.1775, - "step": 4119 - }, - { - "epoch": 0.5585304683793126, - "grad_norm": 2.484619368477784, - "learning_rate": 8.595031037429321e-07, - "loss": 1.134, - "step": 4120 - }, - { - "epoch": 0.5586660340269776, - "grad_norm": 1.7540577414119942, - "learning_rate": 8.590683351441594e-07, - "loss": 1.1717, - "step": 4121 - }, - { - "epoch": 0.5588015996746425, - "grad_norm": 1.5048262816174063, - "learning_rate": 8.586335937229462e-07, - "loss": 1.1679, - "step": 4122 - }, - { - "epoch": 0.5589371653223073, - "grad_norm": 1.7221170563003156, - "learning_rate": 8.581988795631285e-07, - "loss": 1.1521, - "step": 4123 - }, - { - "epoch": 0.5590727309699722, - "grad_norm": 1.916362032197253, - "learning_rate": 8.577641927485373e-07, - "loss": 1.1292, - "step": 4124 - }, - { - "epoch": 0.559208296617637, - "grad_norm": 2.1496549172633936, - "learning_rate": 8.573295333629991e-07, - "loss": 1.1522, - "step": 4125 - }, - { - "epoch": 0.559343862265302, - "grad_norm": 2.075927915023682, - "learning_rate": 8.568949014903339e-07, - "loss": 1.1397, - "step": 4126 - }, - { - "epoch": 0.5594794279129669, - "grad_norm": 2.5310569930623776, - "learning_rate": 8.564602972143576e-07, - "loss": 1.1689, - "step": 4127 - }, - { - "epoch": 0.5596149935606317, - "grad_norm": 1.658375959252895, - "learning_rate": 8.560257206188797e-07, - "loss": 1.13, - "step": 4128 - }, - { - "epoch": 0.5597505592082966, - "grad_norm": 1.7098611553200727, - "learning_rate": 8.555911717877053e-07, - "loss": 1.1316, - "step": 4129 - }, - { - "epoch": 0.5598861248559615, - "grad_norm": 1.8879600901945481, - "learning_rate": 8.551566508046334e-07, - "loss": 1.1367, - "step": 4130 - }, - { - "epoch": 0.5600216905036264, - "grad_norm": 1.5815500581129023, - "learning_rate": 8.547221577534583e-07, - "loss": 1.1334, - "step": 4131 - }, - { - "epoch": 0.5601572561512913, - "grad_norm": 1.4989039763667082, - "learning_rate": 8.542876927179679e-07, - "loss": 1.1495, - "step": 4132 - }, - { - "epoch": 0.5602928217989561, - "grad_norm": 2.3770048467249536, - "learning_rate": 8.538532557819463e-07, - "loss": 1.113, - "step": 4133 - }, - { - "epoch": 0.560428387446621, - "grad_norm": 1.762354992705088, - "learning_rate": 8.534188470291704e-07, - "loss": 1.1304, - "step": 4134 - }, - { - "epoch": 0.5605639530942859, - "grad_norm": 1.4167257169555996, - "learning_rate": 8.529844665434129e-07, - "loss": 1.1572, - "step": 4135 - }, - { - "epoch": 0.5606995187419508, - "grad_norm": 1.880270593390615, - "learning_rate": 8.525501144084409e-07, - "loss": 1.1326, - "step": 4136 - }, - { - "epoch": 0.5608350843896157, - "grad_norm": 1.5883771049211937, - "learning_rate": 8.521157907080148e-07, - "loss": 1.1226, - "step": 4137 - }, - { - "epoch": 0.5609706500372805, - "grad_norm": 2.1708827611390524, - "learning_rate": 8.516814955258916e-07, - "loss": 1.11, - "step": 4138 - }, - { - "epoch": 0.5611062156849455, - "grad_norm": 1.6982236479696795, - "learning_rate": 8.512472289458208e-07, - "loss": 1.1804, - "step": 4139 - }, - { - "epoch": 0.5612417813326103, - "grad_norm": 3.7235935837303162, - "learning_rate": 8.508129910515482e-07, - "loss": 1.1351, - "step": 4140 - }, - { - "epoch": 0.5613773469802752, - "grad_norm": 1.745562815076997, - "learning_rate": 8.503787819268124e-07, - "loss": 1.1503, - "step": 4141 - }, - { - "epoch": 0.5615129126279401, - "grad_norm": 5.710598146962606, - "learning_rate": 8.499446016553473e-07, - "loss": 1.1433, - "step": 4142 - }, - { - "epoch": 0.5616484782756049, - "grad_norm": 1.557511812111272, - "learning_rate": 8.495104503208816e-07, - "loss": 1.1832, - "step": 4143 - }, - { - "epoch": 0.5617840439232699, - "grad_norm": 1.4614163830609677, - "learning_rate": 8.490763280071375e-07, - "loss": 1.1374, - "step": 4144 - }, - { - "epoch": 0.5619196095709347, - "grad_norm": 1.6929700092828959, - "learning_rate": 8.486422347978323e-07, - "loss": 1.1075, - "step": 4145 - }, - { - "epoch": 0.5620551752185996, - "grad_norm": 1.8496309688548889, - "learning_rate": 8.482081707766775e-07, - "loss": 1.104, - "step": 4146 - }, - { - "epoch": 0.5621907408662645, - "grad_norm": 2.531439762693102, - "learning_rate": 8.477741360273785e-07, - "loss": 1.1571, - "step": 4147 - }, - { - "epoch": 0.5623263065139293, - "grad_norm": 1.5116159642284632, - "learning_rate": 8.47340130633636e-07, - "loss": 1.1174, - "step": 4148 - }, - { - "epoch": 0.5624618721615943, - "grad_norm": 1.5365249262748184, - "learning_rate": 8.46906154679144e-07, - "loss": 1.148, - "step": 4149 - }, - { - "epoch": 0.5625974378092591, - "grad_norm": 1.513199881440175, - "learning_rate": 8.46472208247592e-07, - "loss": 1.1428, - "step": 4150 - }, - { - "epoch": 0.562733003456924, - "grad_norm": 1.686410703702844, - "learning_rate": 8.460382914226628e-07, - "loss": 1.1106, - "step": 4151 - }, - { - "epoch": 0.5628685691045889, - "grad_norm": 1.6509314242219109, - "learning_rate": 8.456044042880333e-07, - "loss": 1.1232, - "step": 4152 - }, - { - "epoch": 0.5630041347522537, - "grad_norm": 1.6649988015885475, - "learning_rate": 8.451705469273763e-07, - "loss": 1.1564, - "step": 4153 - }, - { - "epoch": 0.5631397003999187, - "grad_norm": 1.8351498647132205, - "learning_rate": 8.447367194243567e-07, - "loss": 1.1576, - "step": 4154 - }, - { - "epoch": 0.5632752660475835, - "grad_norm": 2.9326382783255562, - "learning_rate": 8.443029218626355e-07, - "loss": 1.1455, - "step": 4155 - }, - { - "epoch": 0.5634108316952484, - "grad_norm": 1.590875167527014, - "learning_rate": 8.438691543258665e-07, - "loss": 1.145, - "step": 4156 - }, - { - "epoch": 0.5635463973429133, - "grad_norm": 2.128508362687925, - "learning_rate": 8.434354168976989e-07, - "loss": 1.1449, - "step": 4157 - }, - { - "epoch": 0.5636819629905782, - "grad_norm": 2.0109811557312764, - "learning_rate": 8.430017096617751e-07, - "loss": 1.2012, - "step": 4158 - }, - { - "epoch": 0.5638175286382431, - "grad_norm": 1.6446048853103505, - "learning_rate": 8.425680327017326e-07, - "loss": 1.0808, - "step": 4159 - }, - { - "epoch": 0.5639530942859079, - "grad_norm": 1.8175384622989226, - "learning_rate": 8.42134386101202e-07, - "loss": 1.1216, - "step": 4160 - }, - { - "epoch": 0.5640886599335728, - "grad_norm": 1.5832224771485246, - "learning_rate": 8.417007699438093e-07, - "loss": 1.1436, - "step": 4161 - }, - { - "epoch": 0.5642242255812377, - "grad_norm": 2.0285999895458433, - "learning_rate": 8.412671843131731e-07, - "loss": 1.1062, - "step": 4162 - }, - { - "epoch": 0.5643597912289026, - "grad_norm": 8.683779842898055, - "learning_rate": 8.408336292929079e-07, - "loss": 1.114, - "step": 4163 - }, - { - "epoch": 0.5644953568765675, - "grad_norm": 1.843910468956719, - "learning_rate": 8.40400104966621e-07, - "loss": 1.1463, - "step": 4164 - }, - { - "epoch": 0.5646309225242324, - "grad_norm": 1.7587510345669792, - "learning_rate": 8.399666114179136e-07, - "loss": 1.1696, - "step": 4165 - }, - { - "epoch": 0.5647664881718972, - "grad_norm": 1.7402977025816908, - "learning_rate": 8.395331487303823e-07, - "loss": 1.1351, - "step": 4166 - }, - { - "epoch": 0.5649020538195622, - "grad_norm": 1.5026233189203588, - "learning_rate": 8.390997169876161e-07, - "loss": 1.1821, - "step": 4167 - }, - { - "epoch": 0.565037619467227, - "grad_norm": 1.6704764338106353, - "learning_rate": 8.386663162732001e-07, - "loss": 1.155, - "step": 4168 - }, - { - "epoch": 0.5651731851148919, - "grad_norm": 1.4111927132450768, - "learning_rate": 8.38232946670711e-07, - "loss": 1.1062, - "step": 4169 - }, - { - "epoch": 0.5653087507625568, - "grad_norm": 1.720555112228216, - "learning_rate": 8.377996082637215e-07, - "loss": 1.1148, - "step": 4170 - }, - { - "epoch": 0.5654443164102216, - "grad_norm": 1.8592739519489285, - "learning_rate": 8.37366301135797e-07, - "loss": 1.16, - "step": 4171 - }, - { - "epoch": 0.5655798820578866, - "grad_norm": 2.098706668119273, - "learning_rate": 8.369330253704979e-07, - "loss": 1.1736, - "step": 4172 - }, - { - "epoch": 0.5657154477055514, - "grad_norm": 1.7827167182987291, - "learning_rate": 8.364997810513774e-07, - "loss": 1.117, - "step": 4173 - }, - { - "epoch": 0.5658510133532163, - "grad_norm": 1.5536052660128805, - "learning_rate": 8.360665682619837e-07, - "loss": 1.103, - "step": 4174 - }, - { - "epoch": 0.5659865790008812, - "grad_norm": 2.163344098445573, - "learning_rate": 8.356333870858581e-07, - "loss": 1.1776, - "step": 4175 - }, - { - "epoch": 0.566122144648546, - "grad_norm": 1.5011990653505216, - "learning_rate": 8.352002376065364e-07, - "loss": 1.1465, - "step": 4176 - }, - { - "epoch": 0.566257710296211, - "grad_norm": 1.6712060417978678, - "learning_rate": 8.347671199075481e-07, - "loss": 1.0928, - "step": 4177 - }, - { - "epoch": 0.5663932759438758, - "grad_norm": 1.625782910802641, - "learning_rate": 8.343340340724168e-07, - "loss": 1.1443, - "step": 4178 - }, - { - "epoch": 0.5665288415915407, - "grad_norm": 1.7425650368043235, - "learning_rate": 8.339009801846589e-07, - "loss": 1.1129, - "step": 4179 - }, - { - "epoch": 0.5666644072392056, - "grad_norm": 1.5623086467890757, - "learning_rate": 8.334679583277859e-07, - "loss": 1.1029, - "step": 4180 - }, - { - "epoch": 0.5667999728868705, - "grad_norm": 1.669647325571345, - "learning_rate": 8.330349685853027e-07, - "loss": 1.1569, - "step": 4181 - }, - { - "epoch": 0.5669355385345354, - "grad_norm": 1.9363508850863382, - "learning_rate": 8.326020110407079e-07, - "loss": 1.1243, - "step": 4182 - }, - { - "epoch": 0.5670711041822002, - "grad_norm": 1.6614716632060702, - "learning_rate": 8.32169085777494e-07, - "loss": 1.1381, - "step": 4183 - }, - { - "epoch": 0.5672066698298651, - "grad_norm": 1.6305941756939906, - "learning_rate": 8.317361928791467e-07, - "loss": 1.1705, - "step": 4184 - }, - { - "epoch": 0.56734223547753, - "grad_norm": 3.337869544609601, - "learning_rate": 8.313033324291469e-07, - "loss": 1.1815, - "step": 4185 - }, - { - "epoch": 0.5674778011251949, - "grad_norm": 1.6743581746854872, - "learning_rate": 8.308705045109675e-07, - "loss": 1.1718, - "step": 4186 - }, - { - "epoch": 0.5676133667728598, - "grad_norm": 1.4448781247406894, - "learning_rate": 8.304377092080766e-07, - "loss": 1.1437, - "step": 4187 - }, - { - "epoch": 0.5677489324205246, - "grad_norm": 1.746530944790666, - "learning_rate": 8.300049466039346e-07, - "loss": 1.1674, - "step": 4188 - }, - { - "epoch": 0.5678844980681895, - "grad_norm": 1.4344432250346677, - "learning_rate": 8.295722167819973e-07, - "loss": 1.1006, - "step": 4189 - }, - { - "epoch": 0.5680200637158545, - "grad_norm": 1.5648035508012366, - "learning_rate": 8.291395198257122e-07, - "loss": 1.17, - "step": 4190 - }, - { - "epoch": 0.5681556293635193, - "grad_norm": 1.4939719733154513, - "learning_rate": 8.287068558185224e-07, - "loss": 1.1511, - "step": 4191 - }, - { - "epoch": 0.5682911950111842, - "grad_norm": 2.400169251643613, - "learning_rate": 8.282742248438634e-07, - "loss": 1.1088, - "step": 4192 - }, - { - "epoch": 0.568426760658849, - "grad_norm": 2.0472999005281998, - "learning_rate": 8.278416269851643e-07, - "loss": 1.1442, - "step": 4193 - }, - { - "epoch": 0.5685623263065139, - "grad_norm": 1.6461841698630155, - "learning_rate": 8.274090623258489e-07, - "loss": 1.1466, - "step": 4194 - }, - { - "epoch": 0.5686978919541789, - "grad_norm": 1.6796920467250323, - "learning_rate": 8.269765309493328e-07, - "loss": 1.0951, - "step": 4195 - }, - { - "epoch": 0.5688334576018437, - "grad_norm": 2.810966115088102, - "learning_rate": 8.265440329390276e-07, - "loss": 1.1468, - "step": 4196 - }, - { - "epoch": 0.5689690232495086, - "grad_norm": 1.4327547937825558, - "learning_rate": 8.261115683783361e-07, - "loss": 1.1323, - "step": 4197 - }, - { - "epoch": 0.5691045888971734, - "grad_norm": 1.9876818419593691, - "learning_rate": 8.256791373506563e-07, - "loss": 1.0961, - "step": 4198 - }, - { - "epoch": 0.5692401545448383, - "grad_norm": 2.013163599788195, - "learning_rate": 8.252467399393786e-07, - "loss": 1.1274, - "step": 4199 - }, - { - "epoch": 0.5693757201925033, - "grad_norm": 1.5928753457701152, - "learning_rate": 8.248143762278879e-07, - "loss": 1.1363, - "step": 4200 - }, - { - "epoch": 0.5695112858401681, - "grad_norm": 1.773243487553507, - "learning_rate": 8.243820462995617e-07, - "loss": 1.1018, - "step": 4201 - }, - { - "epoch": 0.569646851487833, - "grad_norm": 1.4601234580762357, - "learning_rate": 8.239497502377719e-07, - "loss": 1.1452, - "step": 4202 - }, - { - "epoch": 0.5697824171354978, - "grad_norm": 1.5806721320809127, - "learning_rate": 8.235174881258827e-07, - "loss": 1.1571, - "step": 4203 - }, - { - "epoch": 0.5699179827831627, - "grad_norm": 1.6735299791926972, - "learning_rate": 8.230852600472533e-07, - "loss": 1.153, - "step": 4204 - }, - { - "epoch": 0.5700535484308277, - "grad_norm": 1.7210041460245629, - "learning_rate": 8.226530660852349e-07, - "loss": 1.1267, - "step": 4205 - }, - { - "epoch": 0.5701891140784925, - "grad_norm": 1.5281874384083363, - "learning_rate": 8.222209063231727e-07, - "loss": 1.1164, - "step": 4206 - }, - { - "epoch": 0.5703246797261574, - "grad_norm": 1.654562702534214, - "learning_rate": 8.217887808444056e-07, - "loss": 1.1311, - "step": 4207 - }, - { - "epoch": 0.5704602453738222, - "grad_norm": 2.2514823073567034, - "learning_rate": 8.213566897322651e-07, - "loss": 1.1246, - "step": 4208 - }, - { - "epoch": 0.5705958110214872, - "grad_norm": 1.5410104711355437, - "learning_rate": 8.209246330700772e-07, - "loss": 1.1317, - "step": 4209 - }, - { - "epoch": 0.5707313766691521, - "grad_norm": 1.5038826963086989, - "learning_rate": 8.204926109411601e-07, - "loss": 1.1324, - "step": 4210 - }, - { - "epoch": 0.5708669423168169, - "grad_norm": 1.6181056261096318, - "learning_rate": 8.20060623428826e-07, - "loss": 1.1493, - "step": 4211 - }, - { - "epoch": 0.5710025079644818, - "grad_norm": 1.5867712151457343, - "learning_rate": 8.196286706163804e-07, - "loss": 1.1086, - "step": 4212 - }, - { - "epoch": 0.5711380736121466, - "grad_norm": 7.89937960169837, - "learning_rate": 8.191967525871219e-07, - "loss": 1.1306, - "step": 4213 - }, - { - "epoch": 0.5712736392598116, - "grad_norm": 2.7494990459522626, - "learning_rate": 8.187648694243423e-07, - "loss": 1.1096, - "step": 4214 - }, - { - "epoch": 0.5714092049074765, - "grad_norm": 2.596186279221983, - "learning_rate": 8.183330212113273e-07, - "loss": 1.1424, - "step": 4215 - }, - { - "epoch": 0.5715447705551413, - "grad_norm": 1.6942443075832543, - "learning_rate": 8.179012080313549e-07, - "loss": 1.1007, - "step": 4216 - }, - { - "epoch": 0.5716803362028062, - "grad_norm": 2.114614284549197, - "learning_rate": 8.174694299676974e-07, - "loss": 1.1406, - "step": 4217 - }, - { - "epoch": 0.571815901850471, - "grad_norm": 1.4953138130059591, - "learning_rate": 8.170376871036193e-07, - "loss": 1.1567, - "step": 4218 - }, - { - "epoch": 0.571951467498136, - "grad_norm": 2.2252632151734866, - "learning_rate": 8.166059795223793e-07, - "loss": 1.128, - "step": 4219 - }, - { - "epoch": 0.5720870331458009, - "grad_norm": 1.6702072450320868, - "learning_rate": 8.161743073072286e-07, - "loss": 1.1381, - "step": 4220 - }, - { - "epoch": 0.5722225987934657, - "grad_norm": 1.905048118662072, - "learning_rate": 8.157426705414113e-07, - "loss": 1.1606, - "step": 4221 - }, - { - "epoch": 0.5723581644411306, - "grad_norm": 1.7560752911915052, - "learning_rate": 8.153110693081657e-07, - "loss": 1.1227, - "step": 4222 - }, - { - "epoch": 0.5724937300887954, - "grad_norm": 1.6801407620829023, - "learning_rate": 8.148795036907224e-07, - "loss": 1.1755, - "step": 4223 - }, - { - "epoch": 0.5726292957364604, - "grad_norm": 1.4717622371875947, - "learning_rate": 8.144479737723058e-07, - "loss": 1.1179, - "step": 4224 - }, - { - "epoch": 0.5727648613841253, - "grad_norm": 2.496532982338593, - "learning_rate": 8.140164796361327e-07, - "loss": 1.1824, - "step": 4225 - }, - { - "epoch": 0.5729004270317901, - "grad_norm": 1.9908106237316874, - "learning_rate": 8.135850213654135e-07, - "loss": 1.1219, - "step": 4226 - }, - { - "epoch": 0.573035992679455, - "grad_norm": 2.7403888772431717, - "learning_rate": 8.131535990433513e-07, - "loss": 1.1293, - "step": 4227 - }, - { - "epoch": 0.5731715583271199, - "grad_norm": 1.6356470130812542, - "learning_rate": 8.127222127531429e-07, - "loss": 1.1601, - "step": 4228 - }, - { - "epoch": 0.5733071239747848, - "grad_norm": 1.463159596502155, - "learning_rate": 8.122908625779771e-07, - "loss": 1.1211, - "step": 4229 - }, - { - "epoch": 0.5734426896224497, - "grad_norm": 1.9284702580462807, - "learning_rate": 8.118595486010372e-07, - "loss": 1.1596, - "step": 4230 - }, - { - "epoch": 0.5735782552701145, - "grad_norm": 2.0724159859428926, - "learning_rate": 8.114282709054978e-07, - "loss": 1.138, - "step": 4231 - }, - { - "epoch": 0.5737138209177794, - "grad_norm": 1.6391778383200706, - "learning_rate": 8.109970295745284e-07, - "loss": 1.1556, - "step": 4232 - }, - { - "epoch": 0.5738493865654443, - "grad_norm": 1.5004457825960955, - "learning_rate": 8.105658246912895e-07, - "loss": 1.107, - "step": 4233 - }, - { - "epoch": 0.5739849522131092, - "grad_norm": 2.2948444050508128, - "learning_rate": 8.101346563389363e-07, - "loss": 1.0961, - "step": 4234 - }, - { - "epoch": 0.5741205178607741, - "grad_norm": 1.5472861971138319, - "learning_rate": 8.097035246006161e-07, - "loss": 1.1634, - "step": 4235 - }, - { - "epoch": 0.5742560835084389, - "grad_norm": 1.7251769045441083, - "learning_rate": 8.092724295594685e-07, - "loss": 1.1368, - "step": 4236 - }, - { - "epoch": 0.5743916491561039, - "grad_norm": 1.5063305982521753, - "learning_rate": 8.088413712986279e-07, - "loss": 1.1586, - "step": 4237 - }, - { - "epoch": 0.5745272148037687, - "grad_norm": 2.021975362500354, - "learning_rate": 8.084103499012194e-07, - "loss": 1.1386, - "step": 4238 - }, - { - "epoch": 0.5746627804514336, - "grad_norm": 1.4317890020690693, - "learning_rate": 8.07979365450363e-07, - "loss": 1.1565, - "step": 4239 - }, - { - "epoch": 0.5747983460990985, - "grad_norm": 1.4902287978800997, - "learning_rate": 8.075484180291701e-07, - "loss": 1.141, - "step": 4240 - }, - { - "epoch": 0.5749339117467633, - "grad_norm": 1.4252132564713151, - "learning_rate": 8.071175077207457e-07, - "loss": 1.1601, - "step": 4241 - }, - { - "epoch": 0.5750694773944283, - "grad_norm": 1.6458633630693131, - "learning_rate": 8.066866346081873e-07, - "loss": 1.1294, - "step": 4242 - }, - { - "epoch": 0.5752050430420931, - "grad_norm": 1.6721287224998935, - "learning_rate": 8.062557987745856e-07, - "loss": 1.1416, - "step": 4243 - }, - { - "epoch": 0.575340608689758, - "grad_norm": 2.322995240814629, - "learning_rate": 8.058250003030238e-07, - "loss": 1.2021, - "step": 4244 - }, - { - "epoch": 0.5754761743374229, - "grad_norm": 1.7332403179183402, - "learning_rate": 8.053942392765781e-07, - "loss": 1.1506, - "step": 4245 - }, - { - "epoch": 0.5756117399850877, - "grad_norm": 1.7796671486068016, - "learning_rate": 8.049635157783169e-07, - "loss": 1.1379, - "step": 4246 - }, - { - "epoch": 0.5757473056327527, - "grad_norm": 1.7032840107375538, - "learning_rate": 8.045328298913024e-07, - "loss": 1.1451, - "step": 4247 - }, - { - "epoch": 0.5758828712804176, - "grad_norm": 1.3640802540371904, - "learning_rate": 8.041021816985887e-07, - "loss": 1.1151, - "step": 4248 - }, - { - "epoch": 0.5760184369280824, - "grad_norm": 7.19413828799656, - "learning_rate": 8.03671571283223e-07, - "loss": 1.1518, - "step": 4249 - }, - { - "epoch": 0.5761540025757473, - "grad_norm": 1.620661897286451, - "learning_rate": 8.03240998728245e-07, - "loss": 1.1461, - "step": 4250 - }, - { - "epoch": 0.5762895682234122, - "grad_norm": 1.6139080968962773, - "learning_rate": 8.028104641166871e-07, - "loss": 1.0997, - "step": 4251 - }, - { - "epoch": 0.5764251338710771, - "grad_norm": 2.9019730310763703, - "learning_rate": 8.02379967531575e-07, - "loss": 1.1382, - "step": 4252 - }, - { - "epoch": 0.576560699518742, - "grad_norm": 2.0892329676796115, - "learning_rate": 8.019495090559257e-07, - "loss": 1.1344, - "step": 4253 - }, - { - "epoch": 0.5766962651664068, - "grad_norm": 1.4074684769987518, - "learning_rate": 8.015190887727509e-07, - "loss": 1.1345, - "step": 4254 - }, - { - "epoch": 0.5768318308140717, - "grad_norm": 1.5951014016177483, - "learning_rate": 8.010887067650526e-07, - "loss": 1.1202, - "step": 4255 - }, - { - "epoch": 0.5769673964617366, - "grad_norm": 1.897722770254828, - "learning_rate": 8.006583631158275e-07, - "loss": 1.1493, - "step": 4256 - }, - { - "epoch": 0.5771029621094015, - "grad_norm": 1.6301711918749067, - "learning_rate": 8.002280579080632e-07, - "loss": 1.1269, - "step": 4257 - }, - { - "epoch": 0.5772385277570664, - "grad_norm": 2.7468641364337913, - "learning_rate": 7.997977912247413e-07, - "loss": 1.1407, - "step": 4258 - }, - { - "epoch": 0.5773740934047312, - "grad_norm": 1.8393965981427962, - "learning_rate": 7.993675631488348e-07, - "loss": 1.1069, - "step": 4259 - }, - { - "epoch": 0.5775096590523962, - "grad_norm": 1.763603688185099, - "learning_rate": 7.989373737633103e-07, - "loss": 1.1303, - "step": 4260 - }, - { - "epoch": 0.577645224700061, - "grad_norm": 2.238083483726902, - "learning_rate": 7.985072231511259e-07, - "loss": 1.0923, - "step": 4261 - }, - { - "epoch": 0.5777807903477259, - "grad_norm": 1.6713738548695456, - "learning_rate": 7.980771113952335e-07, - "loss": 1.1364, - "step": 4262 - }, - { - "epoch": 0.5779163559953908, - "grad_norm": 1.4280508893524158, - "learning_rate": 7.976470385785762e-07, - "loss": 1.117, - "step": 4263 - }, - { - "epoch": 0.5780519216430556, - "grad_norm": 2.063406498628777, - "learning_rate": 7.972170047840898e-07, - "loss": 1.1438, - "step": 4264 - }, - { - "epoch": 0.5781874872907206, - "grad_norm": 1.7038284750585204, - "learning_rate": 7.967870100947038e-07, - "loss": 1.1733, - "step": 4265 - }, - { - "epoch": 0.5783230529383854, - "grad_norm": 1.4788911891554104, - "learning_rate": 7.963570545933384e-07, - "loss": 1.1096, - "step": 4266 - }, - { - "epoch": 0.5784586185860503, - "grad_norm": 1.5248524396637237, - "learning_rate": 7.95927138362908e-07, - "loss": 1.14, - "step": 4267 - }, - { - "epoch": 0.5785941842337152, - "grad_norm": 1.44808614981907, - "learning_rate": 7.954972614863177e-07, - "loss": 1.1193, - "step": 4268 - }, - { - "epoch": 0.57872974988138, - "grad_norm": 1.571796570683775, - "learning_rate": 7.950674240464667e-07, - "loss": 1.1617, - "step": 4269 - }, - { - "epoch": 0.578865315529045, - "grad_norm": 1.560266235482675, - "learning_rate": 7.946376261262449e-07, - "loss": 1.1643, - "step": 4270 - }, - { - "epoch": 0.5790008811767098, - "grad_norm": 1.7762953289875087, - "learning_rate": 7.942078678085363e-07, - "loss": 1.1632, - "step": 4271 - }, - { - "epoch": 0.5791364468243747, - "grad_norm": 1.7980997350686991, - "learning_rate": 7.937781491762156e-07, - "loss": 1.1612, - "step": 4272 - }, - { - "epoch": 0.5792720124720396, - "grad_norm": 1.5645631841721752, - "learning_rate": 7.933484703121513e-07, - "loss": 1.123, - "step": 4273 - }, - { - "epoch": 0.5794075781197044, - "grad_norm": 1.5648777726810812, - "learning_rate": 7.929188312992031e-07, - "loss": 1.1403, - "step": 4274 - }, - { - "epoch": 0.5795431437673694, - "grad_norm": 1.695827195206307, - "learning_rate": 7.924892322202236e-07, - "loss": 1.1333, - "step": 4275 - }, - { - "epoch": 0.5796787094150342, - "grad_norm": 1.4096401955599196, - "learning_rate": 7.920596731580582e-07, - "loss": 1.1352, - "step": 4276 - }, - { - "epoch": 0.5798142750626991, - "grad_norm": 1.8199379759691554, - "learning_rate": 7.91630154195543e-07, - "loss": 1.1977, - "step": 4277 - }, - { - "epoch": 0.579949840710364, - "grad_norm": 1.7295429106368632, - "learning_rate": 7.912006754155078e-07, - "loss": 1.1732, - "step": 4278 - }, - { - "epoch": 0.5800854063580289, - "grad_norm": 1.5592435144538888, - "learning_rate": 7.907712369007743e-07, - "loss": 1.1438, - "step": 4279 - }, - { - "epoch": 0.5802209720056938, - "grad_norm": 10.959757451632926, - "learning_rate": 7.903418387341564e-07, - "loss": 1.155, - "step": 4280 - }, - { - "epoch": 0.5803565376533586, - "grad_norm": 1.5203454718613547, - "learning_rate": 7.899124809984595e-07, - "loss": 1.1434, - "step": 4281 - }, - { - "epoch": 0.5804921033010235, - "grad_norm": 3.042771593135852, - "learning_rate": 7.894831637764828e-07, - "loss": 1.144, - "step": 4282 - }, - { - "epoch": 0.5806276689486884, - "grad_norm": 1.6249831308186309, - "learning_rate": 7.890538871510156e-07, - "loss": 1.1369, - "step": 4283 - }, - { - "epoch": 0.5807632345963533, - "grad_norm": 1.6681471294950783, - "learning_rate": 7.886246512048418e-07, - "loss": 1.1226, - "step": 4284 - }, - { - "epoch": 0.5808988002440182, - "grad_norm": 1.9167182135649241, - "learning_rate": 7.88195456020735e-07, - "loss": 1.0976, - "step": 4285 - }, - { - "epoch": 0.581034365891683, - "grad_norm": 1.5133723891841295, - "learning_rate": 7.87766301681463e-07, - "loss": 1.1605, - "step": 4286 - }, - { - "epoch": 0.5811699315393479, - "grad_norm": 1.6405692004700636, - "learning_rate": 7.873371882697841e-07, - "loss": 1.1416, - "step": 4287 - }, - { - "epoch": 0.5813054971870129, - "grad_norm": 1.5984149408446044, - "learning_rate": 7.869081158684503e-07, - "loss": 1.1172, - "step": 4288 - }, - { - "epoch": 0.5814410628346777, - "grad_norm": 1.7098628239447555, - "learning_rate": 7.864790845602038e-07, - "loss": 1.0889, - "step": 4289 - }, - { - "epoch": 0.5815766284823426, - "grad_norm": 2.0623568522022797, - "learning_rate": 7.860500944277809e-07, - "loss": 1.1511, - "step": 4290 - }, - { - "epoch": 0.5817121941300074, - "grad_norm": 2.2907250466284057, - "learning_rate": 7.856211455539084e-07, - "loss": 1.1398, - "step": 4291 - }, - { - "epoch": 0.5818477597776723, - "grad_norm": 2.2533290548071663, - "learning_rate": 7.851922380213053e-07, - "loss": 1.1545, - "step": 4292 - }, - { - "epoch": 0.5819833254253373, - "grad_norm": 1.8949249691713341, - "learning_rate": 7.847633719126839e-07, - "loss": 1.1011, - "step": 4293 - }, - { - "epoch": 0.5821188910730021, - "grad_norm": 1.4896049363347317, - "learning_rate": 7.84334547310747e-07, - "loss": 1.1417, - "step": 4294 - }, - { - "epoch": 0.582254456720667, - "grad_norm": 1.565041425811932, - "learning_rate": 7.839057642981905e-07, - "loss": 1.1219, - "step": 4295 - }, - { - "epoch": 0.5823900223683318, - "grad_norm": 1.4918027463042942, - "learning_rate": 7.834770229577015e-07, - "loss": 1.1652, - "step": 4296 - }, - { - "epoch": 0.5825255880159967, - "grad_norm": 1.7305034445431453, - "learning_rate": 7.830483233719597e-07, - "loss": 1.1184, - "step": 4297 - }, - { - "epoch": 0.5826611536636617, - "grad_norm": 2.5394717980821415, - "learning_rate": 7.826196656236357e-07, - "loss": 1.1215, - "step": 4298 - }, - { - "epoch": 0.5827967193113265, - "grad_norm": 2.035242049058492, - "learning_rate": 7.821910497953939e-07, - "loss": 1.0981, - "step": 4299 - }, - { - "epoch": 0.5829322849589914, - "grad_norm": 1.591090260012412, - "learning_rate": 7.817624759698884e-07, - "loss": 1.087, - "step": 4300 - }, - { - "epoch": 0.5830678506066562, - "grad_norm": 1.6905698274986207, - "learning_rate": 7.813339442297671e-07, - "loss": 1.1427, - "step": 4301 - }, - { - "epoch": 0.5832034162543211, - "grad_norm": 1.7891287815521557, - "learning_rate": 7.809054546576686e-07, - "loss": 1.1777, - "step": 4302 - }, - { - "epoch": 0.5833389819019861, - "grad_norm": 2.1210088268155483, - "learning_rate": 7.804770073362236e-07, - "loss": 1.1312, - "step": 4303 - }, - { - "epoch": 0.5834745475496509, - "grad_norm": 1.6021508024678994, - "learning_rate": 7.800486023480551e-07, - "loss": 1.1681, - "step": 4304 - }, - { - "epoch": 0.5836101131973158, - "grad_norm": 2.695259499524025, - "learning_rate": 7.796202397757771e-07, - "loss": 1.157, - "step": 4305 - }, - { - "epoch": 0.5837456788449806, - "grad_norm": 2.8578066258284798, - "learning_rate": 7.791919197019967e-07, - "loss": 1.1347, - "step": 4306 - }, - { - "epoch": 0.5838812444926456, - "grad_norm": 1.6464290721404722, - "learning_rate": 7.787636422093114e-07, - "loss": 1.157, - "step": 4307 - }, - { - "epoch": 0.5840168101403105, - "grad_norm": 2.0344201628254206, - "learning_rate": 7.783354073803114e-07, - "loss": 1.1787, - "step": 4308 - }, - { - "epoch": 0.5841523757879753, - "grad_norm": 1.6305861142085205, - "learning_rate": 7.779072152975783e-07, - "loss": 1.1397, - "step": 4309 - }, - { - "epoch": 0.5842879414356402, - "grad_norm": 1.524317147137655, - "learning_rate": 7.774790660436857e-07, - "loss": 1.1073, - "step": 4310 - }, - { - "epoch": 0.584423507083305, - "grad_norm": 1.3770611198606157, - "learning_rate": 7.770509597011986e-07, - "loss": 1.0929, - "step": 4311 - }, - { - "epoch": 0.58455907273097, - "grad_norm": 1.973469144958103, - "learning_rate": 7.766228963526744e-07, - "loss": 1.1385, - "step": 4312 - }, - { - "epoch": 0.5846946383786349, - "grad_norm": 1.520234655809796, - "learning_rate": 7.761948760806611e-07, - "loss": 1.1532, - "step": 4313 - }, - { - "epoch": 0.5848302040262997, - "grad_norm": 1.601687456898116, - "learning_rate": 7.757668989676995e-07, - "loss": 1.1051, - "step": 4314 - }, - { - "epoch": 0.5849657696739646, - "grad_norm": 1.9037796229353834, - "learning_rate": 7.753389650963212e-07, - "loss": 1.1151, - "step": 4315 - }, - { - "epoch": 0.5851013353216294, - "grad_norm": 2.672164755295703, - "learning_rate": 7.749110745490505e-07, - "loss": 1.1267, - "step": 4316 - }, - { - "epoch": 0.5852369009692944, - "grad_norm": 2.050725049135422, - "learning_rate": 7.744832274084019e-07, - "loss": 1.1322, - "step": 4317 - }, - { - "epoch": 0.5853724666169593, - "grad_norm": 1.5040309928471614, - "learning_rate": 7.740554237568832e-07, - "loss": 1.113, - "step": 4318 - }, - { - "epoch": 0.5855080322646241, - "grad_norm": 1.75903738912333, - "learning_rate": 7.736276636769925e-07, - "loss": 1.1729, - "step": 4319 - }, - { - "epoch": 0.585643597912289, - "grad_norm": 1.351364069317839, - "learning_rate": 7.731999472512196e-07, - "loss": 1.092, - "step": 4320 - }, - { - "epoch": 0.5857791635599539, - "grad_norm": 1.6309755618727242, - "learning_rate": 7.727722745620471e-07, - "loss": 1.1529, - "step": 4321 - }, - { - "epoch": 0.5859147292076188, - "grad_norm": 1.614764759357974, - "learning_rate": 7.723446456919473e-07, - "loss": 1.1533, - "step": 4322 - }, - { - "epoch": 0.5860502948552837, - "grad_norm": 1.7771194328284223, - "learning_rate": 7.719170607233861e-07, - "loss": 1.1292, - "step": 4323 - }, - { - "epoch": 0.5861858605029485, - "grad_norm": 1.7698688787390124, - "learning_rate": 7.714895197388188e-07, - "loss": 1.1332, - "step": 4324 - }, - { - "epoch": 0.5863214261506134, - "grad_norm": 1.6592989781969671, - "learning_rate": 7.710620228206944e-07, - "loss": 1.1577, - "step": 4325 - }, - { - "epoch": 0.5864569917982784, - "grad_norm": 1.5341375398369674, - "learning_rate": 7.706345700514512e-07, - "loss": 1.1573, - "step": 4326 - }, - { - "epoch": 0.5865925574459432, - "grad_norm": 1.6532436560906063, - "learning_rate": 7.702071615135212e-07, - "loss": 1.0902, - "step": 4327 - }, - { - "epoch": 0.5867281230936081, - "grad_norm": 1.5144982943009389, - "learning_rate": 7.697797972893258e-07, - "loss": 1.1175, - "step": 4328 - }, - { - "epoch": 0.5868636887412729, - "grad_norm": 1.8236322455638587, - "learning_rate": 7.693524774612797e-07, - "loss": 1.131, - "step": 4329 - }, - { - "epoch": 0.5869992543889379, - "grad_norm": 1.5506456064949399, - "learning_rate": 7.689252021117874e-07, - "loss": 1.1377, - "step": 4330 - }, - { - "epoch": 0.5871348200366028, - "grad_norm": 1.6163931298025707, - "learning_rate": 7.684979713232461e-07, - "loss": 1.1076, - "step": 4331 - }, - { - "epoch": 0.5872703856842676, - "grad_norm": 1.4818819329234871, - "learning_rate": 7.680707851780433e-07, - "loss": 1.1365, - "step": 4332 - }, - { - "epoch": 0.5874059513319325, - "grad_norm": 1.533424051460587, - "learning_rate": 7.676436437585593e-07, - "loss": 1.1286, - "step": 4333 - }, - { - "epoch": 0.5875415169795973, - "grad_norm": 2.299075859363382, - "learning_rate": 7.672165471471643e-07, - "loss": 1.151, - "step": 4334 - }, - { - "epoch": 0.5876770826272623, - "grad_norm": 4.251272787675027, - "learning_rate": 7.667894954262205e-07, - "loss": 1.1494, - "step": 4335 - }, - { - "epoch": 0.5878126482749272, - "grad_norm": 1.848765311380358, - "learning_rate": 7.66362488678082e-07, - "loss": 1.1604, - "step": 4336 - }, - { - "epoch": 0.587948213922592, - "grad_norm": 1.6877032713732425, - "learning_rate": 7.659355269850929e-07, - "loss": 1.1418, - "step": 4337 - }, - { - "epoch": 0.5880837795702569, - "grad_norm": 1.6989338325825334, - "learning_rate": 7.655086104295904e-07, - "loss": 1.1388, - "step": 4338 - }, - { - "epoch": 0.5882193452179217, - "grad_norm": 1.7903979148453333, - "learning_rate": 7.65081739093901e-07, - "loss": 1.1389, - "step": 4339 - }, - { - "epoch": 0.5883549108655867, - "grad_norm": 2.045324160508782, - "learning_rate": 7.646549130603439e-07, - "loss": 1.1631, - "step": 4340 - }, - { - "epoch": 0.5884904765132516, - "grad_norm": 1.7958708294981742, - "learning_rate": 7.642281324112292e-07, - "loss": 1.1305, - "step": 4341 - }, - { - "epoch": 0.5886260421609164, - "grad_norm": 1.5537707445583453, - "learning_rate": 7.638013972288581e-07, - "loss": 1.098, - "step": 4342 - }, - { - "epoch": 0.5887616078085813, - "grad_norm": 1.9699645418138658, - "learning_rate": 7.63374707595523e-07, - "loss": 1.1095, - "step": 4343 - }, - { - "epoch": 0.5888971734562461, - "grad_norm": 1.821184115159573, - "learning_rate": 7.629480635935082e-07, - "loss": 1.1399, - "step": 4344 - }, - { - "epoch": 0.5890327391039111, - "grad_norm": 1.7340863992677047, - "learning_rate": 7.625214653050874e-07, - "loss": 1.123, - "step": 4345 - }, - { - "epoch": 0.589168304751576, - "grad_norm": 2.0347253277602677, - "learning_rate": 7.620949128125282e-07, - "loss": 1.1293, - "step": 4346 - }, - { - "epoch": 0.5893038703992408, - "grad_norm": 3.352548394662374, - "learning_rate": 7.616684061980867e-07, - "loss": 1.1446, - "step": 4347 - }, - { - "epoch": 0.5894394360469057, - "grad_norm": 1.5710689847454289, - "learning_rate": 7.612419455440119e-07, - "loss": 1.1527, - "step": 4348 - }, - { - "epoch": 0.5895750016945706, - "grad_norm": 1.6919199296393415, - "learning_rate": 7.608155309325435e-07, - "loss": 1.1438, - "step": 4349 - }, - { - "epoch": 0.5897105673422355, - "grad_norm": 1.5288999874041473, - "learning_rate": 7.603891624459114e-07, - "loss": 1.1181, - "step": 4350 - }, - { - "epoch": 0.5898461329899004, - "grad_norm": 1.4895373443797895, - "learning_rate": 7.599628401663384e-07, - "loss": 1.168, - "step": 4351 - }, - { - "epoch": 0.5899816986375652, - "grad_norm": 1.6001538605322403, - "learning_rate": 7.595365641760367e-07, - "loss": 1.1533, - "step": 4352 - }, - { - "epoch": 0.5901172642852301, - "grad_norm": 1.5849582943377434, - "learning_rate": 7.591103345572109e-07, - "loss": 1.103, - "step": 4353 - }, - { - "epoch": 0.590252829932895, - "grad_norm": 1.613132409144807, - "learning_rate": 7.58684151392055e-07, - "loss": 1.1587, - "step": 4354 - }, - { - "epoch": 0.5903883955805599, - "grad_norm": 1.662308406391821, - "learning_rate": 7.582580147627562e-07, - "loss": 1.163, - "step": 4355 - }, - { - "epoch": 0.5905239612282248, - "grad_norm": 2.5664563299579024, - "learning_rate": 7.578319247514906e-07, - "loss": 1.0964, - "step": 4356 - }, - { - "epoch": 0.5906595268758896, - "grad_norm": 1.4563353973700353, - "learning_rate": 7.574058814404272e-07, - "loss": 1.1526, - "step": 4357 - }, - { - "epoch": 0.5907950925235546, - "grad_norm": 1.82174490414821, - "learning_rate": 7.569798849117241e-07, - "loss": 1.1545, - "step": 4358 - }, - { - "epoch": 0.5909306581712194, - "grad_norm": 1.8345189938798672, - "learning_rate": 7.565539352475325e-07, - "loss": 1.1422, - "step": 4359 - }, - { - "epoch": 0.5910662238188843, - "grad_norm": 1.6173024548892285, - "learning_rate": 7.561280325299924e-07, - "loss": 1.1083, - "step": 4360 - }, - { - "epoch": 0.5912017894665492, - "grad_norm": 1.6459614752831229, - "learning_rate": 7.557021768412366e-07, - "loss": 1.1533, - "step": 4361 - }, - { - "epoch": 0.591337355114214, - "grad_norm": 1.5593394005804868, - "learning_rate": 7.552763682633877e-07, - "loss": 1.1077, - "step": 4362 - }, - { - "epoch": 0.591472920761879, - "grad_norm": 1.9138300348443826, - "learning_rate": 7.548506068785589e-07, - "loss": 1.0824, - "step": 4363 - }, - { - "epoch": 0.5916084864095438, - "grad_norm": 2.1602719300077484, - "learning_rate": 7.544248927688561e-07, - "loss": 1.1568, - "step": 4364 - }, - { - "epoch": 0.5917440520572087, - "grad_norm": 1.7849413613539333, - "learning_rate": 7.539992260163735e-07, - "loss": 1.1214, - "step": 4365 - }, - { - "epoch": 0.5918796177048736, - "grad_norm": 1.8800649555140474, - "learning_rate": 7.535736067031991e-07, - "loss": 1.1737, - "step": 4366 - }, - { - "epoch": 0.5920151833525384, - "grad_norm": 1.6494114195479546, - "learning_rate": 7.531480349114088e-07, - "loss": 1.122, - "step": 4367 - }, - { - "epoch": 0.5921507490002034, - "grad_norm": 1.529138063293265, - "learning_rate": 7.527225107230721e-07, - "loss": 1.1351, - "step": 4368 - }, - { - "epoch": 0.5922863146478682, - "grad_norm": 1.5130405444146295, - "learning_rate": 7.52297034220247e-07, - "loss": 1.1283, - "step": 4369 - }, - { - "epoch": 0.5924218802955331, - "grad_norm": 4.706591519487228, - "learning_rate": 7.518716054849836e-07, - "loss": 1.1528, - "step": 4370 - }, - { - "epoch": 0.592557445943198, - "grad_norm": 1.7455720069843457, - "learning_rate": 7.514462245993225e-07, - "loss": 1.1469, - "step": 4371 - }, - { - "epoch": 0.5926930115908629, - "grad_norm": 1.4701551848668586, - "learning_rate": 7.51020891645295e-07, - "loss": 1.1142, - "step": 4372 - }, - { - "epoch": 0.5928285772385278, - "grad_norm": 2.3700004429718824, - "learning_rate": 7.505956067049232e-07, - "loss": 1.1661, - "step": 4373 - }, - { - "epoch": 0.5929641428861926, - "grad_norm": 1.6729472670703416, - "learning_rate": 7.501703698602202e-07, - "loss": 1.1408, - "step": 4374 - }, - { - "epoch": 0.5930997085338575, - "grad_norm": 1.8872346825549602, - "learning_rate": 7.497451811931891e-07, - "loss": 1.1285, - "step": 4375 - }, - { - "epoch": 0.5932352741815224, - "grad_norm": 1.5298969800969362, - "learning_rate": 7.493200407858245e-07, - "loss": 1.1283, - "step": 4376 - }, - { - "epoch": 0.5933708398291873, - "grad_norm": 2.273177413658046, - "learning_rate": 7.488949487201112e-07, - "loss": 1.1575, - "step": 4377 - }, - { - "epoch": 0.5935064054768522, - "grad_norm": 1.8967857670056962, - "learning_rate": 7.48469905078025e-07, - "loss": 1.1501, - "step": 4378 - }, - { - "epoch": 0.593641971124517, - "grad_norm": 1.5249667169070003, - "learning_rate": 7.480449099415322e-07, - "loss": 1.1399, - "step": 4379 - }, - { - "epoch": 0.5937775367721819, - "grad_norm": 1.8640791305739484, - "learning_rate": 7.476199633925894e-07, - "loss": 1.1326, - "step": 4380 - }, - { - "epoch": 0.5939131024198469, - "grad_norm": 1.5237063717471913, - "learning_rate": 7.471950655131451e-07, - "loss": 1.1193, - "step": 4381 - }, - { - "epoch": 0.5940486680675117, - "grad_norm": 1.5819151628531438, - "learning_rate": 7.467702163851363e-07, - "loss": 1.1204, - "step": 4382 - }, - { - "epoch": 0.5941842337151766, - "grad_norm": 1.6613672978583196, - "learning_rate": 7.463454160904927e-07, - "loss": 1.1269, - "step": 4383 - }, - { - "epoch": 0.5943197993628414, - "grad_norm": 8.20259787349857, - "learning_rate": 7.459206647111331e-07, - "loss": 1.1576, - "step": 4384 - }, - { - "epoch": 0.5944553650105063, - "grad_norm": 1.5810161301637762, - "learning_rate": 7.454959623289682e-07, - "loss": 1.1073, - "step": 4385 - }, - { - "epoch": 0.5945909306581713, - "grad_norm": 1.9423153561301443, - "learning_rate": 7.450713090258976e-07, - "loss": 1.0947, - "step": 4386 - }, - { - "epoch": 0.5947264963058361, - "grad_norm": 6.904488034547616, - "learning_rate": 7.44646704883813e-07, - "loss": 1.1489, - "step": 4387 - }, - { - "epoch": 0.594862061953501, - "grad_norm": 1.7378081900089435, - "learning_rate": 7.442221499845955e-07, - "loss": 1.1161, - "step": 4388 - }, - { - "epoch": 0.5949976276011658, - "grad_norm": 1.5454608770556548, - "learning_rate": 7.437976444101177e-07, - "loss": 1.1377, - "step": 4389 - }, - { - "epoch": 0.5951331932488307, - "grad_norm": 1.8708632468726125, - "learning_rate": 7.433731882422418e-07, - "loss": 1.1085, - "step": 4390 - }, - { - "epoch": 0.5952687588964957, - "grad_norm": 1.5704695990839246, - "learning_rate": 7.429487815628206e-07, - "loss": 1.1148, - "step": 4391 - }, - { - "epoch": 0.5954043245441605, - "grad_norm": 2.820885748485331, - "learning_rate": 7.425244244536981e-07, - "loss": 1.1692, - "step": 4392 - }, - { - "epoch": 0.5955398901918254, - "grad_norm": 1.6911282766156912, - "learning_rate": 7.421001169967076e-07, - "loss": 1.1603, - "step": 4393 - }, - { - "epoch": 0.5956754558394902, - "grad_norm": 1.7945142163729557, - "learning_rate": 7.416758592736742e-07, - "loss": 1.1097, - "step": 4394 - }, - { - "epoch": 0.5958110214871551, - "grad_norm": 1.7856664175357966, - "learning_rate": 7.41251651366412e-07, - "loss": 1.1334, - "step": 4395 - }, - { - "epoch": 0.5959465871348201, - "grad_norm": 2.9979440894555265, - "learning_rate": 7.408274933567267e-07, - "loss": 1.1217, - "step": 4396 - }, - { - "epoch": 0.5960821527824849, - "grad_norm": 1.4965119862095082, - "learning_rate": 7.404033853264131e-07, - "loss": 1.1179, - "step": 4397 - }, - { - "epoch": 0.5962177184301498, - "grad_norm": 1.5682233369074923, - "learning_rate": 7.399793273572578e-07, - "loss": 1.185, - "step": 4398 - }, - { - "epoch": 0.5963532840778146, - "grad_norm": 3.60571628703349, - "learning_rate": 7.395553195310364e-07, - "loss": 1.1406, - "step": 4399 - }, - { - "epoch": 0.5964888497254796, - "grad_norm": 1.532035864699447, - "learning_rate": 7.391313619295163e-07, - "loss": 1.1085, - "step": 4400 - }, - { - "epoch": 0.5966244153731445, - "grad_norm": 1.7899527918693714, - "learning_rate": 7.387074546344536e-07, - "loss": 1.1383, - "step": 4401 - }, - { - "epoch": 0.5967599810208093, - "grad_norm": 1.7814107476557575, - "learning_rate": 7.382835977275959e-07, - "loss": 1.1467, - "step": 4402 - }, - { - "epoch": 0.5968955466684742, - "grad_norm": 1.457756229401108, - "learning_rate": 7.378597912906805e-07, - "loss": 1.1338, - "step": 4403 - }, - { - "epoch": 0.5970311123161391, - "grad_norm": 2.794423734553538, - "learning_rate": 7.374360354054348e-07, - "loss": 1.1066, - "step": 4404 - }, - { - "epoch": 0.597166677963804, - "grad_norm": 1.6780093986153028, - "learning_rate": 7.370123301535777e-07, - "loss": 1.146, - "step": 4405 - }, - { - "epoch": 0.5973022436114689, - "grad_norm": 1.7690678007593121, - "learning_rate": 7.365886756168165e-07, - "loss": 1.1793, - "step": 4406 - }, - { - "epoch": 0.5974378092591337, - "grad_norm": 1.801333359381476, - "learning_rate": 7.3616507187685e-07, - "loss": 1.1563, - "step": 4407 - }, - { - "epoch": 0.5975733749067986, - "grad_norm": 1.604817352990756, - "learning_rate": 7.357415190153666e-07, - "loss": 1.0941, - "step": 4408 - }, - { - "epoch": 0.5977089405544636, - "grad_norm": 1.7223154704450543, - "learning_rate": 7.353180171140455e-07, - "loss": 1.1457, - "step": 4409 - }, - { - "epoch": 0.5978445062021284, - "grad_norm": 3.4203302021784294, - "learning_rate": 7.348945662545556e-07, - "loss": 1.1201, - "step": 4410 - }, - { - "epoch": 0.5979800718497933, - "grad_norm": 1.5902162714736003, - "learning_rate": 7.34471166518556e-07, - "loss": 1.1488, - "step": 4411 - }, - { - "epoch": 0.5981156374974581, - "grad_norm": 2.368560881960591, - "learning_rate": 7.340478179876957e-07, - "loss": 1.1618, - "step": 4412 - }, - { - "epoch": 0.598251203145123, - "grad_norm": 2.349930713769084, - "learning_rate": 7.336245207436147e-07, - "loss": 1.1374, - "step": 4413 - }, - { - "epoch": 0.598386768792788, - "grad_norm": 3.693893642274647, - "learning_rate": 7.332012748679419e-07, - "loss": 1.1097, - "step": 4414 - }, - { - "epoch": 0.5985223344404528, - "grad_norm": 2.123182372374166, - "learning_rate": 7.327780804422977e-07, - "loss": 1.1539, - "step": 4415 - }, - { - "epoch": 0.5986579000881177, - "grad_norm": 3.732657885234626, - "learning_rate": 7.32354937548291e-07, - "loss": 1.1636, - "step": 4416 - }, - { - "epoch": 0.5987934657357825, - "grad_norm": 1.5636966133659724, - "learning_rate": 7.319318462675223e-07, - "loss": 1.1135, - "step": 4417 - }, - { - "epoch": 0.5989290313834474, - "grad_norm": 2.1412063028498087, - "learning_rate": 7.315088066815809e-07, - "loss": 1.1184, - "step": 4418 - }, - { - "epoch": 0.5990645970311124, - "grad_norm": 2.10131816222588, - "learning_rate": 7.310858188720466e-07, - "loss": 1.1204, - "step": 4419 - }, - { - "epoch": 0.5992001626787772, - "grad_norm": 1.4857802694310593, - "learning_rate": 7.306628829204897e-07, - "loss": 1.128, - "step": 4420 - }, - { - "epoch": 0.5993357283264421, - "grad_norm": 1.6812756022268571, - "learning_rate": 7.302399989084695e-07, - "loss": 1.1633, - "step": 4421 - }, - { - "epoch": 0.5994712939741069, - "grad_norm": 2.6414115975868806, - "learning_rate": 7.298171669175365e-07, - "loss": 1.1341, - "step": 4422 - }, - { - "epoch": 0.5996068596217718, - "grad_norm": 1.5279092745997136, - "learning_rate": 7.293943870292299e-07, - "loss": 1.1402, - "step": 4423 - }, - { - "epoch": 0.5997424252694368, - "grad_norm": 1.6596064799672616, - "learning_rate": 7.289716593250798e-07, - "loss": 1.1523, - "step": 4424 - }, - { - "epoch": 0.5998779909171016, - "grad_norm": 1.5881054060334978, - "learning_rate": 7.285489838866057e-07, - "loss": 1.1569, - "step": 4425 - }, - { - "epoch": 0.6000135565647665, - "grad_norm": 1.5051428091503156, - "learning_rate": 7.281263607953177e-07, - "loss": 1.1046, - "step": 4426 - }, - { - "epoch": 0.6001491222124313, - "grad_norm": 1.6633557026924215, - "learning_rate": 7.277037901327145e-07, - "loss": 1.1129, - "step": 4427 - }, - { - "epoch": 0.6002846878600963, - "grad_norm": 1.6845907911493256, - "learning_rate": 7.272812719802865e-07, - "loss": 1.1451, - "step": 4428 - }, - { - "epoch": 0.6004202535077612, - "grad_norm": 1.7637691769981811, - "learning_rate": 7.268588064195122e-07, - "loss": 1.1296, - "step": 4429 - }, - { - "epoch": 0.600555819155426, - "grad_norm": 1.573491249240248, - "learning_rate": 7.264363935318612e-07, - "loss": 1.1393, - "step": 4430 - }, - { - "epoch": 0.6006913848030909, - "grad_norm": 1.6447310297629474, - "learning_rate": 7.260140333987925e-07, - "loss": 1.1392, - "step": 4431 - }, - { - "epoch": 0.6008269504507557, - "grad_norm": 1.5968441985542767, - "learning_rate": 7.255917261017543e-07, - "loss": 1.1326, - "step": 4432 - }, - { - "epoch": 0.6009625160984207, - "grad_norm": 1.9299702959857667, - "learning_rate": 7.25169471722186e-07, - "loss": 1.1295, - "step": 4433 - }, - { - "epoch": 0.6010980817460856, - "grad_norm": 1.8642916467071857, - "learning_rate": 7.247472703415154e-07, - "loss": 1.1499, - "step": 4434 - }, - { - "epoch": 0.6012336473937504, - "grad_norm": 1.5785690160837518, - "learning_rate": 7.243251220411612e-07, - "loss": 1.1971, - "step": 4435 - }, - { - "epoch": 0.6013692130414153, - "grad_norm": 1.4392031166462547, - "learning_rate": 7.23903026902531e-07, - "loss": 1.1415, - "step": 4436 - }, - { - "epoch": 0.6015047786890801, - "grad_norm": 7.716786804473418, - "learning_rate": 7.234809850070231e-07, - "loss": 1.0925, - "step": 4437 - }, - { - "epoch": 0.6016403443367451, - "grad_norm": 1.494721558862639, - "learning_rate": 7.230589964360242e-07, - "loss": 1.122, - "step": 4438 - }, - { - "epoch": 0.60177590998441, - "grad_norm": 1.6122072682965642, - "learning_rate": 7.226370612709119e-07, - "loss": 1.1408, - "step": 4439 - }, - { - "epoch": 0.6019114756320748, - "grad_norm": 1.7543570221656362, - "learning_rate": 7.222151795930528e-07, - "loss": 1.1208, - "step": 4440 - }, - { - "epoch": 0.6020470412797397, - "grad_norm": 1.7173564091678917, - "learning_rate": 7.21793351483804e-07, - "loss": 1.165, - "step": 4441 - }, - { - "epoch": 0.6021826069274046, - "grad_norm": 1.8338264709594905, - "learning_rate": 7.213715770245108e-07, - "loss": 1.1592, - "step": 4442 - }, - { - "epoch": 0.6023181725750695, - "grad_norm": 1.6184793816179404, - "learning_rate": 7.209498562965101e-07, - "loss": 1.1691, - "step": 4443 - }, - { - "epoch": 0.6024537382227344, - "grad_norm": 2.8146334167348135, - "learning_rate": 7.205281893811264e-07, - "loss": 1.1361, - "step": 4444 - }, - { - "epoch": 0.6025893038703992, - "grad_norm": 1.4896496195174955, - "learning_rate": 7.201065763596758e-07, - "loss": 1.1365, - "step": 4445 - }, - { - "epoch": 0.6027248695180641, - "grad_norm": 1.61393152059539, - "learning_rate": 7.196850173134628e-07, - "loss": 1.1163, - "step": 4446 - }, - { - "epoch": 0.602860435165729, - "grad_norm": 3.113226781607572, - "learning_rate": 7.192635123237809e-07, - "loss": 1.1189, - "step": 4447 - }, - { - "epoch": 0.6029960008133939, - "grad_norm": 1.5215118815895317, - "learning_rate": 7.188420614719152e-07, - "loss": 1.157, - "step": 4448 - }, - { - "epoch": 0.6031315664610588, - "grad_norm": 1.7208484890966995, - "learning_rate": 7.184206648391381e-07, - "loss": 1.1488, - "step": 4449 - }, - { - "epoch": 0.6032671321087236, - "grad_norm": 1.955896913348491, - "learning_rate": 7.179993225067136e-07, - "loss": 1.1671, - "step": 4450 - }, - { - "epoch": 0.6034026977563886, - "grad_norm": 2.05372816243167, - "learning_rate": 7.175780345558934e-07, - "loss": 1.1218, - "step": 4451 - }, - { - "epoch": 0.6035382634040534, - "grad_norm": 1.776834741907752, - "learning_rate": 7.171568010679203e-07, - "loss": 1.1305, - "step": 4452 - }, - { - "epoch": 0.6036738290517183, - "grad_norm": 1.837984027707176, - "learning_rate": 7.167356221240251e-07, - "loss": 1.126, - "step": 4453 - }, - { - "epoch": 0.6038093946993832, - "grad_norm": 1.4575844895021117, - "learning_rate": 7.163144978054296e-07, - "loss": 1.1416, - "step": 4454 - }, - { - "epoch": 0.603944960347048, - "grad_norm": 1.7511720658362409, - "learning_rate": 7.158934281933435e-07, - "loss": 1.1108, - "step": 4455 - }, - { - "epoch": 0.604080525994713, - "grad_norm": 1.5939746925819607, - "learning_rate": 7.154724133689676e-07, - "loss": 1.1491, - "step": 4456 - }, - { - "epoch": 0.6042160916423778, - "grad_norm": 1.5395649970225227, - "learning_rate": 7.150514534134905e-07, - "loss": 1.169, - "step": 4457 - }, - { - "epoch": 0.6043516572900427, - "grad_norm": 1.7011008395799496, - "learning_rate": 7.146305484080916e-07, - "loss": 1.1111, - "step": 4458 - }, - { - "epoch": 0.6044872229377076, - "grad_norm": 1.8370073996456635, - "learning_rate": 7.142096984339392e-07, - "loss": 1.1435, - "step": 4459 - }, - { - "epoch": 0.6046227885853724, - "grad_norm": 2.425330745885285, - "learning_rate": 7.137889035721898e-07, - "loss": 1.1135, - "step": 4460 - }, - { - "epoch": 0.6047583542330374, - "grad_norm": 1.8588056605775678, - "learning_rate": 7.133681639039917e-07, - "loss": 1.1276, - "step": 4461 - }, - { - "epoch": 0.6048939198807022, - "grad_norm": 1.6227868404709163, - "learning_rate": 7.129474795104802e-07, - "loss": 1.1399, - "step": 4462 - }, - { - "epoch": 0.6050294855283671, - "grad_norm": 1.6220416176634014, - "learning_rate": 7.12526850472782e-07, - "loss": 1.1435, - "step": 4463 - }, - { - "epoch": 0.605165051176032, - "grad_norm": 2.218505756586712, - "learning_rate": 7.121062768720109e-07, - "loss": 1.0991, - "step": 4464 - }, - { - "epoch": 0.6053006168236968, - "grad_norm": 1.6300872815605245, - "learning_rate": 7.116857587892724e-07, - "loss": 1.0958, - "step": 4465 - }, - { - "epoch": 0.6054361824713618, - "grad_norm": 1.5524200430634658, - "learning_rate": 7.112652963056589e-07, - "loss": 1.0963, - "step": 4466 - }, - { - "epoch": 0.6055717481190266, - "grad_norm": 2.0296785891948548, - "learning_rate": 7.108448895022544e-07, - "loss": 1.1001, - "step": 4467 - }, - { - "epoch": 0.6057073137666915, - "grad_norm": 1.75282875803731, - "learning_rate": 7.104245384601303e-07, - "loss": 1.1103, - "step": 4468 - }, - { - "epoch": 0.6058428794143564, - "grad_norm": 1.9988048030585461, - "learning_rate": 7.100042432603481e-07, - "loss": 1.1247, - "step": 4469 - }, - { - "epoch": 0.6059784450620213, - "grad_norm": 1.6007947113350756, - "learning_rate": 7.095840039839587e-07, - "loss": 1.137, - "step": 4470 - }, - { - "epoch": 0.6061140107096862, - "grad_norm": 1.7048483160488441, - "learning_rate": 7.091638207120015e-07, - "loss": 1.1557, - "step": 4471 - }, - { - "epoch": 0.606249576357351, - "grad_norm": 1.6530385538340406, - "learning_rate": 7.087436935255058e-07, - "loss": 1.1396, - "step": 4472 - }, - { - "epoch": 0.6063851420050159, - "grad_norm": 2.125404356642552, - "learning_rate": 7.083236225054901e-07, - "loss": 1.1581, - "step": 4473 - }, - { - "epoch": 0.6065207076526808, - "grad_norm": 1.8701296270779124, - "learning_rate": 7.079036077329612e-07, - "loss": 1.1368, - "step": 4474 - }, - { - "epoch": 0.6066562733003457, - "grad_norm": 1.3903795147450222, - "learning_rate": 7.074836492889158e-07, - "loss": 1.1501, - "step": 4475 - }, - { - "epoch": 0.6067918389480106, - "grad_norm": 1.7006662160514403, - "learning_rate": 7.070637472543397e-07, - "loss": 1.1423, - "step": 4476 - }, - { - "epoch": 0.6069274045956754, - "grad_norm": 1.5354216727414138, - "learning_rate": 7.066439017102076e-07, - "loss": 1.1383, - "step": 4477 - }, - { - "epoch": 0.6070629702433403, - "grad_norm": 3.238427008717177, - "learning_rate": 7.062241127374838e-07, - "loss": 1.0689, - "step": 4478 - }, - { - "epoch": 0.6071985358910053, - "grad_norm": 1.745015301833533, - "learning_rate": 7.058043804171203e-07, - "loss": 1.1508, - "step": 4479 - }, - { - "epoch": 0.6073341015386701, - "grad_norm": 1.5442179376694685, - "learning_rate": 7.053847048300603e-07, - "loss": 1.0993, - "step": 4480 - }, - { - "epoch": 0.607469667186335, - "grad_norm": 1.6567685812751853, - "learning_rate": 7.04965086057234e-07, - "loss": 1.1397, - "step": 4481 - }, - { - "epoch": 0.6076052328339999, - "grad_norm": 1.5174005411124134, - "learning_rate": 7.045455241795624e-07, - "loss": 1.1245, - "step": 4482 - }, - { - "epoch": 0.6077407984816647, - "grad_norm": 1.5344725671430652, - "learning_rate": 7.041260192779539e-07, - "loss": 1.1494, - "step": 4483 - }, - { - "epoch": 0.6078763641293297, - "grad_norm": 1.5032059523780748, - "learning_rate": 7.037065714333075e-07, - "loss": 1.1414, - "step": 4484 - }, - { - "epoch": 0.6080119297769945, - "grad_norm": 1.7617568134348471, - "learning_rate": 7.032871807265096e-07, - "loss": 1.1451, - "step": 4485 - }, - { - "epoch": 0.6081474954246594, - "grad_norm": 1.8124716188595564, - "learning_rate": 7.028678472384373e-07, - "loss": 1.1362, - "step": 4486 - }, - { - "epoch": 0.6082830610723243, - "grad_norm": 1.50831067470735, - "learning_rate": 7.02448571049955e-07, - "loss": 1.1301, - "step": 4487 - }, - { - "epoch": 0.6084186267199891, - "grad_norm": 1.6425129359874426, - "learning_rate": 7.020293522419168e-07, - "loss": 1.1432, - "step": 4488 - }, - { - "epoch": 0.6085541923676541, - "grad_norm": 1.6645939157235385, - "learning_rate": 7.016101908951663e-07, - "loss": 1.1608, - "step": 4489 - }, - { - "epoch": 0.6086897580153189, - "grad_norm": 1.6265322101399269, - "learning_rate": 7.011910870905349e-07, - "loss": 1.0763, - "step": 4490 - }, - { - "epoch": 0.6088253236629838, - "grad_norm": 2.8859283740483352, - "learning_rate": 7.00772040908844e-07, - "loss": 1.1158, - "step": 4491 - }, - { - "epoch": 0.6089608893106487, - "grad_norm": 1.8900874879735372, - "learning_rate": 7.003530524309025e-07, - "loss": 1.1095, - "step": 4492 - }, - { - "epoch": 0.6090964549583135, - "grad_norm": 1.5605791947669456, - "learning_rate": 6.999341217375103e-07, - "loss": 1.12, - "step": 4493 - }, - { - "epoch": 0.6092320206059785, - "grad_norm": 1.6171384525146804, - "learning_rate": 6.995152489094535e-07, - "loss": 1.1414, - "step": 4494 - }, - { - "epoch": 0.6093675862536433, - "grad_norm": 1.6629559288068192, - "learning_rate": 6.990964340275095e-07, - "loss": 1.1683, - "step": 4495 - }, - { - "epoch": 0.6095031519013082, - "grad_norm": 1.769845179887708, - "learning_rate": 6.986776771724427e-07, - "loss": 1.1658, - "step": 4496 - }, - { - "epoch": 0.6096387175489731, - "grad_norm": 1.4339589726480508, - "learning_rate": 6.982589784250077e-07, - "loss": 1.1551, - "step": 4497 - }, - { - "epoch": 0.609774283196638, - "grad_norm": 1.5568245261308535, - "learning_rate": 6.978403378659466e-07, - "loss": 1.1341, - "step": 4498 - }, - { - "epoch": 0.6099098488443029, - "grad_norm": 1.558099672003262, - "learning_rate": 6.974217555759913e-07, - "loss": 1.1273, - "step": 4499 - }, - { - "epoch": 0.6100454144919677, - "grad_norm": 2.955644305688091, - "learning_rate": 6.970032316358623e-07, - "loss": 1.1484, - "step": 4500 - }, - { - "epoch": 0.6101809801396326, - "grad_norm": 1.5434271191731803, - "learning_rate": 6.965847661262681e-07, - "loss": 1.157, - "step": 4501 - }, - { - "epoch": 0.6103165457872975, - "grad_norm": 2.186985335608288, - "learning_rate": 6.96166359127907e-07, - "loss": 1.1012, - "step": 4502 - }, - { - "epoch": 0.6104521114349624, - "grad_norm": 1.8578194716237213, - "learning_rate": 6.957480107214648e-07, - "loss": 1.1162, - "step": 4503 - }, - { - "epoch": 0.6105876770826273, - "grad_norm": 1.9292318932061505, - "learning_rate": 6.953297209876174e-07, - "loss": 1.1245, - "step": 4504 - }, - { - "epoch": 0.6107232427302921, - "grad_norm": 1.7113940673614543, - "learning_rate": 6.949114900070284e-07, - "loss": 1.0758, - "step": 4505 - }, - { - "epoch": 0.610858808377957, - "grad_norm": 1.5202679227228135, - "learning_rate": 6.944933178603503e-07, - "loss": 1.1344, - "step": 4506 - }, - { - "epoch": 0.610994374025622, - "grad_norm": 19.521904748691686, - "learning_rate": 6.940752046282242e-07, - "loss": 1.1485, - "step": 4507 - }, - { - "epoch": 0.6111299396732868, - "grad_norm": 2.2389932520466154, - "learning_rate": 6.936571503912803e-07, - "loss": 1.1467, - "step": 4508 - }, - { - "epoch": 0.6112655053209517, - "grad_norm": 2.8503555433087597, - "learning_rate": 6.932391552301366e-07, - "loss": 1.1738, - "step": 4509 - }, - { - "epoch": 0.6114010709686165, - "grad_norm": 2.268224624427505, - "learning_rate": 6.928212192254006e-07, - "loss": 1.1586, - "step": 4510 - }, - { - "epoch": 0.6115366366162814, - "grad_norm": 1.8580987853971451, - "learning_rate": 6.924033424576674e-07, - "loss": 1.1565, - "step": 4511 - }, - { - "epoch": 0.6116722022639464, - "grad_norm": 1.5224275384205892, - "learning_rate": 6.91985525007522e-07, - "loss": 1.098, - "step": 4512 - }, - { - "epoch": 0.6118077679116112, - "grad_norm": 1.5491055486086598, - "learning_rate": 6.915677669555363e-07, - "loss": 1.1669, - "step": 4513 - }, - { - "epoch": 0.6119433335592761, - "grad_norm": 1.481067836938021, - "learning_rate": 6.911500683822726e-07, - "loss": 1.1545, - "step": 4514 - }, - { - "epoch": 0.6120788992069409, - "grad_norm": 1.4307051433481928, - "learning_rate": 6.907324293682803e-07, - "loss": 1.1747, - "step": 4515 - }, - { - "epoch": 0.6122144648546058, - "grad_norm": 1.6389109473528913, - "learning_rate": 6.903148499940974e-07, - "loss": 1.1027, - "step": 4516 - }, - { - "epoch": 0.6123500305022708, - "grad_norm": 1.5816612654651598, - "learning_rate": 6.898973303402516e-07, - "loss": 1.1191, - "step": 4517 - }, - { - "epoch": 0.6124855961499356, - "grad_norm": 8.488043305219726, - "learning_rate": 6.894798704872574e-07, - "loss": 1.1243, - "step": 4518 - }, - { - "epoch": 0.6126211617976005, - "grad_norm": 1.5759959150172058, - "learning_rate": 6.890624705156194e-07, - "loss": 1.1314, - "step": 4519 - }, - { - "epoch": 0.6127567274452653, - "grad_norm": 1.7537504176390346, - "learning_rate": 6.886451305058293e-07, - "loss": 1.1226, - "step": 4520 - }, - { - "epoch": 0.6128922930929303, - "grad_norm": 2.948374758370644, - "learning_rate": 6.882278505383685e-07, - "loss": 1.1482, - "step": 4521 - }, - { - "epoch": 0.6130278587405952, - "grad_norm": 1.7591021939875564, - "learning_rate": 6.878106306937053e-07, - "loss": 1.1078, - "step": 4522 - }, - { - "epoch": 0.61316342438826, - "grad_norm": 1.7170267635501548, - "learning_rate": 6.873934710522979e-07, - "loss": 1.1214, - "step": 4523 - }, - { - "epoch": 0.6132989900359249, - "grad_norm": 4.484760436003515, - "learning_rate": 6.86976371694592e-07, - "loss": 1.1618, - "step": 4524 - }, - { - "epoch": 0.6134345556835897, - "grad_norm": 2.1110000417905073, - "learning_rate": 6.865593327010221e-07, - "loss": 1.0979, - "step": 4525 - }, - { - "epoch": 0.6135701213312547, - "grad_norm": 1.917532415879903, - "learning_rate": 6.861423541520104e-07, - "loss": 1.1821, - "step": 4526 - }, - { - "epoch": 0.6137056869789196, - "grad_norm": 2.2558159011680936, - "learning_rate": 6.857254361279688e-07, - "loss": 1.1161, - "step": 4527 - }, - { - "epoch": 0.6138412526265844, - "grad_norm": 1.6605723674120318, - "learning_rate": 6.853085787092956e-07, - "loss": 1.1253, - "step": 4528 - }, - { - "epoch": 0.6139768182742493, - "grad_norm": 4.592016835627234, - "learning_rate": 6.848917819763793e-07, - "loss": 1.1591, - "step": 4529 - }, - { - "epoch": 0.6141123839219141, - "grad_norm": 7.0868054944575585, - "learning_rate": 6.844750460095956e-07, - "loss": 1.1274, - "step": 4530 - }, - { - "epoch": 0.6142479495695791, - "grad_norm": 1.491859944055205, - "learning_rate": 6.840583708893083e-07, - "loss": 1.1541, - "step": 4531 - }, - { - "epoch": 0.614383515217244, - "grad_norm": 2.549133347606657, - "learning_rate": 6.836417566958707e-07, - "loss": 1.2007, - "step": 4532 - }, - { - "epoch": 0.6145190808649088, - "grad_norm": 1.9676764409402996, - "learning_rate": 6.832252035096227e-07, - "loss": 1.1304, - "step": 4533 - }, - { - "epoch": 0.6146546465125737, - "grad_norm": 1.9683209168651759, - "learning_rate": 6.82808711410894e-07, - "loss": 1.1834, - "step": 4534 - }, - { - "epoch": 0.6147902121602385, - "grad_norm": 1.6808770360947067, - "learning_rate": 6.823922804800016e-07, - "loss": 1.1323, - "step": 4535 - }, - { - "epoch": 0.6149257778079035, - "grad_norm": 2.05725443960613, - "learning_rate": 6.819759107972507e-07, - "loss": 1.1917, - "step": 4536 - }, - { - "epoch": 0.6150613434555684, - "grad_norm": 1.86544207070624, - "learning_rate": 6.815596024429351e-07, - "loss": 1.1303, - "step": 4537 - }, - { - "epoch": 0.6151969091032332, - "grad_norm": 2.215596263259654, - "learning_rate": 6.811433554973366e-07, - "loss": 1.1944, - "step": 4538 - }, - { - "epoch": 0.6153324747508981, - "grad_norm": 1.6601286191591305, - "learning_rate": 6.807271700407251e-07, - "loss": 1.1201, - "step": 4539 - }, - { - "epoch": 0.615468040398563, - "grad_norm": 1.6033505218651847, - "learning_rate": 6.803110461533587e-07, - "loss": 1.0886, - "step": 4540 - }, - { - "epoch": 0.6156036060462279, - "grad_norm": 1.8777171018448862, - "learning_rate": 6.798949839154834e-07, - "loss": 1.1467, - "step": 4541 - }, - { - "epoch": 0.6157391716938928, - "grad_norm": 1.8249390345085108, - "learning_rate": 6.79478983407334e-07, - "loss": 1.0962, - "step": 4542 - }, - { - "epoch": 0.6158747373415576, - "grad_norm": 1.683851942392328, - "learning_rate": 6.790630447091325e-07, - "loss": 1.1171, - "step": 4543 - }, - { - "epoch": 0.6160103029892225, - "grad_norm": 1.7769644365678379, - "learning_rate": 6.786471679010895e-07, - "loss": 1.1272, - "step": 4544 - }, - { - "epoch": 0.6161458686368874, - "grad_norm": 1.9699858745664762, - "learning_rate": 6.782313530634036e-07, - "loss": 1.1507, - "step": 4545 - }, - { - "epoch": 0.6162814342845523, - "grad_norm": 1.762647754126047, - "learning_rate": 6.77815600276261e-07, - "loss": 1.1285, - "step": 4546 - }, - { - "epoch": 0.6164169999322172, - "grad_norm": 1.6982032840341865, - "learning_rate": 6.773999096198373e-07, - "loss": 1.1237, - "step": 4547 - }, - { - "epoch": 0.616552565579882, - "grad_norm": 1.9456154018627647, - "learning_rate": 6.769842811742941e-07, - "loss": 1.1068, - "step": 4548 - }, - { - "epoch": 0.616688131227547, - "grad_norm": 1.5991101358985949, - "learning_rate": 6.765687150197827e-07, - "loss": 1.1624, - "step": 4549 - }, - { - "epoch": 0.6168236968752118, - "grad_norm": 1.8063471671469524, - "learning_rate": 6.761532112364414e-07, - "loss": 1.1338, - "step": 4550 - }, - { - "epoch": 0.6169592625228767, - "grad_norm": 1.5972258826500176, - "learning_rate": 6.757377699043976e-07, - "loss": 1.1379, - "step": 4551 - }, - { - "epoch": 0.6170948281705416, - "grad_norm": 1.9268050070278382, - "learning_rate": 6.753223911037646e-07, - "loss": 1.1646, - "step": 4552 - }, - { - "epoch": 0.6172303938182064, - "grad_norm": 1.9475270201040769, - "learning_rate": 6.749070749146461e-07, - "loss": 1.165, - "step": 4553 - }, - { - "epoch": 0.6173659594658714, - "grad_norm": 1.7539586088340586, - "learning_rate": 6.744918214171318e-07, - "loss": 1.139, - "step": 4554 - }, - { - "epoch": 0.6175015251135362, - "grad_norm": 1.5329982001525995, - "learning_rate": 6.740766306913007e-07, - "loss": 1.1336, - "step": 4555 - }, - { - "epoch": 0.6176370907612011, - "grad_norm": 1.513929808896686, - "learning_rate": 6.736615028172183e-07, - "loss": 1.1784, - "step": 4556 - }, - { - "epoch": 0.617772656408866, - "grad_norm": 1.7145403934734202, - "learning_rate": 6.732464378749394e-07, - "loss": 1.1271, - "step": 4557 - }, - { - "epoch": 0.6179082220565308, - "grad_norm": 1.9515474808079747, - "learning_rate": 6.728314359445058e-07, - "loss": 1.2104, - "step": 4558 - }, - { - "epoch": 0.6180437877041958, - "grad_norm": 1.5547261198585394, - "learning_rate": 6.724164971059469e-07, - "loss": 1.1305, - "step": 4559 - }, - { - "epoch": 0.6181793533518606, - "grad_norm": 3.7162217853946755, - "learning_rate": 6.720016214392812e-07, - "loss": 1.1204, - "step": 4560 - }, - { - "epoch": 0.6183149189995255, - "grad_norm": 1.7855180324318014, - "learning_rate": 6.715868090245131e-07, - "loss": 1.1388, - "step": 4561 - }, - { - "epoch": 0.6184504846471904, - "grad_norm": 1.6265696238556115, - "learning_rate": 6.711720599416373e-07, - "loss": 1.0836, - "step": 4562 - }, - { - "epoch": 0.6185860502948552, - "grad_norm": 1.4425001368028796, - "learning_rate": 6.707573742706334e-07, - "loss": 1.1385, - "step": 4563 - }, - { - "epoch": 0.6187216159425202, - "grad_norm": 1.4977853390354534, - "learning_rate": 6.703427520914715e-07, - "loss": 1.0878, - "step": 4564 - }, - { - "epoch": 0.6188571815901851, - "grad_norm": 1.9155385393453983, - "learning_rate": 6.699281934841073e-07, - "loss": 1.1536, - "step": 4565 - }, - { - "epoch": 0.6189927472378499, - "grad_norm": 1.8150698968005594, - "learning_rate": 6.69513698528486e-07, - "loss": 1.144, - "step": 4566 - }, - { - "epoch": 0.6191283128855148, - "grad_norm": 1.4826149373039823, - "learning_rate": 6.69099267304539e-07, - "loss": 1.1377, - "step": 4567 - }, - { - "epoch": 0.6192638785331797, - "grad_norm": 2.343604164951536, - "learning_rate": 6.686848998921864e-07, - "loss": 1.143, - "step": 4568 - }, - { - "epoch": 0.6193994441808446, - "grad_norm": 2.006597385725254, - "learning_rate": 6.682705963713355e-07, - "loss": 1.1504, - "step": 4569 - }, - { - "epoch": 0.6195350098285095, - "grad_norm": 1.457132840918752, - "learning_rate": 6.678563568218816e-07, - "loss": 1.1273, - "step": 4570 - }, - { - "epoch": 0.6196705754761743, - "grad_norm": 2.3102343287120735, - "learning_rate": 6.674421813237079e-07, - "loss": 1.185, - "step": 4571 - }, - { - "epoch": 0.6198061411238392, - "grad_norm": 1.695611581758358, - "learning_rate": 6.670280699566841e-07, - "loss": 1.1274, - "step": 4572 - }, - { - "epoch": 0.6199417067715041, - "grad_norm": 1.8052143776893101, - "learning_rate": 6.666140228006687e-07, - "loss": 1.0977, - "step": 4573 - }, - { - "epoch": 0.620077272419169, - "grad_norm": 1.6759866667264236, - "learning_rate": 6.662000399355075e-07, - "loss": 1.179, - "step": 4574 - }, - { - "epoch": 0.6202128380668339, - "grad_norm": 1.835166891807368, - "learning_rate": 6.657861214410338e-07, - "loss": 1.1354, - "step": 4575 - }, - { - "epoch": 0.6203484037144987, - "grad_norm": 1.8615790234295002, - "learning_rate": 6.653722673970681e-07, - "loss": 1.1423, - "step": 4576 - }, - { - "epoch": 0.6204839693621637, - "grad_norm": 1.9810717438620913, - "learning_rate": 6.649584778834196e-07, - "loss": 1.1087, - "step": 4577 - }, - { - "epoch": 0.6206195350098285, - "grad_norm": 1.5744846927761758, - "learning_rate": 6.645447529798838e-07, - "loss": 1.1291, - "step": 4578 - }, - { - "epoch": 0.6207551006574934, - "grad_norm": 1.8739954347538756, - "learning_rate": 6.641310927662447e-07, - "loss": 1.1196, - "step": 4579 - }, - { - "epoch": 0.6208906663051583, - "grad_norm": 2.2242537124786614, - "learning_rate": 6.637174973222727e-07, - "loss": 1.1314, - "step": 4580 - }, - { - "epoch": 0.6210262319528231, - "grad_norm": 1.579689258828854, - "learning_rate": 6.633039667277274e-07, - "loss": 1.1459, - "step": 4581 - }, - { - "epoch": 0.6211617976004881, - "grad_norm": 1.7703161291220928, - "learning_rate": 6.62890501062354e-07, - "loss": 1.139, - "step": 4582 - }, - { - "epoch": 0.6212973632481529, - "grad_norm": 1.8889693043926918, - "learning_rate": 6.624771004058868e-07, - "loss": 1.146, - "step": 4583 - }, - { - "epoch": 0.6214329288958178, - "grad_norm": 1.7957769411102924, - "learning_rate": 6.620637648380463e-07, - "loss": 1.1509, - "step": 4584 - }, - { - "epoch": 0.6215684945434827, - "grad_norm": 1.6506737581912285, - "learning_rate": 6.616504944385415e-07, - "loss": 1.1092, - "step": 4585 - }, - { - "epoch": 0.6217040601911475, - "grad_norm": 2.175826034115448, - "learning_rate": 6.612372892870681e-07, - "loss": 1.1601, - "step": 4586 - }, - { - "epoch": 0.6218396258388125, - "grad_norm": 1.7160388410887775, - "learning_rate": 6.608241494633092e-07, - "loss": 1.1489, - "step": 4587 - }, - { - "epoch": 0.6219751914864773, - "grad_norm": 1.8788963302019355, - "learning_rate": 6.604110750469358e-07, - "loss": 1.1409, - "step": 4588 - }, - { - "epoch": 0.6221107571341422, - "grad_norm": 1.8648010724030464, - "learning_rate": 6.599980661176059e-07, - "loss": 1.1391, - "step": 4589 - }, - { - "epoch": 0.6222463227818071, - "grad_norm": 2.805344339195097, - "learning_rate": 6.595851227549656e-07, - "loss": 1.1072, - "step": 4590 - }, - { - "epoch": 0.622381888429472, - "grad_norm": 1.6361349441993225, - "learning_rate": 6.591722450386468e-07, - "loss": 1.1272, - "step": 4591 - }, - { - "epoch": 0.6225174540771369, - "grad_norm": 1.7896488313057757, - "learning_rate": 6.587594330482707e-07, - "loss": 1.1919, - "step": 4592 - }, - { - "epoch": 0.6226530197248017, - "grad_norm": 2.35721217153974, - "learning_rate": 6.583466868634437e-07, - "loss": 1.1486, - "step": 4593 - }, - { - "epoch": 0.6227885853724666, - "grad_norm": 1.6036256350064935, - "learning_rate": 6.579340065637619e-07, - "loss": 1.1372, - "step": 4594 - }, - { - "epoch": 0.6229241510201315, - "grad_norm": 1.7433083723020044, - "learning_rate": 6.575213922288064e-07, - "loss": 1.1243, - "step": 4595 - }, - { - "epoch": 0.6230597166677964, - "grad_norm": 1.5592022801139758, - "learning_rate": 6.571088439381475e-07, - "loss": 1.1445, - "step": 4596 - }, - { - "epoch": 0.6231952823154613, - "grad_norm": 1.8056318698046991, - "learning_rate": 6.566963617713412e-07, - "loss": 1.1657, - "step": 4597 - }, - { - "epoch": 0.6233308479631261, - "grad_norm": 1.3731170495690361, - "learning_rate": 6.562839458079315e-07, - "loss": 1.1263, - "step": 4598 - }, - { - "epoch": 0.623466413610791, - "grad_norm": 2.2313794770411373, - "learning_rate": 6.558715961274501e-07, - "loss": 1.1256, - "step": 4599 - }, - { - "epoch": 0.623601979258456, - "grad_norm": 1.6042756249550607, - "learning_rate": 6.554593128094145e-07, - "loss": 1.1344, - "step": 4600 - }, - { - "epoch": 0.6237375449061208, - "grad_norm": 1.6680688658062552, - "learning_rate": 6.550470959333313e-07, - "loss": 1.1216, - "step": 4601 - }, - { - "epoch": 0.6238731105537857, - "grad_norm": 1.4154834557758997, - "learning_rate": 6.546349455786925e-07, - "loss": 1.129, - "step": 4602 - }, - { - "epoch": 0.6240086762014505, - "grad_norm": 1.5947726115204481, - "learning_rate": 6.542228618249784e-07, - "loss": 1.1752, - "step": 4603 - }, - { - "epoch": 0.6241442418491154, - "grad_norm": 1.8500559020546727, - "learning_rate": 6.538108447516557e-07, - "loss": 1.1497, - "step": 4604 - }, - { - "epoch": 0.6242798074967804, - "grad_norm": 1.4434021302904376, - "learning_rate": 6.533988944381792e-07, - "loss": 1.1521, - "step": 4605 - }, - { - "epoch": 0.6244153731444452, - "grad_norm": 2.3252465195067282, - "learning_rate": 6.529870109639899e-07, - "loss": 1.123, - "step": 4606 - }, - { - "epoch": 0.6245509387921101, - "grad_norm": 1.8281085663447667, - "learning_rate": 6.525751944085166e-07, - "loss": 1.1638, - "step": 4607 - }, - { - "epoch": 0.6246865044397749, - "grad_norm": 1.56780936385411, - "learning_rate": 6.521634448511743e-07, - "loss": 1.1461, - "step": 4608 - }, - { - "epoch": 0.6248220700874398, - "grad_norm": 2.0601089584628554, - "learning_rate": 6.517517623713664e-07, - "loss": 1.166, - "step": 4609 - }, - { - "epoch": 0.6249576357351048, - "grad_norm": 5.563411859330603, - "learning_rate": 6.513401470484817e-07, - "loss": 1.1106, - "step": 4610 - }, - { - "epoch": 0.6250932013827696, - "grad_norm": 1.6153040812743757, - "learning_rate": 6.50928598961898e-07, - "loss": 1.1485, - "step": 4611 - }, - { - "epoch": 0.6252287670304345, - "grad_norm": 1.4182788003380116, - "learning_rate": 6.505171181909782e-07, - "loss": 1.1334, - "step": 4612 - }, - { - "epoch": 0.6253643326780993, - "grad_norm": 1.6051666785918448, - "learning_rate": 6.501057048150738e-07, - "loss": 1.1463, - "step": 4613 - }, - { - "epoch": 0.6254998983257642, - "grad_norm": 1.614119426304828, - "learning_rate": 6.496943589135225e-07, - "loss": 1.1381, - "step": 4614 - }, - { - "epoch": 0.6256354639734292, - "grad_norm": 1.8133526089481435, - "learning_rate": 6.492830805656484e-07, - "loss": 1.1612, - "step": 4615 - }, - { - "epoch": 0.625771029621094, - "grad_norm": 1.4997820738639664, - "learning_rate": 6.488718698507643e-07, - "loss": 1.1178, - "step": 4616 - }, - { - "epoch": 0.6259065952687589, - "grad_norm": 1.8020889293543205, - "learning_rate": 6.484607268481681e-07, - "loss": 1.171, - "step": 4617 - }, - { - "epoch": 0.6260421609164237, - "grad_norm": 1.7157573490192808, - "learning_rate": 6.480496516371461e-07, - "loss": 1.1652, - "step": 4618 - }, - { - "epoch": 0.6261777265640887, - "grad_norm": 2.354921352569446, - "learning_rate": 6.476386442969703e-07, - "loss": 1.1259, - "step": 4619 - }, - { - "epoch": 0.6263132922117536, - "grad_norm": 1.9191437554191901, - "learning_rate": 6.472277049069011e-07, - "loss": 1.1101, - "step": 4620 - }, - { - "epoch": 0.6264488578594184, - "grad_norm": 1.6601145572823235, - "learning_rate": 6.468168335461839e-07, - "loss": 1.1522, - "step": 4621 - }, - { - "epoch": 0.6265844235070833, - "grad_norm": 1.6051424023930336, - "learning_rate": 6.464060302940528e-07, - "loss": 1.1131, - "step": 4622 - }, - { - "epoch": 0.6267199891547481, - "grad_norm": 1.767607454114888, - "learning_rate": 6.459952952297274e-07, - "loss": 1.1434, - "step": 4623 - }, - { - "epoch": 0.6268555548024131, - "grad_norm": 1.55958350797291, - "learning_rate": 6.455846284324153e-07, - "loss": 1.1534, - "step": 4624 - }, - { - "epoch": 0.626991120450078, - "grad_norm": 2.492372355041594, - "learning_rate": 6.451740299813097e-07, - "loss": 1.1373, - "step": 4625 - }, - { - "epoch": 0.6271266860977428, - "grad_norm": 1.98182353622362, - "learning_rate": 6.447634999555919e-07, - "loss": 1.1643, - "step": 4626 - }, - { - "epoch": 0.6272622517454077, - "grad_norm": 12.82056275963859, - "learning_rate": 6.443530384344291e-07, - "loss": 1.1725, - "step": 4627 - }, - { - "epoch": 0.6273978173930725, - "grad_norm": 1.7850317125290267, - "learning_rate": 6.439426454969752e-07, - "loss": 1.122, - "step": 4628 - }, - { - "epoch": 0.6275333830407375, - "grad_norm": 1.6106855005822434, - "learning_rate": 6.435323212223718e-07, - "loss": 1.1228, - "step": 4629 - }, - { - "epoch": 0.6276689486884024, - "grad_norm": 1.5305925093216468, - "learning_rate": 6.431220656897463e-07, - "loss": 1.0872, - "step": 4630 - }, - { - "epoch": 0.6278045143360672, - "grad_norm": 1.649052085570948, - "learning_rate": 6.427118789782136e-07, - "loss": 1.1606, - "step": 4631 - }, - { - "epoch": 0.6279400799837321, - "grad_norm": 1.5856030258830829, - "learning_rate": 6.423017611668744e-07, - "loss": 1.1256, - "step": 4632 - }, - { - "epoch": 0.628075645631397, - "grad_norm": 1.5363467683365424, - "learning_rate": 6.418917123348176e-07, - "loss": 1.1662, - "step": 4633 - }, - { - "epoch": 0.6282112112790619, - "grad_norm": 1.4294900637266494, - "learning_rate": 6.41481732561117e-07, - "loss": 1.1323, - "step": 4634 - }, - { - "epoch": 0.6283467769267268, - "grad_norm": 1.6287397354727207, - "learning_rate": 6.410718219248344e-07, - "loss": 1.1529, - "step": 4635 - }, - { - "epoch": 0.6284823425743916, - "grad_norm": 1.6348056530672597, - "learning_rate": 6.406619805050177e-07, - "loss": 1.1205, - "step": 4636 - }, - { - "epoch": 0.6286179082220565, - "grad_norm": 1.5619496923376825, - "learning_rate": 6.402522083807016e-07, - "loss": 1.1634, - "step": 4637 - }, - { - "epoch": 0.6287534738697214, - "grad_norm": 1.802975792221973, - "learning_rate": 6.398425056309073e-07, - "loss": 1.1303, - "step": 4638 - }, - { - "epoch": 0.6288890395173863, - "grad_norm": 1.6690348815072635, - "learning_rate": 6.394328723346433e-07, - "loss": 1.1457, - "step": 4639 - }, - { - "epoch": 0.6290246051650512, - "grad_norm": 1.6706408250776914, - "learning_rate": 6.390233085709034e-07, - "loss": 1.1516, - "step": 4640 - }, - { - "epoch": 0.629160170812716, - "grad_norm": 4.375835609605315, - "learning_rate": 6.386138144186693e-07, - "loss": 1.1406, - "step": 4641 - }, - { - "epoch": 0.629295736460381, - "grad_norm": 1.5263945532607583, - "learning_rate": 6.382043899569083e-07, - "loss": 1.1083, - "step": 4642 - }, - { - "epoch": 0.6294313021080459, - "grad_norm": 1.431262459980951, - "learning_rate": 6.377950352645748e-07, - "loss": 1.1074, - "step": 4643 - }, - { - "epoch": 0.6295668677557107, - "grad_norm": 1.6836188236534182, - "learning_rate": 6.373857504206099e-07, - "loss": 1.1443, - "step": 4644 - }, - { - "epoch": 0.6297024334033756, - "grad_norm": 1.8225218100296694, - "learning_rate": 6.369765355039405e-07, - "loss": 1.1345, - "step": 4645 - }, - { - "epoch": 0.6298379990510404, - "grad_norm": 2.2123773430005027, - "learning_rate": 6.365673905934809e-07, - "loss": 1.0915, - "step": 4646 - }, - { - "epoch": 0.6299735646987054, - "grad_norm": 4.097555054526147, - "learning_rate": 6.361583157681309e-07, - "loss": 1.1149, - "step": 4647 - }, - { - "epoch": 0.6301091303463703, - "grad_norm": 1.5832150835040408, - "learning_rate": 6.357493111067781e-07, - "loss": 1.1689, - "step": 4648 - }, - { - "epoch": 0.6302446959940351, - "grad_norm": 1.6145678294733874, - "learning_rate": 6.353403766882951e-07, - "loss": 1.1633, - "step": 4649 - }, - { - "epoch": 0.6303802616417, - "grad_norm": 1.965163899410995, - "learning_rate": 6.349315125915424e-07, - "loss": 1.1704, - "step": 4650 - }, - { - "epoch": 0.6305158272893648, - "grad_norm": 1.5180820903086434, - "learning_rate": 6.345227188953653e-07, - "loss": 1.1188, - "step": 4651 - }, - { - "epoch": 0.6306513929370298, - "grad_norm": 1.5367566013319123, - "learning_rate": 6.341139956785974e-07, - "loss": 1.1378, - "step": 4652 - }, - { - "epoch": 0.6307869585846947, - "grad_norm": 2.207456740524406, - "learning_rate": 6.337053430200571e-07, - "loss": 1.1632, - "step": 4653 - }, - { - "epoch": 0.6309225242323595, - "grad_norm": 16.20860539415881, - "learning_rate": 6.332967609985502e-07, - "loss": 1.1237, - "step": 4654 - }, - { - "epoch": 0.6310580898800244, - "grad_norm": 1.661158650124365, - "learning_rate": 6.328882496928685e-07, - "loss": 1.0898, - "step": 4655 - }, - { - "epoch": 0.6311936555276892, - "grad_norm": 1.5356206744852985, - "learning_rate": 6.324798091817897e-07, - "loss": 1.1439, - "step": 4656 - }, - { - "epoch": 0.6313292211753542, - "grad_norm": 2.1011570288322714, - "learning_rate": 6.320714395440789e-07, - "loss": 1.1174, - "step": 4657 - }, - { - "epoch": 0.6314647868230191, - "grad_norm": 2.6732494514444216, - "learning_rate": 6.316631408584865e-07, - "loss": 1.1335, - "step": 4658 - }, - { - "epoch": 0.6316003524706839, - "grad_norm": 1.4332297177470865, - "learning_rate": 6.312549132037501e-07, - "loss": 1.095, - "step": 4659 - }, - { - "epoch": 0.6317359181183488, - "grad_norm": 1.6472704030842156, - "learning_rate": 6.308467566585927e-07, - "loss": 1.1242, - "step": 4660 - }, - { - "epoch": 0.6318714837660137, - "grad_norm": 1.6485419576158205, - "learning_rate": 6.304386713017249e-07, - "loss": 1.1428, - "step": 4661 - }, - { - "epoch": 0.6320070494136786, - "grad_norm": 2.042215452039785, - "learning_rate": 6.300306572118417e-07, - "loss": 1.1292, - "step": 4662 - }, - { - "epoch": 0.6321426150613435, - "grad_norm": 20.10658789247313, - "learning_rate": 6.296227144676262e-07, - "loss": 1.1413, - "step": 4663 - }, - { - "epoch": 0.6322781807090083, - "grad_norm": 1.6087040865507323, - "learning_rate": 6.292148431477465e-07, - "loss": 1.1315, - "step": 4664 - }, - { - "epoch": 0.6324137463566732, - "grad_norm": 1.3815119697078648, - "learning_rate": 6.288070433308575e-07, - "loss": 1.1367, - "step": 4665 - }, - { - "epoch": 0.6325493120043381, - "grad_norm": 2.217856334669187, - "learning_rate": 6.283993150956002e-07, - "loss": 1.1402, - "step": 4666 - }, - { - "epoch": 0.632684877652003, - "grad_norm": 1.7701314919303819, - "learning_rate": 6.279916585206018e-07, - "loss": 1.1077, - "step": 4667 - }, - { - "epoch": 0.6328204432996679, - "grad_norm": 1.5174131112388105, - "learning_rate": 6.275840736844754e-07, - "loss": 1.1156, - "step": 4668 - }, - { - "epoch": 0.6329560089473327, - "grad_norm": 1.6118489874443953, - "learning_rate": 6.27176560665821e-07, - "loss": 1.113, - "step": 4669 - }, - { - "epoch": 0.6330915745949977, - "grad_norm": 2.0550219207908658, - "learning_rate": 6.267691195432239e-07, - "loss": 1.1311, - "step": 4670 - }, - { - "epoch": 0.6332271402426625, - "grad_norm": 1.4253436975563871, - "learning_rate": 6.263617503952559e-07, - "loss": 1.1468, - "step": 4671 - }, - { - "epoch": 0.6333627058903274, - "grad_norm": 6.9655790907520885, - "learning_rate": 6.259544533004751e-07, - "loss": 1.1878, - "step": 4672 - }, - { - "epoch": 0.6334982715379923, - "grad_norm": 1.5937455338257902, - "learning_rate": 6.255472283374253e-07, - "loss": 1.1152, - "step": 4673 - }, - { - "epoch": 0.6336338371856571, - "grad_norm": 2.0095629658355803, - "learning_rate": 6.251400755846371e-07, - "loss": 1.1497, - "step": 4674 - }, - { - "epoch": 0.6337694028333221, - "grad_norm": 1.4772086517309146, - "learning_rate": 6.247329951206259e-07, - "loss": 1.1321, - "step": 4675 - }, - { - "epoch": 0.6339049684809869, - "grad_norm": 1.6177941983639816, - "learning_rate": 6.243259870238948e-07, - "loss": 1.1245, - "step": 4676 - }, - { - "epoch": 0.6340405341286518, - "grad_norm": 1.7105982717904655, - "learning_rate": 6.239190513729313e-07, - "loss": 1.1156, - "step": 4677 - }, - { - "epoch": 0.6341760997763167, - "grad_norm": 1.6093805552788716, - "learning_rate": 6.235121882462107e-07, - "loss": 1.1478, - "step": 4678 - }, - { - "epoch": 0.6343116654239815, - "grad_norm": 1.4381519120867787, - "learning_rate": 6.23105397722192e-07, - "loss": 1.1458, - "step": 4679 - }, - { - "epoch": 0.6344472310716465, - "grad_norm": 1.7302899964056457, - "learning_rate": 6.226986798793231e-07, - "loss": 1.1125, - "step": 4680 - }, - { - "epoch": 0.6345827967193113, - "grad_norm": 2.1471012207023707, - "learning_rate": 6.22292034796035e-07, - "loss": 1.1207, - "step": 4681 - }, - { - "epoch": 0.6347183623669762, - "grad_norm": 1.7922990960906722, - "learning_rate": 6.21885462550747e-07, - "loss": 1.1396, - "step": 4682 - }, - { - "epoch": 0.6348539280146411, - "grad_norm": 2.1389509784101652, - "learning_rate": 6.214789632218628e-07, - "loss": 1.177, - "step": 4683 - }, - { - "epoch": 0.634989493662306, - "grad_norm": 2.027334924419264, - "learning_rate": 6.210725368877723e-07, - "loss": 1.0614, - "step": 4684 - }, - { - "epoch": 0.6351250593099709, - "grad_norm": 1.4843095727688944, - "learning_rate": 6.206661836268525e-07, - "loss": 1.1203, - "step": 4685 - }, - { - "epoch": 0.6352606249576357, - "grad_norm": 1.54520779913543, - "learning_rate": 6.202599035174645e-07, - "loss": 1.1026, - "step": 4686 - }, - { - "epoch": 0.6353961906053006, - "grad_norm": 1.6665335044803176, - "learning_rate": 6.19853696637957e-07, - "loss": 1.0938, - "step": 4687 - }, - { - "epoch": 0.6355317562529655, - "grad_norm": 2.213570921451466, - "learning_rate": 6.194475630666629e-07, - "loss": 1.1767, - "step": 4688 - }, - { - "epoch": 0.6356673219006304, - "grad_norm": 1.7015229885607612, - "learning_rate": 6.190415028819029e-07, - "loss": 1.0971, - "step": 4689 - }, - { - "epoch": 0.6358028875482953, - "grad_norm": 1.6044426966140521, - "learning_rate": 6.186355161619814e-07, - "loss": 1.1315, - "step": 4690 - }, - { - "epoch": 0.6359384531959601, - "grad_norm": 1.8635318509353649, - "learning_rate": 6.182296029851908e-07, - "loss": 1.1403, - "step": 4691 - }, - { - "epoch": 0.636074018843625, - "grad_norm": 3.197957759710942, - "learning_rate": 6.178237634298073e-07, - "loss": 1.1529, - "step": 4692 - }, - { - "epoch": 0.63620958449129, - "grad_norm": 2.0791145267831213, - "learning_rate": 6.174179975740949e-07, - "loss": 1.1402, - "step": 4693 - }, - { - "epoch": 0.6363451501389548, - "grad_norm": 1.7128785194045597, - "learning_rate": 6.170123054963012e-07, - "loss": 1.1092, - "step": 4694 - }, - { - "epoch": 0.6364807157866197, - "grad_norm": 1.590221654974441, - "learning_rate": 6.166066872746616e-07, - "loss": 1.1595, - "step": 4695 - }, - { - "epoch": 0.6366162814342845, - "grad_norm": 1.5794912366665248, - "learning_rate": 6.162011429873959e-07, - "loss": 1.1366, - "step": 4696 - }, - { - "epoch": 0.6367518470819494, - "grad_norm": 1.3919189143376318, - "learning_rate": 6.157956727127102e-07, - "loss": 1.1023, - "step": 4697 - }, - { - "epoch": 0.6368874127296144, - "grad_norm": 1.5029417389744626, - "learning_rate": 6.153902765287966e-07, - "loss": 1.1213, - "step": 4698 - }, - { - "epoch": 0.6370229783772792, - "grad_norm": 1.7704918248350094, - "learning_rate": 6.149849545138319e-07, - "loss": 1.0979, - "step": 4699 - }, - { - "epoch": 0.6371585440249441, - "grad_norm": 1.736574976808114, - "learning_rate": 6.145797067459799e-07, - "loss": 1.1276, - "step": 4700 - }, - { - "epoch": 0.6372941096726089, - "grad_norm": 1.5461458964233954, - "learning_rate": 6.141745333033889e-07, - "loss": 1.1086, - "step": 4701 - }, - { - "epoch": 0.6374296753202738, - "grad_norm": 1.7901803042219957, - "learning_rate": 6.137694342641937e-07, - "loss": 1.1261, - "step": 4702 - }, - { - "epoch": 0.6375652409679388, - "grad_norm": 1.778634607121448, - "learning_rate": 6.133644097065143e-07, - "loss": 1.1388, - "step": 4703 - }, - { - "epoch": 0.6377008066156036, - "grad_norm": 1.6734839179700316, - "learning_rate": 6.129594597084567e-07, - "loss": 1.1171, - "step": 4704 - }, - { - "epoch": 0.6378363722632685, - "grad_norm": 1.7462402796992553, - "learning_rate": 6.125545843481119e-07, - "loss": 1.0836, - "step": 4705 - }, - { - "epoch": 0.6379719379109333, - "grad_norm": 1.4530541675334887, - "learning_rate": 6.121497837035576e-07, - "loss": 1.0843, - "step": 4706 - }, - { - "epoch": 0.6381075035585982, - "grad_norm": 1.5630401903611495, - "learning_rate": 6.117450578528556e-07, - "loss": 1.1184, - "step": 4707 - }, - { - "epoch": 0.6382430692062632, - "grad_norm": 4.293753479795108, - "learning_rate": 6.11340406874055e-07, - "loss": 1.1605, - "step": 4708 - }, - { - "epoch": 0.638378634853928, - "grad_norm": 1.545013820740773, - "learning_rate": 6.109358308451885e-07, - "loss": 1.1107, - "step": 4709 - }, - { - "epoch": 0.6385142005015929, - "grad_norm": 1.4591484714184346, - "learning_rate": 6.105313298442764e-07, - "loss": 1.1374, - "step": 4710 - }, - { - "epoch": 0.6386497661492577, - "grad_norm": 1.7091342994456478, - "learning_rate": 6.10126903949323e-07, - "loss": 1.161, - "step": 4711 - }, - { - "epoch": 0.6387853317969227, - "grad_norm": 1.513614848739321, - "learning_rate": 6.097225532383184e-07, - "loss": 1.1042, - "step": 4712 - }, - { - "epoch": 0.6389208974445876, - "grad_norm": 1.6440060978913553, - "learning_rate": 6.093182777892392e-07, - "loss": 1.1379, - "step": 4713 - }, - { - "epoch": 0.6390564630922524, - "grad_norm": 1.5879174377576, - "learning_rate": 6.089140776800456e-07, - "loss": 1.1292, - "step": 4714 - }, - { - "epoch": 0.6391920287399173, - "grad_norm": 1.7243121830493096, - "learning_rate": 6.085099529886857e-07, - "loss": 1.1219, - "step": 4715 - }, - { - "epoch": 0.6393275943875821, - "grad_norm": 3.281395686382957, - "learning_rate": 6.081059037930907e-07, - "loss": 1.143, - "step": 4716 - }, - { - "epoch": 0.6394631600352471, - "grad_norm": 1.6399865390090904, - "learning_rate": 6.07701930171179e-07, - "loss": 1.135, - "step": 4717 - }, - { - "epoch": 0.639598725682912, - "grad_norm": 4.069768460085548, - "learning_rate": 6.072980322008532e-07, - "loss": 1.1121, - "step": 4718 - }, - { - "epoch": 0.6397342913305768, - "grad_norm": 1.7664382249474369, - "learning_rate": 6.068942099600025e-07, - "loss": 1.1206, - "step": 4719 - }, - { - "epoch": 0.6398698569782417, - "grad_norm": 1.9773410202404775, - "learning_rate": 6.064904635264999e-07, - "loss": 1.1282, - "step": 4720 - }, - { - "epoch": 0.6400054226259067, - "grad_norm": 1.5169785420591682, - "learning_rate": 6.060867929782057e-07, - "loss": 1.1125, - "step": 4721 - }, - { - "epoch": 0.6401409882735715, - "grad_norm": 1.7123588170228587, - "learning_rate": 6.056831983929638e-07, - "loss": 1.0986, - "step": 4722 - }, - { - "epoch": 0.6402765539212364, - "grad_norm": 1.373797065466126, - "learning_rate": 6.052796798486049e-07, - "loss": 1.1288, - "step": 4723 - }, - { - "epoch": 0.6404121195689012, - "grad_norm": 1.8121542652861544, - "learning_rate": 6.048762374229435e-07, - "loss": 1.1312, - "step": 4724 - }, - { - "epoch": 0.6405476852165661, - "grad_norm": 1.561042616135415, - "learning_rate": 6.044728711937812e-07, - "loss": 1.1254, - "step": 4725 - }, - { - "epoch": 0.6406832508642311, - "grad_norm": 2.4534404249585133, - "learning_rate": 6.040695812389036e-07, - "loss": 1.1507, - "step": 4726 - }, - { - "epoch": 0.6408188165118959, - "grad_norm": 3.0914819205445676, - "learning_rate": 6.036663676360816e-07, - "loss": 1.1142, - "step": 4727 - }, - { - "epoch": 0.6409543821595608, - "grad_norm": 1.64063592275413, - "learning_rate": 6.032632304630726e-07, - "loss": 1.1491, - "step": 4728 - }, - { - "epoch": 0.6410899478072256, - "grad_norm": 1.851200188184754, - "learning_rate": 6.028601697976175e-07, - "loss": 1.1334, - "step": 4729 - }, - { - "epoch": 0.6412255134548905, - "grad_norm": 1.517280575791147, - "learning_rate": 6.024571857174442e-07, - "loss": 1.1283, - "step": 4730 - }, - { - "epoch": 0.6413610791025555, - "grad_norm": 3.634174145098893, - "learning_rate": 6.020542783002643e-07, - "loss": 1.1651, - "step": 4731 - }, - { - "epoch": 0.6414966447502203, - "grad_norm": 1.3087396030520786, - "learning_rate": 6.01651447623776e-07, - "loss": 1.0855, - "step": 4732 - }, - { - "epoch": 0.6416322103978852, - "grad_norm": 1.5188945487899586, - "learning_rate": 6.012486937656613e-07, - "loss": 1.1287, - "step": 4733 - }, - { - "epoch": 0.64176777604555, - "grad_norm": 1.8050243655585028, - "learning_rate": 6.008460168035887e-07, - "loss": 1.159, - "step": 4734 - }, - { - "epoch": 0.641903341693215, - "grad_norm": 1.6359350392699559, - "learning_rate": 6.004434168152109e-07, - "loss": 1.1154, - "step": 4735 - }, - { - "epoch": 0.6420389073408799, - "grad_norm": 1.456111712271028, - "learning_rate": 6.000408938781665e-07, - "loss": 1.135, - "step": 4736 - }, - { - "epoch": 0.6421744729885447, - "grad_norm": 1.622122018698129, - "learning_rate": 5.996384480700783e-07, - "loss": 1.1304, - "step": 4737 - }, - { - "epoch": 0.6423100386362096, - "grad_norm": 1.9474343005938992, - "learning_rate": 5.992360794685554e-07, - "loss": 1.1186, - "step": 4738 - }, - { - "epoch": 0.6424456042838744, - "grad_norm": 1.6444491076014611, - "learning_rate": 5.988337881511909e-07, - "loss": 1.1348, - "step": 4739 - }, - { - "epoch": 0.6425811699315394, - "grad_norm": 5.260249591569377, - "learning_rate": 5.984315741955639e-07, - "loss": 1.1501, - "step": 4740 - }, - { - "epoch": 0.6427167355792043, - "grad_norm": 2.2533240837166537, - "learning_rate": 5.98029437679238e-07, - "loss": 1.1949, - "step": 4741 - }, - { - "epoch": 0.6428523012268691, - "grad_norm": 1.3841362051466948, - "learning_rate": 5.976273786797619e-07, - "loss": 1.1675, - "step": 4742 - }, - { - "epoch": 0.642987866874534, - "grad_norm": 1.7663484814430361, - "learning_rate": 5.972253972746701e-07, - "loss": 1.1477, - "step": 4743 - }, - { - "epoch": 0.6431234325221988, - "grad_norm": 1.6236263029833253, - "learning_rate": 5.968234935414807e-07, - "loss": 1.1011, - "step": 4744 - }, - { - "epoch": 0.6432589981698638, - "grad_norm": 1.4754618043916852, - "learning_rate": 5.964216675576983e-07, - "loss": 1.089, - "step": 4745 - }, - { - "epoch": 0.6433945638175287, - "grad_norm": 1.4385016088441636, - "learning_rate": 5.960199194008115e-07, - "loss": 1.1391, - "step": 4746 - }, - { - "epoch": 0.6435301294651935, - "grad_norm": 1.7842562045011214, - "learning_rate": 5.956182491482946e-07, - "loss": 1.1381, - "step": 4747 - }, - { - "epoch": 0.6436656951128584, - "grad_norm": 1.5534908489610553, - "learning_rate": 5.952166568776062e-07, - "loss": 1.0876, - "step": 4748 - }, - { - "epoch": 0.6438012607605232, - "grad_norm": 1.456589360284409, - "learning_rate": 5.948151426661904e-07, - "loss": 1.1559, - "step": 4749 - }, - { - "epoch": 0.6439368264081882, - "grad_norm": 1.7936609741484348, - "learning_rate": 5.944137065914759e-07, - "loss": 1.1087, - "step": 4750 - }, - { - "epoch": 0.6440723920558531, - "grad_norm": 1.8449583051647789, - "learning_rate": 5.94012348730877e-07, - "loss": 1.1351, - "step": 4751 - }, - { - "epoch": 0.6442079577035179, - "grad_norm": 1.5290777489414773, - "learning_rate": 5.936110691617915e-07, - "loss": 1.1397, - "step": 4752 - }, - { - "epoch": 0.6443435233511828, - "grad_norm": 1.5649258629166274, - "learning_rate": 5.932098679616038e-07, - "loss": 1.1161, - "step": 4753 - }, - { - "epoch": 0.6444790889988476, - "grad_norm": 1.5618133551614133, - "learning_rate": 5.928087452076821e-07, - "loss": 1.1021, - "step": 4754 - }, - { - "epoch": 0.6446146546465126, - "grad_norm": 2.469582549653953, - "learning_rate": 5.924077009773794e-07, - "loss": 1.143, - "step": 4755 - }, - { - "epoch": 0.6447502202941775, - "grad_norm": 2.0495477986063064, - "learning_rate": 5.920067353480345e-07, - "loss": 1.1024, - "step": 4756 - }, - { - "epoch": 0.6448857859418423, - "grad_norm": 1.840210373717833, - "learning_rate": 5.916058483969698e-07, - "loss": 1.1559, - "step": 4757 - }, - { - "epoch": 0.6450213515895072, - "grad_norm": 5.983948953427069, - "learning_rate": 5.912050402014941e-07, - "loss": 1.1158, - "step": 4758 - }, - { - "epoch": 0.6451569172371721, - "grad_norm": 1.5250035233833985, - "learning_rate": 5.908043108388989e-07, - "loss": 1.1256, - "step": 4759 - }, - { - "epoch": 0.645292482884837, - "grad_norm": 1.8014221589616437, - "learning_rate": 5.90403660386463e-07, - "loss": 1.1402, - "step": 4760 - }, - { - "epoch": 0.6454280485325019, - "grad_norm": 2.178085009790153, - "learning_rate": 5.900030889214476e-07, - "loss": 1.1459, - "step": 4761 - }, - { - "epoch": 0.6455636141801667, - "grad_norm": 1.7672833105832866, - "learning_rate": 5.896025965211005e-07, - "loss": 1.0965, - "step": 4762 - }, - { - "epoch": 0.6456991798278316, - "grad_norm": 1.9827291389676238, - "learning_rate": 5.89202183262653e-07, - "loss": 1.113, - "step": 4763 - }, - { - "epoch": 0.6458347454754965, - "grad_norm": 2.0989667300957597, - "learning_rate": 5.888018492233219e-07, - "loss": 1.1433, - "step": 4764 - }, - { - "epoch": 0.6459703111231614, - "grad_norm": 1.4925023613189676, - "learning_rate": 5.884015944803084e-07, - "loss": 1.1254, - "step": 4765 - }, - { - "epoch": 0.6461058767708263, - "grad_norm": 1.7983804038807079, - "learning_rate": 5.880014191107982e-07, - "loss": 1.1188, - "step": 4766 - }, - { - "epoch": 0.6462414424184911, - "grad_norm": 2.9109856295749967, - "learning_rate": 5.876013231919628e-07, - "loss": 1.1262, - "step": 4767 - }, - { - "epoch": 0.6463770080661561, - "grad_norm": 1.7268479507998833, - "learning_rate": 5.872013068009565e-07, - "loss": 1.1202, - "step": 4768 - }, - { - "epoch": 0.6465125737138209, - "grad_norm": 2.23156694876669, - "learning_rate": 5.868013700149197e-07, - "loss": 1.1006, - "step": 4769 - }, - { - "epoch": 0.6466481393614858, - "grad_norm": 1.4390360160047948, - "learning_rate": 5.864015129109771e-07, - "loss": 1.1427, - "step": 4770 - }, - { - "epoch": 0.6467837050091507, - "grad_norm": 1.7992384844511071, - "learning_rate": 5.860017355662381e-07, - "loss": 1.1763, - "step": 4771 - }, - { - "epoch": 0.6469192706568155, - "grad_norm": 1.394781329931129, - "learning_rate": 5.856020380577964e-07, - "loss": 1.106, - "step": 4772 - }, - { - "epoch": 0.6470548363044805, - "grad_norm": 1.5666544851153166, - "learning_rate": 5.852024204627308e-07, - "loss": 1.1172, - "step": 4773 - }, - { - "epoch": 0.6471904019521453, - "grad_norm": 1.7726511091335129, - "learning_rate": 5.84802882858104e-07, - "loss": 1.1376, - "step": 4774 - }, - { - "epoch": 0.6473259675998102, - "grad_norm": 1.8837075199723314, - "learning_rate": 5.844034253209641e-07, - "loss": 1.1044, - "step": 4775 - }, - { - "epoch": 0.6474615332474751, - "grad_norm": 1.4140192018698483, - "learning_rate": 5.840040479283428e-07, - "loss": 1.1207, - "step": 4776 - }, - { - "epoch": 0.6475970988951399, - "grad_norm": 2.027173616106004, - "learning_rate": 5.836047507572575e-07, - "loss": 1.1225, - "step": 4777 - }, - { - "epoch": 0.6477326645428049, - "grad_norm": 1.560800280468979, - "learning_rate": 5.832055338847089e-07, - "loss": 1.1192, - "step": 4778 - }, - { - "epoch": 0.6478682301904697, - "grad_norm": 1.5163969393808416, - "learning_rate": 5.828063973876833e-07, - "loss": 1.1313, - "step": 4779 - }, - { - "epoch": 0.6480037958381346, - "grad_norm": 1.5937114658688518, - "learning_rate": 5.824073413431507e-07, - "loss": 1.1015, - "step": 4780 - }, - { - "epoch": 0.6481393614857995, - "grad_norm": 1.4884182323335657, - "learning_rate": 5.820083658280661e-07, - "loss": 1.1212, - "step": 4781 - }, - { - "epoch": 0.6482749271334644, - "grad_norm": 1.3985217282443203, - "learning_rate": 5.816094709193688e-07, - "loss": 1.1238, - "step": 4782 - }, - { - "epoch": 0.6484104927811293, - "grad_norm": 1.5505452058544897, - "learning_rate": 5.812106566939824e-07, - "loss": 1.1192, - "step": 4783 - }, - { - "epoch": 0.6485460584287941, - "grad_norm": 1.774965276246935, - "learning_rate": 5.808119232288151e-07, - "loss": 1.1294, - "step": 4784 - }, - { - "epoch": 0.648681624076459, - "grad_norm": 1.9032722818287082, - "learning_rate": 5.804132706007597e-07, - "loss": 1.1548, - "step": 4785 - }, - { - "epoch": 0.6488171897241239, - "grad_norm": 1.751331187769959, - "learning_rate": 5.800146988866927e-07, - "loss": 1.1353, - "step": 4786 - }, - { - "epoch": 0.6489527553717888, - "grad_norm": 1.482689811907791, - "learning_rate": 5.796162081634761e-07, - "loss": 1.1205, - "step": 4787 - }, - { - "epoch": 0.6490883210194537, - "grad_norm": 1.7631365838050608, - "learning_rate": 5.792177985079558e-07, - "loss": 1.0928, - "step": 4788 - }, - { - "epoch": 0.6492238866671185, - "grad_norm": 1.6598386292132472, - "learning_rate": 5.788194699969608e-07, - "loss": 1.1539, - "step": 4789 - }, - { - "epoch": 0.6493594523147834, - "grad_norm": 1.5260592144897889, - "learning_rate": 5.784212227073073e-07, - "loss": 1.1258, - "step": 4790 - }, - { - "epoch": 0.6494950179624484, - "grad_norm": 1.6598695501727219, - "learning_rate": 5.780230567157924e-07, - "loss": 1.1726, - "step": 4791 - }, - { - "epoch": 0.6496305836101132, - "grad_norm": 1.674905777860736, - "learning_rate": 5.776249720992009e-07, - "loss": 1.1137, - "step": 4792 - }, - { - "epoch": 0.6497661492577781, - "grad_norm": 1.645837848639911, - "learning_rate": 5.772269689342988e-07, - "loss": 1.1131, - "step": 4793 - }, - { - "epoch": 0.6499017149054429, - "grad_norm": 1.887975526385798, - "learning_rate": 5.768290472978392e-07, - "loss": 1.0866, - "step": 4794 - }, - { - "epoch": 0.6500372805531078, - "grad_norm": 1.834810884157617, - "learning_rate": 5.764312072665574e-07, - "loss": 1.1586, - "step": 4795 - }, - { - "epoch": 0.6501728462007728, - "grad_norm": 1.6201278155326666, - "learning_rate": 5.760334489171735e-07, - "loss": 1.1236, - "step": 4796 - }, - { - "epoch": 0.6503084118484376, - "grad_norm": 1.5993838670254967, - "learning_rate": 5.756357723263926e-07, - "loss": 1.1498, - "step": 4797 - }, - { - "epoch": 0.6504439774961025, - "grad_norm": 1.6927508746378985, - "learning_rate": 5.752381775709032e-07, - "loss": 1.1423, - "step": 4798 - }, - { - "epoch": 0.6505795431437674, - "grad_norm": 1.3839523761065653, - "learning_rate": 5.748406647273784e-07, - "loss": 1.1216, - "step": 4799 - }, - { - "epoch": 0.6507151087914322, - "grad_norm": 2.0003171969273565, - "learning_rate": 5.744432338724754e-07, - "loss": 1.1294, - "step": 4800 - }, - { - "epoch": 0.6508506744390972, - "grad_norm": 1.5449604132150574, - "learning_rate": 5.740458850828356e-07, - "loss": 1.1168, - "step": 4801 - }, - { - "epoch": 0.650986240086762, - "grad_norm": 1.9171615869878587, - "learning_rate": 5.736486184350846e-07, - "loss": 1.1667, - "step": 4802 - }, - { - "epoch": 0.6511218057344269, - "grad_norm": 1.9458848163108826, - "learning_rate": 5.732514340058321e-07, - "loss": 1.0991, - "step": 4803 - }, - { - "epoch": 0.6512573713820918, - "grad_norm": 1.840157615495904, - "learning_rate": 5.728543318716721e-07, - "loss": 1.1912, - "step": 4804 - }, - { - "epoch": 0.6513929370297566, - "grad_norm": 1.6110570960689108, - "learning_rate": 5.724573121091825e-07, - "loss": 1.1832, - "step": 4805 - }, - { - "epoch": 0.6515285026774216, - "grad_norm": 1.7562075168769882, - "learning_rate": 5.720603747949253e-07, - "loss": 1.1925, - "step": 4806 - }, - { - "epoch": 0.6516640683250864, - "grad_norm": 2.2906608286258017, - "learning_rate": 5.716635200054469e-07, - "loss": 1.1217, - "step": 4807 - }, - { - "epoch": 0.6517996339727513, - "grad_norm": 1.7656550550543244, - "learning_rate": 5.712667478172776e-07, - "loss": 1.1791, - "step": 4808 - }, - { - "epoch": 0.6519351996204162, - "grad_norm": 1.4147632488668616, - "learning_rate": 5.708700583069319e-07, - "loss": 1.1003, - "step": 4809 - }, - { - "epoch": 0.652070765268081, - "grad_norm": 1.6987489598107293, - "learning_rate": 5.704734515509085e-07, - "loss": 1.1472, - "step": 4810 - }, - { - "epoch": 0.652206330915746, - "grad_norm": 1.5394448023122558, - "learning_rate": 5.700769276256886e-07, - "loss": 1.1505, - "step": 4811 - }, - { - "epoch": 0.6523418965634108, - "grad_norm": 3.409492384711982, - "learning_rate": 5.696804866077404e-07, - "loss": 1.159, - "step": 4812 - }, - { - "epoch": 0.6524774622110757, - "grad_norm": 1.385971446420812, - "learning_rate": 5.692841285735128e-07, - "loss": 1.1361, - "step": 4813 - }, - { - "epoch": 0.6526130278587406, - "grad_norm": 1.6786071088656378, - "learning_rate": 5.68887853599442e-07, - "loss": 1.1433, - "step": 4814 - }, - { - "epoch": 0.6527485935064055, - "grad_norm": 1.9365883718795547, - "learning_rate": 5.684916617619453e-07, - "loss": 1.1428, - "step": 4815 - }, - { - "epoch": 0.6528841591540704, - "grad_norm": 1.5444935150035093, - "learning_rate": 5.680955531374255e-07, - "loss": 1.1516, - "step": 4816 - }, - { - "epoch": 0.6530197248017352, - "grad_norm": 1.6008589819816113, - "learning_rate": 5.676995278022688e-07, - "loss": 1.1516, - "step": 4817 - }, - { - "epoch": 0.6531552904494001, - "grad_norm": 1.6389993935419649, - "learning_rate": 5.67303585832846e-07, - "loss": 1.1274, - "step": 4818 - }, - { - "epoch": 0.653290856097065, - "grad_norm": 1.422135320040644, - "learning_rate": 5.669077273055111e-07, - "loss": 1.1394, - "step": 4819 - }, - { - "epoch": 0.6534264217447299, - "grad_norm": 1.578812663583261, - "learning_rate": 5.665119522966024e-07, - "loss": 1.1387, - "step": 4820 - }, - { - "epoch": 0.6535619873923948, - "grad_norm": 1.790432260707421, - "learning_rate": 5.661162608824419e-07, - "loss": 1.1066, - "step": 4821 - }, - { - "epoch": 0.6536975530400596, - "grad_norm": 1.7637080109842824, - "learning_rate": 5.657206531393358e-07, - "loss": 1.1121, - "step": 4822 - }, - { - "epoch": 0.6538331186877245, - "grad_norm": 1.5391488266255702, - "learning_rate": 5.653251291435735e-07, - "loss": 1.1005, - "step": 4823 - }, - { - "epoch": 0.6539686843353895, - "grad_norm": 1.5857314613039635, - "learning_rate": 5.64929688971429e-07, - "loss": 1.1517, - "step": 4824 - }, - { - "epoch": 0.6541042499830543, - "grad_norm": 1.491047819331301, - "learning_rate": 5.645343326991602e-07, - "loss": 1.1052, - "step": 4825 - }, - { - "epoch": 0.6542398156307192, - "grad_norm": 1.9852371405976332, - "learning_rate": 5.641390604030072e-07, - "loss": 1.1229, - "step": 4826 - }, - { - "epoch": 0.654375381278384, - "grad_norm": 1.8605379437968697, - "learning_rate": 5.637438721591967e-07, - "loss": 1.1583, - "step": 4827 - }, - { - "epoch": 0.6545109469260489, - "grad_norm": 1.7932505097639544, - "learning_rate": 5.633487680439361e-07, - "loss": 1.1571, - "step": 4828 - }, - { - "epoch": 0.6546465125737139, - "grad_norm": 2.9109495541394494, - "learning_rate": 5.629537481334195e-07, - "loss": 1.116, - "step": 4829 - }, - { - "epoch": 0.6547820782213787, - "grad_norm": 1.5386671113458654, - "learning_rate": 5.625588125038221e-07, - "loss": 1.1381, - "step": 4830 - }, - { - "epoch": 0.6549176438690436, - "grad_norm": 2.5497132010942174, - "learning_rate": 5.621639612313056e-07, - "loss": 1.1533, - "step": 4831 - }, - { - "epoch": 0.6550532095167084, - "grad_norm": 6.117626161625898, - "learning_rate": 5.617691943920122e-07, - "loss": 1.0929, - "step": 4832 - }, - { - "epoch": 0.6551887751643733, - "grad_norm": 1.4349254725269918, - "learning_rate": 5.613745120620712e-07, - "loss": 1.1402, - "step": 4833 - }, - { - "epoch": 0.6553243408120383, - "grad_norm": 1.840040398916793, - "learning_rate": 5.609799143175927e-07, - "loss": 1.0646, - "step": 4834 - }, - { - "epoch": 0.6554599064597031, - "grad_norm": 1.525388982804501, - "learning_rate": 5.605854012346729e-07, - "loss": 1.097, - "step": 4835 - }, - { - "epoch": 0.655595472107368, - "grad_norm": 1.651461390341418, - "learning_rate": 5.601909728893892e-07, - "loss": 1.0922, - "step": 4836 - }, - { - "epoch": 0.6557310377550328, - "grad_norm": 1.9081693106616995, - "learning_rate": 5.597966293578055e-07, - "loss": 1.156, - "step": 4837 - }, - { - "epoch": 0.6558666034026978, - "grad_norm": 1.4906267108249123, - "learning_rate": 5.594023707159668e-07, - "loss": 1.1506, - "step": 4838 - }, - { - "epoch": 0.6560021690503627, - "grad_norm": 3.0652479519767293, - "learning_rate": 5.590081970399028e-07, - "loss": 1.1207, - "step": 4839 - }, - { - "epoch": 0.6561377346980275, - "grad_norm": 1.4695469643269363, - "learning_rate": 5.586141084056273e-07, - "loss": 1.1546, - "step": 4840 - }, - { - "epoch": 0.6562733003456924, - "grad_norm": 1.525083153588519, - "learning_rate": 5.582201048891367e-07, - "loss": 1.1202, - "step": 4841 - }, - { - "epoch": 0.6564088659933572, - "grad_norm": 1.66487690647815, - "learning_rate": 5.578261865664118e-07, - "loss": 1.1176, - "step": 4842 - }, - { - "epoch": 0.6565444316410222, - "grad_norm": 1.6860839277146589, - "learning_rate": 5.574323535134164e-07, - "loss": 1.1405, - "step": 4843 - }, - { - "epoch": 0.6566799972886871, - "grad_norm": 1.6772305329557835, - "learning_rate": 5.570386058060983e-07, - "loss": 1.0947, - "step": 4844 - }, - { - "epoch": 0.6568155629363519, - "grad_norm": 1.9718045851503474, - "learning_rate": 5.566449435203886e-07, - "loss": 1.1031, - "step": 4845 - }, - { - "epoch": 0.6569511285840168, - "grad_norm": 1.944762874328042, - "learning_rate": 5.562513667322018e-07, - "loss": 1.1137, - "step": 4846 - }, - { - "epoch": 0.6570866942316816, - "grad_norm": 2.2689253463486545, - "learning_rate": 5.558578755174363e-07, - "loss": 1.1394, - "step": 4847 - }, - { - "epoch": 0.6572222598793466, - "grad_norm": 1.4700970496743098, - "learning_rate": 5.554644699519735e-07, - "loss": 1.1488, - "step": 4848 - }, - { - "epoch": 0.6573578255270115, - "grad_norm": 1.4240240327451372, - "learning_rate": 5.550711501116788e-07, - "loss": 1.1765, - "step": 4849 - }, - { - "epoch": 0.6574933911746763, - "grad_norm": 2.8113261174521074, - "learning_rate": 5.546779160724012e-07, - "loss": 1.0962, - "step": 4850 - }, - { - "epoch": 0.6576289568223412, - "grad_norm": 1.773310944450205, - "learning_rate": 5.542847679099715e-07, - "loss": 1.1177, - "step": 4851 - }, - { - "epoch": 0.657764522470006, - "grad_norm": 1.5935818266799504, - "learning_rate": 5.538917057002069e-07, - "loss": 1.1171, - "step": 4852 - }, - { - "epoch": 0.657900088117671, - "grad_norm": 1.6361928036485163, - "learning_rate": 5.534987295189049e-07, - "loss": 1.1315, - "step": 4853 - }, - { - "epoch": 0.6580356537653359, - "grad_norm": 1.8985550571482281, - "learning_rate": 5.531058394418487e-07, - "loss": 1.1232, - "step": 4854 - }, - { - "epoch": 0.6581712194130007, - "grad_norm": 1.4871525864752673, - "learning_rate": 5.527130355448035e-07, - "loss": 1.1453, - "step": 4855 - }, - { - "epoch": 0.6583067850606656, - "grad_norm": 2.0306342238415533, - "learning_rate": 5.523203179035189e-07, - "loss": 1.09, - "step": 4856 - }, - { - "epoch": 0.6584423507083305, - "grad_norm": 1.6225203436305788, - "learning_rate": 5.519276865937272e-07, - "loss": 1.1061, - "step": 4857 - }, - { - "epoch": 0.6585779163559954, - "grad_norm": 1.6151284288386374, - "learning_rate": 5.515351416911442e-07, - "loss": 1.1419, - "step": 4858 - }, - { - "epoch": 0.6587134820036603, - "grad_norm": 1.4982969677734927, - "learning_rate": 5.511426832714694e-07, - "loss": 1.172, - "step": 4859 - }, - { - "epoch": 0.6588490476513251, - "grad_norm": 1.6125121319532338, - "learning_rate": 5.507503114103849e-07, - "loss": 1.204, - "step": 4860 - }, - { - "epoch": 0.65898461329899, - "grad_norm": 1.5093757944427297, - "learning_rate": 5.503580261835566e-07, - "loss": 1.1243, - "step": 4861 - }, - { - "epoch": 0.6591201789466549, - "grad_norm": 1.4065380966713095, - "learning_rate": 5.499658276666338e-07, - "loss": 1.123, - "step": 4862 - }, - { - "epoch": 0.6592557445943198, - "grad_norm": 2.3337697425529362, - "learning_rate": 5.495737159352487e-07, - "loss": 1.0961, - "step": 4863 - }, - { - "epoch": 0.6593913102419847, - "grad_norm": 1.5450043311446877, - "learning_rate": 5.491816910650171e-07, - "loss": 1.1274, - "step": 4864 - }, - { - "epoch": 0.6595268758896495, - "grad_norm": 2.3195342632177205, - "learning_rate": 5.48789753131538e-07, - "loss": 1.1236, - "step": 4865 - }, - { - "epoch": 0.6596624415373145, - "grad_norm": 3.24530023226589, - "learning_rate": 5.483979022103935e-07, - "loss": 1.1242, - "step": 4866 - }, - { - "epoch": 0.6597980071849793, - "grad_norm": 1.39663365273393, - "learning_rate": 5.480061383771481e-07, - "loss": 1.1763, - "step": 4867 - }, - { - "epoch": 0.6599335728326442, - "grad_norm": 1.9855353776043534, - "learning_rate": 5.476144617073519e-07, - "loss": 1.1247, - "step": 4868 - }, - { - "epoch": 0.6600691384803091, - "grad_norm": 1.7559768207005786, - "learning_rate": 5.472228722765351e-07, - "loss": 1.1648, - "step": 4869 - }, - { - "epoch": 0.6602047041279739, - "grad_norm": 2.1316261776080414, - "learning_rate": 5.46831370160214e-07, - "loss": 1.1217, - "step": 4870 - }, - { - "epoch": 0.6603402697756389, - "grad_norm": 1.7414733024030613, - "learning_rate": 5.464399554338856e-07, - "loss": 1.1025, - "step": 4871 - }, - { - "epoch": 0.6604758354233037, - "grad_norm": 1.473699092558706, - "learning_rate": 5.460486281730322e-07, - "loss": 1.1396, - "step": 4872 - }, - { - "epoch": 0.6606114010709686, - "grad_norm": 1.691059413651753, - "learning_rate": 5.456573884531168e-07, - "loss": 1.154, - "step": 4873 - }, - { - "epoch": 0.6607469667186335, - "grad_norm": 1.963653470652611, - "learning_rate": 5.452662363495884e-07, - "loss": 1.1557, - "step": 4874 - }, - { - "epoch": 0.6608825323662983, - "grad_norm": 1.5569111058000522, - "learning_rate": 5.448751719378762e-07, - "loss": 1.1109, - "step": 4875 - }, - { - "epoch": 0.6610180980139633, - "grad_norm": 1.7733760879529628, - "learning_rate": 5.444841952933953e-07, - "loss": 1.1567, - "step": 4876 - }, - { - "epoch": 0.6611536636616281, - "grad_norm": 1.5452544358024716, - "learning_rate": 5.440933064915413e-07, - "loss": 1.1167, - "step": 4877 - }, - { - "epoch": 0.661289229309293, - "grad_norm": 1.5477538676060125, - "learning_rate": 5.437025056076945e-07, - "loss": 1.1577, - "step": 4878 - }, - { - "epoch": 0.6614247949569579, - "grad_norm": 2.182703681354459, - "learning_rate": 5.433117927172176e-07, - "loss": 1.1226, - "step": 4879 - }, - { - "epoch": 0.6615603606046228, - "grad_norm": 1.9689939617306953, - "learning_rate": 5.429211678954566e-07, - "loss": 1.1316, - "step": 4880 - }, - { - "epoch": 0.6616959262522877, - "grad_norm": 1.5725579169508583, - "learning_rate": 5.425306312177404e-07, - "loss": 1.1293, - "step": 4881 - }, - { - "epoch": 0.6618314918999526, - "grad_norm": 1.6614086000587205, - "learning_rate": 5.421401827593812e-07, - "loss": 1.1635, - "step": 4882 - }, - { - "epoch": 0.6619670575476174, - "grad_norm": 1.4894274184387255, - "learning_rate": 5.417498225956734e-07, - "loss": 1.1633, - "step": 4883 - }, - { - "epoch": 0.6621026231952823, - "grad_norm": 2.046543305345037, - "learning_rate": 5.413595508018951e-07, - "loss": 1.1248, - "step": 4884 - }, - { - "epoch": 0.6622381888429472, - "grad_norm": 1.5149373726900681, - "learning_rate": 5.409693674533071e-07, - "loss": 1.163, - "step": 4885 - }, - { - "epoch": 0.6623737544906121, - "grad_norm": 2.0872600392110185, - "learning_rate": 5.405792726251532e-07, - "loss": 1.1295, - "step": 4886 - }, - { - "epoch": 0.662509320138277, - "grad_norm": 1.5495095681961184, - "learning_rate": 5.401892663926606e-07, - "loss": 1.1192, - "step": 4887 - }, - { - "epoch": 0.6626448857859418, - "grad_norm": 1.63120116165608, - "learning_rate": 5.397993488310378e-07, - "loss": 1.1671, - "step": 4888 - }, - { - "epoch": 0.6627804514336068, - "grad_norm": 1.5985680306594543, - "learning_rate": 5.394095200154786e-07, - "loss": 1.0818, - "step": 4889 - }, - { - "epoch": 0.6629160170812716, - "grad_norm": 1.547326956425439, - "learning_rate": 5.39019780021157e-07, - "loss": 1.1516, - "step": 4890 - }, - { - "epoch": 0.6630515827289365, - "grad_norm": 2.7125985141999402, - "learning_rate": 5.386301289232329e-07, - "loss": 1.1143, - "step": 4891 - }, - { - "epoch": 0.6631871483766014, - "grad_norm": 1.5602338878808106, - "learning_rate": 5.382405667968457e-07, - "loss": 1.0793, - "step": 4892 - }, - { - "epoch": 0.6633227140242662, - "grad_norm": 1.5499172130164718, - "learning_rate": 5.378510937171212e-07, - "loss": 1.1304, - "step": 4893 - }, - { - "epoch": 0.6634582796719312, - "grad_norm": 1.578329287390987, - "learning_rate": 5.37461709759165e-07, - "loss": 1.1404, - "step": 4894 - }, - { - "epoch": 0.663593845319596, - "grad_norm": 1.776867286731425, - "learning_rate": 5.370724149980668e-07, - "loss": 1.1079, - "step": 4895 - }, - { - "epoch": 0.6637294109672609, - "grad_norm": 7.07356222666573, - "learning_rate": 5.366832095088994e-07, - "loss": 1.1247, - "step": 4896 - }, - { - "epoch": 0.6638649766149258, - "grad_norm": 1.5821058266035843, - "learning_rate": 5.362940933667177e-07, - "loss": 1.1164, - "step": 4897 - }, - { - "epoch": 0.6640005422625906, - "grad_norm": 2.1923072693357417, - "learning_rate": 5.359050666465599e-07, - "loss": 1.1328, - "step": 4898 - }, - { - "epoch": 0.6641361079102556, - "grad_norm": 1.7945749701620397, - "learning_rate": 5.355161294234465e-07, - "loss": 1.1344, - "step": 4899 - }, - { - "epoch": 0.6642716735579204, - "grad_norm": 1.8130187936298026, - "learning_rate": 5.351272817723813e-07, - "loss": 1.1481, - "step": 4900 - }, - { - "epoch": 0.6644072392055853, - "grad_norm": 1.716975009757803, - "learning_rate": 5.347385237683504e-07, - "loss": 1.1433, - "step": 4901 - }, - { - "epoch": 0.6645428048532502, - "grad_norm": 1.8397952012077903, - "learning_rate": 5.343498554863225e-07, - "loss": 1.1376, - "step": 4902 - }, - { - "epoch": 0.664678370500915, - "grad_norm": 1.5560436072961044, - "learning_rate": 5.339612770012494e-07, - "loss": 1.1604, - "step": 4903 - }, - { - "epoch": 0.66481393614858, - "grad_norm": 1.473404711358916, - "learning_rate": 5.335727883880654e-07, - "loss": 1.1204, - "step": 4904 - }, - { - "epoch": 0.6649495017962448, - "grad_norm": 1.6837577652231523, - "learning_rate": 5.331843897216873e-07, - "loss": 1.1246, - "step": 4905 - }, - { - "epoch": 0.6650850674439097, - "grad_norm": 1.7003248053624211, - "learning_rate": 5.327960810770149e-07, - "loss": 1.0902, - "step": 4906 - }, - { - "epoch": 0.6652206330915746, - "grad_norm": 1.4544231159780867, - "learning_rate": 5.324078625289304e-07, - "loss": 1.1519, - "step": 4907 - }, - { - "epoch": 0.6653561987392395, - "grad_norm": 1.5815478445414703, - "learning_rate": 5.320197341522985e-07, - "loss": 1.1813, - "step": 4908 - }, - { - "epoch": 0.6654917643869044, - "grad_norm": 4.73791278421319, - "learning_rate": 5.316316960219673e-07, - "loss": 1.1214, - "step": 4909 - }, - { - "epoch": 0.6656273300345692, - "grad_norm": 1.5073378586420585, - "learning_rate": 5.312437482127659e-07, - "loss": 1.1605, - "step": 4910 - }, - { - "epoch": 0.6657628956822341, - "grad_norm": 1.4753460925312933, - "learning_rate": 5.30855890799508e-07, - "loss": 1.0966, - "step": 4911 - }, - { - "epoch": 0.665898461329899, - "grad_norm": 1.5866062493114987, - "learning_rate": 5.304681238569877e-07, - "loss": 1.1329, - "step": 4912 - }, - { - "epoch": 0.6660340269775639, - "grad_norm": 2.858366448986451, - "learning_rate": 5.300804474599842e-07, - "loss": 1.1459, - "step": 4913 - }, - { - "epoch": 0.6661695926252288, - "grad_norm": 1.8130524009981435, - "learning_rate": 5.296928616832568e-07, - "loss": 1.1098, - "step": 4914 - }, - { - "epoch": 0.6663051582728936, - "grad_norm": 2.1361444318176317, - "learning_rate": 5.293053666015485e-07, - "loss": 1.1651, - "step": 4915 - }, - { - "epoch": 0.6664407239205585, - "grad_norm": 1.666276228266202, - "learning_rate": 5.28917962289585e-07, - "loss": 1.111, - "step": 4916 - }, - { - "epoch": 0.6665762895682235, - "grad_norm": 1.9111815769145295, - "learning_rate": 5.28530648822074e-07, - "loss": 1.079, - "step": 4917 - }, - { - "epoch": 0.6667118552158883, - "grad_norm": 1.7978023797790923, - "learning_rate": 5.281434262737056e-07, - "loss": 1.159, - "step": 4918 - }, - { - "epoch": 0.6668474208635532, - "grad_norm": 1.7604711589371427, - "learning_rate": 5.277562947191529e-07, - "loss": 1.1245, - "step": 4919 - }, - { - "epoch": 0.666982986511218, - "grad_norm": 2.2046924113091975, - "learning_rate": 5.273692542330713e-07, - "loss": 1.1517, - "step": 4920 - }, - { - "epoch": 0.6671185521588829, - "grad_norm": 1.4505991656566142, - "learning_rate": 5.269823048900981e-07, - "loss": 1.1761, - "step": 4921 - }, - { - "epoch": 0.6672541178065479, - "grad_norm": 1.5723252503022231, - "learning_rate": 5.265954467648539e-07, - "loss": 1.1094, - "step": 4922 - }, - { - "epoch": 0.6673896834542127, - "grad_norm": 1.5739384728577925, - "learning_rate": 5.262086799319405e-07, - "loss": 1.1444, - "step": 4923 - }, - { - "epoch": 0.6675252491018776, - "grad_norm": 1.50472044754599, - "learning_rate": 5.258220044659438e-07, - "loss": 1.1235, - "step": 4924 - }, - { - "epoch": 0.6676608147495424, - "grad_norm": 1.7357482128233432, - "learning_rate": 5.2543542044143e-07, - "loss": 1.1078, - "step": 4925 - }, - { - "epoch": 0.6677963803972073, - "grad_norm": 1.8326018079334034, - "learning_rate": 5.2504892793295e-07, - "loss": 1.132, - "step": 4926 - }, - { - "epoch": 0.6679319460448723, - "grad_norm": 1.5611745200190938, - "learning_rate": 5.246625270150346e-07, - "loss": 1.1204, - "step": 4927 - }, - { - "epoch": 0.6680675116925371, - "grad_norm": 2.5191713115782095, - "learning_rate": 5.242762177621994e-07, - "loss": 1.188, - "step": 4928 - }, - { - "epoch": 0.668203077340202, - "grad_norm": 1.8831786794947099, - "learning_rate": 5.238900002489398e-07, - "loss": 1.1071, - "step": 4929 - }, - { - "epoch": 0.6683386429878668, - "grad_norm": 1.624339115395409, - "learning_rate": 5.235038745497363e-07, - "loss": 1.0947, - "step": 4930 - }, - { - "epoch": 0.6684742086355318, - "grad_norm": 1.5950439673565042, - "learning_rate": 5.231178407390484e-07, - "loss": 1.1275, - "step": 4931 - }, - { - "epoch": 0.6686097742831967, - "grad_norm": 1.8061238904627441, - "learning_rate": 5.227318988913216e-07, - "loss": 1.1267, - "step": 4932 - }, - { - "epoch": 0.6687453399308615, - "grad_norm": 1.480704036521496, - "learning_rate": 5.223460490809799e-07, - "loss": 1.1119, - "step": 4933 - }, - { - "epoch": 0.6688809055785264, - "grad_norm": 1.7133727492552788, - "learning_rate": 5.21960291382433e-07, - "loss": 1.1857, - "step": 4934 - }, - { - "epoch": 0.6690164712261912, - "grad_norm": 1.541130165019227, - "learning_rate": 5.215746258700698e-07, - "loss": 1.1089, - "step": 4935 - }, - { - "epoch": 0.6691520368738562, - "grad_norm": 1.5732731753014844, - "learning_rate": 5.211890526182642e-07, - "loss": 1.122, - "step": 4936 - }, - { - "epoch": 0.6692876025215211, - "grad_norm": 1.4119339177426045, - "learning_rate": 5.208035717013702e-07, - "loss": 1.1287, - "step": 4937 - }, - { - "epoch": 0.6694231681691859, - "grad_norm": 2.0549599581611164, - "learning_rate": 5.204181831937245e-07, - "loss": 1.1744, - "step": 4938 - }, - { - "epoch": 0.6695587338168508, - "grad_norm": 1.8005072196363703, - "learning_rate": 5.200328871696468e-07, - "loss": 1.1711, - "step": 4939 - }, - { - "epoch": 0.6696942994645156, - "grad_norm": 1.632659057878715, - "learning_rate": 5.19647683703438e-07, - "loss": 1.1384, - "step": 4940 - }, - { - "epoch": 0.6698298651121806, - "grad_norm": 2.2267577818323203, - "learning_rate": 5.192625728693819e-07, - "loss": 1.1461, - "step": 4941 - }, - { - "epoch": 0.6699654307598455, - "grad_norm": 1.8371558032101727, - "learning_rate": 5.188775547417439e-07, - "loss": 1.1431, - "step": 4942 - }, - { - "epoch": 0.6701009964075103, - "grad_norm": 2.3756173095374282, - "learning_rate": 5.184926293947716e-07, - "loss": 1.1705, - "step": 4943 - }, - { - "epoch": 0.6702365620551752, - "grad_norm": 2.2876009572906764, - "learning_rate": 5.181077969026951e-07, - "loss": 1.1258, - "step": 4944 - }, - { - "epoch": 0.67037212770284, - "grad_norm": 1.6043297028300725, - "learning_rate": 5.17723057339726e-07, - "loss": 1.1264, - "step": 4945 - }, - { - "epoch": 0.670507693350505, - "grad_norm": 3.0171732411674648, - "learning_rate": 5.173384107800585e-07, - "loss": 1.1354, - "step": 4946 - }, - { - "epoch": 0.6706432589981699, - "grad_norm": 1.5744043188639119, - "learning_rate": 5.169538572978684e-07, - "loss": 1.135, - "step": 4947 - }, - { - "epoch": 0.6707788246458347, - "grad_norm": 1.4175648420777738, - "learning_rate": 5.165693969673142e-07, - "loss": 1.1166, - "step": 4948 - }, - { - "epoch": 0.6709143902934996, - "grad_norm": 2.0736191441389367, - "learning_rate": 5.161850298625362e-07, - "loss": 1.0794, - "step": 4949 - }, - { - "epoch": 0.6710499559411645, - "grad_norm": 1.9750548096126308, - "learning_rate": 5.158007560576557e-07, - "loss": 1.136, - "step": 4950 - }, - { - "epoch": 0.6711855215888294, - "grad_norm": 1.7781073786350896, - "learning_rate": 5.154165756267774e-07, - "loss": 1.1729, - "step": 4951 - }, - { - "epoch": 0.6713210872364943, - "grad_norm": 1.6835518335811073, - "learning_rate": 5.150324886439874e-07, - "loss": 1.135, - "step": 4952 - }, - { - "epoch": 0.6714566528841591, - "grad_norm": 1.8107074866187132, - "learning_rate": 5.14648495183354e-07, - "loss": 1.1366, - "step": 4953 - }, - { - "epoch": 0.671592218531824, - "grad_norm": 1.4952984933271551, - "learning_rate": 5.142645953189271e-07, - "loss": 1.1236, - "step": 4954 - }, - { - "epoch": 0.6717277841794889, - "grad_norm": 1.9371337709341832, - "learning_rate": 5.138807891247388e-07, - "loss": 1.1417, - "step": 4955 - }, - { - "epoch": 0.6718633498271538, - "grad_norm": 5.6258013018276465, - "learning_rate": 5.13497076674803e-07, - "loss": 1.138, - "step": 4956 - }, - { - "epoch": 0.6719989154748187, - "grad_norm": 1.4379189315279797, - "learning_rate": 5.13113458043116e-07, - "loss": 1.1268, - "step": 4957 - }, - { - "epoch": 0.6721344811224835, - "grad_norm": 2.0713250268479495, - "learning_rate": 5.127299333036552e-07, - "loss": 1.1235, - "step": 4958 - }, - { - "epoch": 0.6722700467701485, - "grad_norm": 1.9608845164061142, - "learning_rate": 5.123465025303804e-07, - "loss": 1.1277, - "step": 4959 - }, - { - "epoch": 0.6724056124178134, - "grad_norm": 1.5553016074626629, - "learning_rate": 5.119631657972334e-07, - "loss": 1.0877, - "step": 4960 - }, - { - "epoch": 0.6725411780654782, - "grad_norm": 1.9863658037818084, - "learning_rate": 5.115799231781377e-07, - "loss": 1.1134, - "step": 4961 - }, - { - "epoch": 0.6726767437131431, - "grad_norm": 1.5175915489188927, - "learning_rate": 5.111967747469983e-07, - "loss": 1.1386, - "step": 4962 - }, - { - "epoch": 0.6728123093608079, - "grad_norm": 1.585868289908071, - "learning_rate": 5.108137205777026e-07, - "loss": 1.1365, - "step": 4963 - }, - { - "epoch": 0.6729478750084729, - "grad_norm": 1.8326415030101058, - "learning_rate": 5.104307607441193e-07, - "loss": 1.1182, - "step": 4964 - }, - { - "epoch": 0.6730834406561378, - "grad_norm": 1.4495713023596106, - "learning_rate": 5.100478953200999e-07, - "loss": 1.0752, - "step": 4965 - }, - { - "epoch": 0.6732190063038026, - "grad_norm": 1.9727004799061012, - "learning_rate": 5.096651243794756e-07, - "loss": 1.1024, - "step": 4966 - }, - { - "epoch": 0.6733545719514675, - "grad_norm": 1.6749910471147682, - "learning_rate": 5.092824479960625e-07, - "loss": 1.089, - "step": 4967 - }, - { - "epoch": 0.6734901375991323, - "grad_norm": 1.469564032873871, - "learning_rate": 5.088998662436548e-07, - "loss": 1.0934, - "step": 4968 - }, - { - "epoch": 0.6736257032467973, - "grad_norm": 6.342404211432201, - "learning_rate": 5.085173791960324e-07, - "loss": 1.0755, - "step": 4969 - }, - { - "epoch": 0.6737612688944622, - "grad_norm": 2.0620273549095147, - "learning_rate": 5.081349869269529e-07, - "loss": 1.1134, - "step": 4970 - }, - { - "epoch": 0.673896834542127, - "grad_norm": 1.4867427888242264, - "learning_rate": 5.077526895101596e-07, - "loss": 1.0899, - "step": 4971 - }, - { - "epoch": 0.6740324001897919, - "grad_norm": 4.598567487102877, - "learning_rate": 5.073704870193736e-07, - "loss": 1.0901, - "step": 4972 - }, - { - "epoch": 0.6741679658374568, - "grad_norm": 1.5937081704067992, - "learning_rate": 5.069883795283015e-07, - "loss": 1.1303, - "step": 4973 - }, - { - "epoch": 0.6743035314851217, - "grad_norm": 1.866566661943804, - "learning_rate": 5.066063671106281e-07, - "loss": 1.1403, - "step": 4974 - }, - { - "epoch": 0.6744390971327866, - "grad_norm": 2.0487427252542334, - "learning_rate": 5.062244498400228e-07, - "loss": 1.11, - "step": 4975 - }, - { - "epoch": 0.6745746627804514, - "grad_norm": 1.6636100433612249, - "learning_rate": 5.058426277901344e-07, - "loss": 1.1826, - "step": 4976 - }, - { - "epoch": 0.6747102284281163, - "grad_norm": 1.488384521401093, - "learning_rate": 5.054609010345947e-07, - "loss": 1.133, - "step": 4977 - }, - { - "epoch": 0.6748457940757812, - "grad_norm": 1.8084519697168833, - "learning_rate": 5.050792696470165e-07, - "loss": 1.1727, - "step": 4978 - }, - { - "epoch": 0.6749813597234461, - "grad_norm": 1.6474575645592657, - "learning_rate": 5.046977337009945e-07, - "loss": 1.0928, - "step": 4979 - }, - { - "epoch": 0.675116925371111, - "grad_norm": 2.0371243281747575, - "learning_rate": 5.043162932701048e-07, - "loss": 1.1291, - "step": 4980 - }, - { - "epoch": 0.6752524910187758, - "grad_norm": 1.5694867285406233, - "learning_rate": 5.039349484279053e-07, - "loss": 1.1316, - "step": 4981 - }, - { - "epoch": 0.6753880566664408, - "grad_norm": 1.6258280346986795, - "learning_rate": 5.035536992479352e-07, - "loss": 1.1382, - "step": 4982 - }, - { - "epoch": 0.6755236223141056, - "grad_norm": 1.8222504119619058, - "learning_rate": 5.031725458037157e-07, - "loss": 1.1334, - "step": 4983 - }, - { - "epoch": 0.6756591879617705, - "grad_norm": 5.453570530041879, - "learning_rate": 5.027914881687489e-07, - "loss": 1.1073, - "step": 4984 - }, - { - "epoch": 0.6757947536094354, - "grad_norm": 1.5692625113883847, - "learning_rate": 5.024105264165188e-07, - "loss": 1.1238, - "step": 4985 - }, - { - "epoch": 0.6759303192571002, - "grad_norm": 2.297705205606987, - "learning_rate": 5.020296606204915e-07, - "loss": 1.1403, - "step": 4986 - }, - { - "epoch": 0.6760658849047652, - "grad_norm": 1.700488453724067, - "learning_rate": 5.016488908541125e-07, - "loss": 1.1082, - "step": 4987 - }, - { - "epoch": 0.67620145055243, - "grad_norm": 1.5776329195978689, - "learning_rate": 5.01268217190812e-07, - "loss": 1.1208, - "step": 4988 - }, - { - "epoch": 0.6763370162000949, - "grad_norm": 4.587535454487696, - "learning_rate": 5.008876397039983e-07, - "loss": 1.1549, - "step": 4989 - }, - { - "epoch": 0.6764725818477598, - "grad_norm": 1.6405808091828626, - "learning_rate": 5.005071584670644e-07, - "loss": 1.1318, - "step": 4990 - }, - { - "epoch": 0.6766081474954246, - "grad_norm": 1.989999847994308, - "learning_rate": 5.001267735533811e-07, - "loss": 1.1369, - "step": 4991 - }, - { - "epoch": 0.6767437131430896, - "grad_norm": 1.4788568410602503, - "learning_rate": 4.997464850363049e-07, - "loss": 1.1577, - "step": 4992 - }, - { - "epoch": 0.6768792787907544, - "grad_norm": 1.5467800909427947, - "learning_rate": 4.993662929891698e-07, - "loss": 1.1846, - "step": 4993 - }, - { - "epoch": 0.6770148444384193, - "grad_norm": 1.8774359882506313, - "learning_rate": 4.989861974852934e-07, - "loss": 1.1405, - "step": 4994 - }, - { - "epoch": 0.6771504100860842, - "grad_norm": 2.01882573585862, - "learning_rate": 4.986061985979739e-07, - "loss": 1.1425, - "step": 4995 - }, - { - "epoch": 0.677285975733749, - "grad_norm": 1.648455401956844, - "learning_rate": 4.982262964004913e-07, - "loss": 1.0915, - "step": 4996 - }, - { - "epoch": 0.677421541381414, - "grad_norm": 1.6548294472028942, - "learning_rate": 4.978464909661067e-07, - "loss": 1.1051, - "step": 4997 - }, - { - "epoch": 0.6775571070290788, - "grad_norm": 2.544847698198007, - "learning_rate": 4.974667823680626e-07, - "loss": 1.1694, - "step": 4998 - }, - { - "epoch": 0.6776926726767437, - "grad_norm": 14.242079179833981, - "learning_rate": 4.970871706795827e-07, - "loss": 1.121, - "step": 4999 - }, - { - "epoch": 0.6778282383244086, - "grad_norm": 1.8892950052883828, - "learning_rate": 4.967076559738722e-07, - "loss": 1.1463, - "step": 5000 - }, - { - "epoch": 0.6779638039720735, - "grad_norm": 1.6879212454529837, - "learning_rate": 4.963282383241175e-07, - "loss": 1.1285, - "step": 5001 - }, - { - "epoch": 0.6780993696197384, - "grad_norm": 1.4369508277170684, - "learning_rate": 4.959489178034863e-07, - "loss": 1.1244, - "step": 5002 - }, - { - "epoch": 0.6782349352674032, - "grad_norm": 1.387495776319263, - "learning_rate": 4.955696944851276e-07, - "loss": 1.1152, - "step": 5003 - }, - { - "epoch": 0.6783705009150681, - "grad_norm": 1.5234262046566502, - "learning_rate": 4.951905684421716e-07, - "loss": 1.1578, - "step": 5004 - }, - { - "epoch": 0.678506066562733, - "grad_norm": 2.5900619591174063, - "learning_rate": 4.948115397477296e-07, - "loss": 1.1361, - "step": 5005 - }, - { - "epoch": 0.6786416322103979, - "grad_norm": 2.3919751395177715, - "learning_rate": 4.94432608474895e-07, - "loss": 1.1352, - "step": 5006 - }, - { - "epoch": 0.6787771978580628, - "grad_norm": 1.491447483398711, - "learning_rate": 4.940537746967403e-07, - "loss": 1.1206, - "step": 5007 - }, - { - "epoch": 0.6789127635057276, - "grad_norm": 2.499854131013477, - "learning_rate": 4.936750384863222e-07, - "loss": 1.107, - "step": 5008 - }, - { - "epoch": 0.6790483291533925, - "grad_norm": 1.573972456719156, - "learning_rate": 4.932963999166755e-07, - "loss": 1.146, - "step": 5009 - }, - { - "epoch": 0.6791838948010575, - "grad_norm": 7.060728208549765, - "learning_rate": 4.929178590608191e-07, - "loss": 1.152, - "step": 5010 - }, - { - "epoch": 0.6793194604487223, - "grad_norm": 1.709316596950634, - "learning_rate": 4.925394159917506e-07, - "loss": 1.104, - "step": 5011 - }, - { - "epoch": 0.6794550260963872, - "grad_norm": 2.313662758453731, - "learning_rate": 4.921610707824501e-07, - "loss": 1.1593, - "step": 5012 - }, - { - "epoch": 0.679590591744052, - "grad_norm": 1.7341284306983658, - "learning_rate": 4.917828235058785e-07, - "loss": 1.1525, - "step": 5013 - }, - { - "epoch": 0.6797261573917169, - "grad_norm": 1.6154296548619989, - "learning_rate": 4.914046742349777e-07, - "loss": 1.1105, - "step": 5014 - }, - { - "epoch": 0.6798617230393819, - "grad_norm": 2.2993263415272707, - "learning_rate": 4.910266230426708e-07, - "loss": 1.1239, - "step": 5015 - }, - { - "epoch": 0.6799972886870467, - "grad_norm": 1.5117160004057655, - "learning_rate": 4.906486700018622e-07, - "loss": 1.144, - "step": 5016 - }, - { - "epoch": 0.6801328543347116, - "grad_norm": 1.89462920924638, - "learning_rate": 4.90270815185437e-07, - "loss": 1.1482, - "step": 5017 - }, - { - "epoch": 0.6802684199823764, - "grad_norm": 1.4783794877219154, - "learning_rate": 4.898930586662614e-07, - "loss": 1.1256, - "step": 5018 - }, - { - "epoch": 0.6804039856300413, - "grad_norm": 1.8689579490955446, - "learning_rate": 4.89515400517183e-07, - "loss": 1.1333, - "step": 5019 - }, - { - "epoch": 0.6805395512777063, - "grad_norm": 1.7879082427758457, - "learning_rate": 4.891378408110301e-07, - "loss": 1.117, - "step": 5020 - }, - { - "epoch": 0.6806751169253711, - "grad_norm": 2.1126618076518273, - "learning_rate": 4.887603796206124e-07, - "loss": 1.1304, - "step": 5021 - }, - { - "epoch": 0.680810682573036, - "grad_norm": 1.733785098331935, - "learning_rate": 4.883830170187193e-07, - "loss": 1.1196, - "step": 5022 - }, - { - "epoch": 0.6809462482207008, - "grad_norm": 1.523921206359258, - "learning_rate": 4.880057530781237e-07, - "loss": 1.1375, - "step": 5023 - }, - { - "epoch": 0.6810818138683657, - "grad_norm": 1.7105298581218666, - "learning_rate": 4.876285878715763e-07, - "loss": 1.0992, - "step": 5024 - }, - { - "epoch": 0.6812173795160307, - "grad_norm": 1.927293677911524, - "learning_rate": 4.872515214718123e-07, - "loss": 1.1822, - "step": 5025 - }, - { - "epoch": 0.6813529451636955, - "grad_norm": 1.5267178977235765, - "learning_rate": 4.86874553951544e-07, - "loss": 1.1043, - "step": 5026 - }, - { - "epoch": 0.6814885108113604, - "grad_norm": 1.8709934643226926, - "learning_rate": 4.864976853834684e-07, - "loss": 1.1251, - "step": 5027 - }, - { - "epoch": 0.6816240764590252, - "grad_norm": 1.5878407763071387, - "learning_rate": 4.861209158402601e-07, - "loss": 1.1201, - "step": 5028 - }, - { - "epoch": 0.6817596421066902, - "grad_norm": 1.5137489418216632, - "learning_rate": 4.857442453945779e-07, - "loss": 1.1022, - "step": 5029 - }, - { - "epoch": 0.6818952077543551, - "grad_norm": 1.5158565706772829, - "learning_rate": 4.853676741190576e-07, - "loss": 1.114, - "step": 5030 - }, - { - "epoch": 0.6820307734020199, - "grad_norm": 1.4333889196114566, - "learning_rate": 4.849912020863198e-07, - "loss": 1.1566, - "step": 5031 - }, - { - "epoch": 0.6821663390496848, - "grad_norm": 1.614346978124686, - "learning_rate": 4.846148293689629e-07, - "loss": 1.1195, - "step": 5032 - }, - { - "epoch": 0.6823019046973496, - "grad_norm": 1.9958076213266087, - "learning_rate": 4.842385560395687e-07, - "loss": 1.1226, - "step": 5033 - }, - { - "epoch": 0.6824374703450146, - "grad_norm": 1.7569109844815372, - "learning_rate": 4.838623821706973e-07, - "loss": 1.1166, - "step": 5034 - }, - { - "epoch": 0.6825730359926795, - "grad_norm": 1.8302412138297257, - "learning_rate": 4.834863078348915e-07, - "loss": 1.1698, - "step": 5035 - }, - { - "epoch": 0.6827086016403443, - "grad_norm": 1.7437547676890182, - "learning_rate": 4.831103331046739e-07, - "loss": 1.1335, - "step": 5036 - }, - { - "epoch": 0.6828441672880092, - "grad_norm": 1.6852946364260437, - "learning_rate": 4.827344580525487e-07, - "loss": 1.1036, - "step": 5037 - }, - { - "epoch": 0.6829797329356742, - "grad_norm": 2.3816224138841084, - "learning_rate": 4.82358682751e-07, - "loss": 1.1026, - "step": 5038 - }, - { - "epoch": 0.683115298583339, - "grad_norm": 2.4570264849093713, - "learning_rate": 4.819830072724934e-07, - "loss": 1.1477, - "step": 5039 - }, - { - "epoch": 0.6832508642310039, - "grad_norm": 1.8938029011784432, - "learning_rate": 4.816074316894749e-07, - "loss": 1.1439, - "step": 5040 - }, - { - "epoch": 0.6833864298786687, - "grad_norm": 2.629711618674366, - "learning_rate": 4.812319560743713e-07, - "loss": 1.1714, - "step": 5041 - }, - { - "epoch": 0.6835219955263336, - "grad_norm": 1.7319043354465704, - "learning_rate": 4.8085658049959e-07, - "loss": 1.1069, - "step": 5042 - }, - { - "epoch": 0.6836575611739986, - "grad_norm": 1.7199438437713077, - "learning_rate": 4.804813050375194e-07, - "loss": 1.0981, - "step": 5043 - }, - { - "epoch": 0.6837931268216634, - "grad_norm": 2.3801132509817937, - "learning_rate": 4.801061297605282e-07, - "loss": 1.1271, - "step": 5044 - }, - { - "epoch": 0.6839286924693283, - "grad_norm": 1.4373562282067949, - "learning_rate": 4.797310547409661e-07, - "loss": 1.1011, - "step": 5045 - }, - { - "epoch": 0.6840642581169931, - "grad_norm": 1.4093543606547574, - "learning_rate": 4.793560800511634e-07, - "loss": 1.1363, - "step": 5046 - }, - { - "epoch": 0.684199823764658, - "grad_norm": 2.6787194991569585, - "learning_rate": 4.789812057634308e-07, - "loss": 1.1794, - "step": 5047 - }, - { - "epoch": 0.684335389412323, - "grad_norm": 1.7198131040542501, - "learning_rate": 4.786064319500604e-07, - "loss": 1.1294, - "step": 5048 - }, - { - "epoch": 0.6844709550599878, - "grad_norm": 1.6875094785507658, - "learning_rate": 4.782317586833236e-07, - "loss": 1.1117, - "step": 5049 - }, - { - "epoch": 0.6846065207076527, - "grad_norm": 1.9852535945707153, - "learning_rate": 4.778571860354737e-07, - "loss": 1.1318, - "step": 5050 - }, - { - "epoch": 0.6847420863553175, - "grad_norm": 1.7652776558460812, - "learning_rate": 4.774827140787437e-07, - "loss": 1.1446, - "step": 5051 - }, - { - "epoch": 0.6848776520029825, - "grad_norm": 1.7592656551920434, - "learning_rate": 4.77108342885348e-07, - "loss": 1.1332, - "step": 5052 - }, - { - "epoch": 0.6850132176506474, - "grad_norm": 1.7365095443821854, - "learning_rate": 4.767340725274809e-07, - "loss": 1.1109, - "step": 5053 - }, - { - "epoch": 0.6851487832983122, - "grad_norm": 1.5362023092957744, - "learning_rate": 4.763599030773173e-07, - "loss": 1.1521, - "step": 5054 - }, - { - "epoch": 0.6852843489459771, - "grad_norm": 1.5644655130046778, - "learning_rate": 4.7598583460701324e-07, - "loss": 1.1227, - "step": 5055 - }, - { - "epoch": 0.6854199145936419, - "grad_norm": 1.6267093382810747, - "learning_rate": 4.756118671887046e-07, - "loss": 1.1139, - "step": 5056 - }, - { - "epoch": 0.6855554802413069, - "grad_norm": 1.640278609310388, - "learning_rate": 4.7523800089450804e-07, - "loss": 1.1296, - "step": 5057 - }, - { - "epoch": 0.6856910458889718, - "grad_norm": 2.242040638782064, - "learning_rate": 4.748642357965208e-07, - "loss": 1.1437, - "step": 5058 - }, - { - "epoch": 0.6858266115366366, - "grad_norm": 1.6384593414804838, - "learning_rate": 4.7449057196682063e-07, - "loss": 1.1202, - "step": 5059 - }, - { - "epoch": 0.6859621771843015, - "grad_norm": 1.545152355349733, - "learning_rate": 4.7411700947746534e-07, - "loss": 1.138, - "step": 5060 - }, - { - "epoch": 0.6860977428319663, - "grad_norm": 9.415203748550557, - "learning_rate": 4.737435484004939e-07, - "loss": 1.1512, - "step": 5061 - }, - { - "epoch": 0.6862333084796313, - "grad_norm": 1.7113111348252474, - "learning_rate": 4.7337018880792544e-07, - "loss": 1.1493, - "step": 5062 - }, - { - "epoch": 0.6863688741272962, - "grad_norm": 1.515789461636531, - "learning_rate": 4.729969307717583e-07, - "loss": 1.0947, - "step": 5063 - }, - { - "epoch": 0.686504439774961, - "grad_norm": 1.7697422926944724, - "learning_rate": 4.7262377436397396e-07, - "loss": 1.1674, - "step": 5064 - }, - { - "epoch": 0.6866400054226259, - "grad_norm": 1.87253731164435, - "learning_rate": 4.722507196565311e-07, - "loss": 1.1189, - "step": 5065 - }, - { - "epoch": 0.6867755710702907, - "grad_norm": 1.7232275074282548, - "learning_rate": 4.718777667213719e-07, - "loss": 1.1561, - "step": 5066 - }, - { - "epoch": 0.6869111367179557, - "grad_norm": 1.717831271781981, - "learning_rate": 4.7150491563041597e-07, - "loss": 1.1223, - "step": 5067 - }, - { - "epoch": 0.6870467023656206, - "grad_norm": 1.349248746260784, - "learning_rate": 4.7113216645556606e-07, - "loss": 1.0871, - "step": 5068 - }, - { - "epoch": 0.6871822680132854, - "grad_norm": 1.575165426352292, - "learning_rate": 4.707595192687025e-07, - "loss": 1.1234, - "step": 5069 - }, - { - "epoch": 0.6873178336609503, - "grad_norm": 1.5237737124777995, - "learning_rate": 4.703869741416888e-07, - "loss": 1.1367, - "step": 5070 - }, - { - "epoch": 0.6874533993086152, - "grad_norm": 1.936097523162276, - "learning_rate": 4.700145311463659e-07, - "loss": 1.0725, - "step": 5071 - }, - { - "epoch": 0.6875889649562801, - "grad_norm": 3.019980141195836, - "learning_rate": 4.696421903545579e-07, - "loss": 1.1317, - "step": 5072 - }, - { - "epoch": 0.687724530603945, - "grad_norm": 1.7211989158285992, - "learning_rate": 4.692699518380664e-07, - "loss": 1.1334, - "step": 5073 - }, - { - "epoch": 0.6878600962516098, - "grad_norm": 1.6830052712317567, - "learning_rate": 4.6889781566867617e-07, - "loss": 1.1433, - "step": 5074 - }, - { - "epoch": 0.6879956618992747, - "grad_norm": 1.5471417561457257, - "learning_rate": 4.685257819181494e-07, - "loss": 1.1283, - "step": 5075 - }, - { - "epoch": 0.6881312275469396, - "grad_norm": 1.6789305393272496, - "learning_rate": 4.6815385065823053e-07, - "loss": 1.0979, - "step": 5076 - }, - { - "epoch": 0.6882667931946045, - "grad_norm": 1.6995569448742935, - "learning_rate": 4.677820219606433e-07, - "loss": 1.134, - "step": 5077 - }, - { - "epoch": 0.6884023588422694, - "grad_norm": 1.6147989514347159, - "learning_rate": 4.6741029589709216e-07, - "loss": 1.1362, - "step": 5078 - }, - { - "epoch": 0.6885379244899342, - "grad_norm": 2.276587095093122, - "learning_rate": 4.6703867253926144e-07, - "loss": 1.1033, - "step": 5079 - }, - { - "epoch": 0.6886734901375992, - "grad_norm": 1.670423625015705, - "learning_rate": 4.666671519588158e-07, - "loss": 1.1057, - "step": 5080 - }, - { - "epoch": 0.688809055785264, - "grad_norm": 1.7635757128817013, - "learning_rate": 4.662957342274e-07, - "loss": 1.1459, - "step": 5081 - }, - { - "epoch": 0.6889446214329289, - "grad_norm": 1.8977456402324373, - "learning_rate": 4.6592441941663896e-07, - "loss": 1.1593, - "step": 5082 - }, - { - "epoch": 0.6890801870805938, - "grad_norm": 1.5255802336835635, - "learning_rate": 4.655532075981383e-07, - "loss": 1.1287, - "step": 5083 - }, - { - "epoch": 0.6892157527282586, - "grad_norm": 2.053453475384124, - "learning_rate": 4.6518209884348227e-07, - "loss": 1.1358, - "step": 5084 - }, - { - "epoch": 0.6893513183759236, - "grad_norm": 1.7119566438481733, - "learning_rate": 4.648110932242375e-07, - "loss": 1.1347, - "step": 5085 - }, - { - "epoch": 0.6894868840235884, - "grad_norm": 1.6839460763947565, - "learning_rate": 4.644401908119482e-07, - "loss": 1.1282, - "step": 5086 - }, - { - "epoch": 0.6896224496712533, - "grad_norm": 1.5649669171286584, - "learning_rate": 4.640693916781414e-07, - "loss": 1.1246, - "step": 5087 - }, - { - "epoch": 0.6897580153189182, - "grad_norm": 1.5520216661043789, - "learning_rate": 4.636986958943212e-07, - "loss": 1.0894, - "step": 5088 - }, - { - "epoch": 0.689893580966583, - "grad_norm": 1.480994939720819, - "learning_rate": 4.6332810353197503e-07, - "loss": 1.1349, - "step": 5089 - }, - { - "epoch": 0.690029146614248, - "grad_norm": 1.8363490408408871, - "learning_rate": 4.629576146625674e-07, - "loss": 1.1623, - "step": 5090 - }, - { - "epoch": 0.6901647122619128, - "grad_norm": 2.3686811902379423, - "learning_rate": 4.625872293575448e-07, - "loss": 1.0866, - "step": 5091 - }, - { - "epoch": 0.6903002779095777, - "grad_norm": 1.529245460946844, - "learning_rate": 4.6221694768833276e-07, - "loss": 1.1194, - "step": 5092 - }, - { - "epoch": 0.6904358435572426, - "grad_norm": 1.767152784764307, - "learning_rate": 4.6184676972633753e-07, - "loss": 1.1201, - "step": 5093 - }, - { - "epoch": 0.6905714092049074, - "grad_norm": 1.5088201768766683, - "learning_rate": 4.614766955429447e-07, - "loss": 1.1429, - "step": 5094 - }, - { - "epoch": 0.6907069748525724, - "grad_norm": 3.804781154481856, - "learning_rate": 4.6110672520952033e-07, - "loss": 1.1266, - "step": 5095 - }, - { - "epoch": 0.6908425405002372, - "grad_norm": 1.4937794011732857, - "learning_rate": 4.607368587974102e-07, - "loss": 1.1376, - "step": 5096 - }, - { - "epoch": 0.6909781061479021, - "grad_norm": 1.7991873139240044, - "learning_rate": 4.6036709637794026e-07, - "loss": 1.1405, - "step": 5097 - }, - { - "epoch": 0.691113671795567, - "grad_norm": 1.4341544182929433, - "learning_rate": 4.599974380224161e-07, - "loss": 1.1648, - "step": 5098 - }, - { - "epoch": 0.6912492374432319, - "grad_norm": 1.6617796800786673, - "learning_rate": 4.5962788380212346e-07, - "loss": 1.126, - "step": 5099 - }, - { - "epoch": 0.6913848030908968, - "grad_norm": 1.427249386289172, - "learning_rate": 4.592584337883281e-07, - "loss": 1.1137, - "step": 5100 - }, - { - "epoch": 0.6915203687385616, - "grad_norm": 1.6209354731282715, - "learning_rate": 4.5888908805227536e-07, - "loss": 1.1461, - "step": 5101 - }, - { - "epoch": 0.6916559343862265, - "grad_norm": 1.7531249505814985, - "learning_rate": 4.585198466651907e-07, - "loss": 1.1335, - "step": 5102 - }, - { - "epoch": 0.6917915000338914, - "grad_norm": 2.1047497543628944, - "learning_rate": 4.581507096982794e-07, - "loss": 1.0979, - "step": 5103 - }, - { - "epoch": 0.6919270656815563, - "grad_norm": 1.6224796112440663, - "learning_rate": 4.5778167722272674e-07, - "loss": 1.1059, - "step": 5104 - }, - { - "epoch": 0.6920626313292212, - "grad_norm": 2.5418199582552874, - "learning_rate": 4.57412749309698e-07, - "loss": 1.0953, - "step": 5105 - }, - { - "epoch": 0.692198196976886, - "grad_norm": 6.533709851156899, - "learning_rate": 4.570439260303368e-07, - "loss": 1.1118, - "step": 5106 - }, - { - "epoch": 0.6923337626245509, - "grad_norm": 1.5579263753593178, - "learning_rate": 4.566752074557694e-07, - "loss": 1.1477, - "step": 5107 - }, - { - "epoch": 0.6924693282722159, - "grad_norm": 1.5024366427352795, - "learning_rate": 4.563065936570988e-07, - "loss": 1.1242, - "step": 5108 - }, - { - "epoch": 0.6926048939198807, - "grad_norm": 1.6618188818437418, - "learning_rate": 4.559380847054106e-07, - "loss": 1.1295, - "step": 5109 - }, - { - "epoch": 0.6927404595675456, - "grad_norm": 1.882921368972763, - "learning_rate": 4.555696806717679e-07, - "loss": 1.1504, - "step": 5110 - }, - { - "epoch": 0.6928760252152104, - "grad_norm": 1.724458687739725, - "learning_rate": 4.552013816272148e-07, - "loss": 1.106, - "step": 5111 - }, - { - "epoch": 0.6930115908628753, - "grad_norm": 1.784435397195635, - "learning_rate": 4.548331876427749e-07, - "loss": 1.1161, - "step": 5112 - }, - { - "epoch": 0.6931471565105403, - "grad_norm": 1.5563460942203795, - "learning_rate": 4.544650987894514e-07, - "loss": 1.1186, - "step": 5113 - }, - { - "epoch": 0.6932827221582051, - "grad_norm": 1.5468048580184683, - "learning_rate": 4.5409711513822745e-07, - "loss": 1.1652, - "step": 5114 - }, - { - "epoch": 0.69341828780587, - "grad_norm": 1.5813698509845677, - "learning_rate": 4.537292367600658e-07, - "loss": 1.1338, - "step": 5115 - }, - { - "epoch": 0.6935538534535349, - "grad_norm": 1.489228430410561, - "learning_rate": 4.5336146372590876e-07, - "loss": 1.1116, - "step": 5116 - }, - { - "epoch": 0.6936894191011997, - "grad_norm": 2.0498603993642983, - "learning_rate": 4.5299379610667865e-07, - "loss": 1.1904, - "step": 5117 - }, - { - "epoch": 0.6938249847488647, - "grad_norm": 2.074859003824733, - "learning_rate": 4.5262623397327706e-07, - "loss": 1.1272, - "step": 5118 - }, - { - "epoch": 0.6939605503965295, - "grad_norm": 1.5511335100253298, - "learning_rate": 4.522587773965856e-07, - "loss": 1.171, - "step": 5119 - }, - { - "epoch": 0.6940961160441944, - "grad_norm": 1.691992357473617, - "learning_rate": 4.518914264474657e-07, - "loss": 1.1502, - "step": 5120 - }, - { - "epoch": 0.6942316816918593, - "grad_norm": 1.992792281023209, - "learning_rate": 4.5152418119675684e-07, - "loss": 1.1105, - "step": 5121 - }, - { - "epoch": 0.6943672473395242, - "grad_norm": 1.4639211990523915, - "learning_rate": 4.5115704171528103e-07, - "loss": 1.0933, - "step": 5122 - }, - { - "epoch": 0.6945028129871891, - "grad_norm": 1.6191077508830551, - "learning_rate": 4.507900080738367e-07, - "loss": 1.1674, - "step": 5123 - }, - { - "epoch": 0.6946383786348539, - "grad_norm": 1.4545318571516928, - "learning_rate": 4.5042308034320487e-07, - "loss": 1.1586, - "step": 5124 - }, - { - "epoch": 0.6947739442825188, - "grad_norm": 2.270814683559817, - "learning_rate": 4.500562585941432e-07, - "loss": 1.0901, - "step": 5125 - }, - { - "epoch": 0.6949095099301837, - "grad_norm": 1.569916862722935, - "learning_rate": 4.496895428973917e-07, - "loss": 1.147, - "step": 5126 - }, - { - "epoch": 0.6950450755778486, - "grad_norm": 1.6426784695613386, - "learning_rate": 4.4932293332366733e-07, - "loss": 1.1513, - "step": 5127 - }, - { - "epoch": 0.6951806412255135, - "grad_norm": 1.5483762394791516, - "learning_rate": 4.489564299436691e-07, - "loss": 1.1255, - "step": 5128 - }, - { - "epoch": 0.6953162068731783, - "grad_norm": 3.14891976840678, - "learning_rate": 4.4859003282807305e-07, - "loss": 1.1031, - "step": 5129 - }, - { - "epoch": 0.6954517725208432, - "grad_norm": 1.484878898529198, - "learning_rate": 4.4822374204753734e-07, - "loss": 1.144, - "step": 5130 - }, - { - "epoch": 0.6955873381685082, - "grad_norm": 1.4134385728342282, - "learning_rate": 4.4785755767269675e-07, - "loss": 1.1242, - "step": 5131 - }, - { - "epoch": 0.695722903816173, - "grad_norm": 1.5299152786384362, - "learning_rate": 4.474914797741686e-07, - "loss": 1.1147, - "step": 5132 - }, - { - "epoch": 0.6958584694638379, - "grad_norm": 1.4642357989527712, - "learning_rate": 4.471255084225468e-07, - "loss": 1.114, - "step": 5133 - }, - { - "epoch": 0.6959940351115027, - "grad_norm": 1.8896965438528213, - "learning_rate": 4.467596436884068e-07, - "loss": 1.1296, - "step": 5134 - }, - { - "epoch": 0.6961296007591676, - "grad_norm": 1.7232441953908024, - "learning_rate": 4.463938856423023e-07, - "loss": 1.1357, - "step": 5135 - }, - { - "epoch": 0.6962651664068326, - "grad_norm": 1.5104092022431248, - "learning_rate": 4.4602823435476723e-07, - "loss": 1.1438, - "step": 5136 - }, - { - "epoch": 0.6964007320544974, - "grad_norm": 1.7490588616220661, - "learning_rate": 4.4566268989631427e-07, - "loss": 1.133, - "step": 5137 - }, - { - "epoch": 0.6965362977021623, - "grad_norm": 1.5685346061176184, - "learning_rate": 4.452972523374359e-07, - "loss": 1.1355, - "step": 5138 - }, - { - "epoch": 0.6966718633498271, - "grad_norm": 1.5020041858796862, - "learning_rate": 4.4493192174860394e-07, - "loss": 1.1573, - "step": 5139 - }, - { - "epoch": 0.696807428997492, - "grad_norm": 1.8323229978720112, - "learning_rate": 4.4456669820026935e-07, - "loss": 1.1429, - "step": 5140 - }, - { - "epoch": 0.696942994645157, - "grad_norm": 2.032889927678819, - "learning_rate": 4.442015817628627e-07, - "loss": 1.141, - "step": 5141 - }, - { - "epoch": 0.6970785602928218, - "grad_norm": 1.774093739919427, - "learning_rate": 4.438365725067937e-07, - "loss": 1.1512, - "step": 5142 - }, - { - "epoch": 0.6972141259404867, - "grad_norm": 1.9847054500465258, - "learning_rate": 4.434716705024518e-07, - "loss": 1.1214, - "step": 5143 - }, - { - "epoch": 0.6973496915881515, - "grad_norm": 1.6432638525407721, - "learning_rate": 4.4310687582020524e-07, - "loss": 1.1011, - "step": 5144 - }, - { - "epoch": 0.6974852572358164, - "grad_norm": 1.5559487196291886, - "learning_rate": 4.4274218853040213e-07, - "loss": 1.1156, - "step": 5145 - }, - { - "epoch": 0.6976208228834814, - "grad_norm": 1.5785875176425834, - "learning_rate": 4.4237760870336883e-07, - "loss": 1.1256, - "step": 5146 - }, - { - "epoch": 0.6977563885311462, - "grad_norm": 1.4689482446722029, - "learning_rate": 4.420131364094122e-07, - "loss": 1.0941, - "step": 5147 - }, - { - "epoch": 0.6978919541788111, - "grad_norm": 1.9923430989270816, - "learning_rate": 4.4164877171881765e-07, - "loss": 1.1894, - "step": 5148 - }, - { - "epoch": 0.6980275198264759, - "grad_norm": 2.294579829786927, - "learning_rate": 4.4128451470185013e-07, - "loss": 1.1651, - "step": 5149 - }, - { - "epoch": 0.6981630854741409, - "grad_norm": 1.95373740293365, - "learning_rate": 4.409203654287538e-07, - "loss": 1.1281, - "step": 5150 - }, - { - "epoch": 0.6982986511218058, - "grad_norm": 1.547597427144631, - "learning_rate": 4.4055632396975174e-07, - "loss": 1.1168, - "step": 5151 - }, - { - "epoch": 0.6984342167694706, - "grad_norm": 1.686671344083448, - "learning_rate": 4.4019239039504676e-07, - "loss": 1.1168, - "step": 5152 - }, - { - "epoch": 0.6985697824171355, - "grad_norm": 1.5510773000629237, - "learning_rate": 4.3982856477482034e-07, - "loss": 1.1239, - "step": 5153 - }, - { - "epoch": 0.6987053480648003, - "grad_norm": 3.5525914020915885, - "learning_rate": 4.394648471792335e-07, - "loss": 1.1472, - "step": 5154 - }, - { - "epoch": 0.6988409137124653, - "grad_norm": 2.494593967683602, - "learning_rate": 4.391012376784263e-07, - "loss": 1.1193, - "step": 5155 - }, - { - "epoch": 0.6989764793601302, - "grad_norm": 1.5056410194079155, - "learning_rate": 4.3873773634251796e-07, - "loss": 1.1046, - "step": 5156 - }, - { - "epoch": 0.699112045007795, - "grad_norm": 1.5187471277456848, - "learning_rate": 4.3837434324160684e-07, - "loss": 1.1389, - "step": 5157 - }, - { - "epoch": 0.6992476106554599, - "grad_norm": 1.7087411130894514, - "learning_rate": 4.380110584457705e-07, - "loss": 1.112, - "step": 5158 - }, - { - "epoch": 0.6993831763031247, - "grad_norm": 1.442902424367394, - "learning_rate": 4.376478820250653e-07, - "loss": 1.1115, - "step": 5159 - }, - { - "epoch": 0.6995187419507897, - "grad_norm": 1.4275466980614566, - "learning_rate": 4.3728481404952724e-07, - "loss": 1.1225, - "step": 5160 - }, - { - "epoch": 0.6996543075984546, - "grad_norm": 1.4685065150512506, - "learning_rate": 4.369218545891713e-07, - "loss": 1.1669, - "step": 5161 - }, - { - "epoch": 0.6997898732461194, - "grad_norm": 2.2108542333134733, - "learning_rate": 4.3655900371399025e-07, - "loss": 1.1329, - "step": 5162 - }, - { - "epoch": 0.6999254388937843, - "grad_norm": 3.344905293060164, - "learning_rate": 4.361962614939586e-07, - "loss": 1.1495, - "step": 5163 - }, - { - "epoch": 0.7000610045414492, - "grad_norm": 1.795965568742283, - "learning_rate": 4.358336279990268e-07, - "loss": 1.1217, - "step": 5164 - }, - { - "epoch": 0.7001965701891141, - "grad_norm": 1.5069226137106995, - "learning_rate": 4.354711032991273e-07, - "loss": 1.1108, - "step": 5165 - }, - { - "epoch": 0.700332135836779, - "grad_norm": 1.525596147377813, - "learning_rate": 4.3510868746416875e-07, - "loss": 1.1472, - "step": 5166 - }, - { - "epoch": 0.7004677014844438, - "grad_norm": 2.1280308714805742, - "learning_rate": 4.3474638056404146e-07, - "loss": 1.1299, - "step": 5167 - }, - { - "epoch": 0.7006032671321087, - "grad_norm": 1.5199261069620764, - "learning_rate": 4.343841826686121e-07, - "loss": 1.1285, - "step": 5168 - }, - { - "epoch": 0.7007388327797736, - "grad_norm": 1.80980919640645, - "learning_rate": 4.3402209384772925e-07, - "loss": 1.1455, - "step": 5169 - }, - { - "epoch": 0.7008743984274385, - "grad_norm": 1.7799040358788523, - "learning_rate": 4.336601141712172e-07, - "loss": 1.0864, - "step": 5170 - }, - { - "epoch": 0.7010099640751034, - "grad_norm": 1.7247317629129393, - "learning_rate": 4.332982437088825e-07, - "loss": 1.0857, - "step": 5171 - }, - { - "epoch": 0.7011455297227682, - "grad_norm": 2.366321711237902, - "learning_rate": 4.3293648253050786e-07, - "loss": 1.1197, - "step": 5172 - }, - { - "epoch": 0.7012810953704331, - "grad_norm": 2.041352476523526, - "learning_rate": 4.3257483070585644e-07, - "loss": 1.1545, - "step": 5173 - }, - { - "epoch": 0.701416661018098, - "grad_norm": 1.542656702842601, - "learning_rate": 4.3221328830466996e-07, - "loss": 1.1068, - "step": 5174 - }, - { - "epoch": 0.7015522266657629, - "grad_norm": 2.4787532864342756, - "learning_rate": 4.318518553966689e-07, - "loss": 1.1496, - "step": 5175 - }, - { - "epoch": 0.7016877923134278, - "grad_norm": 1.7336070677634021, - "learning_rate": 4.3149053205155295e-07, - "loss": 1.1471, - "step": 5176 - }, - { - "epoch": 0.7018233579610926, - "grad_norm": 2.0884633642661807, - "learning_rate": 4.3112931833900036e-07, - "loss": 1.1067, - "step": 5177 - }, - { - "epoch": 0.7019589236087576, - "grad_norm": 1.4989075297811785, - "learning_rate": 4.307682143286683e-07, - "loss": 1.1357, - "step": 5178 - }, - { - "epoch": 0.7020944892564224, - "grad_norm": 1.6157098889333126, - "learning_rate": 4.3040722009019284e-07, - "loss": 1.1037, - "step": 5179 - }, - { - "epoch": 0.7022300549040873, - "grad_norm": 1.6479421046060307, - "learning_rate": 4.300463356931888e-07, - "loss": 1.1304, - "step": 5180 - }, - { - "epoch": 0.7023656205517522, - "grad_norm": 2.638010571897554, - "learning_rate": 4.296855612072501e-07, - "loss": 1.0905, - "step": 5181 - }, - { - "epoch": 0.702501186199417, - "grad_norm": 3.003822875960002, - "learning_rate": 4.293248967019495e-07, - "loss": 1.1828, - "step": 5182 - }, - { - "epoch": 0.702636751847082, - "grad_norm": 1.4357844578088785, - "learning_rate": 4.289643422468372e-07, - "loss": 1.1237, - "step": 5183 - }, - { - "epoch": 0.7027723174947468, - "grad_norm": 1.5145957259865335, - "learning_rate": 4.286038979114447e-07, - "loss": 1.1183, - "step": 5184 - }, - { - "epoch": 0.7029078831424117, - "grad_norm": 1.7120519289252663, - "learning_rate": 4.282435637652795e-07, - "loss": 1.1539, - "step": 5185 - }, - { - "epoch": 0.7030434487900766, - "grad_norm": 1.756582006206239, - "learning_rate": 4.278833398778305e-07, - "loss": 1.1263, - "step": 5186 - }, - { - "epoch": 0.7031790144377414, - "grad_norm": 1.9604136553414062, - "learning_rate": 4.2752322631856275e-07, - "loss": 1.136, - "step": 5187 - }, - { - "epoch": 0.7033145800854064, - "grad_norm": 1.6260757702910649, - "learning_rate": 4.2716322315692266e-07, - "loss": 1.124, - "step": 5188 - }, - { - "epoch": 0.7034501457330712, - "grad_norm": 1.6124312898629183, - "learning_rate": 4.2680333046233286e-07, - "loss": 1.1135, - "step": 5189 - }, - { - "epoch": 0.7035857113807361, - "grad_norm": 1.4877343963859904, - "learning_rate": 4.2644354830419627e-07, - "loss": 1.116, - "step": 5190 - }, - { - "epoch": 0.703721277028401, - "grad_norm": 1.3871520063717433, - "learning_rate": 4.2608387675189404e-07, - "loss": 1.1025, - "step": 5191 - }, - { - "epoch": 0.7038568426760659, - "grad_norm": 1.5423904316836008, - "learning_rate": 4.2572431587478594e-07, - "loss": 1.1152, - "step": 5192 - }, - { - "epoch": 0.7039924083237308, - "grad_norm": 2.468598936334706, - "learning_rate": 4.253648657422105e-07, - "loss": 1.1469, - "step": 5193 - }, - { - "epoch": 0.7041279739713956, - "grad_norm": 1.6887204738130872, - "learning_rate": 4.2500552642348475e-07, - "loss": 1.1273, - "step": 5194 - }, - { - "epoch": 0.7042635396190605, - "grad_norm": 1.7092647778240457, - "learning_rate": 4.2464629798790453e-07, - "loss": 1.1223, - "step": 5195 - }, - { - "epoch": 0.7043991052667254, - "grad_norm": 1.9536531382368467, - "learning_rate": 4.242871805047442e-07, - "loss": 1.1514, - "step": 5196 - }, - { - "epoch": 0.7045346709143903, - "grad_norm": 1.7888068094399, - "learning_rate": 4.2392817404325665e-07, - "loss": 1.1368, - "step": 5197 - }, - { - "epoch": 0.7046702365620552, - "grad_norm": 1.9575359611723084, - "learning_rate": 4.2356927867267355e-07, - "loss": 1.1127, - "step": 5198 - }, - { - "epoch": 0.7048058022097201, - "grad_norm": 1.6471327778831848, - "learning_rate": 4.23210494462205e-07, - "loss": 1.1348, - "step": 5199 - }, - { - "epoch": 0.7049413678573849, - "grad_norm": 2.0491976447299853, - "learning_rate": 4.228518214810396e-07, - "loss": 1.1794, - "step": 5200 - }, - { - "epoch": 0.7050769335050499, - "grad_norm": 1.747783443617125, - "learning_rate": 4.2249325979834484e-07, - "loss": 1.1606, - "step": 5201 - }, - { - "epoch": 0.7052124991527147, - "grad_norm": 1.4726412126945436, - "learning_rate": 4.221348094832666e-07, - "loss": 1.1403, - "step": 5202 - }, - { - "epoch": 0.7053480648003796, - "grad_norm": 1.5457224743932823, - "learning_rate": 4.217764706049283e-07, - "loss": 1.1452, - "step": 5203 - }, - { - "epoch": 0.7054836304480445, - "grad_norm": 1.445285695012565, - "learning_rate": 4.2141824323243416e-07, - "loss": 1.1456, - "step": 5204 - }, - { - "epoch": 0.7056191960957093, - "grad_norm": 1.5510119995707496, - "learning_rate": 4.21060127434864e-07, - "loss": 1.083, - "step": 5205 - }, - { - "epoch": 0.7057547617433743, - "grad_norm": 1.5060515451345011, - "learning_rate": 4.207021232812792e-07, - "loss": 1.1071, - "step": 5206 - }, - { - "epoch": 0.7058903273910391, - "grad_norm": 2.345330398500057, - "learning_rate": 4.2034423084071637e-07, - "loss": 1.1186, - "step": 5207 - }, - { - "epoch": 0.706025893038704, - "grad_norm": 1.479765459274507, - "learning_rate": 4.199864501821939e-07, - "loss": 1.0955, - "step": 5208 - }, - { - "epoch": 0.7061614586863689, - "grad_norm": 1.5512866178670361, - "learning_rate": 4.196287813747058e-07, - "loss": 1.1168, - "step": 5209 - }, - { - "epoch": 0.7062970243340337, - "grad_norm": 1.9112686569861843, - "learning_rate": 4.1927122448722597e-07, - "loss": 1.1148, - "step": 5210 - }, - { - "epoch": 0.7064325899816987, - "grad_norm": 2.06121322631287, - "learning_rate": 4.1891377958870657e-07, - "loss": 1.1184, - "step": 5211 - }, - { - "epoch": 0.7065681556293635, - "grad_norm": 4.166803987071641, - "learning_rate": 4.18556446748078e-07, - "loss": 1.1622, - "step": 5212 - }, - { - "epoch": 0.7067037212770284, - "grad_norm": 1.697410459373497, - "learning_rate": 4.1819922603424895e-07, - "loss": 1.0854, - "step": 5213 - }, - { - "epoch": 0.7068392869246933, - "grad_norm": 2.44950345913227, - "learning_rate": 4.1784211751610675e-07, - "loss": 1.1633, - "step": 5214 - }, - { - "epoch": 0.7069748525723581, - "grad_norm": 1.5052845724092532, - "learning_rate": 4.174851212625169e-07, - "loss": 1.1258, - "step": 5215 - }, - { - "epoch": 0.7071104182200231, - "grad_norm": 1.595589654372437, - "learning_rate": 4.171282373423234e-07, - "loss": 1.1547, - "step": 5216 - }, - { - "epoch": 0.7072459838676879, - "grad_norm": 1.704528808987417, - "learning_rate": 4.167714658243486e-07, - "loss": 1.1545, - "step": 5217 - }, - { - "epoch": 0.7073815495153528, - "grad_norm": 1.7017848115767975, - "learning_rate": 4.1641480677739236e-07, - "loss": 1.1534, - "step": 5218 - }, - { - "epoch": 0.7075171151630177, - "grad_norm": 1.6415328387063495, - "learning_rate": 4.160582602702347e-07, - "loss": 1.1105, - "step": 5219 - }, - { - "epoch": 0.7076526808106826, - "grad_norm": 1.5720760624575498, - "learning_rate": 4.1570182637163153e-07, - "loss": 1.1034, - "step": 5220 - }, - { - "epoch": 0.7077882464583475, - "grad_norm": 1.539168852420745, - "learning_rate": 4.153455051503196e-07, - "loss": 1.1291, - "step": 5221 - }, - { - "epoch": 0.7079238121060123, - "grad_norm": 1.730582165648342, - "learning_rate": 4.149892966750114e-07, - "loss": 1.1475, - "step": 5222 - }, - { - "epoch": 0.7080593777536772, - "grad_norm": 1.8487646126120718, - "learning_rate": 4.1463320101440027e-07, - "loss": 1.1623, - "step": 5223 - }, - { - "epoch": 0.7081949434013421, - "grad_norm": 1.477493117274552, - "learning_rate": 4.1427721823715487e-07, - "loss": 1.1262, - "step": 5224 - }, - { - "epoch": 0.708330509049007, - "grad_norm": 1.5991883156065025, - "learning_rate": 4.1392134841192537e-07, - "loss": 1.1622, - "step": 5225 - }, - { - "epoch": 0.7084660746966719, - "grad_norm": 1.6985563623376927, - "learning_rate": 4.135655916073368e-07, - "loss": 1.1416, - "step": 5226 - }, - { - "epoch": 0.7086016403443367, - "grad_norm": 1.8339216229961295, - "learning_rate": 4.132099478919957e-07, - "loss": 1.1293, - "step": 5227 - }, - { - "epoch": 0.7087372059920016, - "grad_norm": 1.7125331897052676, - "learning_rate": 4.1285441733448344e-07, - "loss": 1.1168, - "step": 5228 - }, - { - "epoch": 0.7088727716396666, - "grad_norm": 1.6949293423518235, - "learning_rate": 4.124990000033629e-07, - "loss": 1.1279, - "step": 5229 - }, - { - "epoch": 0.7090083372873314, - "grad_norm": 1.5685737611907498, - "learning_rate": 4.1214369596717244e-07, - "loss": 1.1471, - "step": 5230 - }, - { - "epoch": 0.7091439029349963, - "grad_norm": 2.30986322913246, - "learning_rate": 4.1178850529442996e-07, - "loss": 1.1258, - "step": 5231 - }, - { - "epoch": 0.7092794685826611, - "grad_norm": 1.389786470593863, - "learning_rate": 4.1143342805363123e-07, - "loss": 1.103, - "step": 5232 - }, - { - "epoch": 0.709415034230326, - "grad_norm": 1.5880655493023719, - "learning_rate": 4.1107846431325e-07, - "loss": 1.1321, - "step": 5233 - }, - { - "epoch": 0.709550599877991, - "grad_norm": 1.5595836552473266, - "learning_rate": 4.1072361414173815e-07, - "loss": 1.1324, - "step": 5234 - }, - { - "epoch": 0.7096861655256558, - "grad_norm": 2.0203654320421878, - "learning_rate": 4.10368877607526e-07, - "loss": 1.1156, - "step": 5235 - }, - { - "epoch": 0.7098217311733207, - "grad_norm": 1.451772559225182, - "learning_rate": 4.100142547790214e-07, - "loss": 1.1199, - "step": 5236 - }, - { - "epoch": 0.7099572968209855, - "grad_norm": 1.5244509004737692, - "learning_rate": 4.096597457246108e-07, - "loss": 1.1141, - "step": 5237 - }, - { - "epoch": 0.7100928624686504, - "grad_norm": 1.4880395236746309, - "learning_rate": 4.0930535051265835e-07, - "loss": 1.1291, - "step": 5238 - }, - { - "epoch": 0.7102284281163154, - "grad_norm": 1.4373969986147546, - "learning_rate": 4.0895106921150644e-07, - "loss": 1.1674, - "step": 5239 - }, - { - "epoch": 0.7103639937639802, - "grad_norm": 2.3657876307551775, - "learning_rate": 4.0859690188947525e-07, - "loss": 1.1213, - "step": 5240 - }, - { - "epoch": 0.7104995594116451, - "grad_norm": 1.5824531213489121, - "learning_rate": 4.0824284861486346e-07, - "loss": 1.1642, - "step": 5241 - }, - { - "epoch": 0.7106351250593099, - "grad_norm": 2.2393930669605537, - "learning_rate": 4.0788890945594714e-07, - "loss": 1.1594, - "step": 5242 - }, - { - "epoch": 0.7107706907069749, - "grad_norm": 1.7762242367859529, - "learning_rate": 4.0753508448098085e-07, - "loss": 1.0915, - "step": 5243 - }, - { - "epoch": 0.7109062563546398, - "grad_norm": 1.65059641712582, - "learning_rate": 4.0718137375819717e-07, - "loss": 1.1188, - "step": 5244 - }, - { - "epoch": 0.7110418220023046, - "grad_norm": 4.130383397636464, - "learning_rate": 4.0682777735580586e-07, - "loss": 1.1292, - "step": 5245 - }, - { - "epoch": 0.7111773876499695, - "grad_norm": 1.5144684009256741, - "learning_rate": 4.064742953419954e-07, - "loss": 1.1455, - "step": 5246 - }, - { - "epoch": 0.7113129532976343, - "grad_norm": 1.7490407058242181, - "learning_rate": 4.061209277849321e-07, - "loss": 1.0959, - "step": 5247 - }, - { - "epoch": 0.7114485189452993, - "grad_norm": 1.4328449315089336, - "learning_rate": 4.057676747527601e-07, - "loss": 1.1321, - "step": 5248 - }, - { - "epoch": 0.7115840845929642, - "grad_norm": 1.8868452962395321, - "learning_rate": 4.054145363136013e-07, - "loss": 1.1463, - "step": 5249 - }, - { - "epoch": 0.711719650240629, - "grad_norm": 1.5472538221781234, - "learning_rate": 4.05061512535556e-07, - "loss": 1.1077, - "step": 5250 - }, - { - "epoch": 0.7118552158882939, - "grad_norm": 1.7648109620298167, - "learning_rate": 4.047086034867018e-07, - "loss": 1.106, - "step": 5251 - }, - { - "epoch": 0.7119907815359587, - "grad_norm": 11.78138489146233, - "learning_rate": 4.0435580923509436e-07, - "loss": 1.119, - "step": 5252 - }, - { - "epoch": 0.7121263471836237, - "grad_norm": 1.4804305933257895, - "learning_rate": 4.040031298487675e-07, - "loss": 1.1391, - "step": 5253 - }, - { - "epoch": 0.7122619128312886, - "grad_norm": 1.7336752431692217, - "learning_rate": 4.036505653957325e-07, - "loss": 1.1419, - "step": 5254 - }, - { - "epoch": 0.7123974784789534, - "grad_norm": 1.7013382498300442, - "learning_rate": 4.032981159439787e-07, - "loss": 1.1488, - "step": 5255 - }, - { - "epoch": 0.7125330441266183, - "grad_norm": 2.320387665209212, - "learning_rate": 4.029457815614731e-07, - "loss": 1.1306, - "step": 5256 - }, - { - "epoch": 0.7126686097742831, - "grad_norm": 1.7361153983083748, - "learning_rate": 4.025935623161607e-07, - "loss": 1.1393, - "step": 5257 - }, - { - "epoch": 0.7128041754219481, - "grad_norm": 1.6309816187284483, - "learning_rate": 4.022414582759646e-07, - "loss": 1.1475, - "step": 5258 - }, - { - "epoch": 0.712939741069613, - "grad_norm": 1.9102681715029068, - "learning_rate": 4.01889469508784e-07, - "loss": 1.1292, - "step": 5259 - }, - { - "epoch": 0.7130753067172778, - "grad_norm": 1.831065170140749, - "learning_rate": 4.0153759608249883e-07, - "loss": 1.1373, - "step": 5260 - }, - { - "epoch": 0.7132108723649427, - "grad_norm": 1.590231950569179, - "learning_rate": 4.011858380649634e-07, - "loss": 1.0675, - "step": 5261 - }, - { - "epoch": 0.7133464380126076, - "grad_norm": 1.6990801357412049, - "learning_rate": 4.008341955240132e-07, - "loss": 1.1557, - "step": 5262 - }, - { - "epoch": 0.7134820036602725, - "grad_norm": 1.56336966499735, - "learning_rate": 4.0048266852745815e-07, - "loss": 1.117, - "step": 5263 - }, - { - "epoch": 0.7136175693079374, - "grad_norm": 1.556608420894694, - "learning_rate": 4.0013125714308883e-07, - "loss": 1.1357, - "step": 5264 - }, - { - "epoch": 0.7137531349556022, - "grad_norm": 1.931719427342082, - "learning_rate": 3.9977996143867086e-07, - "loss": 1.1513, - "step": 5265 - }, - { - "epoch": 0.7138887006032671, - "grad_norm": 1.6665022988374465, - "learning_rate": 3.9942878148195015e-07, - "loss": 1.0905, - "step": 5266 - }, - { - "epoch": 0.714024266250932, - "grad_norm": 2.8245528476949686, - "learning_rate": 3.9907771734064756e-07, - "loss": 1.1542, - "step": 5267 - }, - { - "epoch": 0.7141598318985969, - "grad_norm": 1.6667423633274212, - "learning_rate": 3.987267690824646e-07, - "loss": 1.1561, - "step": 5268 - }, - { - "epoch": 0.7142953975462618, - "grad_norm": 1.429718527251034, - "learning_rate": 3.983759367750772e-07, - "loss": 1.104, - "step": 5269 - }, - { - "epoch": 0.7144309631939266, - "grad_norm": 1.6826403959477096, - "learning_rate": 3.980252204861423e-07, - "loss": 1.1263, - "step": 5270 - }, - { - "epoch": 0.7145665288415916, - "grad_norm": 1.6649721589858357, - "learning_rate": 3.9767462028329156e-07, - "loss": 1.1372, - "step": 5271 - }, - { - "epoch": 0.7147020944892564, - "grad_norm": 1.459311204651444, - "learning_rate": 3.973241362341357e-07, - "loss": 1.0844, - "step": 5272 - }, - { - "epoch": 0.7148376601369213, - "grad_norm": 1.5058131953409792, - "learning_rate": 3.9697376840626304e-07, - "loss": 1.1222, - "step": 5273 - }, - { - "epoch": 0.7149732257845862, - "grad_norm": 1.5133304395007847, - "learning_rate": 3.9662351686723914e-07, - "loss": 1.1564, - "step": 5274 - }, - { - "epoch": 0.715108791432251, - "grad_norm": 1.3918228432112048, - "learning_rate": 3.962733816846073e-07, - "loss": 1.0932, - "step": 5275 - }, - { - "epoch": 0.715244357079916, - "grad_norm": 1.7017032303140962, - "learning_rate": 3.9592336292588825e-07, - "loss": 1.0964, - "step": 5276 - }, - { - "epoch": 0.7153799227275809, - "grad_norm": 1.7718491721845813, - "learning_rate": 3.9557346065858034e-07, - "loss": 1.1332, - "step": 5277 - }, - { - "epoch": 0.7155154883752457, - "grad_norm": 1.5917459322224354, - "learning_rate": 3.952236749501594e-07, - "loss": 1.1402, - "step": 5278 - }, - { - "epoch": 0.7156510540229106, - "grad_norm": 1.6577029036195212, - "learning_rate": 3.948740058680791e-07, - "loss": 1.1481, - "step": 5279 - }, - { - "epoch": 0.7157866196705754, - "grad_norm": 1.8992902920006884, - "learning_rate": 3.9452445347977e-07, - "loss": 1.1325, - "step": 5280 - }, - { - "epoch": 0.7159221853182404, - "grad_norm": 1.509212087737686, - "learning_rate": 3.941750178526413e-07, - "loss": 1.1343, - "step": 5281 - }, - { - "epoch": 0.7160577509659053, - "grad_norm": 1.8484722074793531, - "learning_rate": 3.938256990540775e-07, - "loss": 1.149, - "step": 5282 - }, - { - "epoch": 0.7161933166135701, - "grad_norm": 2.215097372451185, - "learning_rate": 3.934764971514434e-07, - "loss": 1.1102, - "step": 5283 - }, - { - "epoch": 0.716328882261235, - "grad_norm": 2.5515179276865294, - "learning_rate": 3.931274122120786e-07, - "loss": 1.1552, - "step": 5284 - }, - { - "epoch": 0.7164644479088998, - "grad_norm": 1.7200462381235366, - "learning_rate": 3.9277844430330277e-07, - "loss": 1.1667, - "step": 5285 - }, - { - "epoch": 0.7166000135565648, - "grad_norm": 1.3937328072542279, - "learning_rate": 3.9242959349241036e-07, - "loss": 1.147, - "step": 5286 - }, - { - "epoch": 0.7167355792042297, - "grad_norm": 1.727350989625206, - "learning_rate": 3.9208085984667507e-07, - "loss": 1.1181, - "step": 5287 - }, - { - "epoch": 0.7168711448518945, - "grad_norm": 1.6230529665815427, - "learning_rate": 3.917322434333472e-07, - "loss": 1.1512, - "step": 5288 - }, - { - "epoch": 0.7170067104995594, - "grad_norm": 3.321688143915773, - "learning_rate": 3.913837443196549e-07, - "loss": 1.1162, - "step": 5289 - }, - { - "epoch": 0.7171422761472243, - "grad_norm": 1.4396734832756524, - "learning_rate": 3.9103536257280343e-07, - "loss": 1.1432, - "step": 5290 - }, - { - "epoch": 0.7172778417948892, - "grad_norm": 1.4511889116798282, - "learning_rate": 3.9068709825997534e-07, - "loss": 1.1474, - "step": 5291 - }, - { - "epoch": 0.7174134074425541, - "grad_norm": 10.073086145103824, - "learning_rate": 3.903389514483308e-07, - "loss": 1.1046, - "step": 5292 - }, - { - "epoch": 0.7175489730902189, - "grad_norm": 1.5696892194670138, - "learning_rate": 3.899909222050071e-07, - "loss": 1.097, - "step": 5293 - }, - { - "epoch": 0.7176845387378838, - "grad_norm": 1.5068312711052936, - "learning_rate": 3.896430105971188e-07, - "loss": 1.1382, - "step": 5294 - }, - { - "epoch": 0.7178201043855487, - "grad_norm": 2.0829325094650697, - "learning_rate": 3.8929521669175813e-07, - "loss": 1.161, - "step": 5295 - }, - { - "epoch": 0.7179556700332136, - "grad_norm": 1.5093566280218527, - "learning_rate": 3.889475405559943e-07, - "loss": 1.1286, - "step": 5296 - }, - { - "epoch": 0.7180912356808785, - "grad_norm": 2.2391027174337426, - "learning_rate": 3.88599982256874e-07, - "loss": 1.1762, - "step": 5297 - }, - { - "epoch": 0.7182268013285433, - "grad_norm": 1.5896870131509178, - "learning_rate": 3.8825254186142097e-07, - "loss": 1.1481, - "step": 5298 - }, - { - "epoch": 0.7183623669762083, - "grad_norm": 1.6447315278921162, - "learning_rate": 3.8790521943663633e-07, - "loss": 1.1166, - "step": 5299 - }, - { - "epoch": 0.7184979326238731, - "grad_norm": 1.7239418100330277, - "learning_rate": 3.875580150494986e-07, - "loss": 1.1328, - "step": 5300 - }, - { - "epoch": 0.718633498271538, - "grad_norm": 1.751061019257844, - "learning_rate": 3.8721092876696373e-07, - "loss": 1.1335, - "step": 5301 - }, - { - "epoch": 0.7187690639192029, - "grad_norm": 1.7450590644460298, - "learning_rate": 3.868639606559635e-07, - "loss": 1.1437, - "step": 5302 - }, - { - "epoch": 0.7189046295668677, - "grad_norm": 1.5232856056244652, - "learning_rate": 3.8651711078340923e-07, - "loss": 1.1325, - "step": 5303 - }, - { - "epoch": 0.7190401952145327, - "grad_norm": 1.6126366693121217, - "learning_rate": 3.86170379216187e-07, - "loss": 1.1042, - "step": 5304 - }, - { - "epoch": 0.7191757608621975, - "grad_norm": 1.5259163279994197, - "learning_rate": 3.8582376602116254e-07, - "loss": 1.1233, - "step": 5305 - }, - { - "epoch": 0.7193113265098624, - "grad_norm": 1.4473163978124917, - "learning_rate": 3.854772712651765e-07, - "loss": 1.1317, - "step": 5306 - }, - { - "epoch": 0.7194468921575273, - "grad_norm": 1.615976562993275, - "learning_rate": 3.8513089501504783e-07, - "loss": 1.114, - "step": 5307 - }, - { - "epoch": 0.7195824578051921, - "grad_norm": 1.4882343163753862, - "learning_rate": 3.847846373375726e-07, - "loss": 1.1016, - "step": 5308 - }, - { - "epoch": 0.7197180234528571, - "grad_norm": 2.582763622244887, - "learning_rate": 3.844384982995239e-07, - "loss": 1.1783, - "step": 5309 - }, - { - "epoch": 0.7198535891005219, - "grad_norm": 1.6540082073029063, - "learning_rate": 3.8409247796765185e-07, - "loss": 1.1486, - "step": 5310 - }, - { - "epoch": 0.7199891547481868, - "grad_norm": 1.601433990779869, - "learning_rate": 3.837465764086837e-07, - "loss": 1.0838, - "step": 5311 - }, - { - "epoch": 0.7201247203958517, - "grad_norm": 1.5069715491230624, - "learning_rate": 3.83400793689324e-07, - "loss": 1.0816, - "step": 5312 - }, - { - "epoch": 0.7202602860435166, - "grad_norm": 3.75770513362962, - "learning_rate": 3.83055129876254e-07, - "loss": 1.1634, - "step": 5313 - }, - { - "epoch": 0.7203958516911815, - "grad_norm": 1.633032408312539, - "learning_rate": 3.8270958503613225e-07, - "loss": 1.1203, - "step": 5314 - }, - { - "epoch": 0.7205314173388463, - "grad_norm": 1.5468424187150256, - "learning_rate": 3.8236415923559463e-07, - "loss": 1.1421, - "step": 5315 - }, - { - "epoch": 0.7206669829865112, - "grad_norm": 4.040558721220551, - "learning_rate": 3.820188525412538e-07, - "loss": 1.1195, - "step": 5316 - }, - { - "epoch": 0.7208025486341761, - "grad_norm": 1.4551998165788553, - "learning_rate": 3.8167366501969855e-07, - "loss": 1.1098, - "step": 5317 - }, - { - "epoch": 0.720938114281841, - "grad_norm": 1.6373174000956847, - "learning_rate": 3.8132859673749685e-07, - "loss": 1.1313, - "step": 5318 - }, - { - "epoch": 0.7210736799295059, - "grad_norm": 1.6588385393599137, - "learning_rate": 3.809836477611912e-07, - "loss": 1.0898, - "step": 5319 - }, - { - "epoch": 0.7212092455771707, - "grad_norm": 1.971090972747685, - "learning_rate": 3.806388181573035e-07, - "loss": 1.1081, - "step": 5320 - }, - { - "epoch": 0.7213448112248356, - "grad_norm": 1.5693387341522138, - "learning_rate": 3.8029410799233006e-07, - "loss": 1.1414, - "step": 5321 - }, - { - "epoch": 0.7214803768725006, - "grad_norm": 1.4543872603596673, - "learning_rate": 3.7994951733274695e-07, - "loss": 1.1339, - "step": 5322 - }, - { - "epoch": 0.7216159425201654, - "grad_norm": 1.6915110995361213, - "learning_rate": 3.7960504624500436e-07, - "loss": 1.1491, - "step": 5323 - }, - { - "epoch": 0.7217515081678303, - "grad_norm": 1.9628740381412688, - "learning_rate": 3.792606947955321e-07, - "loss": 1.1462, - "step": 5324 - }, - { - "epoch": 0.7218870738154951, - "grad_norm": 1.740884404802411, - "learning_rate": 3.7891646305073456e-07, - "loss": 1.1314, - "step": 5325 - }, - { - "epoch": 0.72202263946316, - "grad_norm": 1.8920527548328734, - "learning_rate": 3.78572351076995e-07, - "loss": 1.0977, - "step": 5326 - }, - { - "epoch": 0.722158205110825, - "grad_norm": 2.1901691268956696, - "learning_rate": 3.7822835894067185e-07, - "loss": 1.1042, - "step": 5327 - }, - { - "epoch": 0.7222937707584898, - "grad_norm": 4.8582837711244, - "learning_rate": 3.7788448670810225e-07, - "loss": 1.1256, - "step": 5328 - }, - { - "epoch": 0.7224293364061547, - "grad_norm": 4.2925483522460635, - "learning_rate": 3.775407344455984e-07, - "loss": 1.1539, - "step": 5329 - }, - { - "epoch": 0.7225649020538195, - "grad_norm": 1.7789777923480774, - "learning_rate": 3.7719710221945055e-07, - "loss": 1.151, - "step": 5330 - }, - { - "epoch": 0.7227004677014844, - "grad_norm": 1.6133117067537395, - "learning_rate": 3.768535900959253e-07, - "loss": 1.1012, - "step": 5331 - }, - { - "epoch": 0.7228360333491494, - "grad_norm": 1.6532868934925096, - "learning_rate": 3.765101981412665e-07, - "loss": 1.1533, - "step": 5332 - }, - { - "epoch": 0.7229715989968142, - "grad_norm": 1.7982502645113594, - "learning_rate": 3.7616692642169443e-07, - "loss": 1.1056, - "step": 5333 - }, - { - "epoch": 0.7231071646444791, - "grad_norm": 1.63032509701791, - "learning_rate": 3.7582377500340636e-07, - "loss": 1.1375, - "step": 5334 - }, - { - "epoch": 0.7232427302921439, - "grad_norm": 1.8477589761599433, - "learning_rate": 3.7548074395257634e-07, - "loss": 1.1342, - "step": 5335 - }, - { - "epoch": 0.7233782959398088, - "grad_norm": 1.676501131544696, - "learning_rate": 3.751378333353552e-07, - "loss": 1.1036, - "step": 5336 - }, - { - "epoch": 0.7235138615874738, - "grad_norm": 3.3375513840649615, - "learning_rate": 3.747950432178706e-07, - "loss": 1.133, - "step": 5337 - }, - { - "epoch": 0.7236494272351386, - "grad_norm": 2.0252146592194644, - "learning_rate": 3.744523736662267e-07, - "loss": 1.1347, - "step": 5338 - }, - { - "epoch": 0.7237849928828035, - "grad_norm": 1.6008889218275155, - "learning_rate": 3.7410982474650486e-07, - "loss": 1.1547, - "step": 5339 - }, - { - "epoch": 0.7239205585304683, - "grad_norm": 1.5289742876317323, - "learning_rate": 3.7376739652476287e-07, - "loss": 1.1435, - "step": 5340 - }, - { - "epoch": 0.7240561241781333, - "grad_norm": 1.3548548296898886, - "learning_rate": 3.734250890670352e-07, - "loss": 1.1266, - "step": 5341 - }, - { - "epoch": 0.7241916898257982, - "grad_norm": 1.4501088046025934, - "learning_rate": 3.730829024393333e-07, - "loss": 1.1159, - "step": 5342 - }, - { - "epoch": 0.724327255473463, - "grad_norm": 1.7586006276412973, - "learning_rate": 3.727408367076453e-07, - "loss": 1.1308, - "step": 5343 - }, - { - "epoch": 0.7244628211211279, - "grad_norm": 1.5768717898243372, - "learning_rate": 3.723988919379354e-07, - "loss": 1.1092, - "step": 5344 - }, - { - "epoch": 0.7245983867687927, - "grad_norm": 1.6089322018986985, - "learning_rate": 3.7205706819614527e-07, - "loss": 1.1126, - "step": 5345 - }, - { - "epoch": 0.7247339524164577, - "grad_norm": 1.648889357778547, - "learning_rate": 3.717153655481927e-07, - "loss": 1.147, - "step": 5346 - }, - { - "epoch": 0.7248695180641226, - "grad_norm": 1.4387618832484639, - "learning_rate": 3.7137378405997267e-07, - "loss": 1.1249, - "step": 5347 - }, - { - "epoch": 0.7250050837117874, - "grad_norm": 2.1508829398636142, - "learning_rate": 3.710323237973563e-07, - "loss": 1.1056, - "step": 5348 - }, - { - "epoch": 0.7251406493594523, - "grad_norm": 1.6359677630210596, - "learning_rate": 3.7069098482619145e-07, - "loss": 1.1365, - "step": 5349 - }, - { - "epoch": 0.7252762150071171, - "grad_norm": 1.6683256245325149, - "learning_rate": 3.703497672123026e-07, - "loss": 1.1613, - "step": 5350 - }, - { - "epoch": 0.7254117806547821, - "grad_norm": 1.5574589705677118, - "learning_rate": 3.7000867102149114e-07, - "loss": 1.1153, - "step": 5351 - }, - { - "epoch": 0.725547346302447, - "grad_norm": 1.7093488679766402, - "learning_rate": 3.6966769631953466e-07, - "loss": 1.0803, - "step": 5352 - }, - { - "epoch": 0.7256829119501118, - "grad_norm": 1.9779567844076673, - "learning_rate": 3.693268431721873e-07, - "loss": 1.1722, - "step": 5353 - }, - { - "epoch": 0.7258184775977767, - "grad_norm": 1.7946643803294549, - "learning_rate": 3.6898611164518e-07, - "loss": 1.1685, - "step": 5354 - }, - { - "epoch": 0.7259540432454417, - "grad_norm": 2.5495909084339865, - "learning_rate": 3.6864550180422014e-07, - "loss": 1.1362, - "step": 5355 - }, - { - "epoch": 0.7260896088931065, - "grad_norm": 1.5521536902378574, - "learning_rate": 3.683050137149918e-07, - "loss": 1.1442, - "step": 5356 - }, - { - "epoch": 0.7262251745407714, - "grad_norm": 1.713839041472764, - "learning_rate": 3.6796464744315545e-07, - "loss": 1.1196, - "step": 5357 - }, - { - "epoch": 0.7263607401884362, - "grad_norm": 2.3498739017507213, - "learning_rate": 3.6762440305434726e-07, - "loss": 1.1606, - "step": 5358 - }, - { - "epoch": 0.7264963058361011, - "grad_norm": 1.7127325697308675, - "learning_rate": 3.6728428061418195e-07, - "loss": 1.1088, - "step": 5359 - }, - { - "epoch": 0.7266318714837661, - "grad_norm": 1.5026447938416305, - "learning_rate": 3.66944280188248e-07, - "loss": 1.1482, - "step": 5360 - }, - { - "epoch": 0.7267674371314309, - "grad_norm": 1.900219968070367, - "learning_rate": 3.6660440184211326e-07, - "loss": 1.1287, - "step": 5361 - }, - { - "epoch": 0.7269030027790958, - "grad_norm": 1.5841686170573437, - "learning_rate": 3.662646456413193e-07, - "loss": 1.1629, - "step": 5362 - }, - { - "epoch": 0.7270385684267606, - "grad_norm": 1.5914643060159959, - "learning_rate": 3.6592501165138666e-07, - "loss": 1.1347, - "step": 5363 - }, - { - "epoch": 0.7271741340744255, - "grad_norm": 1.4799565453565167, - "learning_rate": 3.6558549993780985e-07, - "loss": 1.1525, - "step": 5364 - }, - { - "epoch": 0.7273096997220905, - "grad_norm": 1.6565926951218564, - "learning_rate": 3.6524611056606226e-07, - "loss": 1.1421, - "step": 5365 - }, - { - "epoch": 0.7274452653697553, - "grad_norm": 1.4556822576153634, - "learning_rate": 3.6490684360159106e-07, - "loss": 1.1108, - "step": 5366 - }, - { - "epoch": 0.7275808310174202, - "grad_norm": 2.5528233900067914, - "learning_rate": 3.6456769910982264e-07, - "loss": 1.1116, - "step": 5367 - }, - { - "epoch": 0.727716396665085, - "grad_norm": 1.4939420697250805, - "learning_rate": 3.6422867715615703e-07, - "loss": 1.1539, - "step": 5368 - }, - { - "epoch": 0.72785196231275, - "grad_norm": 1.4303025416609347, - "learning_rate": 3.638897778059732e-07, - "loss": 1.1593, - "step": 5369 - }, - { - "epoch": 0.7279875279604149, - "grad_norm": 1.6823315045135818, - "learning_rate": 3.6355100112462425e-07, - "loss": 1.1451, - "step": 5370 - }, - { - "epoch": 0.7281230936080797, - "grad_norm": 1.9308563775271876, - "learning_rate": 3.632123471774409e-07, - "loss": 1.1515, - "step": 5371 - }, - { - "epoch": 0.7282586592557446, - "grad_norm": 1.584642679373204, - "learning_rate": 3.628738160297299e-07, - "loss": 1.0938, - "step": 5372 - }, - { - "epoch": 0.7283942249034094, - "grad_norm": 1.9075297872239034, - "learning_rate": 3.625354077467743e-07, - "loss": 1.1418, - "step": 5373 - }, - { - "epoch": 0.7285297905510744, - "grad_norm": 1.7571528061291155, - "learning_rate": 3.6219712239383336e-07, - "loss": 1.0971, - "step": 5374 - }, - { - "epoch": 0.7286653561987393, - "grad_norm": 1.7138119599225152, - "learning_rate": 3.6185896003614303e-07, - "loss": 1.1258, - "step": 5375 - }, - { - "epoch": 0.7288009218464041, - "grad_norm": 1.591787000841555, - "learning_rate": 3.6152092073891504e-07, - "loss": 1.0777, - "step": 5376 - }, - { - "epoch": 0.728936487494069, - "grad_norm": 1.7388555023862784, - "learning_rate": 3.6118300456733764e-07, - "loss": 1.1951, - "step": 5377 - }, - { - "epoch": 0.7290720531417338, - "grad_norm": 1.4934167498004367, - "learning_rate": 3.6084521158657555e-07, - "loss": 1.0942, - "step": 5378 - }, - { - "epoch": 0.7292076187893988, - "grad_norm": 2.8021124413537755, - "learning_rate": 3.605075418617687e-07, - "loss": 1.1458, - "step": 5379 - }, - { - "epoch": 0.7293431844370637, - "grad_norm": 3.6190118745023088, - "learning_rate": 3.6016999545803504e-07, - "loss": 1.1489, - "step": 5380 - }, - { - "epoch": 0.7294787500847285, - "grad_norm": 1.407547746958454, - "learning_rate": 3.5983257244046674e-07, - "loss": 1.1335, - "step": 5381 - }, - { - "epoch": 0.7296143157323934, - "grad_norm": 1.97139438527592, - "learning_rate": 3.594952728741343e-07, - "loss": 1.1451, - "step": 5382 - }, - { - "epoch": 0.7297498813800583, - "grad_norm": 1.4901025037073317, - "learning_rate": 3.591580968240819e-07, - "loss": 1.1545, - "step": 5383 - }, - { - "epoch": 0.7298854470277232, - "grad_norm": 1.4100062862663427, - "learning_rate": 3.5882104435533276e-07, - "loss": 1.1456, - "step": 5384 - }, - { - "epoch": 0.7300210126753881, - "grad_norm": 1.7424813098664567, - "learning_rate": 3.584841155328837e-07, - "loss": 1.113, - "step": 5385 - }, - { - "epoch": 0.7301565783230529, - "grad_norm": 2.859959380240663, - "learning_rate": 3.581473104217092e-07, - "loss": 1.1337, - "step": 5386 - }, - { - "epoch": 0.7302921439707178, - "grad_norm": 1.459121443394398, - "learning_rate": 3.578106290867593e-07, - "loss": 1.1177, - "step": 5387 - }, - { - "epoch": 0.7304277096183827, - "grad_norm": 2.1778847993213333, - "learning_rate": 3.5747407159296063e-07, - "loss": 1.1188, - "step": 5388 - }, - { - "epoch": 0.7305632752660476, - "grad_norm": 3.389916580355758, - "learning_rate": 3.571376380052152e-07, - "loss": 1.1072, - "step": 5389 - }, - { - "epoch": 0.7306988409137125, - "grad_norm": 2.053505597413966, - "learning_rate": 3.5680132838840205e-07, - "loss": 1.0945, - "step": 5390 - }, - { - "epoch": 0.7308344065613773, - "grad_norm": 1.4669416084417857, - "learning_rate": 3.564651428073755e-07, - "loss": 1.1478, - "step": 5391 - }, - { - "epoch": 0.7309699722090423, - "grad_norm": 2.9409395337407642, - "learning_rate": 3.561290813269665e-07, - "loss": 1.1009, - "step": 5392 - }, - { - "epoch": 0.7311055378567071, - "grad_norm": 2.0139537786396544, - "learning_rate": 3.5579314401198166e-07, - "loss": 1.1601, - "step": 5393 - }, - { - "epoch": 0.731241103504372, - "grad_norm": 1.5668191357023813, - "learning_rate": 3.5545733092720396e-07, - "loss": 1.1251, - "step": 5394 - }, - { - "epoch": 0.7313766691520369, - "grad_norm": 1.5213604467066775, - "learning_rate": 3.551216421373924e-07, - "loss": 1.146, - "step": 5395 - }, - { - "epoch": 0.7315122347997017, - "grad_norm": 8.253879808306294, - "learning_rate": 3.5478607770728164e-07, - "loss": 1.1382, - "step": 5396 - }, - { - "epoch": 0.7316478004473667, - "grad_norm": 2.1856457477416376, - "learning_rate": 3.544506377015829e-07, - "loss": 1.1471, - "step": 5397 - }, - { - "epoch": 0.7317833660950315, - "grad_norm": 1.6628551061935597, - "learning_rate": 3.5411532218498296e-07, - "loss": 1.0925, - "step": 5398 - }, - { - "epoch": 0.7319189317426964, - "grad_norm": 2.906611581536199, - "learning_rate": 3.537801312221448e-07, - "loss": 1.1028, - "step": 5399 - }, - { - "epoch": 0.7320544973903613, - "grad_norm": 1.7158519900854976, - "learning_rate": 3.5344506487770774e-07, - "loss": 1.1147, - "step": 5400 - }, - { - "epoch": 0.7321900630380261, - "grad_norm": 1.6288067123258536, - "learning_rate": 3.5311012321628577e-07, - "loss": 1.1368, - "step": 5401 - }, - { - "epoch": 0.7323256286856911, - "grad_norm": 1.6751899073223848, - "learning_rate": 3.527753063024708e-07, - "loss": 1.1344, - "step": 5402 - }, - { - "epoch": 0.7324611943333559, - "grad_norm": 2.0010202474647434, - "learning_rate": 3.524406142008285e-07, - "loss": 1.1232, - "step": 5403 - }, - { - "epoch": 0.7325967599810208, - "grad_norm": 2.008241393520621, - "learning_rate": 3.5210604697590297e-07, - "loss": 1.1104, - "step": 5404 - }, - { - "epoch": 0.7327323256286857, - "grad_norm": 1.6555350496095416, - "learning_rate": 3.5177160469221176e-07, - "loss": 1.1669, - "step": 5405 - }, - { - "epoch": 0.7328678912763505, - "grad_norm": 1.5572893016974747, - "learning_rate": 3.514372874142497e-07, - "loss": 1.1751, - "step": 5406 - }, - { - "epoch": 0.7330034569240155, - "grad_norm": 1.5216618847835734, - "learning_rate": 3.511030952064874e-07, - "loss": 1.1446, - "step": 5407 - }, - { - "epoch": 0.7331390225716803, - "grad_norm": 2.433042915758862, - "learning_rate": 3.507690281333712e-07, - "loss": 1.1263, - "step": 5408 - }, - { - "epoch": 0.7332745882193452, - "grad_norm": 1.4352852945143089, - "learning_rate": 3.504350862593231e-07, - "loss": 1.1379, - "step": 5409 - }, - { - "epoch": 0.7334101538670101, - "grad_norm": 1.5042558645646191, - "learning_rate": 3.501012696487412e-07, - "loss": 1.1182, - "step": 5410 - }, - { - "epoch": 0.733545719514675, - "grad_norm": 1.8700099941989463, - "learning_rate": 3.497675783659995e-07, - "loss": 1.1316, - "step": 5411 - }, - { - "epoch": 0.7336812851623399, - "grad_norm": 2.046069674109708, - "learning_rate": 3.4943401247544766e-07, - "loss": 1.1412, - "step": 5412 - }, - { - "epoch": 0.7338168508100047, - "grad_norm": 2.0136486857969422, - "learning_rate": 3.491005720414113e-07, - "loss": 1.1091, - "step": 5413 - }, - { - "epoch": 0.7339524164576696, - "grad_norm": 1.6536559680445275, - "learning_rate": 3.487672571281918e-07, - "loss": 1.1083, - "step": 5414 - }, - { - "epoch": 0.7340879821053345, - "grad_norm": 2.4195219944589983, - "learning_rate": 3.4843406780006644e-07, - "loss": 1.1054, - "step": 5415 - }, - { - "epoch": 0.7342235477529994, - "grad_norm": 1.6820929332280206, - "learning_rate": 3.481010041212874e-07, - "loss": 1.1352, - "step": 5416 - }, - { - "epoch": 0.7343591134006643, - "grad_norm": 1.9443004503340873, - "learning_rate": 3.477680661560846e-07, - "loss": 1.1658, - "step": 5417 - }, - { - "epoch": 0.7344946790483291, - "grad_norm": 1.5192109441316397, - "learning_rate": 3.4743525396866114e-07, - "loss": 1.184, - "step": 5418 - }, - { - "epoch": 0.734630244695994, - "grad_norm": 1.5894226642342957, - "learning_rate": 3.471025676231986e-07, - "loss": 1.1256, - "step": 5419 - }, - { - "epoch": 0.734765810343659, - "grad_norm": 1.5362456898170755, - "learning_rate": 3.467700071838515e-07, - "loss": 1.1885, - "step": 5420 - }, - { - "epoch": 0.7349013759913238, - "grad_norm": 1.580365728095864, - "learning_rate": 3.4643757271475293e-07, - "loss": 1.1174, - "step": 5421 - }, - { - "epoch": 0.7350369416389887, - "grad_norm": 1.6461600129635068, - "learning_rate": 3.4610526428000897e-07, - "loss": 1.1348, - "step": 5422 - }, - { - "epoch": 0.7351725072866535, - "grad_norm": 1.4986721513113068, - "learning_rate": 3.457730819437038e-07, - "loss": 1.1457, - "step": 5423 - }, - { - "epoch": 0.7353080729343184, - "grad_norm": 1.638576972704897, - "learning_rate": 3.454410257698951e-07, - "loss": 1.1008, - "step": 5424 - }, - { - "epoch": 0.7354436385819834, - "grad_norm": 1.955956952511495, - "learning_rate": 3.451090958226184e-07, - "loss": 1.1385, - "step": 5425 - }, - { - "epoch": 0.7355792042296482, - "grad_norm": 1.5935517381885116, - "learning_rate": 3.447772921658825e-07, - "loss": 1.1199, - "step": 5426 - }, - { - "epoch": 0.7357147698773131, - "grad_norm": 2.170661477147692, - "learning_rate": 3.444456148636744e-07, - "loss": 1.1426, - "step": 5427 - }, - { - "epoch": 0.7358503355249779, - "grad_norm": 1.892813161112932, - "learning_rate": 3.441140639799546e-07, - "loss": 1.1322, - "step": 5428 - }, - { - "epoch": 0.7359859011726428, - "grad_norm": 1.4624303327186696, - "learning_rate": 3.4378263957866026e-07, - "loss": 1.1017, - "step": 5429 - }, - { - "epoch": 0.7361214668203078, - "grad_norm": 1.8566063392234644, - "learning_rate": 3.4345134172370407e-07, - "loss": 1.0855, - "step": 5430 - }, - { - "epoch": 0.7362570324679726, - "grad_norm": 1.693678681570148, - "learning_rate": 3.431201704789741e-07, - "loss": 1.1495, - "step": 5431 - }, - { - "epoch": 0.7363925981156375, - "grad_norm": 7.553133914609454, - "learning_rate": 3.427891259083342e-07, - "loss": 1.1118, - "step": 5432 - }, - { - "epoch": 0.7365281637633023, - "grad_norm": 1.6968663582405663, - "learning_rate": 3.4245820807562365e-07, - "loss": 1.1088, - "step": 5433 - }, - { - "epoch": 0.7366637294109672, - "grad_norm": 1.444493463599562, - "learning_rate": 3.4212741704465733e-07, - "loss": 1.131, - "step": 5434 - }, - { - "epoch": 0.7367992950586322, - "grad_norm": 1.48865289293312, - "learning_rate": 3.4179675287922573e-07, - "loss": 1.1233, - "step": 5435 - }, - { - "epoch": 0.736934860706297, - "grad_norm": 1.3760182778638879, - "learning_rate": 3.4146621564309476e-07, - "loss": 1.1286, - "step": 5436 - }, - { - "epoch": 0.7370704263539619, - "grad_norm": 2.254641591979723, - "learning_rate": 3.41135805400006e-07, - "loss": 1.1563, - "step": 5437 - }, - { - "epoch": 0.7372059920016268, - "grad_norm": 2.0481365711066237, - "learning_rate": 3.408055222136763e-07, - "loss": 1.1333, - "step": 5438 - }, - { - "epoch": 0.7373415576492917, - "grad_norm": 1.6866803225304787, - "learning_rate": 3.4047536614779837e-07, - "loss": 1.1451, - "step": 5439 - }, - { - "epoch": 0.7374771232969566, - "grad_norm": 1.5932886296946749, - "learning_rate": 3.4014533726604046e-07, - "loss": 1.1272, - "step": 5440 - }, - { - "epoch": 0.7376126889446214, - "grad_norm": 1.5722710480827515, - "learning_rate": 3.398154356320454e-07, - "loss": 1.0952, - "step": 5441 - }, - { - "epoch": 0.7377482545922863, - "grad_norm": 1.405599446950876, - "learning_rate": 3.394856613094322e-07, - "loss": 1.0729, - "step": 5442 - }, - { - "epoch": 0.7378838202399512, - "grad_norm": 1.5161537799119773, - "learning_rate": 3.3915601436179564e-07, - "loss": 1.0958, - "step": 5443 - }, - { - "epoch": 0.7380193858876161, - "grad_norm": 2.2141859200810377, - "learning_rate": 3.388264948527052e-07, - "loss": 1.1238, - "step": 5444 - }, - { - "epoch": 0.738154951535281, - "grad_norm": 1.691664708227661, - "learning_rate": 3.384971028457063e-07, - "loss": 1.1674, - "step": 5445 - }, - { - "epoch": 0.7382905171829458, - "grad_norm": 2.4787790792952458, - "learning_rate": 3.381678384043195e-07, - "loss": 1.1313, - "step": 5446 - }, - { - "epoch": 0.7384260828306107, - "grad_norm": 1.7111807498636238, - "learning_rate": 3.378387015920409e-07, - "loss": 1.1161, - "step": 5447 - }, - { - "epoch": 0.7385616484782757, - "grad_norm": 1.544324011946911, - "learning_rate": 3.3750969247234184e-07, - "loss": 1.1533, - "step": 5448 - }, - { - "epoch": 0.7386972141259405, - "grad_norm": 1.5623764280778365, - "learning_rate": 3.371808111086694e-07, - "loss": 1.1133, - "step": 5449 - }, - { - "epoch": 0.7388327797736054, - "grad_norm": 1.5266805840427737, - "learning_rate": 3.3685205756444534e-07, - "loss": 1.0878, - "step": 5450 - }, - { - "epoch": 0.7389683454212702, - "grad_norm": 1.884331026054299, - "learning_rate": 3.365234319030675e-07, - "loss": 1.1351, - "step": 5451 - }, - { - "epoch": 0.7391039110689351, - "grad_norm": 1.4951264048254902, - "learning_rate": 3.361949341879087e-07, - "loss": 1.1213, - "step": 5452 - }, - { - "epoch": 0.7392394767166001, - "grad_norm": 1.4771081448917478, - "learning_rate": 3.35866564482317e-07, - "loss": 1.1346, - "step": 5453 - }, - { - "epoch": 0.7393750423642649, - "grad_norm": 1.719214690349787, - "learning_rate": 3.3553832284961603e-07, - "loss": 1.1251, - "step": 5454 - }, - { - "epoch": 0.7395106080119298, - "grad_norm": 2.9246437480897503, - "learning_rate": 3.352102093531045e-07, - "loss": 1.1223, - "step": 5455 - }, - { - "epoch": 0.7396461736595946, - "grad_norm": 1.7356744286117596, - "learning_rate": 3.348822240560569e-07, - "loss": 1.1405, - "step": 5456 - }, - { - "epoch": 0.7397817393072595, - "grad_norm": 1.72362424367332, - "learning_rate": 3.345543670217217e-07, - "loss": 1.1217, - "step": 5457 - }, - { - "epoch": 0.7399173049549245, - "grad_norm": 1.640456352964763, - "learning_rate": 3.3422663831332477e-07, - "loss": 1.1193, - "step": 5458 - }, - { - "epoch": 0.7400528706025893, - "grad_norm": 1.6213249922530506, - "learning_rate": 3.338990379940646e-07, - "loss": 1.15, - "step": 5459 - }, - { - "epoch": 0.7401884362502542, - "grad_norm": 1.576244465381312, - "learning_rate": 3.335715661271178e-07, - "loss": 1.1146, - "step": 5460 - }, - { - "epoch": 0.740324001897919, - "grad_norm": 1.5347533552344916, - "learning_rate": 3.3324422277563326e-07, - "loss": 1.1356, - "step": 5461 - }, - { - "epoch": 0.740459567545584, - "grad_norm": 1.4942103187230449, - "learning_rate": 3.32917008002738e-07, - "loss": 1.1171, - "step": 5462 - }, - { - "epoch": 0.7405951331932489, - "grad_norm": 1.42505667346409, - "learning_rate": 3.3258992187153144e-07, - "loss": 1.1193, - "step": 5463 - }, - { - "epoch": 0.7407306988409137, - "grad_norm": 1.5574173274775787, - "learning_rate": 3.322629644450909e-07, - "loss": 1.1401, - "step": 5464 - }, - { - "epoch": 0.7408662644885786, - "grad_norm": 2.3096845814778977, - "learning_rate": 3.319361357864663e-07, - "loss": 1.1283, - "step": 5465 - }, - { - "epoch": 0.7410018301362434, - "grad_norm": 1.6516551431017594, - "learning_rate": 3.316094359586852e-07, - "loss": 1.101, - "step": 5466 - }, - { - "epoch": 0.7411373957839084, - "grad_norm": 1.8869829602288586, - "learning_rate": 3.3128286502474803e-07, - "loss": 1.1396, - "step": 5467 - }, - { - "epoch": 0.7412729614315733, - "grad_norm": 1.567032895238786, - "learning_rate": 3.3095642304763183e-07, - "loss": 1.1446, - "step": 5468 - }, - { - "epoch": 0.7414085270792381, - "grad_norm": 1.4800807474569573, - "learning_rate": 3.306301100902883e-07, - "loss": 1.0695, - "step": 5469 - }, - { - "epoch": 0.741544092726903, - "grad_norm": 1.794952602599827, - "learning_rate": 3.303039262156443e-07, - "loss": 1.0878, - "step": 5470 - }, - { - "epoch": 0.7416796583745678, - "grad_norm": 1.7002187965826974, - "learning_rate": 3.2997787148660195e-07, - "loss": 1.1043, - "step": 5471 - }, - { - "epoch": 0.7418152240222328, - "grad_norm": 1.9690895390758825, - "learning_rate": 3.296519459660383e-07, - "loss": 1.1793, - "step": 5472 - }, - { - "epoch": 0.7419507896698977, - "grad_norm": 1.4636092838014303, - "learning_rate": 3.293261497168054e-07, - "loss": 1.1188, - "step": 5473 - }, - { - "epoch": 0.7420863553175625, - "grad_norm": 2.195885692573736, - "learning_rate": 3.2900048280173055e-07, - "loss": 1.1042, - "step": 5474 - }, - { - "epoch": 0.7422219209652274, - "grad_norm": 2.6767345663038196, - "learning_rate": 3.2867494528361605e-07, - "loss": 1.1524, - "step": 5475 - }, - { - "epoch": 0.7423574866128922, - "grad_norm": 1.7499265386893947, - "learning_rate": 3.2834953722523915e-07, - "loss": 1.1093, - "step": 5476 - }, - { - "epoch": 0.7424930522605572, - "grad_norm": 1.4574293002971066, - "learning_rate": 3.2802425868935277e-07, - "loss": 1.1044, - "step": 5477 - }, - { - "epoch": 0.7426286179082221, - "grad_norm": 4.14219888688796, - "learning_rate": 3.276991097386831e-07, - "loss": 1.1355, - "step": 5478 - }, - { - "epoch": 0.7427641835558869, - "grad_norm": 1.760778820606215, - "learning_rate": 3.27374090435934e-07, - "loss": 1.1559, - "step": 5479 - }, - { - "epoch": 0.7428997492035518, - "grad_norm": 2.0734748228465776, - "learning_rate": 3.270492008437815e-07, - "loss": 1.0908, - "step": 5480 - }, - { - "epoch": 0.7430353148512167, - "grad_norm": 7.563311375904182, - "learning_rate": 3.267244410248794e-07, - "loss": 1.0963, - "step": 5481 - }, - { - "epoch": 0.7431708804988816, - "grad_norm": 1.743421472261632, - "learning_rate": 3.2639981104185355e-07, - "loss": 1.1263, - "step": 5482 - }, - { - "epoch": 0.7433064461465465, - "grad_norm": 2.359873876176534, - "learning_rate": 3.260753109573078e-07, - "loss": 1.1341, - "step": 5483 - }, - { - "epoch": 0.7434420117942113, - "grad_norm": 1.6341039815062548, - "learning_rate": 3.2575094083381837e-07, - "loss": 1.164, - "step": 5484 - }, - { - "epoch": 0.7435775774418762, - "grad_norm": 1.932144684498183, - "learning_rate": 3.2542670073393776e-07, - "loss": 1.0938, - "step": 5485 - }, - { - "epoch": 0.7437131430895411, - "grad_norm": 1.7038590039557413, - "learning_rate": 3.251025907201932e-07, - "loss": 1.1081, - "step": 5486 - }, - { - "epoch": 0.743848708737206, - "grad_norm": 1.7202618527776445, - "learning_rate": 3.247786108550866e-07, - "loss": 1.146, - "step": 5487 - }, - { - "epoch": 0.7439842743848709, - "grad_norm": 2.350135238490276, - "learning_rate": 3.244547612010952e-07, - "loss": 1.1415, - "step": 5488 - }, - { - "epoch": 0.7441198400325357, - "grad_norm": 1.7967400164209104, - "learning_rate": 3.241310418206705e-07, - "loss": 1.1565, - "step": 5489 - }, - { - "epoch": 0.7442554056802007, - "grad_norm": 1.5584964189336472, - "learning_rate": 3.238074527762394e-07, - "loss": 1.1089, - "step": 5490 - }, - { - "epoch": 0.7443909713278655, - "grad_norm": 1.7932036658188204, - "learning_rate": 3.2348399413020365e-07, - "loss": 1.1302, - "step": 5491 - }, - { - "epoch": 0.7445265369755304, - "grad_norm": 2.6328628802066296, - "learning_rate": 3.231606659449394e-07, - "loss": 1.166, - "step": 5492 - }, - { - "epoch": 0.7446621026231953, - "grad_norm": 3.578098917486883, - "learning_rate": 3.228374682827982e-07, - "loss": 1.1112, - "step": 5493 - }, - { - "epoch": 0.7447976682708601, - "grad_norm": 1.5832213671022848, - "learning_rate": 3.2251440120610596e-07, - "loss": 1.1482, - "step": 5494 - }, - { - "epoch": 0.7449332339185251, - "grad_norm": 1.6171499419171973, - "learning_rate": 3.2219146477716376e-07, - "loss": 1.1206, - "step": 5495 - }, - { - "epoch": 0.7450687995661899, - "grad_norm": 1.4703217493180636, - "learning_rate": 3.2186865905824724e-07, - "loss": 1.1065, - "step": 5496 - }, - { - "epoch": 0.7452043652138548, - "grad_norm": 1.5024557964552725, - "learning_rate": 3.215459841116073e-07, - "loss": 1.1073, - "step": 5497 - }, - { - "epoch": 0.7453399308615197, - "grad_norm": 1.9816720477552259, - "learning_rate": 3.212234399994682e-07, - "loss": 1.0972, - "step": 5498 - }, - { - "epoch": 0.7454754965091845, - "grad_norm": 2.3375007335841516, - "learning_rate": 3.209010267840315e-07, - "loss": 1.1611, - "step": 5499 - }, - { - "epoch": 0.7456110621568495, - "grad_norm": 1.9187251494649622, - "learning_rate": 3.205787445274707e-07, - "loss": 1.1659, - "step": 5500 - }, - { - "epoch": 0.7457466278045143, - "grad_norm": 1.6062590154827598, - "learning_rate": 3.2025659329193654e-07, - "loss": 1.1448, - "step": 5501 - }, - { - "epoch": 0.7458821934521792, - "grad_norm": 1.4939746924883037, - "learning_rate": 3.1993457313955217e-07, - "loss": 1.1405, - "step": 5502 - }, - { - "epoch": 0.7460177590998441, - "grad_norm": 1.8736251677387314, - "learning_rate": 3.19612684132418e-07, - "loss": 1.1028, - "step": 5503 - }, - { - "epoch": 0.746153324747509, - "grad_norm": 1.7377266897192405, - "learning_rate": 3.1929092633260667e-07, - "loss": 1.0818, - "step": 5504 - }, - { - "epoch": 0.7462888903951739, - "grad_norm": 2.4415260339332296, - "learning_rate": 3.1896929980216704e-07, - "loss": 1.1247, - "step": 5505 - }, - { - "epoch": 0.7464244560428387, - "grad_norm": 1.6368998999934332, - "learning_rate": 3.186478046031221e-07, - "loss": 1.1217, - "step": 5506 - }, - { - "epoch": 0.7465600216905036, - "grad_norm": 1.9540345543982602, - "learning_rate": 3.1832644079746984e-07, - "loss": 1.1567, - "step": 5507 - }, - { - "epoch": 0.7466955873381685, - "grad_norm": 1.4923984714052274, - "learning_rate": 3.180052084471827e-07, - "loss": 1.1425, - "step": 5508 - }, - { - "epoch": 0.7468311529858334, - "grad_norm": 1.7714277139015153, - "learning_rate": 3.176841076142077e-07, - "loss": 1.1684, - "step": 5509 - }, - { - "epoch": 0.7469667186334983, - "grad_norm": 1.8374084246756608, - "learning_rate": 3.173631383604667e-07, - "loss": 1.1301, - "step": 5510 - }, - { - "epoch": 0.7471022842811631, - "grad_norm": 1.5103891851512357, - "learning_rate": 3.170423007478561e-07, - "loss": 1.0982, - "step": 5511 - }, - { - "epoch": 0.747237849928828, - "grad_norm": 1.7962127606904872, - "learning_rate": 3.167215948382471e-07, - "loss": 1.107, - "step": 5512 - }, - { - "epoch": 0.747373415576493, - "grad_norm": 1.8715169750840128, - "learning_rate": 3.164010206934845e-07, - "loss": 1.1373, - "step": 5513 - }, - { - "epoch": 0.7475089812241578, - "grad_norm": 1.5311105600563946, - "learning_rate": 3.160805783753897e-07, - "loss": 1.1228, - "step": 5514 - }, - { - "epoch": 0.7476445468718227, - "grad_norm": 1.7059777609659832, - "learning_rate": 3.1576026794575615e-07, - "loss": 1.1462, - "step": 5515 - }, - { - "epoch": 0.7477801125194876, - "grad_norm": 1.7333260152947012, - "learning_rate": 3.154400894663546e-07, - "loss": 1.1689, - "step": 5516 - }, - { - "epoch": 0.7479156781671524, - "grad_norm": 1.5309773473111432, - "learning_rate": 3.1512004299892747e-07, - "loss": 1.134, - "step": 5517 - }, - { - "epoch": 0.7480512438148174, - "grad_norm": 1.4780125113362768, - "learning_rate": 3.1480012860519453e-07, - "loss": 1.1287, - "step": 5518 - }, - { - "epoch": 0.7481868094624822, - "grad_norm": 1.7576410215133556, - "learning_rate": 3.1448034634684764e-07, - "loss": 1.1155, - "step": 5519 - }, - { - "epoch": 0.7483223751101471, - "grad_norm": 1.793892781747249, - "learning_rate": 3.141606962855553e-07, - "loss": 1.1396, - "step": 5520 - }, - { - "epoch": 0.748457940757812, - "grad_norm": 1.6766186973303765, - "learning_rate": 3.1384117848295843e-07, - "loss": 1.1123, - "step": 5521 - }, - { - "epoch": 0.7485935064054768, - "grad_norm": 1.5836635428238968, - "learning_rate": 3.135217930006747e-07, - "loss": 1.1599, - "step": 5522 - }, - { - "epoch": 0.7487290720531418, - "grad_norm": 2.002128225237155, - "learning_rate": 3.1320253990029387e-07, - "loss": 1.1767, - "step": 5523 - }, - { - "epoch": 0.7488646377008066, - "grad_norm": 1.7963045424301964, - "learning_rate": 3.128834192433826e-07, - "loss": 1.1042, - "step": 5524 - }, - { - "epoch": 0.7490002033484715, - "grad_norm": 1.6688857983588012, - "learning_rate": 3.125644310914798e-07, - "loss": 1.1179, - "step": 5525 - }, - { - "epoch": 0.7491357689961364, - "grad_norm": 1.6052195466166834, - "learning_rate": 3.122455755061002e-07, - "loss": 1.1428, - "step": 5526 - }, - { - "epoch": 0.7492713346438012, - "grad_norm": 2.4567159738537, - "learning_rate": 3.1192685254873254e-07, - "loss": 1.1006, - "step": 5527 - }, - { - "epoch": 0.7494069002914662, - "grad_norm": 1.469671160722059, - "learning_rate": 3.1160826228084004e-07, - "loss": 1.1075, - "step": 5528 - }, - { - "epoch": 0.749542465939131, - "grad_norm": 1.847860214113767, - "learning_rate": 3.1128980476386035e-07, - "loss": 1.1639, - "step": 5529 - }, - { - "epoch": 0.7496780315867959, - "grad_norm": 1.8037843195191312, - "learning_rate": 3.109714800592055e-07, - "loss": 1.11, - "step": 5530 - }, - { - "epoch": 0.7498135972344608, - "grad_norm": 2.33180803362352, - "learning_rate": 3.106532882282618e-07, - "loss": 1.1144, - "step": 5531 - }, - { - "epoch": 0.7499491628821257, - "grad_norm": 1.816970734271735, - "learning_rate": 3.103352293323901e-07, - "loss": 1.1003, - "step": 5532 - }, - { - "epoch": 0.7500847285297906, - "grad_norm": 1.8820494767397826, - "learning_rate": 3.1001730343292556e-07, - "loss": 1.1249, - "step": 5533 - }, - { - "epoch": 0.7502202941774554, - "grad_norm": 1.788949789074081, - "learning_rate": 3.096995105911776e-07, - "loss": 1.1559, - "step": 5534 - }, - { - "epoch": 0.7503558598251203, - "grad_norm": 3.547484775501951, - "learning_rate": 3.093818508684302e-07, - "loss": 1.1213, - "step": 5535 - }, - { - "epoch": 0.7504914254727852, - "grad_norm": 1.6422240073416166, - "learning_rate": 3.090643243259414e-07, - "loss": 1.1403, - "step": 5536 - }, - { - "epoch": 0.7506269911204501, - "grad_norm": 1.5055879346738459, - "learning_rate": 3.0874693102494374e-07, - "loss": 1.1488, - "step": 5537 - }, - { - "epoch": 0.750762556768115, - "grad_norm": 1.7408642936774008, - "learning_rate": 3.084296710266441e-07, - "loss": 1.0942, - "step": 5538 - }, - { - "epoch": 0.7508981224157798, - "grad_norm": 1.5103707856914317, - "learning_rate": 3.081125443922237e-07, - "loss": 1.1576, - "step": 5539 - }, - { - "epoch": 0.7510336880634447, - "grad_norm": 1.630367856322379, - "learning_rate": 3.077955511828374e-07, - "loss": 1.1281, - "step": 5540 - }, - { - "epoch": 0.7511692537111097, - "grad_norm": 1.8022751393733152, - "learning_rate": 3.074786914596151e-07, - "loss": 1.1541, - "step": 5541 - }, - { - "epoch": 0.7513048193587745, - "grad_norm": 2.2325322477184217, - "learning_rate": 3.071619652836608e-07, - "loss": 1.121, - "step": 5542 - }, - { - "epoch": 0.7514403850064394, - "grad_norm": 1.6983989446516363, - "learning_rate": 3.068453727160525e-07, - "loss": 1.1466, - "step": 5543 - }, - { - "epoch": 0.7515759506541042, - "grad_norm": 1.99312177143774, - "learning_rate": 3.065289138178426e-07, - "loss": 1.097, - "step": 5544 - }, - { - "epoch": 0.7517115163017691, - "grad_norm": 4.040862394817943, - "learning_rate": 3.062125886500578e-07, - "loss": 1.1244, - "step": 5545 - }, - { - "epoch": 0.7518470819494341, - "grad_norm": 2.0421904605845627, - "learning_rate": 3.0589639727369886e-07, - "loss": 1.1341, - "step": 5546 - }, - { - "epoch": 0.7519826475970989, - "grad_norm": 1.6483650457347803, - "learning_rate": 3.0558033974974076e-07, - "loss": 1.1375, - "step": 5547 - }, - { - "epoch": 0.7521182132447638, - "grad_norm": 1.4401385045094528, - "learning_rate": 3.052644161391328e-07, - "loss": 1.1043, - "step": 5548 - }, - { - "epoch": 0.7522537788924286, - "grad_norm": 1.5110498533296846, - "learning_rate": 3.0494862650279816e-07, - "loss": 1.1329, - "step": 5549 - }, - { - "epoch": 0.7523893445400935, - "grad_norm": 1.857247983156151, - "learning_rate": 3.046329709016345e-07, - "loss": 1.1213, - "step": 5550 - }, - { - "epoch": 0.7525249101877585, - "grad_norm": 1.5415842021583062, - "learning_rate": 3.043174493965136e-07, - "loss": 1.1421, - "step": 5551 - }, - { - "epoch": 0.7526604758354233, - "grad_norm": 1.6041590719159964, - "learning_rate": 3.040020620482812e-07, - "loss": 1.1125, - "step": 5552 - }, - { - "epoch": 0.7527960414830882, - "grad_norm": 1.6455157484727976, - "learning_rate": 3.0368680891775755e-07, - "loss": 1.1376, - "step": 5553 - }, - { - "epoch": 0.752931607130753, - "grad_norm": 2.0410842432238767, - "learning_rate": 3.033716900657357e-07, - "loss": 1.1329, - "step": 5554 - }, - { - "epoch": 0.753067172778418, - "grad_norm": 1.7796914465872653, - "learning_rate": 3.0305670555298533e-07, - "loss": 1.1395, - "step": 5555 - }, - { - "epoch": 0.7532027384260829, - "grad_norm": 2.353766170597031, - "learning_rate": 3.027418554402473e-07, - "loss": 1.1058, - "step": 5556 - }, - { - "epoch": 0.7533383040737477, - "grad_norm": 1.648801329880353, - "learning_rate": 3.024271397882393e-07, - "loss": 1.1227, - "step": 5557 - }, - { - "epoch": 0.7534738697214126, - "grad_norm": 1.9133781142300537, - "learning_rate": 3.021125586576504e-07, - "loss": 1.1765, - "step": 5558 - }, - { - "epoch": 0.7536094353690774, - "grad_norm": 1.4686032469849908, - "learning_rate": 3.017981121091464e-07, - "loss": 1.1213, - "step": 5559 - }, - { - "epoch": 0.7537450010167424, - "grad_norm": 1.4542108394339408, - "learning_rate": 3.014838002033645e-07, - "loss": 1.1569, - "step": 5560 - }, - { - "epoch": 0.7538805666644073, - "grad_norm": 1.6358563065856728, - "learning_rate": 3.0116962300091876e-07, - "loss": 1.0795, - "step": 5561 - }, - { - "epoch": 0.7540161323120721, - "grad_norm": 2.2293608805753053, - "learning_rate": 3.0085558056239426e-07, - "loss": 1.1105, - "step": 5562 - }, - { - "epoch": 0.754151697959737, - "grad_norm": 1.696473482453051, - "learning_rate": 3.0054167294835306e-07, - "loss": 1.0829, - "step": 5563 - }, - { - "epoch": 0.7542872636074018, - "grad_norm": 1.550396622193765, - "learning_rate": 3.002279002193283e-07, - "loss": 1.1295, - "step": 5564 - }, - { - "epoch": 0.7544228292550668, - "grad_norm": 4.741270784454152, - "learning_rate": 2.9991426243583005e-07, - "loss": 1.0673, - "step": 5565 - }, - { - "epoch": 0.7545583949027317, - "grad_norm": 1.5387227356937365, - "learning_rate": 2.9960075965833974e-07, - "loss": 1.1619, - "step": 5566 - }, - { - "epoch": 0.7546939605503965, - "grad_norm": 1.5379903708314284, - "learning_rate": 2.9928739194731444e-07, - "loss": 1.1155, - "step": 5567 - }, - { - "epoch": 0.7548295261980614, - "grad_norm": 1.5600155533663091, - "learning_rate": 2.9897415936318436e-07, - "loss": 1.1436, - "step": 5568 - }, - { - "epoch": 0.7549650918457262, - "grad_norm": 1.863179639713212, - "learning_rate": 2.986610619663542e-07, - "loss": 1.0783, - "step": 5569 - }, - { - "epoch": 0.7551006574933912, - "grad_norm": 2.123255181421133, - "learning_rate": 2.983480998172022e-07, - "loss": 1.1309, - "step": 5570 - }, - { - "epoch": 0.7552362231410561, - "grad_norm": 2.2650315829197667, - "learning_rate": 2.980352729760807e-07, - "loss": 1.0829, - "step": 5571 - }, - { - "epoch": 0.7553717887887209, - "grad_norm": 1.8355617406344606, - "learning_rate": 2.9772258150331565e-07, - "loss": 1.1229, - "step": 5572 - }, - { - "epoch": 0.7555073544363858, - "grad_norm": 1.4940820904529237, - "learning_rate": 2.974100254592075e-07, - "loss": 1.1633, - "step": 5573 - }, - { - "epoch": 0.7556429200840507, - "grad_norm": 1.674237846961914, - "learning_rate": 2.970976049040299e-07, - "loss": 1.1323, - "step": 5574 - }, - { - "epoch": 0.7557784857317156, - "grad_norm": 2.1618443569652372, - "learning_rate": 2.967853198980309e-07, - "loss": 1.1154, - "step": 5575 - }, - { - "epoch": 0.7559140513793805, - "grad_norm": 2.229964321499073, - "learning_rate": 2.964731705014324e-07, - "loss": 1.1618, - "step": 5576 - }, - { - "epoch": 0.7560496170270453, - "grad_norm": 1.4518011096624115, - "learning_rate": 2.9616115677442897e-07, - "loss": 1.1322, - "step": 5577 - }, - { - "epoch": 0.7561851826747102, - "grad_norm": 27.813716316483962, - "learning_rate": 2.9584927877719145e-07, - "loss": 1.0867, - "step": 5578 - }, - { - "epoch": 0.7563207483223751, - "grad_norm": 3.6542636438948564, - "learning_rate": 2.9553753656986155e-07, - "loss": 1.1357, - "step": 5579 - }, - { - "epoch": 0.75645631397004, - "grad_norm": 3.0897147234302906, - "learning_rate": 2.952259302125578e-07, - "loss": 1.141, - "step": 5580 - }, - { - "epoch": 0.7565918796177049, - "grad_norm": 2.5318576847913934, - "learning_rate": 2.9491445976536977e-07, - "loss": 1.0947, - "step": 5581 - }, - { - "epoch": 0.7567274452653697, - "grad_norm": 1.6100171197760038, - "learning_rate": 2.9460312528836274e-07, - "loss": 1.0914, - "step": 5582 - }, - { - "epoch": 0.7568630109130347, - "grad_norm": 1.4494342102356041, - "learning_rate": 2.942919268415748e-07, - "loss": 1.0752, - "step": 5583 - }, - { - "epoch": 0.7569985765606995, - "grad_norm": 5.070365382599341, - "learning_rate": 2.9398086448501837e-07, - "loss": 1.1105, - "step": 5584 - }, - { - "epoch": 0.7571341422083644, - "grad_norm": 1.6857971647099654, - "learning_rate": 2.9366993827867913e-07, - "loss": 1.1375, - "step": 5585 - }, - { - "epoch": 0.7572697078560293, - "grad_norm": 1.5886395274692071, - "learning_rate": 2.9335914828251694e-07, - "loss": 1.1187, - "step": 5586 - }, - { - "epoch": 0.7574052735036941, - "grad_norm": 2.3394344435622756, - "learning_rate": 2.9304849455646505e-07, - "loss": 1.1509, - "step": 5587 - }, - { - "epoch": 0.7575408391513591, - "grad_norm": 1.704176868538531, - "learning_rate": 2.9273797716043067e-07, - "loss": 1.1166, - "step": 5588 - }, - { - "epoch": 0.7576764047990239, - "grad_norm": 3.348805901278821, - "learning_rate": 2.9242759615429467e-07, - "loss": 1.1174, - "step": 5589 - }, - { - "epoch": 0.7578119704466888, - "grad_norm": 1.7921985362996256, - "learning_rate": 2.9211735159791153e-07, - "loss": 1.1158, - "step": 5590 - }, - { - "epoch": 0.7579475360943537, - "grad_norm": 1.547289058710695, - "learning_rate": 2.918072435511093e-07, - "loss": 1.1522, - "step": 5591 - }, - { - "epoch": 0.7580831017420185, - "grad_norm": 1.8175384541299482, - "learning_rate": 2.914972720736901e-07, - "loss": 1.1357, - "step": 5592 - }, - { - "epoch": 0.7582186673896835, - "grad_norm": 1.7440840237851118, - "learning_rate": 2.9118743722542937e-07, - "loss": 1.1298, - "step": 5593 - }, - { - "epoch": 0.7583542330373484, - "grad_norm": 1.6703991785900456, - "learning_rate": 2.908777390660765e-07, - "loss": 1.1665, - "step": 5594 - }, - { - "epoch": 0.7584897986850132, - "grad_norm": 2.2444178070394205, - "learning_rate": 2.9056817765535404e-07, - "loss": 1.1238, - "step": 5595 - }, - { - "epoch": 0.7586253643326781, - "grad_norm": 1.9435359463398785, - "learning_rate": 2.9025875305295886e-07, - "loss": 1.1889, - "step": 5596 - }, - { - "epoch": 0.758760929980343, - "grad_norm": 1.5461033830150697, - "learning_rate": 2.8994946531856035e-07, - "loss": 1.1444, - "step": 5597 - }, - { - "epoch": 0.7588964956280079, - "grad_norm": 2.2814083513201395, - "learning_rate": 2.8964031451180316e-07, - "loss": 1.1478, - "step": 5598 - }, - { - "epoch": 0.7590320612756728, - "grad_norm": 2.074788460194466, - "learning_rate": 2.893313006923035e-07, - "loss": 1.1126, - "step": 5599 - }, - { - "epoch": 0.7591676269233376, - "grad_norm": 1.6604660365576887, - "learning_rate": 2.8902242391965335e-07, - "loss": 1.137, - "step": 5600 - }, - { - "epoch": 0.7593031925710025, - "grad_norm": 1.5846811716332339, - "learning_rate": 2.8871368425341634e-07, - "loss": 1.1765, - "step": 5601 - }, - { - "epoch": 0.7594387582186674, - "grad_norm": 2.4118306604915376, - "learning_rate": 2.8840508175313095e-07, - "loss": 1.1738, - "step": 5602 - }, - { - "epoch": 0.7595743238663323, - "grad_norm": 1.8835080628617322, - "learning_rate": 2.880966164783084e-07, - "loss": 1.117, - "step": 5603 - }, - { - "epoch": 0.7597098895139972, - "grad_norm": 1.4896125857720466, - "learning_rate": 2.87788288488434e-07, - "loss": 1.102, - "step": 5604 - }, - { - "epoch": 0.759845455161662, - "grad_norm": 1.6896861462113835, - "learning_rate": 2.8748009784296625e-07, - "loss": 1.1297, - "step": 5605 - }, - { - "epoch": 0.759981020809327, - "grad_norm": 1.492355632963475, - "learning_rate": 2.871720446013374e-07, - "loss": 1.1615, - "step": 5606 - }, - { - "epoch": 0.7601165864569918, - "grad_norm": 1.5984960491360272, - "learning_rate": 2.8686412882295287e-07, - "loss": 1.101, - "step": 5607 - }, - { - "epoch": 0.7602521521046567, - "grad_norm": 1.5063541086492231, - "learning_rate": 2.865563505671921e-07, - "loss": 1.1443, - "step": 5608 - }, - { - "epoch": 0.7603877177523216, - "grad_norm": 1.5717164639175574, - "learning_rate": 2.8624870989340757e-07, - "loss": 1.1126, - "step": 5609 - }, - { - "epoch": 0.7605232833999864, - "grad_norm": 2.9780807050683724, - "learning_rate": 2.8594120686092515e-07, - "loss": 1.1472, - "step": 5610 - }, - { - "epoch": 0.7606588490476514, - "grad_norm": 2.0606361176516867, - "learning_rate": 2.8563384152904503e-07, - "loss": 1.1506, - "step": 5611 - }, - { - "epoch": 0.7607944146953162, - "grad_norm": 1.8204321812960078, - "learning_rate": 2.8532661395703905e-07, - "loss": 1.1269, - "step": 5612 - }, - { - "epoch": 0.7609299803429811, - "grad_norm": 1.762175776467936, - "learning_rate": 2.8501952420415486e-07, - "loss": 1.1587, - "step": 5613 - }, - { - "epoch": 0.761065545990646, - "grad_norm": 1.664388342264442, - "learning_rate": 2.847125723296111e-07, - "loss": 1.1018, - "step": 5614 - }, - { - "epoch": 0.7612011116383108, - "grad_norm": 2.6581111935376995, - "learning_rate": 2.8440575839260227e-07, - "loss": 1.1162, - "step": 5615 - }, - { - "epoch": 0.7613366772859758, - "grad_norm": 1.682087628907367, - "learning_rate": 2.8409908245229374e-07, - "loss": 1.1267, - "step": 5616 - }, - { - "epoch": 0.7614722429336406, - "grad_norm": 3.8676968135518663, - "learning_rate": 2.8379254456782685e-07, - "loss": 1.1328, - "step": 5617 - }, - { - "epoch": 0.7616078085813055, - "grad_norm": 1.7437732954169305, - "learning_rate": 2.8348614479831367e-07, - "loss": 1.0913, - "step": 5618 - }, - { - "epoch": 0.7617433742289704, - "grad_norm": 2.0843382428616986, - "learning_rate": 2.8317988320284223e-07, - "loss": 1.1299, - "step": 5619 - }, - { - "epoch": 0.7618789398766352, - "grad_norm": 2.283955976947826, - "learning_rate": 2.828737598404716e-07, - "loss": 1.12, - "step": 5620 - }, - { - "epoch": 0.7620145055243002, - "grad_norm": 2.1084534241123056, - "learning_rate": 2.8256777477023617e-07, - "loss": 1.1115, - "step": 5621 - }, - { - "epoch": 0.762150071171965, - "grad_norm": 1.800005781123401, - "learning_rate": 2.822619280511418e-07, - "loss": 1.1505, - "step": 5622 - }, - { - "epoch": 0.7622856368196299, - "grad_norm": 2.3842002755511444, - "learning_rate": 2.8195621974216975e-07, - "loss": 1.1398, - "step": 5623 - }, - { - "epoch": 0.7624212024672948, - "grad_norm": 6.714243454529217, - "learning_rate": 2.816506499022725e-07, - "loss": 1.1331, - "step": 5624 - }, - { - "epoch": 0.7625567681149596, - "grad_norm": 1.7609729143708799, - "learning_rate": 2.8134521859037707e-07, - "loss": 1.141, - "step": 5625 - }, - { - "epoch": 0.7626923337626246, - "grad_norm": 1.5239622529603516, - "learning_rate": 2.810399258653836e-07, - "loss": 1.1615, - "step": 5626 - }, - { - "epoch": 0.7628278994102894, - "grad_norm": 3.5631276883319507, - "learning_rate": 2.807347717861653e-07, - "loss": 1.1349, - "step": 5627 - }, - { - "epoch": 0.7629634650579543, - "grad_norm": 1.8553489036147899, - "learning_rate": 2.8042975641156864e-07, - "loss": 1.1316, - "step": 5628 - }, - { - "epoch": 0.7630990307056192, - "grad_norm": 1.4772299209734274, - "learning_rate": 2.8012487980041354e-07, - "loss": 1.1407, - "step": 5629 - }, - { - "epoch": 0.7632345963532841, - "grad_norm": 3.9468636028690747, - "learning_rate": 2.798201420114931e-07, - "loss": 1.1265, - "step": 5630 - }, - { - "epoch": 0.763370162000949, - "grad_norm": 1.7134322414987744, - "learning_rate": 2.795155431035735e-07, - "loss": 1.0689, - "step": 5631 - }, - { - "epoch": 0.7635057276486138, - "grad_norm": 1.940253211228026, - "learning_rate": 2.7921108313539423e-07, - "loss": 1.1822, - "step": 5632 - }, - { - "epoch": 0.7636412932962787, - "grad_norm": 1.8064958921477863, - "learning_rate": 2.78906762165668e-07, - "loss": 1.1436, - "step": 5633 - }, - { - "epoch": 0.7637768589439436, - "grad_norm": 2.5909160911899316, - "learning_rate": 2.786025802530807e-07, - "loss": 1.1518, - "step": 5634 - }, - { - "epoch": 0.7639124245916085, - "grad_norm": 1.4751079906944076, - "learning_rate": 2.782985374562915e-07, - "loss": 1.1314, - "step": 5635 - }, - { - "epoch": 0.7640479902392734, - "grad_norm": 2.1731077109154935, - "learning_rate": 2.779946338339325e-07, - "loss": 1.121, - "step": 5636 - }, - { - "epoch": 0.7641835558869382, - "grad_norm": 2.285638389693649, - "learning_rate": 2.776908694446095e-07, - "loss": 1.1148, - "step": 5637 - }, - { - "epoch": 0.7643191215346031, - "grad_norm": 1.771322601359761, - "learning_rate": 2.773872443469005e-07, - "loss": 1.1027, - "step": 5638 - }, - { - "epoch": 0.7644546871822681, - "grad_norm": 2.500113336908096, - "learning_rate": 2.770837585993575e-07, - "loss": 1.0819, - "step": 5639 - }, - { - "epoch": 0.7645902528299329, - "grad_norm": 1.35215445226337, - "learning_rate": 2.767804122605053e-07, - "loss": 1.1048, - "step": 5640 - }, - { - "epoch": 0.7647258184775978, - "grad_norm": 1.9114997174522634, - "learning_rate": 2.764772053888419e-07, - "loss": 1.1342, - "step": 5641 - }, - { - "epoch": 0.7648613841252626, - "grad_norm": 1.7247238748679388, - "learning_rate": 2.7617413804283815e-07, - "loss": 1.1764, - "step": 5642 - }, - { - "epoch": 0.7649969497729275, - "grad_norm": 1.5345911202673956, - "learning_rate": 2.7587121028093853e-07, - "loss": 1.1229, - "step": 5643 - }, - { - "epoch": 0.7651325154205925, - "grad_norm": 1.676807640477458, - "learning_rate": 2.7556842216155996e-07, - "loss": 1.1299, - "step": 5644 - }, - { - "epoch": 0.7652680810682573, - "grad_norm": 1.4208565885683302, - "learning_rate": 2.752657737430928e-07, - "loss": 1.1024, - "step": 5645 - }, - { - "epoch": 0.7654036467159222, - "grad_norm": 1.5472630259370819, - "learning_rate": 2.749632650839006e-07, - "loss": 1.1518, - "step": 5646 - }, - { - "epoch": 0.765539212363587, - "grad_norm": 1.7392703394890774, - "learning_rate": 2.746608962423196e-07, - "loss": 1.1284, - "step": 5647 - }, - { - "epoch": 0.7656747780112519, - "grad_norm": 1.804932523558963, - "learning_rate": 2.7435866727665924e-07, - "loss": 1.117, - "step": 5648 - }, - { - "epoch": 0.7658103436589169, - "grad_norm": 1.5448921705768355, - "learning_rate": 2.74056578245202e-07, - "loss": 1.0989, - "step": 5649 - }, - { - "epoch": 0.7659459093065817, - "grad_norm": 1.7411987633088435, - "learning_rate": 2.7375462920620354e-07, - "loss": 1.1239, - "step": 5650 - }, - { - "epoch": 0.7660814749542466, - "grad_norm": 1.563886194147513, - "learning_rate": 2.7345282021789204e-07, - "loss": 1.1448, - "step": 5651 - }, - { - "epoch": 0.7662170406019114, - "grad_norm": 1.496557279490038, - "learning_rate": 2.731511513384696e-07, - "loss": 1.088, - "step": 5652 - }, - { - "epoch": 0.7663526062495764, - "grad_norm": 2.826096521012251, - "learning_rate": 2.7284962262610946e-07, - "loss": 1.1463, - "step": 5653 - }, - { - "epoch": 0.7664881718972413, - "grad_norm": 1.5571720066461916, - "learning_rate": 2.7254823413896056e-07, - "loss": 1.1188, - "step": 5654 - }, - { - "epoch": 0.7666237375449061, - "grad_norm": 1.989042467916981, - "learning_rate": 2.7224698593514183e-07, - "loss": 1.1461, - "step": 5655 - }, - { - "epoch": 0.766759303192571, - "grad_norm": 1.6413555754068816, - "learning_rate": 2.7194587807274803e-07, - "loss": 1.1165, - "step": 5656 - }, - { - "epoch": 0.7668948688402358, - "grad_norm": 1.6154400962522693, - "learning_rate": 2.7164491060984417e-07, - "loss": 1.133, - "step": 5657 - }, - { - "epoch": 0.7670304344879008, - "grad_norm": 1.8116939591841072, - "learning_rate": 2.713440836044705e-07, - "loss": 1.1414, - "step": 5658 - }, - { - "epoch": 0.7671660001355657, - "grad_norm": 1.7520666762063646, - "learning_rate": 2.710433971146381e-07, - "loss": 1.1196, - "step": 5659 - }, - { - "epoch": 0.7673015657832305, - "grad_norm": 2.150855315746426, - "learning_rate": 2.7074285119833315e-07, - "loss": 1.1631, - "step": 5660 - }, - { - "epoch": 0.7674371314308954, - "grad_norm": 1.7839445829121041, - "learning_rate": 2.704424459135123e-07, - "loss": 1.1165, - "step": 5661 - }, - { - "epoch": 0.7675726970785602, - "grad_norm": 1.9585759737062995, - "learning_rate": 2.701421813181076e-07, - "loss": 1.0909, - "step": 5662 - }, - { - "epoch": 0.7677082627262252, - "grad_norm": 1.443182086245629, - "learning_rate": 2.6984205747002153e-07, - "loss": 1.0767, - "step": 5663 - }, - { - "epoch": 0.7678438283738901, - "grad_norm": 1.7291665467284376, - "learning_rate": 2.6954207442713174e-07, - "loss": 1.1198, - "step": 5664 - }, - { - "epoch": 0.7679793940215549, - "grad_norm": 2.2361729653968676, - "learning_rate": 2.692422322472866e-07, - "loss": 1.1631, - "step": 5665 - }, - { - "epoch": 0.7681149596692198, - "grad_norm": 1.6686827436776654, - "learning_rate": 2.689425309883089e-07, - "loss": 1.1081, - "step": 5666 - }, - { - "epoch": 0.7682505253168846, - "grad_norm": 1.4675112332964075, - "learning_rate": 2.6864297070799336e-07, - "loss": 1.1015, - "step": 5667 - }, - { - "epoch": 0.7683860909645496, - "grad_norm": 1.3799723468907235, - "learning_rate": 2.6834355146410793e-07, - "loss": 1.1275, - "step": 5668 - }, - { - "epoch": 0.7685216566122145, - "grad_norm": 1.9767399474476277, - "learning_rate": 2.6804427331439327e-07, - "loss": 1.1201, - "step": 5669 - }, - { - "epoch": 0.7686572222598793, - "grad_norm": 2.8349438300724237, - "learning_rate": 2.677451363165628e-07, - "loss": 1.1522, - "step": 5670 - }, - { - "epoch": 0.7687927879075442, - "grad_norm": 2.5870445140764633, - "learning_rate": 2.674461405283027e-07, - "loss": 1.1792, - "step": 5671 - }, - { - "epoch": 0.7689283535552092, - "grad_norm": 1.5494674200035028, - "learning_rate": 2.671472860072721e-07, - "loss": 1.1132, - "step": 5672 - }, - { - "epoch": 0.769063919202874, - "grad_norm": 1.9497797552711735, - "learning_rate": 2.6684857281110286e-07, - "loss": 1.1911, - "step": 5673 - }, - { - "epoch": 0.7691994848505389, - "grad_norm": 2.0315771012224926, - "learning_rate": 2.6655000099739857e-07, - "loss": 1.1097, - "step": 5674 - }, - { - "epoch": 0.7693350504982037, - "grad_norm": 2.128669224633602, - "learning_rate": 2.662515706237376e-07, - "loss": 1.0866, - "step": 5675 - }, - { - "epoch": 0.7694706161458686, - "grad_norm": 2.3521127122017105, - "learning_rate": 2.6595328174766885e-07, - "loss": 1.104, - "step": 5676 - }, - { - "epoch": 0.7696061817935336, - "grad_norm": 1.8074692011542461, - "learning_rate": 2.656551344267162e-07, - "loss": 1.1474, - "step": 5677 - }, - { - "epoch": 0.7697417474411984, - "grad_norm": 1.70120445543075, - "learning_rate": 2.6535712871837357e-07, - "loss": 1.1084, - "step": 5678 - }, - { - "epoch": 0.7698773130888633, - "grad_norm": 1.680435659669913, - "learning_rate": 2.6505926468011044e-07, - "loss": 1.1348, - "step": 5679 - }, - { - "epoch": 0.7700128787365281, - "grad_norm": 1.4640943543878653, - "learning_rate": 2.6476154236936643e-07, - "loss": 1.1059, - "step": 5680 - }, - { - "epoch": 0.770148444384193, - "grad_norm": 1.5252418481508832, - "learning_rate": 2.6446396184355545e-07, - "loss": 1.1096, - "step": 5681 - }, - { - "epoch": 0.770284010031858, - "grad_norm": 1.8475514017426458, - "learning_rate": 2.641665231600634e-07, - "loss": 1.1397, - "step": 5682 - }, - { - "epoch": 0.7704195756795228, - "grad_norm": 1.840725333248945, - "learning_rate": 2.6386922637624906e-07, - "loss": 1.0626, - "step": 5683 - }, - { - "epoch": 0.7705551413271877, - "grad_norm": 1.561312042780428, - "learning_rate": 2.635720715494438e-07, - "loss": 1.1007, - "step": 5684 - }, - { - "epoch": 0.7706907069748525, - "grad_norm": 4.002909948897374, - "learning_rate": 2.6327505873695157e-07, - "loss": 1.1938, - "step": 5685 - }, - { - "epoch": 0.7708262726225175, - "grad_norm": 1.506512654400048, - "learning_rate": 2.629781879960488e-07, - "loss": 1.1125, - "step": 5686 - }, - { - "epoch": 0.7709618382701824, - "grad_norm": 1.84927581254869, - "learning_rate": 2.626814593839848e-07, - "loss": 1.1257, - "step": 5687 - }, - { - "epoch": 0.7710974039178472, - "grad_norm": 1.5106834986264215, - "learning_rate": 2.623848729579813e-07, - "loss": 1.1378, - "step": 5688 - }, - { - "epoch": 0.7712329695655121, - "grad_norm": 1.7846330766219878, - "learning_rate": 2.620884287752327e-07, - "loss": 1.1311, - "step": 5689 - }, - { - "epoch": 0.7713685352131769, - "grad_norm": 2.0817278138654167, - "learning_rate": 2.61792126892906e-07, - "loss": 1.128, - "step": 5690 - }, - { - "epoch": 0.7715041008608419, - "grad_norm": 1.4815320871194724, - "learning_rate": 2.614959673681404e-07, - "loss": 1.1165, - "step": 5691 - }, - { - "epoch": 0.7716396665085068, - "grad_norm": 1.4689885228887125, - "learning_rate": 2.611999502580482e-07, - "loss": 1.1211, - "step": 5692 - }, - { - "epoch": 0.7717752321561716, - "grad_norm": 1.7789227422220348, - "learning_rate": 2.6090407561971405e-07, - "loss": 1.0844, - "step": 5693 - }, - { - "epoch": 0.7719107978038365, - "grad_norm": 1.8759987628352057, - "learning_rate": 2.6060834351019433e-07, - "loss": 1.1269, - "step": 5694 - }, - { - "epoch": 0.7720463634515013, - "grad_norm": 2.029652783773119, - "learning_rate": 2.6031275398651986e-07, - "loss": 1.1544, - "step": 5695 - }, - { - "epoch": 0.7721819290991663, - "grad_norm": 1.4795274179946105, - "learning_rate": 2.6001730710569123e-07, - "loss": 1.1055, - "step": 5696 - }, - { - "epoch": 0.7723174947468312, - "grad_norm": 1.8751556444923918, - "learning_rate": 2.597220029246846e-07, - "loss": 1.1084, - "step": 5697 - }, - { - "epoch": 0.772453060394496, - "grad_norm": 1.5452740812167731, - "learning_rate": 2.594268415004457e-07, - "loss": 1.1588, - "step": 5698 - }, - { - "epoch": 0.7725886260421609, - "grad_norm": 1.753984398407572, - "learning_rate": 2.591318228898953e-07, - "loss": 1.212, - "step": 5699 - }, - { - "epoch": 0.7727241916898258, - "grad_norm": 2.0966738314670668, - "learning_rate": 2.5883694714992446e-07, - "loss": 1.1191, - "step": 5700 - }, - { - "epoch": 0.7728597573374907, - "grad_norm": 1.7720588288256773, - "learning_rate": 2.5854221433739797e-07, - "loss": 1.1519, - "step": 5701 - }, - { - "epoch": 0.7729953229851556, - "grad_norm": 1.7944920889074518, - "learning_rate": 2.582476245091527e-07, - "loss": 1.134, - "step": 5702 - }, - { - "epoch": 0.7731308886328204, - "grad_norm": 1.5100032796687892, - "learning_rate": 2.579531777219981e-07, - "loss": 1.1359, - "step": 5703 - }, - { - "epoch": 0.7732664542804853, - "grad_norm": 4.268247178252124, - "learning_rate": 2.576588740327158e-07, - "loss": 1.1025, - "step": 5704 - }, - { - "epoch": 0.7734020199281502, - "grad_norm": 1.8519197534584118, - "learning_rate": 2.573647134980599e-07, - "loss": 1.1196, - "step": 5705 - }, - { - "epoch": 0.7735375855758151, - "grad_norm": 1.627651099164805, - "learning_rate": 2.57070696174757e-07, - "loss": 1.1296, - "step": 5706 - }, - { - "epoch": 0.77367315122348, - "grad_norm": 1.648874773476019, - "learning_rate": 2.5677682211950604e-07, - "loss": 1.1166, - "step": 5707 - }, - { - "epoch": 0.7738087168711448, - "grad_norm": 1.624688645170778, - "learning_rate": 2.564830913889783e-07, - "loss": 1.1318, - "step": 5708 - }, - { - "epoch": 0.7739442825188098, - "grad_norm": 1.8252867587571606, - "learning_rate": 2.561895040398173e-07, - "loss": 1.112, - "step": 5709 - }, - { - "epoch": 0.7740798481664746, - "grad_norm": 1.9669579390099454, - "learning_rate": 2.5589606012863964e-07, - "loss": 1.129, - "step": 5710 - }, - { - "epoch": 0.7742154138141395, - "grad_norm": 1.5514123688804375, - "learning_rate": 2.556027597120325e-07, - "loss": 1.1353, - "step": 5711 - }, - { - "epoch": 0.7743509794618044, - "grad_norm": 1.4792583635683072, - "learning_rate": 2.553096028465578e-07, - "loss": 1.139, - "step": 5712 - }, - { - "epoch": 0.7744865451094692, - "grad_norm": 1.8677832853060468, - "learning_rate": 2.550165895887474e-07, - "loss": 1.1231, - "step": 5713 - }, - { - "epoch": 0.7746221107571342, - "grad_norm": 1.9840449952975443, - "learning_rate": 2.547237199951078e-07, - "loss": 1.1526, - "step": 5714 - }, - { - "epoch": 0.774757676404799, - "grad_norm": 1.753293586746801, - "learning_rate": 2.5443099412211535e-07, - "loss": 1.1365, - "step": 5715 - }, - { - "epoch": 0.7748932420524639, - "grad_norm": 1.9212116246410464, - "learning_rate": 2.54138412026221e-07, - "loss": 1.1553, - "step": 5716 - }, - { - "epoch": 0.7750288077001288, - "grad_norm": 1.5480432406054123, - "learning_rate": 2.5384597376384596e-07, - "loss": 1.1441, - "step": 5717 - }, - { - "epoch": 0.7751643733477936, - "grad_norm": 1.7774262470210713, - "learning_rate": 2.535536793913856e-07, - "loss": 1.1291, - "step": 5718 - }, - { - "epoch": 0.7752999389954586, - "grad_norm": 1.8670495106498213, - "learning_rate": 2.532615289652055e-07, - "loss": 1.1191, - "step": 5719 - }, - { - "epoch": 0.7754355046431234, - "grad_norm": 1.7477990052021781, - "learning_rate": 2.5296952254164573e-07, - "loss": 1.1097, - "step": 5720 - }, - { - "epoch": 0.7755710702907883, - "grad_norm": 1.379199762722718, - "learning_rate": 2.5267766017701664e-07, - "loss": 1.0891, - "step": 5721 - }, - { - "epoch": 0.7757066359384532, - "grad_norm": 2.0205197727021833, - "learning_rate": 2.5238594192760165e-07, - "loss": 1.1144, - "step": 5722 - }, - { - "epoch": 0.775842201586118, - "grad_norm": 2.215384073068831, - "learning_rate": 2.5209436784965657e-07, - "loss": 1.1222, - "step": 5723 - }, - { - "epoch": 0.775977767233783, - "grad_norm": 2.4827117882649254, - "learning_rate": 2.5180293799940886e-07, - "loss": 1.1059, - "step": 5724 - }, - { - "epoch": 0.7761133328814478, - "grad_norm": 1.7128538698623845, - "learning_rate": 2.5151165243305885e-07, - "loss": 1.1112, - "step": 5725 - }, - { - "epoch": 0.7762488985291127, - "grad_norm": 2.0575261789536765, - "learning_rate": 2.512205112067783e-07, - "loss": 1.1155, - "step": 5726 - }, - { - "epoch": 0.7763844641767776, - "grad_norm": 1.487284105844084, - "learning_rate": 2.5092951437671184e-07, - "loss": 1.1724, - "step": 5727 - }, - { - "epoch": 0.7765200298244425, - "grad_norm": 1.4568735818675593, - "learning_rate": 2.5063866199897556e-07, - "loss": 1.1122, - "step": 5728 - }, - { - "epoch": 0.7766555954721074, - "grad_norm": 1.5588949293132903, - "learning_rate": 2.5034795412965825e-07, - "loss": 1.0837, - "step": 5729 - }, - { - "epoch": 0.7767911611197722, - "grad_norm": 1.46462974501356, - "learning_rate": 2.500573908248207e-07, - "loss": 1.1533, - "step": 5730 - }, - { - "epoch": 0.7769267267674371, - "grad_norm": 1.576298091531953, - "learning_rate": 2.497669721404956e-07, - "loss": 1.1188, - "step": 5731 - }, - { - "epoch": 0.777062292415102, - "grad_norm": 3.251544230442681, - "learning_rate": 2.494766981326878e-07, - "loss": 1.1289, - "step": 5732 - }, - { - "epoch": 0.7771978580627669, - "grad_norm": 1.650595715420313, - "learning_rate": 2.4918656885737465e-07, - "loss": 1.0897, - "step": 5733 - }, - { - "epoch": 0.7773334237104318, - "grad_norm": 1.4876732255281484, - "learning_rate": 2.488965843705051e-07, - "loss": 1.1326, - "step": 5734 - }, - { - "epoch": 0.7774689893580966, - "grad_norm": 2.2302829572228986, - "learning_rate": 2.4860674472800036e-07, - "loss": 1.1242, - "step": 5735 - }, - { - "epoch": 0.7776045550057615, - "grad_norm": 1.6917192193483126, - "learning_rate": 2.483170499857541e-07, - "loss": 1.1694, - "step": 5736 - }, - { - "epoch": 0.7777401206534265, - "grad_norm": 1.5696538148860835, - "learning_rate": 2.48027500199631e-07, - "loss": 1.1318, - "step": 5737 - }, - { - "epoch": 0.7778756863010913, - "grad_norm": 1.9199400699972917, - "learning_rate": 2.477380954254689e-07, - "loss": 1.1123, - "step": 5738 - }, - { - "epoch": 0.7780112519487562, - "grad_norm": 1.7309498228480176, - "learning_rate": 2.4744883571907694e-07, - "loss": 1.1068, - "step": 5739 - }, - { - "epoch": 0.778146817596421, - "grad_norm": 1.628707553694707, - "learning_rate": 2.471597211362367e-07, - "loss": 1.1465, - "step": 5740 - }, - { - "epoch": 0.7782823832440859, - "grad_norm": 1.8569190470983585, - "learning_rate": 2.468707517327019e-07, - "loss": 1.1631, - "step": 5741 - }, - { - "epoch": 0.7784179488917509, - "grad_norm": 2.617797586839627, - "learning_rate": 2.465819275641976e-07, - "loss": 1.1282, - "step": 5742 - }, - { - "epoch": 0.7785535145394157, - "grad_norm": 1.5161008274572638, - "learning_rate": 2.462932486864215e-07, - "loss": 1.1432, - "step": 5743 - }, - { - "epoch": 0.7786890801870806, - "grad_norm": 1.9153637917214947, - "learning_rate": 2.4600471515504293e-07, - "loss": 1.0816, - "step": 5744 - }, - { - "epoch": 0.7788246458347454, - "grad_norm": 1.5500232730392414, - "learning_rate": 2.4571632702570356e-07, - "loss": 1.1131, - "step": 5745 - }, - { - "epoch": 0.7789602114824103, - "grad_norm": 2.4338918571073327, - "learning_rate": 2.454280843540164e-07, - "loss": 1.1392, - "step": 5746 - }, - { - "epoch": 0.7790957771300753, - "grad_norm": 2.0195569483396265, - "learning_rate": 2.4513998719556693e-07, - "loss": 1.1217, - "step": 5747 - }, - { - "epoch": 0.7792313427777401, - "grad_norm": 1.6217204083222712, - "learning_rate": 2.448520356059125e-07, - "loss": 1.1143, - "step": 5748 - }, - { - "epoch": 0.779366908425405, - "grad_norm": 3.0827091168540686, - "learning_rate": 2.4456422964058254e-07, - "loss": 1.1796, - "step": 5749 - }, - { - "epoch": 0.7795024740730698, - "grad_norm": 1.5752895318454925, - "learning_rate": 2.442765693550772e-07, - "loss": 1.1331, - "step": 5750 - }, - { - "epoch": 0.7796380397207348, - "grad_norm": 1.5859472303781381, - "learning_rate": 2.4398905480487073e-07, - "loss": 1.0746, - "step": 5751 - }, - { - "epoch": 0.7797736053683997, - "grad_norm": 1.5240579359040423, - "learning_rate": 2.4370168604540697e-07, - "loss": 1.1063, - "step": 5752 - }, - { - "epoch": 0.7799091710160645, - "grad_norm": 1.985519442423172, - "learning_rate": 2.4341446313210365e-07, - "loss": 1.1251, - "step": 5753 - }, - { - "epoch": 0.7800447366637294, - "grad_norm": 1.7941875747618343, - "learning_rate": 2.4312738612034843e-07, - "loss": 1.1262, - "step": 5754 - }, - { - "epoch": 0.7801803023113943, - "grad_norm": 1.787953134989362, - "learning_rate": 2.428404550655031e-07, - "loss": 1.1036, - "step": 5755 - }, - { - "epoch": 0.7803158679590592, - "grad_norm": 1.628650374980659, - "learning_rate": 2.425536700228986e-07, - "loss": 1.1296, - "step": 5756 - }, - { - "epoch": 0.7804514336067241, - "grad_norm": 1.5212046658863316, - "learning_rate": 2.422670310478406e-07, - "loss": 1.1315, - "step": 5757 - }, - { - "epoch": 0.7805869992543889, - "grad_norm": 2.3081437435452608, - "learning_rate": 2.4198053819560394e-07, - "loss": 1.1148, - "step": 5758 - }, - { - "epoch": 0.7807225649020538, - "grad_norm": 1.512742882172894, - "learning_rate": 2.4169419152143766e-07, - "loss": 1.1501, - "step": 5759 - }, - { - "epoch": 0.7808581305497188, - "grad_norm": 1.672194272131186, - "learning_rate": 2.414079910805601e-07, - "loss": 1.1313, - "step": 5760 - }, - { - "epoch": 0.7809936961973836, - "grad_norm": 1.5796308940057597, - "learning_rate": 2.4112193692816416e-07, - "loss": 1.1389, - "step": 5761 - }, - { - "epoch": 0.7811292618450485, - "grad_norm": 1.5432716383541651, - "learning_rate": 2.4083602911941224e-07, - "loss": 1.1454, - "step": 5762 - }, - { - "epoch": 0.7812648274927133, - "grad_norm": 1.6911481873925136, - "learning_rate": 2.405502677094395e-07, - "loss": 1.1039, - "step": 5763 - }, - { - "epoch": 0.7814003931403782, - "grad_norm": 1.3737472335887604, - "learning_rate": 2.4026465275335306e-07, - "loss": 1.1485, - "step": 5764 - }, - { - "epoch": 0.7815359587880432, - "grad_norm": 1.7058843150854925, - "learning_rate": 2.399791843062312e-07, - "loss": 1.1027, - "step": 5765 - }, - { - "epoch": 0.781671524435708, - "grad_norm": 1.5124896985253808, - "learning_rate": 2.396938624231245e-07, - "loss": 1.1042, - "step": 5766 - }, - { - "epoch": 0.7818070900833729, - "grad_norm": 1.3804768468587438, - "learning_rate": 2.3940868715905495e-07, - "loss": 1.1211, - "step": 5767 - }, - { - "epoch": 0.7819426557310377, - "grad_norm": 1.5493348935887008, - "learning_rate": 2.3912365856901627e-07, - "loss": 1.114, - "step": 5768 - }, - { - "epoch": 0.7820782213787026, - "grad_norm": 1.5666084906540247, - "learning_rate": 2.38838776707974e-07, - "loss": 1.1912, - "step": 5769 - }, - { - "epoch": 0.7822137870263676, - "grad_norm": 1.8368344195400135, - "learning_rate": 2.3855404163086556e-07, - "loss": 1.1027, - "step": 5770 - }, - { - "epoch": 0.7823493526740324, - "grad_norm": 1.6631617411416244, - "learning_rate": 2.3826945339259964e-07, - "loss": 1.1171, - "step": 5771 - }, - { - "epoch": 0.7824849183216973, - "grad_norm": 1.801587855160405, - "learning_rate": 2.379850120480571e-07, - "loss": 1.1797, - "step": 5772 - }, - { - "epoch": 0.7826204839693621, - "grad_norm": 1.4023732103273001, - "learning_rate": 2.3770071765208956e-07, - "loss": 1.1438, - "step": 5773 - }, - { - "epoch": 0.782756049617027, - "grad_norm": 2.121097580159299, - "learning_rate": 2.3741657025952188e-07, - "loss": 1.1524, - "step": 5774 - }, - { - "epoch": 0.782891615264692, - "grad_norm": 1.5601538355660463, - "learning_rate": 2.3713256992514853e-07, - "loss": 1.1668, - "step": 5775 - }, - { - "epoch": 0.7830271809123568, - "grad_norm": 1.6774335909097535, - "learning_rate": 2.3684871670373806e-07, - "loss": 1.1653, - "step": 5776 - }, - { - "epoch": 0.7831627465600217, - "grad_norm": 1.852304796795427, - "learning_rate": 2.365650106500282e-07, - "loss": 1.0917, - "step": 5777 - }, - { - "epoch": 0.7832983122076865, - "grad_norm": 1.6714038507911109, - "learning_rate": 2.3628145181872994e-07, - "loss": 1.1153, - "step": 5778 - }, - { - "epoch": 0.7834338778553515, - "grad_norm": 2.483408934314613, - "learning_rate": 2.359980402645253e-07, - "loss": 1.1694, - "step": 5779 - }, - { - "epoch": 0.7835694435030164, - "grad_norm": 1.6621037752215582, - "learning_rate": 2.3571477604206792e-07, - "loss": 1.1591, - "step": 5780 - }, - { - "epoch": 0.7837050091506812, - "grad_norm": 1.7767525583452555, - "learning_rate": 2.3543165920598308e-07, - "loss": 1.1401, - "step": 5781 - }, - { - "epoch": 0.7838405747983461, - "grad_norm": 1.4915365057069099, - "learning_rate": 2.3514868981086755e-07, - "loss": 1.0891, - "step": 5782 - }, - { - "epoch": 0.7839761404460109, - "grad_norm": 2.6703622455341955, - "learning_rate": 2.3486586791128982e-07, - "loss": 1.1821, - "step": 5783 - }, - { - "epoch": 0.7841117060936759, - "grad_norm": 1.7000377983798203, - "learning_rate": 2.345831935617899e-07, - "loss": 1.1108, - "step": 5784 - }, - { - "epoch": 0.7842472717413408, - "grad_norm": 1.7652439256719816, - "learning_rate": 2.3430066681687932e-07, - "loss": 1.1145, - "step": 5785 - }, - { - "epoch": 0.7843828373890056, - "grad_norm": 4.44678629393062, - "learning_rate": 2.3401828773104103e-07, - "loss": 1.0935, - "step": 5786 - }, - { - "epoch": 0.7845184030366705, - "grad_norm": 1.5774725151948086, - "learning_rate": 2.3373605635872972e-07, - "loss": 1.0985, - "step": 5787 - }, - { - "epoch": 0.7846539686843353, - "grad_norm": 2.324811105978859, - "learning_rate": 2.334539727543713e-07, - "loss": 1.1266, - "step": 5788 - }, - { - "epoch": 0.7847895343320003, - "grad_norm": 2.642141267442177, - "learning_rate": 2.3317203697236353e-07, - "loss": 1.1721, - "step": 5789 - }, - { - "epoch": 0.7849250999796652, - "grad_norm": 1.7969127762212427, - "learning_rate": 2.3289024906707555e-07, - "loss": 1.1484, - "step": 5790 - }, - { - "epoch": 0.78506066562733, - "grad_norm": 1.66294539772085, - "learning_rate": 2.3260860909284773e-07, - "loss": 1.1458, - "step": 5791 - }, - { - "epoch": 0.7851962312749949, - "grad_norm": 1.379065123482126, - "learning_rate": 2.3232711710399255e-07, - "loss": 1.1253, - "step": 5792 - }, - { - "epoch": 0.7853317969226598, - "grad_norm": 2.1729931226247063, - "learning_rate": 2.3204577315479269e-07, - "loss": 1.1468, - "step": 5793 - }, - { - "epoch": 0.7854673625703247, - "grad_norm": 1.693911973020694, - "learning_rate": 2.3176457729950417e-07, - "loss": 1.1089, - "step": 5794 - }, - { - "epoch": 0.7856029282179896, - "grad_norm": 2.029599191386079, - "learning_rate": 2.3148352959235218e-07, - "loss": 1.1337, - "step": 5795 - }, - { - "epoch": 0.7857384938656544, - "grad_norm": 6.1893071613152975, - "learning_rate": 2.3120263008753582e-07, - "loss": 1.1678, - "step": 5796 - }, - { - "epoch": 0.7858740595133193, - "grad_norm": 1.9856636833490904, - "learning_rate": 2.309218788392232e-07, - "loss": 1.1268, - "step": 5797 - }, - { - "epoch": 0.7860096251609842, - "grad_norm": 1.6490316321109244, - "learning_rate": 2.3064127590155603e-07, - "loss": 1.1241, - "step": 5798 - }, - { - "epoch": 0.7861451908086491, - "grad_norm": 1.6221237178949006, - "learning_rate": 2.3036082132864555e-07, - "loss": 1.1315, - "step": 5799 - }, - { - "epoch": 0.786280756456314, - "grad_norm": 2.065970292088523, - "learning_rate": 2.300805151745756e-07, - "loss": 1.1386, - "step": 5800 - }, - { - "epoch": 0.7864163221039788, - "grad_norm": 1.5007402191977883, - "learning_rate": 2.2980035749340088e-07, - "loss": 1.108, - "step": 5801 - }, - { - "epoch": 0.7865518877516438, - "grad_norm": 1.5967098929279577, - "learning_rate": 2.2952034833914757e-07, - "loss": 1.1317, - "step": 5802 - }, - { - "epoch": 0.7866874533993086, - "grad_norm": 2.045866777993252, - "learning_rate": 2.292404877658134e-07, - "loss": 1.1261, - "step": 5803 - }, - { - "epoch": 0.7868230190469735, - "grad_norm": 1.5838485648686977, - "learning_rate": 2.2896077582736705e-07, - "loss": 1.1258, - "step": 5804 - }, - { - "epoch": 0.7869585846946384, - "grad_norm": 1.4960913318219085, - "learning_rate": 2.2868121257774885e-07, - "loss": 1.116, - "step": 5805 - }, - { - "epoch": 0.7870941503423032, - "grad_norm": 1.5870582738861427, - "learning_rate": 2.2840179807087044e-07, - "loss": 1.1417, - "step": 5806 - }, - { - "epoch": 0.7872297159899682, - "grad_norm": 1.5177218730895383, - "learning_rate": 2.2812253236061497e-07, - "loss": 1.0956, - "step": 5807 - }, - { - "epoch": 0.787365281637633, - "grad_norm": 1.5463433476004085, - "learning_rate": 2.2784341550083574e-07, - "loss": 1.1064, - "step": 5808 - }, - { - "epoch": 0.7875008472852979, - "grad_norm": 1.5987714363973962, - "learning_rate": 2.275644475453593e-07, - "loss": 1.1371, - "step": 5809 - }, - { - "epoch": 0.7876364129329628, - "grad_norm": 1.5945550350623803, - "learning_rate": 2.272856285479814e-07, - "loss": 1.1789, - "step": 5810 - }, - { - "epoch": 0.7877719785806276, - "grad_norm": 1.6266187655936322, - "learning_rate": 2.2700695856247122e-07, - "loss": 1.1072, - "step": 5811 - }, - { - "epoch": 0.7879075442282926, - "grad_norm": 1.5349631432881738, - "learning_rate": 2.2672843764256678e-07, - "loss": 1.1081, - "step": 5812 - }, - { - "epoch": 0.7880431098759574, - "grad_norm": 1.438617189153611, - "learning_rate": 2.264500658419799e-07, - "loss": 1.1144, - "step": 5813 - }, - { - "epoch": 0.7881786755236223, - "grad_norm": 1.5935577961760994, - "learning_rate": 2.261718432143912e-07, - "loss": 1.1039, - "step": 5814 - }, - { - "epoch": 0.7883142411712872, - "grad_norm": 1.497581996745133, - "learning_rate": 2.2589376981345487e-07, - "loss": 1.0867, - "step": 5815 - }, - { - "epoch": 0.788449806818952, - "grad_norm": 2.1820441502487116, - "learning_rate": 2.25615845692794e-07, - "loss": 1.1277, - "step": 5816 - }, - { - "epoch": 0.788585372466617, - "grad_norm": 1.680364754583142, - "learning_rate": 2.253380709060053e-07, - "loss": 1.121, - "step": 5817 - }, - { - "epoch": 0.7887209381142818, - "grad_norm": 1.7075044724992658, - "learning_rate": 2.2506044550665438e-07, - "loss": 1.1312, - "step": 5818 - }, - { - "epoch": 0.7888565037619467, - "grad_norm": 1.6595850330958393, - "learning_rate": 2.247829695482799e-07, - "loss": 1.1111, - "step": 5819 - }, - { - "epoch": 0.7889920694096116, - "grad_norm": 1.6561852938020194, - "learning_rate": 2.2450564308439036e-07, - "loss": 1.1521, - "step": 5820 - }, - { - "epoch": 0.7891276350572765, - "grad_norm": 1.703730149412812, - "learning_rate": 2.2422846616846613e-07, - "loss": 1.0982, - "step": 5821 - }, - { - "epoch": 0.7892632007049414, - "grad_norm": 1.455996343973915, - "learning_rate": 2.2395143885395873e-07, - "loss": 1.1326, - "step": 5822 - }, - { - "epoch": 0.7893987663526062, - "grad_norm": 2.445082378018391, - "learning_rate": 2.236745611942905e-07, - "loss": 1.1546, - "step": 5823 - }, - { - "epoch": 0.7895343320002711, - "grad_norm": 1.6335140394546, - "learning_rate": 2.2339783324285523e-07, - "loss": 1.1349, - "step": 5824 - }, - { - "epoch": 0.789669897647936, - "grad_norm": 1.5790870980083669, - "learning_rate": 2.231212550530177e-07, - "loss": 1.1398, - "step": 5825 - }, - { - "epoch": 0.7898054632956009, - "grad_norm": 1.495593796504678, - "learning_rate": 2.2284482667811378e-07, - "loss": 1.1161, - "step": 5826 - }, - { - "epoch": 0.7899410289432658, - "grad_norm": 1.490608977572971, - "learning_rate": 2.2256854817145065e-07, - "loss": 1.1268, - "step": 5827 - }, - { - "epoch": 0.7900765945909306, - "grad_norm": 2.225633679226286, - "learning_rate": 2.2229241958630617e-07, - "loss": 1.1648, - "step": 5828 - }, - { - "epoch": 0.7902121602385955, - "grad_norm": 1.608798382841865, - "learning_rate": 2.2201644097592987e-07, - "loss": 1.1144, - "step": 5829 - }, - { - "epoch": 0.7903477258862605, - "grad_norm": 1.6854126328051005, - "learning_rate": 2.217406123935418e-07, - "loss": 1.147, - "step": 5830 - }, - { - "epoch": 0.7904832915339253, - "grad_norm": 1.5212861814490075, - "learning_rate": 2.2146493389233357e-07, - "loss": 1.0909, - "step": 5831 - }, - { - "epoch": 0.7906188571815902, - "grad_norm": 1.597020492811265, - "learning_rate": 2.211894055254673e-07, - "loss": 1.1394, - "step": 5832 - }, - { - "epoch": 0.7907544228292551, - "grad_norm": 1.383942920243841, - "learning_rate": 2.20914027346077e-07, - "loss": 1.148, - "step": 5833 - }, - { - "epoch": 0.7908899884769199, - "grad_norm": 2.7433711089379615, - "learning_rate": 2.206387994072665e-07, - "loss": 1.0979, - "step": 5834 - }, - { - "epoch": 0.7910255541245849, - "grad_norm": 1.4491503908746541, - "learning_rate": 2.2036372176211148e-07, - "loss": 1.1301, - "step": 5835 - }, - { - "epoch": 0.7911611197722497, - "grad_norm": 1.7447229529194401, - "learning_rate": 2.200887944636588e-07, - "loss": 1.1163, - "step": 5836 - }, - { - "epoch": 0.7912966854199146, - "grad_norm": 1.6152480231431734, - "learning_rate": 2.198140175649259e-07, - "loss": 1.1566, - "step": 5837 - }, - { - "epoch": 0.7914322510675795, - "grad_norm": 1.6331907977895017, - "learning_rate": 2.195393911189012e-07, - "loss": 1.1604, - "step": 5838 - }, - { - "epoch": 0.7915678167152443, - "grad_norm": 1.5008468615445212, - "learning_rate": 2.192649151785444e-07, - "loss": 1.1529, - "step": 5839 - }, - { - "epoch": 0.7917033823629093, - "grad_norm": 1.5324957666718024, - "learning_rate": 2.1899058979678586e-07, - "loss": 1.129, - "step": 5840 - }, - { - "epoch": 0.7918389480105741, - "grad_norm": 1.7896934796825126, - "learning_rate": 2.1871641502652728e-07, - "loss": 1.1239, - "step": 5841 - }, - { - "epoch": 0.791974513658239, - "grad_norm": 2.297925757499211, - "learning_rate": 2.1844239092064088e-07, - "loss": 1.1463, - "step": 5842 - }, - { - "epoch": 0.7921100793059039, - "grad_norm": 1.5063098639936239, - "learning_rate": 2.181685175319702e-07, - "loss": 1.102, - "step": 5843 - }, - { - "epoch": 0.7922456449535688, - "grad_norm": 3.3638544469355756, - "learning_rate": 2.1789479491332953e-07, - "loss": 1.1486, - "step": 5844 - }, - { - "epoch": 0.7923812106012337, - "grad_norm": 1.4557246055837676, - "learning_rate": 2.176212231175041e-07, - "loss": 1.1555, - "step": 5845 - }, - { - "epoch": 0.7925167762488985, - "grad_norm": 1.574502121674619, - "learning_rate": 2.1734780219725e-07, - "loss": 1.1182, - "step": 5846 - }, - { - "epoch": 0.7926523418965634, - "grad_norm": 1.8395205046317087, - "learning_rate": 2.1707453220529448e-07, - "loss": 1.1574, - "step": 5847 - }, - { - "epoch": 0.7927879075442283, - "grad_norm": 1.9893790208651607, - "learning_rate": 2.1680141319433564e-07, - "loss": 1.1256, - "step": 5848 - }, - { - "epoch": 0.7929234731918932, - "grad_norm": 1.6683323326084807, - "learning_rate": 2.165284452170415e-07, - "loss": 1.097, - "step": 5849 - }, - { - "epoch": 0.7930590388395581, - "grad_norm": 1.7409633875722939, - "learning_rate": 2.1625562832605281e-07, - "loss": 1.1431, - "step": 5850 - }, - { - "epoch": 0.7931946044872229, - "grad_norm": 1.6945682453886448, - "learning_rate": 2.159829625739793e-07, - "loss": 1.1549, - "step": 5851 - }, - { - "epoch": 0.7933301701348878, - "grad_norm": 1.7454408425338046, - "learning_rate": 2.157104480134032e-07, - "loss": 1.0971, - "step": 5852 - }, - { - "epoch": 0.7934657357825528, - "grad_norm": 1.6650570060088044, - "learning_rate": 2.1543808469687596e-07, - "loss": 1.0904, - "step": 5853 - }, - { - "epoch": 0.7936013014302176, - "grad_norm": 1.7093116132351402, - "learning_rate": 2.1516587267692165e-07, - "loss": 1.0765, - "step": 5854 - }, - { - "epoch": 0.7937368670778825, - "grad_norm": 1.902201932598715, - "learning_rate": 2.1489381200603307e-07, - "loss": 1.1715, - "step": 5855 - }, - { - "epoch": 0.7938724327255473, - "grad_norm": 1.7218029147146168, - "learning_rate": 2.1462190273667624e-07, - "loss": 1.1173, - "step": 5856 - }, - { - "epoch": 0.7940079983732122, - "grad_norm": 2.064520127862063, - "learning_rate": 2.1435014492128545e-07, - "loss": 1.078, - "step": 5857 - }, - { - "epoch": 0.7941435640208772, - "grad_norm": 1.710476011066988, - "learning_rate": 2.1407853861226833e-07, - "loss": 1.1195, - "step": 5858 - }, - { - "epoch": 0.794279129668542, - "grad_norm": 1.6510636449119345, - "learning_rate": 2.1380708386200075e-07, - "loss": 1.1151, - "step": 5859 - }, - { - "epoch": 0.7944146953162069, - "grad_norm": 1.9567311254020667, - "learning_rate": 2.1353578072283175e-07, - "loss": 1.1351, - "step": 5860 - }, - { - "epoch": 0.7945502609638717, - "grad_norm": 1.5287161173770623, - "learning_rate": 2.1326462924707912e-07, - "loss": 1.1217, - "step": 5861 - }, - { - "epoch": 0.7946858266115366, - "grad_norm": 1.698762325839484, - "learning_rate": 2.129936294870327e-07, - "loss": 1.139, - "step": 5862 - }, - { - "epoch": 0.7948213922592016, - "grad_norm": 1.6031500730598798, - "learning_rate": 2.127227814949526e-07, - "loss": 1.13, - "step": 5863 - }, - { - "epoch": 0.7949569579068664, - "grad_norm": 1.8139888321131024, - "learning_rate": 2.124520853230697e-07, - "loss": 1.1443, - "step": 5864 - }, - { - "epoch": 0.7950925235545313, - "grad_norm": 1.9264489056237715, - "learning_rate": 2.1218154102358554e-07, - "loss": 1.1543, - "step": 5865 - }, - { - "epoch": 0.7952280892021961, - "grad_norm": 2.1082107902066265, - "learning_rate": 2.1191114864867255e-07, - "loss": 1.1242, - "step": 5866 - }, - { - "epoch": 0.795363654849861, - "grad_norm": 3.0672126577495193, - "learning_rate": 2.1164090825047388e-07, - "loss": 1.1164, - "step": 5867 - }, - { - "epoch": 0.795499220497526, - "grad_norm": 1.4661560917481267, - "learning_rate": 2.1137081988110294e-07, - "loss": 1.137, - "step": 5868 - }, - { - "epoch": 0.7956347861451908, - "grad_norm": 1.6459527588650773, - "learning_rate": 2.1110088359264445e-07, - "loss": 1.1485, - "step": 5869 - }, - { - "epoch": 0.7957703517928557, - "grad_norm": 2.7996337293430176, - "learning_rate": 2.108310994371534e-07, - "loss": 1.142, - "step": 5870 - }, - { - "epoch": 0.7959059174405205, - "grad_norm": 2.9636247806762617, - "learning_rate": 2.105614674666556e-07, - "loss": 1.0834, - "step": 5871 - }, - { - "epoch": 0.7960414830881855, - "grad_norm": 2.376165704702495, - "learning_rate": 2.1029198773314693e-07, - "loss": 1.1439, - "step": 5872 - }, - { - "epoch": 0.7961770487358504, - "grad_norm": 3.815284781287962, - "learning_rate": 2.1002266028859539e-07, - "loss": 1.1318, - "step": 5873 - }, - { - "epoch": 0.7963126143835152, - "grad_norm": 1.5437890446851927, - "learning_rate": 2.0975348518493762e-07, - "loss": 1.1282, - "step": 5874 - }, - { - "epoch": 0.7964481800311801, - "grad_norm": 1.4232154067935234, - "learning_rate": 2.094844624740828e-07, - "loss": 1.1275, - "step": 5875 - }, - { - "epoch": 0.7965837456788449, - "grad_norm": 1.675773029907477, - "learning_rate": 2.092155922079093e-07, - "loss": 1.1347, - "step": 5876 - }, - { - "epoch": 0.7967193113265099, - "grad_norm": 4.015224857311656, - "learning_rate": 2.0894687443826675e-07, - "loss": 1.119, - "step": 5877 - }, - { - "epoch": 0.7968548769741748, - "grad_norm": 1.7876379771261166, - "learning_rate": 2.0867830921697527e-07, - "loss": 1.1547, - "step": 5878 - }, - { - "epoch": 0.7969904426218396, - "grad_norm": 1.6837450883213865, - "learning_rate": 2.0840989659582552e-07, - "loss": 1.0903, - "step": 5879 - }, - { - "epoch": 0.7971260082695045, - "grad_norm": 1.6586246175268393, - "learning_rate": 2.081416366265787e-07, - "loss": 1.1217, - "step": 5880 - }, - { - "epoch": 0.7972615739171693, - "grad_norm": 1.535166140827417, - "learning_rate": 2.078735293609668e-07, - "loss": 1.1281, - "step": 5881 - }, - { - "epoch": 0.7973971395648343, - "grad_norm": 1.5442562146302554, - "learning_rate": 2.0760557485069208e-07, - "loss": 1.0794, - "step": 5882 - }, - { - "epoch": 0.7975327052124992, - "grad_norm": 2.57548996881889, - "learning_rate": 2.073377731474275e-07, - "loss": 1.1199, - "step": 5883 - }, - { - "epoch": 0.797668270860164, - "grad_norm": 1.8245495433141452, - "learning_rate": 2.0707012430281646e-07, - "loss": 1.1432, - "step": 5884 - }, - { - "epoch": 0.7978038365078289, - "grad_norm": 1.402715529630314, - "learning_rate": 2.0680262836847294e-07, - "loss": 1.0975, - "step": 5885 - }, - { - "epoch": 0.7979394021554937, - "grad_norm": 2.519447357558393, - "learning_rate": 2.065352853959814e-07, - "loss": 1.1197, - "step": 5886 - }, - { - "epoch": 0.7980749678031587, - "grad_norm": 1.9611231377696992, - "learning_rate": 2.0626809543689682e-07, - "loss": 1.1227, - "step": 5887 - }, - { - "epoch": 0.7982105334508236, - "grad_norm": 2.0485625557210323, - "learning_rate": 2.0600105854274474e-07, - "loss": 1.0923, - "step": 5888 - }, - { - "epoch": 0.7983460990984884, - "grad_norm": 2.079130426944005, - "learning_rate": 2.0573417476502108e-07, - "loss": 1.1334, - "step": 5889 - }, - { - "epoch": 0.7984816647461533, - "grad_norm": 1.6897061609832829, - "learning_rate": 2.0546744415519223e-07, - "loss": 1.0915, - "step": 5890 - }, - { - "epoch": 0.7986172303938182, - "grad_norm": 1.6315960763555541, - "learning_rate": 2.052008667646954e-07, - "loss": 1.1506, - "step": 5891 - }, - { - "epoch": 0.7987527960414831, - "grad_norm": 1.54394959815121, - "learning_rate": 2.049344426449371e-07, - "loss": 1.0845, - "step": 5892 - }, - { - "epoch": 0.798888361689148, - "grad_norm": 1.6450757729192877, - "learning_rate": 2.0466817184729624e-07, - "loss": 1.0853, - "step": 5893 - }, - { - "epoch": 0.7990239273368128, - "grad_norm": 1.590901418774006, - "learning_rate": 2.0440205442311987e-07, - "loss": 1.1226, - "step": 5894 - }, - { - "epoch": 0.7991594929844777, - "grad_norm": 5.383892802791, - "learning_rate": 2.041360904237278e-07, - "loss": 1.1115, - "step": 5895 - }, - { - "epoch": 0.7992950586321426, - "grad_norm": 2.2364235403522956, - "learning_rate": 2.0387027990040827e-07, - "loss": 1.1136, - "step": 5896 - }, - { - "epoch": 0.7994306242798075, - "grad_norm": 1.4483243014809557, - "learning_rate": 2.0360462290442105e-07, - "loss": 1.0921, - "step": 5897 - }, - { - "epoch": 0.7995661899274724, - "grad_norm": 1.7909769512841927, - "learning_rate": 2.033391194869959e-07, - "loss": 1.1413, - "step": 5898 - }, - { - "epoch": 0.7997017555751372, - "grad_norm": 2.265014926645139, - "learning_rate": 2.03073769699333e-07, - "loss": 1.1386, - "step": 5899 - }, - { - "epoch": 0.7998373212228022, - "grad_norm": 1.8109907808204184, - "learning_rate": 2.0280857359260316e-07, - "loss": 1.1165, - "step": 5900 - }, - { - "epoch": 0.799972886870467, - "grad_norm": 1.6316682197190682, - "learning_rate": 2.025435312179472e-07, - "loss": 1.15, - "step": 5901 - }, - { - "epoch": 0.8001084525181319, - "grad_norm": 1.8799022561926828, - "learning_rate": 2.0227864262647664e-07, - "loss": 1.1258, - "step": 5902 - }, - { - "epoch": 0.8002440181657968, - "grad_norm": 1.8097054631037865, - "learning_rate": 2.0201390786927286e-07, - "loss": 1.1226, - "step": 5903 - }, - { - "epoch": 0.8003795838134616, - "grad_norm": 1.6626462382932796, - "learning_rate": 2.017493269973881e-07, - "loss": 1.1129, - "step": 5904 - }, - { - "epoch": 0.8005151494611266, - "grad_norm": 1.4890572749226454, - "learning_rate": 2.014849000618446e-07, - "loss": 1.1183, - "step": 5905 - }, - { - "epoch": 0.8006507151087914, - "grad_norm": 5.595374927760697, - "learning_rate": 2.012206271136353e-07, - "loss": 1.1269, - "step": 5906 - }, - { - "epoch": 0.8007862807564563, - "grad_norm": 2.118717689969066, - "learning_rate": 2.0095650820372234e-07, - "loss": 1.1451, - "step": 5907 - }, - { - "epoch": 0.8009218464041212, - "grad_norm": 1.4636488412353184, - "learning_rate": 2.006925433830401e-07, - "loss": 1.1109, - "step": 5908 - }, - { - "epoch": 0.801057412051786, - "grad_norm": 1.670185209462989, - "learning_rate": 2.0042873270249094e-07, - "loss": 1.1386, - "step": 5909 - }, - { - "epoch": 0.801192977699451, - "grad_norm": 1.5121969333613947, - "learning_rate": 2.0016507621294975e-07, - "loss": 1.104, - "step": 5910 - }, - { - "epoch": 0.8013285433471159, - "grad_norm": 3.1899102411378957, - "learning_rate": 1.9990157396525963e-07, - "loss": 1.1396, - "step": 5911 - }, - { - "epoch": 0.8014641089947807, - "grad_norm": 1.983345166157163, - "learning_rate": 1.9963822601023595e-07, - "loss": 1.125, - "step": 5912 - }, - { - "epoch": 0.8015996746424456, - "grad_norm": 1.4731488756207114, - "learning_rate": 1.9937503239866205e-07, - "loss": 1.1361, - "step": 5913 - }, - { - "epoch": 0.8017352402901105, - "grad_norm": 2.5629145780583995, - "learning_rate": 1.9911199318129403e-07, - "loss": 1.1394, - "step": 5914 - }, - { - "epoch": 0.8018708059377754, - "grad_norm": 1.698767346847256, - "learning_rate": 1.9884910840885571e-07, - "loss": 1.1318, - "step": 5915 - }, - { - "epoch": 0.8020063715854403, - "grad_norm": 2.6247620709556116, - "learning_rate": 1.9858637813204349e-07, - "loss": 1.1282, - "step": 5916 - }, - { - "epoch": 0.8021419372331051, - "grad_norm": 1.5675204088589259, - "learning_rate": 1.983238024015217e-07, - "loss": 1.1253, - "step": 5917 - }, - { - "epoch": 0.80227750288077, - "grad_norm": 1.7225519624920504, - "learning_rate": 1.9806138126792716e-07, - "loss": 1.1497, - "step": 5918 - }, - { - "epoch": 0.8024130685284349, - "grad_norm": 1.5817506981773852, - "learning_rate": 1.9779911478186485e-07, - "loss": 1.1387, - "step": 5919 - }, - { - "epoch": 0.8025486341760998, - "grad_norm": 2.050380438604846, - "learning_rate": 1.9753700299391107e-07, - "loss": 1.1004, - "step": 5920 - }, - { - "epoch": 0.8026841998237647, - "grad_norm": 1.5347275832982614, - "learning_rate": 1.9727504595461198e-07, - "loss": 1.0741, - "step": 5921 - }, - { - "epoch": 0.8028197654714295, - "grad_norm": 1.448949332619657, - "learning_rate": 1.970132437144839e-07, - "loss": 1.135, - "step": 5922 - }, - { - "epoch": 0.8029553311190945, - "grad_norm": 1.500497188838573, - "learning_rate": 1.967515963240135e-07, - "loss": 1.1028, - "step": 5923 - }, - { - "epoch": 0.8030908967667593, - "grad_norm": 1.9002963399426132, - "learning_rate": 1.9649010383365717e-07, - "loss": 1.1306, - "step": 5924 - }, - { - "epoch": 0.8032264624144242, - "grad_norm": 1.737722704996477, - "learning_rate": 1.962287662938419e-07, - "loss": 1.0952, - "step": 5925 - }, - { - "epoch": 0.8033620280620891, - "grad_norm": 1.5314961857640172, - "learning_rate": 1.9596758375496435e-07, - "loss": 1.1094, - "step": 5926 - }, - { - "epoch": 0.8034975937097539, - "grad_norm": 1.7356739357913282, - "learning_rate": 1.9570655626739176e-07, - "loss": 1.1271, - "step": 5927 - }, - { - "epoch": 0.8036331593574189, - "grad_norm": 1.4387784383487527, - "learning_rate": 1.9544568388146098e-07, - "loss": 1.1114, - "step": 5928 - }, - { - "epoch": 0.8037687250050837, - "grad_norm": 1.4790715597794624, - "learning_rate": 1.951849666474793e-07, - "loss": 1.1143, - "step": 5929 - }, - { - "epoch": 0.8039042906527486, - "grad_norm": 1.4890621640432262, - "learning_rate": 1.9492440461572401e-07, - "loss": 1.0994, - "step": 5930 - }, - { - "epoch": 0.8040398563004135, - "grad_norm": 1.9867567259411645, - "learning_rate": 1.9466399783644249e-07, - "loss": 1.1364, - "step": 5931 - }, - { - "epoch": 0.8041754219480783, - "grad_norm": 2.0917836928122, - "learning_rate": 1.9440374635985224e-07, - "loss": 1.1145, - "step": 5932 - }, - { - "epoch": 0.8043109875957433, - "grad_norm": 1.501505201952952, - "learning_rate": 1.941436502361402e-07, - "loss": 1.1464, - "step": 5933 - }, - { - "epoch": 0.8044465532434081, - "grad_norm": 1.696569752390788, - "learning_rate": 1.9388370951546428e-07, - "loss": 1.1357, - "step": 5934 - }, - { - "epoch": 0.804582118891073, - "grad_norm": 2.4447330932008597, - "learning_rate": 1.9362392424795183e-07, - "loss": 1.1319, - "step": 5935 - }, - { - "epoch": 0.8047176845387379, - "grad_norm": 1.646948690225786, - "learning_rate": 1.933642944837004e-07, - "loss": 1.1222, - "step": 5936 - }, - { - "epoch": 0.8048532501864027, - "grad_norm": 2.035494010563723, - "learning_rate": 1.9310482027277763e-07, - "loss": 1.1129, - "step": 5937 - }, - { - "epoch": 0.8049888158340677, - "grad_norm": 1.6951652257494048, - "learning_rate": 1.9284550166522108e-07, - "loss": 1.1208, - "step": 5938 - }, - { - "epoch": 0.8051243814817325, - "grad_norm": 1.8723600824037987, - "learning_rate": 1.9258633871103814e-07, - "loss": 1.137, - "step": 5939 - }, - { - "epoch": 0.8052599471293974, - "grad_norm": 1.5529951698634274, - "learning_rate": 1.923273314602065e-07, - "loss": 1.1222, - "step": 5940 - }, - { - "epoch": 0.8053955127770623, - "grad_norm": 1.7179537666837605, - "learning_rate": 1.920684799626736e-07, - "loss": 1.1076, - "step": 5941 - }, - { - "epoch": 0.8055310784247272, - "grad_norm": 1.3832473941481473, - "learning_rate": 1.9180978426835693e-07, - "loss": 1.1377, - "step": 5942 - }, - { - "epoch": 0.8056666440723921, - "grad_norm": 2.3701027615066326, - "learning_rate": 1.9155124442714387e-07, - "loss": 1.1709, - "step": 5943 - }, - { - "epoch": 0.8058022097200569, - "grad_norm": 1.5756754271746136, - "learning_rate": 1.912928604888918e-07, - "loss": 1.0928, - "step": 5944 - }, - { - "epoch": 0.8059377753677218, - "grad_norm": 2.073521022808377, - "learning_rate": 1.91034632503428e-07, - "loss": 1.1373, - "step": 5945 - }, - { - "epoch": 0.8060733410153867, - "grad_norm": 1.9263968056506517, - "learning_rate": 1.907765605205498e-07, - "loss": 1.1525, - "step": 5946 - }, - { - "epoch": 0.8062089066630516, - "grad_norm": 1.5709850823521951, - "learning_rate": 1.9051864459002454e-07, - "loss": 1.137, - "step": 5947 - }, - { - "epoch": 0.8063444723107165, - "grad_norm": 1.4540116611193388, - "learning_rate": 1.9026088476158851e-07, - "loss": 1.1012, - "step": 5948 - }, - { - "epoch": 0.8064800379583813, - "grad_norm": 1.751530905541245, - "learning_rate": 1.9000328108494967e-07, - "loss": 1.1079, - "step": 5949 - }, - { - "epoch": 0.8066156036060462, - "grad_norm": 1.5041963309521462, - "learning_rate": 1.897458336097838e-07, - "loss": 1.1722, - "step": 5950 - }, - { - "epoch": 0.8067511692537112, - "grad_norm": 1.763235699885478, - "learning_rate": 1.8948854238573874e-07, - "loss": 1.1282, - "step": 5951 - }, - { - "epoch": 0.806886734901376, - "grad_norm": 1.4910932712330793, - "learning_rate": 1.8923140746242994e-07, - "loss": 1.1714, - "step": 5952 - }, - { - "epoch": 0.8070223005490409, - "grad_norm": 1.7811854420489461, - "learning_rate": 1.8897442888944492e-07, - "loss": 1.1743, - "step": 5953 - }, - { - "epoch": 0.8071578661967057, - "grad_norm": 1.4643921808084872, - "learning_rate": 1.8871760671633895e-07, - "loss": 1.056, - "step": 5954 - }, - { - "epoch": 0.8072934318443706, - "grad_norm": 1.526181505132543, - "learning_rate": 1.884609409926391e-07, - "loss": 1.1208, - "step": 5955 - }, - { - "epoch": 0.8074289974920356, - "grad_norm": 1.852073557472295, - "learning_rate": 1.882044317678404e-07, - "loss": 1.1315, - "step": 5956 - }, - { - "epoch": 0.8075645631397004, - "grad_norm": 1.9343205902495728, - "learning_rate": 1.8794807909140963e-07, - "loss": 1.1551, - "step": 5957 - }, - { - "epoch": 0.8077001287873653, - "grad_norm": 1.809520918462569, - "learning_rate": 1.8769188301278126e-07, - "loss": 1.1787, - "step": 5958 - }, - { - "epoch": 0.8078356944350301, - "grad_norm": 2.0683614207771166, - "learning_rate": 1.8743584358136188e-07, - "loss": 1.1337, - "step": 5959 - }, - { - "epoch": 0.807971260082695, - "grad_norm": 4.800317612728052, - "learning_rate": 1.8717996084652587e-07, - "loss": 1.1541, - "step": 5960 - }, - { - "epoch": 0.80810682573036, - "grad_norm": 1.5978512553209485, - "learning_rate": 1.8692423485761833e-07, - "loss": 1.1025, - "step": 5961 - }, - { - "epoch": 0.8082423913780248, - "grad_norm": 1.8507475534815128, - "learning_rate": 1.86668665663954e-07, - "loss": 1.1092, - "step": 5962 - }, - { - "epoch": 0.8083779570256897, - "grad_norm": 1.8454431362164936, - "learning_rate": 1.8641325331481762e-07, - "loss": 1.1341, - "step": 5963 - }, - { - "epoch": 0.8085135226733545, - "grad_norm": 1.8377880324617795, - "learning_rate": 1.861579978594632e-07, - "loss": 1.0993, - "step": 5964 - }, - { - "epoch": 0.8086490883210194, - "grad_norm": 1.9545861166657241, - "learning_rate": 1.859028993471148e-07, - "loss": 1.1031, - "step": 5965 - }, - { - "epoch": 0.8087846539686844, - "grad_norm": 1.749398694057212, - "learning_rate": 1.8564795782696607e-07, - "loss": 1.1182, - "step": 5966 - }, - { - "epoch": 0.8089202196163492, - "grad_norm": 1.6565323386548123, - "learning_rate": 1.8539317334818072e-07, - "loss": 1.1007, - "step": 5967 - }, - { - "epoch": 0.8090557852640141, - "grad_norm": 1.6894258221706542, - "learning_rate": 1.8513854595989198e-07, - "loss": 1.1014, - "step": 5968 - }, - { - "epoch": 0.8091913509116789, - "grad_norm": 1.5956661698275836, - "learning_rate": 1.848840757112019e-07, - "loss": 1.1002, - "step": 5969 - }, - { - "epoch": 0.8093269165593439, - "grad_norm": 2.1483610270450963, - "learning_rate": 1.8462976265118436e-07, - "loss": 1.1371, - "step": 5970 - }, - { - "epoch": 0.8094624822070088, - "grad_norm": 1.419875148172435, - "learning_rate": 1.8437560682888043e-07, - "loss": 1.1209, - "step": 5971 - }, - { - "epoch": 0.8095980478546736, - "grad_norm": 2.0063314386953133, - "learning_rate": 1.8412160829330304e-07, - "loss": 1.1242, - "step": 5972 - }, - { - "epoch": 0.8097336135023385, - "grad_norm": 1.4499072495161034, - "learning_rate": 1.8386776709343278e-07, - "loss": 1.0517, - "step": 5973 - }, - { - "epoch": 0.8098691791500033, - "grad_norm": 1.6036227727168066, - "learning_rate": 1.8361408327822203e-07, - "loss": 1.1155, - "step": 5974 - }, - { - "epoch": 0.8100047447976683, - "grad_norm": 1.6140414943140244, - "learning_rate": 1.8336055689659091e-07, - "loss": 1.1382, - "step": 5975 - }, - { - "epoch": 0.8101403104453332, - "grad_norm": 1.5551060513512636, - "learning_rate": 1.831071879974302e-07, - "loss": 1.1246, - "step": 5976 - }, - { - "epoch": 0.810275876092998, - "grad_norm": 2.08953667574848, - "learning_rate": 1.8285397662960022e-07, - "loss": 1.1347, - "step": 5977 - }, - { - "epoch": 0.8104114417406629, - "grad_norm": 2.1326879707999806, - "learning_rate": 1.8260092284193062e-07, - "loss": 1.1154, - "step": 5978 - }, - { - "epoch": 0.8105470073883277, - "grad_norm": 1.5317621218182431, - "learning_rate": 1.823480266832209e-07, - "loss": 1.1389, - "step": 5979 - }, - { - "epoch": 0.8106825730359927, - "grad_norm": 1.660837399962454, - "learning_rate": 1.8209528820224008e-07, - "loss": 1.1475, - "step": 5980 - }, - { - "epoch": 0.8108181386836576, - "grad_norm": 1.8279320702147228, - "learning_rate": 1.8184270744772678e-07, - "loss": 1.1161, - "step": 5981 - }, - { - "epoch": 0.8109537043313224, - "grad_norm": 1.6996888472212568, - "learning_rate": 1.815902844683892e-07, - "loss": 1.1505, - "step": 5982 - }, - { - "epoch": 0.8110892699789873, - "grad_norm": 1.523338831842278, - "learning_rate": 1.8133801931290516e-07, - "loss": 1.1302, - "step": 5983 - }, - { - "epoch": 0.8112248356266522, - "grad_norm": 1.6959830344405085, - "learning_rate": 1.8108591202992195e-07, - "loss": 1.1414, - "step": 5984 - }, - { - "epoch": 0.8113604012743171, - "grad_norm": 1.5504338169240748, - "learning_rate": 1.808339626680565e-07, - "loss": 1.1243, - "step": 5985 - }, - { - "epoch": 0.811495966921982, - "grad_norm": 1.9464548006397318, - "learning_rate": 1.8058217127589526e-07, - "loss": 1.0959, - "step": 5986 - }, - { - "epoch": 0.8116315325696468, - "grad_norm": 1.63456231908327, - "learning_rate": 1.8033053790199415e-07, - "loss": 1.1594, - "step": 5987 - }, - { - "epoch": 0.8117670982173117, - "grad_norm": 2.1266680270254206, - "learning_rate": 1.8007906259487904e-07, - "loss": 1.1127, - "step": 5988 - }, - { - "epoch": 0.8119026638649767, - "grad_norm": 1.9741367429624257, - "learning_rate": 1.7982774540304402e-07, - "loss": 1.0971, - "step": 5989 - }, - { - "epoch": 0.8120382295126415, - "grad_norm": 2.4779406741131345, - "learning_rate": 1.7957658637495488e-07, - "loss": 1.1254, - "step": 5990 - }, - { - "epoch": 0.8121737951603064, - "grad_norm": 2.3206012117719363, - "learning_rate": 1.7932558555904453e-07, - "loss": 1.0816, - "step": 5991 - }, - { - "epoch": 0.8123093608079712, - "grad_norm": 1.5682214853043726, - "learning_rate": 1.790747430037174e-07, - "loss": 1.1314, - "step": 5992 - }, - { - "epoch": 0.8124449264556362, - "grad_norm": 2.517079855007105, - "learning_rate": 1.7882405875734564e-07, - "loss": 1.1567, - "step": 5993 - }, - { - "epoch": 0.8125804921033011, - "grad_norm": 1.9160382319120954, - "learning_rate": 1.785735328682727e-07, - "loss": 1.1031, - "step": 5994 - }, - { - "epoch": 0.8127160577509659, - "grad_norm": 1.7047405572571654, - "learning_rate": 1.7832316538480973e-07, - "loss": 1.1067, - "step": 5995 - }, - { - "epoch": 0.8128516233986308, - "grad_norm": 1.666998235386702, - "learning_rate": 1.7807295635523845e-07, - "loss": 1.0915, - "step": 5996 - }, - { - "epoch": 0.8129871890462956, - "grad_norm": 1.6917107959326478, - "learning_rate": 1.7782290582780958e-07, - "loss": 1.121, - "step": 5997 - }, - { - "epoch": 0.8131227546939606, - "grad_norm": 1.6576505387683373, - "learning_rate": 1.7757301385074342e-07, - "loss": 1.0803, - "step": 5998 - }, - { - "epoch": 0.8132583203416255, - "grad_norm": 2.4883424236793017, - "learning_rate": 1.7732328047222978e-07, - "loss": 1.1169, - "step": 5999 - }, - { - "epoch": 0.8133938859892903, - "grad_norm": 1.7548715703891027, - "learning_rate": 1.7707370574042769e-07, - "loss": 1.1191, - "step": 6000 - }, - { - "epoch": 0.8135294516369552, - "grad_norm": 1.6901260171636896, - "learning_rate": 1.7682428970346553e-07, - "loss": 1.1964, - "step": 6001 - }, - { - "epoch": 0.81366501728462, - "grad_norm": 1.761254168506445, - "learning_rate": 1.765750324094415e-07, - "loss": 1.141, - "step": 6002 - }, - { - "epoch": 0.813800582932285, - "grad_norm": 1.592841377498472, - "learning_rate": 1.763259339064226e-07, - "loss": 1.1255, - "step": 6003 - }, - { - "epoch": 0.8139361485799499, - "grad_norm": 1.6579102951077316, - "learning_rate": 1.7607699424244582e-07, - "loss": 1.153, - "step": 6004 - }, - { - "epoch": 0.8140717142276147, - "grad_norm": 2.0297871946534265, - "learning_rate": 1.7582821346551711e-07, - "loss": 1.0901, - "step": 6005 - }, - { - "epoch": 0.8142072798752796, - "grad_norm": 1.6026151658907208, - "learning_rate": 1.7557959162361148e-07, - "loss": 1.0782, - "step": 6006 - }, - { - "epoch": 0.8143428455229444, - "grad_norm": 1.8550548425805877, - "learning_rate": 1.753311287646745e-07, - "loss": 1.1788, - "step": 6007 - }, - { - "epoch": 0.8144784111706094, - "grad_norm": 1.8153339282926075, - "learning_rate": 1.7508282493661918e-07, - "loss": 1.1265, - "step": 6008 - }, - { - "epoch": 0.8146139768182743, - "grad_norm": 1.8125603525840703, - "learning_rate": 1.7483468018733017e-07, - "loss": 1.096, - "step": 6009 - }, - { - "epoch": 0.8147495424659391, - "grad_norm": 1.4746235114417292, - "learning_rate": 1.7458669456465914e-07, - "loss": 1.0833, - "step": 6010 - }, - { - "epoch": 0.814885108113604, - "grad_norm": 1.5107256601895256, - "learning_rate": 1.7433886811642916e-07, - "loss": 1.1048, - "step": 6011 - }, - { - "epoch": 0.8150206737612689, - "grad_norm": 1.6320236746013845, - "learning_rate": 1.740912008904305e-07, - "loss": 1.1393, - "step": 6012 - }, - { - "epoch": 0.8151562394089338, - "grad_norm": 1.434841448401946, - "learning_rate": 1.7384369293442501e-07, - "loss": 1.1373, - "step": 6013 - }, - { - "epoch": 0.8152918050565987, - "grad_norm": 1.635841738118453, - "learning_rate": 1.7359634429614145e-07, - "loss": 1.0925, - "step": 6014 - }, - { - "epoch": 0.8154273707042635, - "grad_norm": 1.5629268876785556, - "learning_rate": 1.7334915502328028e-07, - "loss": 1.1725, - "step": 6015 - }, - { - "epoch": 0.8155629363519284, - "grad_norm": 2.2876669362547184, - "learning_rate": 1.7310212516350908e-07, - "loss": 1.1441, - "step": 6016 - }, - { - "epoch": 0.8156985019995933, - "grad_norm": 1.6586244743858076, - "learning_rate": 1.7285525476446594e-07, - "loss": 1.1031, - "step": 6017 - }, - { - "epoch": 0.8158340676472582, - "grad_norm": 1.6571672478022463, - "learning_rate": 1.7260854387375778e-07, - "loss": 1.0911, - "step": 6018 - }, - { - "epoch": 0.8159696332949231, - "grad_norm": 1.703094100183799, - "learning_rate": 1.7236199253896089e-07, - "loss": 1.1311, - "step": 6019 - }, - { - "epoch": 0.8161051989425879, - "grad_norm": 2.1974611167135483, - "learning_rate": 1.7211560080762078e-07, - "loss": 1.1336, - "step": 6020 - }, - { - "epoch": 0.8162407645902529, - "grad_norm": 1.7363988494098888, - "learning_rate": 1.718693687272521e-07, - "loss": 1.1199, - "step": 6021 - }, - { - "epoch": 0.8163763302379177, - "grad_norm": 1.7315927508010474, - "learning_rate": 1.716232963453389e-07, - "loss": 1.16, - "step": 6022 - }, - { - "epoch": 0.8165118958855826, - "grad_norm": 1.8132590054904536, - "learning_rate": 1.7137738370933408e-07, - "loss": 1.1503, - "step": 6023 - }, - { - "epoch": 0.8166474615332475, - "grad_norm": 3.293230632754883, - "learning_rate": 1.7113163086666016e-07, - "loss": 1.1363, - "step": 6024 - }, - { - "epoch": 0.8167830271809123, - "grad_norm": 1.5818730261421958, - "learning_rate": 1.7088603786470845e-07, - "loss": 1.0948, - "step": 6025 - }, - { - "epoch": 0.8169185928285773, - "grad_norm": 1.587556193740831, - "learning_rate": 1.7064060475083975e-07, - "loss": 1.1307, - "step": 6026 - }, - { - "epoch": 0.8170541584762421, - "grad_norm": 1.4772631289257365, - "learning_rate": 1.7039533157238394e-07, - "loss": 1.1006, - "step": 6027 - }, - { - "epoch": 0.817189724123907, - "grad_norm": 1.753988032806564, - "learning_rate": 1.7015021837663979e-07, - "loss": 1.135, - "step": 6028 - }, - { - "epoch": 0.8173252897715719, - "grad_norm": 1.5899368651941173, - "learning_rate": 1.6990526521087567e-07, - "loss": 1.1357, - "step": 6029 - }, - { - "epoch": 0.8174608554192367, - "grad_norm": 1.8142258814073933, - "learning_rate": 1.696604721223288e-07, - "loss": 1.1234, - "step": 6030 - }, - { - "epoch": 0.8175964210669017, - "grad_norm": 1.674232229689965, - "learning_rate": 1.6941583915820578e-07, - "loss": 1.1587, - "step": 6031 - }, - { - "epoch": 0.8177319867145665, - "grad_norm": 1.4548381055913768, - "learning_rate": 1.6917136636568176e-07, - "loss": 1.122, - "step": 6032 - }, - { - "epoch": 0.8178675523622314, - "grad_norm": 1.6535245333858326, - "learning_rate": 1.6892705379190153e-07, - "loss": 1.0946, - "step": 6033 - }, - { - "epoch": 0.8180031180098963, - "grad_norm": 1.638372173850085, - "learning_rate": 1.6868290148397878e-07, - "loss": 1.1256, - "step": 6034 - }, - { - "epoch": 0.8181386836575611, - "grad_norm": 1.727856167991448, - "learning_rate": 1.6843890948899665e-07, - "loss": 1.1859, - "step": 6035 - }, - { - "epoch": 0.8182742493052261, - "grad_norm": 1.8081122270328929, - "learning_rate": 1.6819507785400677e-07, - "loss": 1.1568, - "step": 6036 - }, - { - "epoch": 0.8184098149528909, - "grad_norm": 1.8701119213305, - "learning_rate": 1.6795140662603026e-07, - "loss": 1.1347, - "step": 6037 - }, - { - "epoch": 0.8185453806005558, - "grad_norm": 2.0667241227409763, - "learning_rate": 1.6770789585205725e-07, - "loss": 1.0982, - "step": 6038 - }, - { - "epoch": 0.8186809462482207, - "grad_norm": 1.5141935297004685, - "learning_rate": 1.6746454557904677e-07, - "loss": 1.0652, - "step": 6039 - }, - { - "epoch": 0.8188165118958856, - "grad_norm": 1.4801753030981784, - "learning_rate": 1.6722135585392706e-07, - "loss": 1.1348, - "step": 6040 - }, - { - "epoch": 0.8189520775435505, - "grad_norm": 1.4458901064527596, - "learning_rate": 1.6697832672359525e-07, - "loss": 1.1252, - "step": 6041 - }, - { - "epoch": 0.8190876431912153, - "grad_norm": 1.649733523174716, - "learning_rate": 1.6673545823491774e-07, - "loss": 1.1311, - "step": 6042 - }, - { - "epoch": 0.8192232088388802, - "grad_norm": 1.8204788270225223, - "learning_rate": 1.6649275043472965e-07, - "loss": 1.1288, - "step": 6043 - }, - { - "epoch": 0.8193587744865451, - "grad_norm": 1.4475357958056674, - "learning_rate": 1.6625020336983565e-07, - "loss": 1.0954, - "step": 6044 - }, - { - "epoch": 0.81949434013421, - "grad_norm": 2.030120288504051, - "learning_rate": 1.6600781708700816e-07, - "loss": 1.1111, - "step": 6045 - }, - { - "epoch": 0.8196299057818749, - "grad_norm": 4.856077149277165, - "learning_rate": 1.6576559163299053e-07, - "loss": 1.1066, - "step": 6046 - }, - { - "epoch": 0.8197654714295397, - "grad_norm": 1.6769097955874899, - "learning_rate": 1.6552352705449302e-07, - "loss": 1.1158, - "step": 6047 - }, - { - "epoch": 0.8199010370772046, - "grad_norm": 4.39274403469556, - "learning_rate": 1.6528162339819685e-07, - "loss": 1.1041, - "step": 6048 - }, - { - "epoch": 0.8200366027248696, - "grad_norm": 1.788703436930725, - "learning_rate": 1.6503988071075026e-07, - "loss": 1.128, - "step": 6049 - }, - { - "epoch": 0.8201721683725344, - "grad_norm": 1.989861816449875, - "learning_rate": 1.647982990387724e-07, - "loss": 1.0957, - "step": 6050 - }, - { - "epoch": 0.8203077340201993, - "grad_norm": 1.7866908582179641, - "learning_rate": 1.6455687842884936e-07, - "loss": 1.0872, - "step": 6051 - }, - { - "epoch": 0.8204432996678641, - "grad_norm": 1.5320166535962116, - "learning_rate": 1.643156189275382e-07, - "loss": 1.1649, - "step": 6052 - }, - { - "epoch": 0.820578865315529, - "grad_norm": 1.4448362988513614, - "learning_rate": 1.6407452058136294e-07, - "loss": 1.1048, - "step": 6053 - }, - { - "epoch": 0.820714430963194, - "grad_norm": 1.6930877884647348, - "learning_rate": 1.6383358343681852e-07, - "loss": 1.16, - "step": 6054 - }, - { - "epoch": 0.8208499966108588, - "grad_norm": 1.9943306758951778, - "learning_rate": 1.6359280754036675e-07, - "loss": 1.0995, - "step": 6055 - }, - { - "epoch": 0.8209855622585237, - "grad_norm": 1.4679277413028868, - "learning_rate": 1.6335219293844038e-07, - "loss": 1.1093, - "step": 6056 - }, - { - "epoch": 0.8211211279061885, - "grad_norm": 1.6266838424335046, - "learning_rate": 1.6311173967743918e-07, - "loss": 1.1495, - "step": 6057 - }, - { - "epoch": 0.8212566935538534, - "grad_norm": 1.575516403761223, - "learning_rate": 1.6287144780373308e-07, - "loss": 1.1205, - "step": 6058 - }, - { - "epoch": 0.8213922592015184, - "grad_norm": 1.990377363997174, - "learning_rate": 1.6263131736366032e-07, - "loss": 1.1551, - "step": 6059 - }, - { - "epoch": 0.8215278248491832, - "grad_norm": 2.3818960381999053, - "learning_rate": 1.623913484035282e-07, - "loss": 1.1229, - "step": 6060 - }, - { - "epoch": 0.8216633904968481, - "grad_norm": 2.15637372045643, - "learning_rate": 1.6215154096961292e-07, - "loss": 1.1208, - "step": 6061 - }, - { - "epoch": 0.8217989561445129, - "grad_norm": 3.29782982353072, - "learning_rate": 1.619118951081594e-07, - "loss": 1.1226, - "step": 6062 - }, - { - "epoch": 0.8219345217921779, - "grad_norm": 1.5614304900400975, - "learning_rate": 1.616724108653813e-07, - "loss": 1.1218, - "step": 6063 - }, - { - "epoch": 0.8220700874398428, - "grad_norm": 2.6940536420391563, - "learning_rate": 1.614330882874616e-07, - "loss": 1.1303, - "step": 6064 - }, - { - "epoch": 0.8222056530875076, - "grad_norm": 2.4183661525643565, - "learning_rate": 1.611939274205515e-07, - "loss": 1.1414, - "step": 6065 - }, - { - "epoch": 0.8223412187351725, - "grad_norm": 1.7761563795006645, - "learning_rate": 1.6095492831077128e-07, - "loss": 1.1343, - "step": 6066 - }, - { - "epoch": 0.8224767843828373, - "grad_norm": 1.9231075356095122, - "learning_rate": 1.6071609100421048e-07, - "loss": 1.1259, - "step": 6067 - }, - { - "epoch": 0.8226123500305023, - "grad_norm": 1.4564961137741643, - "learning_rate": 1.6047741554692606e-07, - "loss": 1.1663, - "step": 6068 - }, - { - "epoch": 0.8227479156781672, - "grad_norm": 1.5621797665849535, - "learning_rate": 1.6023890198494584e-07, - "loss": 1.1571, - "step": 6069 - }, - { - "epoch": 0.822883481325832, - "grad_norm": 2.03909193309879, - "learning_rate": 1.6000055036426407e-07, - "loss": 1.1363, - "step": 6070 - }, - { - "epoch": 0.8230190469734969, - "grad_norm": 1.612741838396169, - "learning_rate": 1.5976236073084627e-07, - "loss": 1.1047, - "step": 6071 - }, - { - "epoch": 0.8231546126211619, - "grad_norm": 1.4611043610055217, - "learning_rate": 1.595243331306244e-07, - "loss": 1.149, - "step": 6072 - }, - { - "epoch": 0.8232901782688267, - "grad_norm": 1.748512264762146, - "learning_rate": 1.592864676095006e-07, - "loss": 1.1202, - "step": 6073 - }, - { - "epoch": 0.8234257439164916, - "grad_norm": 2.6827996334696893, - "learning_rate": 1.5904876421334534e-07, - "loss": 1.0798, - "step": 6074 - }, - { - "epoch": 0.8235613095641564, - "grad_norm": 1.6145908786602705, - "learning_rate": 1.5881122298799788e-07, - "loss": 1.1196, - "step": 6075 - }, - { - "epoch": 0.8236968752118213, - "grad_norm": 1.553126695005957, - "learning_rate": 1.585738439792661e-07, - "loss": 1.1356, - "step": 6076 - }, - { - "epoch": 0.8238324408594863, - "grad_norm": 1.7470861464852228, - "learning_rate": 1.5833662723292662e-07, - "loss": 1.12, - "step": 6077 - }, - { - "epoch": 0.8239680065071511, - "grad_norm": 1.7327131769954773, - "learning_rate": 1.5809957279472496e-07, - "loss": 1.1218, - "step": 6078 - }, - { - "epoch": 0.824103572154816, - "grad_norm": 1.4617269190312414, - "learning_rate": 1.578626807103751e-07, - "loss": 1.091, - "step": 6079 - }, - { - "epoch": 0.8242391378024808, - "grad_norm": 1.951678845101915, - "learning_rate": 1.5762595102555987e-07, - "loss": 1.108, - "step": 6080 - }, - { - "epoch": 0.8243747034501457, - "grad_norm": 1.6767657086384855, - "learning_rate": 1.5738938378593068e-07, - "loss": 1.1355, - "step": 6081 - }, - { - "epoch": 0.8245102690978107, - "grad_norm": 1.831663030179059, - "learning_rate": 1.5715297903710767e-07, - "loss": 1.1234, - "step": 6082 - }, - { - "epoch": 0.8246458347454755, - "grad_norm": 1.6789076886322105, - "learning_rate": 1.5691673682467967e-07, - "loss": 1.1298, - "step": 6083 - }, - { - "epoch": 0.8247814003931404, - "grad_norm": 1.498642986284596, - "learning_rate": 1.5668065719420398e-07, - "loss": 1.1588, - "step": 6084 - }, - { - "epoch": 0.8249169660408052, - "grad_norm": 1.748159225193498, - "learning_rate": 1.564447401912069e-07, - "loss": 1.0978, - "step": 6085 - }, - { - "epoch": 0.8250525316884701, - "grad_norm": 2.0560717279926677, - "learning_rate": 1.5620898586118292e-07, - "loss": 1.1257, - "step": 6086 - }, - { - "epoch": 0.8251880973361351, - "grad_norm": 1.8411205061859668, - "learning_rate": 1.5597339424959588e-07, - "loss": 1.1487, - "step": 6087 - }, - { - "epoch": 0.8253236629837999, - "grad_norm": 1.61984623707389, - "learning_rate": 1.557379654018769e-07, - "loss": 1.1323, - "step": 6088 - }, - { - "epoch": 0.8254592286314648, - "grad_norm": 1.7186574116629958, - "learning_rate": 1.555026993634275e-07, - "loss": 1.0973, - "step": 6089 - }, - { - "epoch": 0.8255947942791296, - "grad_norm": 1.5308616452342962, - "learning_rate": 1.5526759617961614e-07, - "loss": 1.1329, - "step": 6090 - }, - { - "epoch": 0.8257303599267946, - "grad_norm": 1.7169702015678046, - "learning_rate": 1.5503265589578128e-07, - "loss": 1.1696, - "step": 6091 - }, - { - "epoch": 0.8258659255744595, - "grad_norm": 1.5664658487613379, - "learning_rate": 1.5479787855722858e-07, - "loss": 1.0776, - "step": 6092 - }, - { - "epoch": 0.8260014912221243, - "grad_norm": 1.5810471357671791, - "learning_rate": 1.5456326420923382e-07, - "loss": 1.1247, - "step": 6093 - }, - { - "epoch": 0.8261370568697892, - "grad_norm": 1.619568704918383, - "learning_rate": 1.543288128970399e-07, - "loss": 1.1258, - "step": 6094 - }, - { - "epoch": 0.826272622517454, - "grad_norm": 2.389858300838128, - "learning_rate": 1.5409452466585903e-07, - "loss": 1.1396, - "step": 6095 - }, - { - "epoch": 0.826408188165119, - "grad_norm": 1.6504527549332588, - "learning_rate": 1.5386039956087194e-07, - "loss": 1.1296, - "step": 6096 - }, - { - "epoch": 0.8265437538127839, - "grad_norm": 1.6787119631197842, - "learning_rate": 1.5362643762722782e-07, - "loss": 1.1127, - "step": 6097 - }, - { - "epoch": 0.8266793194604487, - "grad_norm": 1.6650925507351901, - "learning_rate": 1.5339263891004427e-07, - "loss": 1.1382, - "step": 6098 - }, - { - "epoch": 0.8268148851081136, - "grad_norm": 2.8297276151867132, - "learning_rate": 1.5315900345440757e-07, - "loss": 1.1405, - "step": 6099 - }, - { - "epoch": 0.8269504507557784, - "grad_norm": 1.6427962325166952, - "learning_rate": 1.5292553130537255e-07, - "loss": 1.1021, - "step": 6100 - }, - { - "epoch": 0.8270860164034434, - "grad_norm": 4.02367471204802, - "learning_rate": 1.526922225079623e-07, - "loss": 1.1219, - "step": 6101 - }, - { - "epoch": 0.8272215820511083, - "grad_norm": 1.384677950944082, - "learning_rate": 1.524590771071691e-07, - "loss": 1.1328, - "step": 6102 - }, - { - "epoch": 0.8273571476987731, - "grad_norm": 1.700948190567225, - "learning_rate": 1.5222609514795225e-07, - "loss": 1.1157, - "step": 6103 - }, - { - "epoch": 0.827492713346438, - "grad_norm": 1.7557824455836417, - "learning_rate": 1.5199327667524154e-07, - "loss": 1.1378, - "step": 6104 - }, - { - "epoch": 0.8276282789941029, - "grad_norm": 6.652528948664602, - "learning_rate": 1.5176062173393312e-07, - "loss": 1.1081, - "step": 6105 - }, - { - "epoch": 0.8277638446417678, - "grad_norm": 1.6718092801979756, - "learning_rate": 1.5152813036889378e-07, - "loss": 1.1161, - "step": 6106 - }, - { - "epoch": 0.8278994102894327, - "grad_norm": 1.694300805007026, - "learning_rate": 1.5129580262495656e-07, - "loss": 1.1349, - "step": 6107 - }, - { - "epoch": 0.8280349759370975, - "grad_norm": 2.055905461130772, - "learning_rate": 1.5106363854692493e-07, - "loss": 1.1662, - "step": 6108 - }, - { - "epoch": 0.8281705415847624, - "grad_norm": 2.3554125500808727, - "learning_rate": 1.5083163817956913e-07, - "loss": 1.1205, - "step": 6109 - }, - { - "epoch": 0.8283061072324273, - "grad_norm": 1.5391223333915125, - "learning_rate": 1.5059980156762942e-07, - "loss": 1.1108, - "step": 6110 - }, - { - "epoch": 0.8284416728800922, - "grad_norm": 1.9838122081888856, - "learning_rate": 1.5036812875581274e-07, - "loss": 1.0907, - "step": 6111 - }, - { - "epoch": 0.8285772385277571, - "grad_norm": 1.4513873270097286, - "learning_rate": 1.5013661978879632e-07, - "loss": 1.1379, - "step": 6112 - }, - { - "epoch": 0.8287128041754219, - "grad_norm": 1.6337994129067452, - "learning_rate": 1.4990527471122382e-07, - "loss": 1.1001, - "step": 6113 - }, - { - "epoch": 0.8288483698230869, - "grad_norm": 1.6090167383690979, - "learning_rate": 1.4967409356770945e-07, - "loss": 1.1464, - "step": 6114 - }, - { - "epoch": 0.8289839354707517, - "grad_norm": 1.6598662153672805, - "learning_rate": 1.4944307640283382e-07, - "loss": 1.136, - "step": 6115 - }, - { - "epoch": 0.8291195011184166, - "grad_norm": 1.5111428225540444, - "learning_rate": 1.4921222326114692e-07, - "loss": 1.1426, - "step": 6116 - }, - { - "epoch": 0.8292550667660815, - "grad_norm": 1.4180524658545575, - "learning_rate": 1.4898153418716708e-07, - "loss": 1.1349, - "step": 6117 - }, - { - "epoch": 0.8293906324137463, - "grad_norm": 3.412055824367126, - "learning_rate": 1.4875100922538087e-07, - "loss": 1.0679, - "step": 6118 - }, - { - "epoch": 0.8295261980614113, - "grad_norm": 3.275464642569637, - "learning_rate": 1.4852064842024325e-07, - "loss": 1.1471, - "step": 6119 - }, - { - "epoch": 0.8296617637090761, - "grad_norm": 1.7230558705315062, - "learning_rate": 1.4829045181617727e-07, - "loss": 1.126, - "step": 6120 - }, - { - "epoch": 0.829797329356741, - "grad_norm": 1.803157081634202, - "learning_rate": 1.4806041945757474e-07, - "loss": 1.1628, - "step": 6121 - }, - { - "epoch": 0.8299328950044059, - "grad_norm": 1.718803731001436, - "learning_rate": 1.4783055138879562e-07, - "loss": 1.1717, - "step": 6122 - }, - { - "epoch": 0.8300684606520707, - "grad_norm": 1.7611381752627506, - "learning_rate": 1.476008476541679e-07, - "loss": 1.1849, - "step": 6123 - }, - { - "epoch": 0.8302040262997357, - "grad_norm": 1.684325632412977, - "learning_rate": 1.473713082979884e-07, - "loss": 1.0813, - "step": 6124 - }, - { - "epoch": 0.8303395919474005, - "grad_norm": 2.1483924780135086, - "learning_rate": 1.4714193336452174e-07, - "loss": 1.1131, - "step": 6125 - }, - { - "epoch": 0.8304751575950654, - "grad_norm": 1.5514688313505514, - "learning_rate": 1.4691272289800115e-07, - "loss": 1.111, - "step": 6126 - }, - { - "epoch": 0.8306107232427303, - "grad_norm": 1.6299864975334275, - "learning_rate": 1.4668367694262817e-07, - "loss": 1.1669, - "step": 6127 - }, - { - "epoch": 0.8307462888903951, - "grad_norm": 1.7165829109854134, - "learning_rate": 1.4645479554257267e-07, - "loss": 1.1155, - "step": 6128 - }, - { - "epoch": 0.8308818545380601, - "grad_norm": 1.651023582950642, - "learning_rate": 1.4622607874197214e-07, - "loss": 1.1202, - "step": 6129 - }, - { - "epoch": 0.8310174201857249, - "grad_norm": 1.413985248057129, - "learning_rate": 1.4599752658493304e-07, - "loss": 1.1295, - "step": 6130 - }, - { - "epoch": 0.8311529858333898, - "grad_norm": 1.782063026242228, - "learning_rate": 1.457691391155298e-07, - "loss": 1.1378, - "step": 6131 - }, - { - "epoch": 0.8312885514810547, - "grad_norm": 1.5384858641647392, - "learning_rate": 1.4554091637780518e-07, - "loss": 1.1319, - "step": 6132 - }, - { - "epoch": 0.8314241171287196, - "grad_norm": 1.5754456636573182, - "learning_rate": 1.4531285841577024e-07, - "loss": 1.1186, - "step": 6133 - }, - { - "epoch": 0.8315596827763845, - "grad_norm": 1.5320240876770022, - "learning_rate": 1.4508496527340398e-07, - "loss": 1.0807, - "step": 6134 - }, - { - "epoch": 0.8316952484240493, - "grad_norm": 1.5766773624671824, - "learning_rate": 1.448572369946539e-07, - "loss": 1.1196, - "step": 6135 - }, - { - "epoch": 0.8318308140717142, - "grad_norm": 2.110698734649517, - "learning_rate": 1.446296736234356e-07, - "loss": 1.1094, - "step": 6136 - }, - { - "epoch": 0.8319663797193791, - "grad_norm": 1.6997547408761438, - "learning_rate": 1.444022752036328e-07, - "loss": 1.1544, - "step": 6137 - }, - { - "epoch": 0.832101945367044, - "grad_norm": 1.5323370386113027, - "learning_rate": 1.4417504177909767e-07, - "loss": 1.1304, - "step": 6138 - }, - { - "epoch": 0.8322375110147089, - "grad_norm": 3.133218889509942, - "learning_rate": 1.4394797339365017e-07, - "loss": 1.1233, - "step": 6139 - }, - { - "epoch": 0.8323730766623737, - "grad_norm": 1.886917751452, - "learning_rate": 1.437210700910787e-07, - "loss": 1.1427, - "step": 6140 - }, - { - "epoch": 0.8325086423100386, - "grad_norm": 2.2682986523325654, - "learning_rate": 1.4349433191513994e-07, - "loss": 1.1459, - "step": 6141 - }, - { - "epoch": 0.8326442079577036, - "grad_norm": 1.6824181151020183, - "learning_rate": 1.4326775890955833e-07, - "loss": 1.1148, - "step": 6142 - }, - { - "epoch": 0.8327797736053684, - "grad_norm": 3.450355000946064, - "learning_rate": 1.4304135111802707e-07, - "loss": 1.1519, - "step": 6143 - }, - { - "epoch": 0.8329153392530333, - "grad_norm": 2.006705323580505, - "learning_rate": 1.4281510858420632e-07, - "loss": 1.1305, - "step": 6144 - }, - { - "epoch": 0.8330509049006981, - "grad_norm": 1.5645726092087568, - "learning_rate": 1.4258903135172605e-07, - "loss": 1.1392, - "step": 6145 - }, - { - "epoch": 0.833186470548363, - "grad_norm": 1.798705878832206, - "learning_rate": 1.423631194641828e-07, - "loss": 1.0889, - "step": 6146 - }, - { - "epoch": 0.833322036196028, - "grad_norm": 1.5600977009592263, - "learning_rate": 1.421373729651425e-07, - "loss": 1.1087, - "step": 6147 - }, - { - "epoch": 0.8334576018436928, - "grad_norm": 1.7743718820437313, - "learning_rate": 1.4191179189813796e-07, - "loss": 1.0966, - "step": 6148 - }, - { - "epoch": 0.8335931674913577, - "grad_norm": 1.8328980269411923, - "learning_rate": 1.4168637630667135e-07, - "loss": 1.1468, - "step": 6149 - }, - { - "epoch": 0.8337287331390226, - "grad_norm": 1.735205448540986, - "learning_rate": 1.4146112623421158e-07, - "loss": 1.1413, - "step": 6150 - }, - { - "epoch": 0.8338642987866874, - "grad_norm": 1.8543779820222022, - "learning_rate": 1.4123604172419713e-07, - "loss": 1.1042, - "step": 6151 - }, - { - "epoch": 0.8339998644343524, - "grad_norm": 1.5176632858509251, - "learning_rate": 1.410111228200329e-07, - "loss": 1.1759, - "step": 6152 - }, - { - "epoch": 0.8341354300820172, - "grad_norm": 1.5325031080475353, - "learning_rate": 1.407863695650936e-07, - "loss": 1.1416, - "step": 6153 - }, - { - "epoch": 0.8342709957296821, - "grad_norm": 1.7726809654790447, - "learning_rate": 1.405617820027204e-07, - "loss": 1.0972, - "step": 6154 - }, - { - "epoch": 0.834406561377347, - "grad_norm": 1.398630527000847, - "learning_rate": 1.4033736017622388e-07, - "loss": 1.1533, - "step": 6155 - }, - { - "epoch": 0.8345421270250118, - "grad_norm": 2.9571030107063887, - "learning_rate": 1.4011310412888145e-07, - "loss": 1.1518, - "step": 6156 - }, - { - "epoch": 0.8346776926726768, - "grad_norm": 2.205052938578223, - "learning_rate": 1.398890139039395e-07, - "loss": 1.1182, - "step": 6157 - }, - { - "epoch": 0.8348132583203416, - "grad_norm": 2.200818209860699, - "learning_rate": 1.3966508954461175e-07, - "loss": 1.0926, - "step": 6158 - }, - { - "epoch": 0.8349488239680065, - "grad_norm": 1.7132804423581414, - "learning_rate": 1.3944133109408053e-07, - "loss": 1.1624, - "step": 6159 - }, - { - "epoch": 0.8350843896156714, - "grad_norm": 1.7393363717302641, - "learning_rate": 1.3921773859549569e-07, - "loss": 1.1212, - "step": 6160 - }, - { - "epoch": 0.8352199552633363, - "grad_norm": 1.7546078175134212, - "learning_rate": 1.389943120919753e-07, - "loss": 1.1473, - "step": 6161 - }, - { - "epoch": 0.8353555209110012, - "grad_norm": 1.7912742656603087, - "learning_rate": 1.3877105162660564e-07, - "loss": 1.1125, - "step": 6162 - }, - { - "epoch": 0.835491086558666, - "grad_norm": 1.5202624716927995, - "learning_rate": 1.385479572424404e-07, - "loss": 1.1042, - "step": 6163 - }, - { - "epoch": 0.8356266522063309, - "grad_norm": 4.484749064666905, - "learning_rate": 1.3832502898250174e-07, - "loss": 1.1291, - "step": 6164 - }, - { - "epoch": 0.8357622178539958, - "grad_norm": 1.6526783397173164, - "learning_rate": 1.3810226688977967e-07, - "loss": 1.106, - "step": 6165 - }, - { - "epoch": 0.8358977835016607, - "grad_norm": 2.0526572075526435, - "learning_rate": 1.378796710072322e-07, - "loss": 1.1409, - "step": 6166 - }, - { - "epoch": 0.8360333491493256, - "grad_norm": 1.719264493168516, - "learning_rate": 1.3765724137778456e-07, - "loss": 1.131, - "step": 6167 - }, - { - "epoch": 0.8361689147969904, - "grad_norm": 1.558097538178257, - "learning_rate": 1.3743497804433147e-07, - "loss": 1.117, - "step": 6168 - }, - { - "epoch": 0.8363044804446553, - "grad_norm": 1.5545690325087547, - "learning_rate": 1.3721288104973372e-07, - "loss": 1.1111, - "step": 6169 - }, - { - "epoch": 0.8364400460923203, - "grad_norm": 1.6007552120049815, - "learning_rate": 1.3699095043682184e-07, - "loss": 1.0865, - "step": 6170 - }, - { - "epoch": 0.8365756117399851, - "grad_norm": 1.688400599851003, - "learning_rate": 1.3676918624839285e-07, - "loss": 1.1378, - "step": 6171 - }, - { - "epoch": 0.83671117738765, - "grad_norm": 1.9049372141172483, - "learning_rate": 1.3654758852721226e-07, - "loss": 1.1245, - "step": 6172 - }, - { - "epoch": 0.8368467430353148, - "grad_norm": 3.228393807771819, - "learning_rate": 1.363261573160136e-07, - "loss": 1.176, - "step": 6173 - }, - { - "epoch": 0.8369823086829797, - "grad_norm": 2.213699417561093, - "learning_rate": 1.3610489265749801e-07, - "loss": 1.1369, - "step": 6174 - }, - { - "epoch": 0.8371178743306447, - "grad_norm": 1.6452386654735738, - "learning_rate": 1.3588379459433485e-07, - "loss": 1.0655, - "step": 6175 - }, - { - "epoch": 0.8372534399783095, - "grad_norm": 1.7964541027447067, - "learning_rate": 1.3566286316916087e-07, - "loss": 1.1273, - "step": 6176 - }, - { - "epoch": 0.8373890056259744, - "grad_norm": 2.1710580259191117, - "learning_rate": 1.354420984245811e-07, - "loss": 1.1557, - "step": 6177 - }, - { - "epoch": 0.8375245712736392, - "grad_norm": 1.6005759778563469, - "learning_rate": 1.3522150040316826e-07, - "loss": 1.1327, - "step": 6178 - }, - { - "epoch": 0.8376601369213041, - "grad_norm": 2.2194474453615247, - "learning_rate": 1.350010691474629e-07, - "loss": 1.1404, - "step": 6179 - }, - { - "epoch": 0.8377957025689691, - "grad_norm": 1.5975518507396873, - "learning_rate": 1.3478080469997344e-07, - "loss": 1.1496, - "step": 6180 - }, - { - "epoch": 0.8379312682166339, - "grad_norm": 1.6438693437645342, - "learning_rate": 1.3456070710317624e-07, - "loss": 1.0932, - "step": 6181 - }, - { - "epoch": 0.8380668338642988, - "grad_norm": 1.633943879720907, - "learning_rate": 1.3434077639951525e-07, - "loss": 1.1177, - "step": 6182 - }, - { - "epoch": 0.8382023995119636, - "grad_norm": 1.6290968045572614, - "learning_rate": 1.341210126314024e-07, - "loss": 1.0998, - "step": 6183 - }, - { - "epoch": 0.8383379651596286, - "grad_norm": 2.5255227284641575, - "learning_rate": 1.3390141584121772e-07, - "loss": 1.1246, - "step": 6184 - }, - { - "epoch": 0.8384735308072935, - "grad_norm": 1.631872182427361, - "learning_rate": 1.33681986071308e-07, - "loss": 1.1414, - "step": 6185 - }, - { - "epoch": 0.8386090964549583, - "grad_norm": 1.5296933489929978, - "learning_rate": 1.3346272336398934e-07, - "loss": 1.1276, - "step": 6186 - }, - { - "epoch": 0.8387446621026232, - "grad_norm": 1.4717812743992367, - "learning_rate": 1.3324362776154408e-07, - "loss": 1.1403, - "step": 6187 - }, - { - "epoch": 0.838880227750288, - "grad_norm": 2.125996876433454, - "learning_rate": 1.3302469930622383e-07, - "loss": 1.163, - "step": 6188 - }, - { - "epoch": 0.839015793397953, - "grad_norm": 1.4454256738343119, - "learning_rate": 1.3280593804024642e-07, - "loss": 1.1252, - "step": 6189 - }, - { - "epoch": 0.8391513590456179, - "grad_norm": 19.67582792850445, - "learning_rate": 1.3258734400579908e-07, - "loss": 1.1159, - "step": 6190 - }, - { - "epoch": 0.8392869246932827, - "grad_norm": 1.4276577973301068, - "learning_rate": 1.323689172450353e-07, - "loss": 1.1268, - "step": 6191 - }, - { - "epoch": 0.8394224903409476, - "grad_norm": 1.308378592386755, - "learning_rate": 1.3215065780007718e-07, - "loss": 1.0916, - "step": 6192 - }, - { - "epoch": 0.8395580559886124, - "grad_norm": 2.390103187782756, - "learning_rate": 1.3193256571301426e-07, - "loss": 1.1205, - "step": 6193 - }, - { - "epoch": 0.8396936216362774, - "grad_norm": 1.6898130237681153, - "learning_rate": 1.3171464102590392e-07, - "loss": 1.0833, - "step": 6194 - }, - { - "epoch": 0.8398291872839423, - "grad_norm": 1.4058089475199333, - "learning_rate": 1.3149688378077128e-07, - "loss": 1.1454, - "step": 6195 - }, - { - "epoch": 0.8399647529316071, - "grad_norm": 7.200669098133073, - "learning_rate": 1.3127929401960903e-07, - "loss": 1.1045, - "step": 6196 - }, - { - "epoch": 0.840100318579272, - "grad_norm": 1.4582405872122313, - "learning_rate": 1.3106187178437768e-07, - "loss": 1.1343, - "step": 6197 - }, - { - "epoch": 0.8402358842269368, - "grad_norm": 1.6823753535645727, - "learning_rate": 1.3084461711700544e-07, - "loss": 1.0859, - "step": 6198 - }, - { - "epoch": 0.8403714498746018, - "grad_norm": 2.2710293629645446, - "learning_rate": 1.3062753005938798e-07, - "loss": 1.1199, - "step": 6199 - }, - { - "epoch": 0.8405070155222667, - "grad_norm": 1.532202158369591, - "learning_rate": 1.30410610653389e-07, - "loss": 1.14, - "step": 6200 - }, - { - "epoch": 0.8406425811699315, - "grad_norm": 1.718879209658983, - "learning_rate": 1.3019385894083988e-07, - "loss": 1.1189, - "step": 6201 - }, - { - "epoch": 0.8407781468175964, - "grad_norm": 1.6127396368357971, - "learning_rate": 1.2997727496353872e-07, - "loss": 1.1647, - "step": 6202 - }, - { - "epoch": 0.8409137124652613, - "grad_norm": 1.7460391366960477, - "learning_rate": 1.2976085876325303e-07, - "loss": 1.1411, - "step": 6203 - }, - { - "epoch": 0.8410492781129262, - "grad_norm": 1.4366207469057595, - "learning_rate": 1.2954461038171603e-07, - "loss": 1.0961, - "step": 6204 - }, - { - "epoch": 0.8411848437605911, - "grad_norm": 1.5083874877897854, - "learning_rate": 1.2932852986063046e-07, - "loss": 1.1587, - "step": 6205 - }, - { - "epoch": 0.8413204094082559, - "grad_norm": 1.7730101359987072, - "learning_rate": 1.2911261724166468e-07, - "loss": 1.1232, - "step": 6206 - }, - { - "epoch": 0.8414559750559208, - "grad_norm": 2.456315921922907, - "learning_rate": 1.2889687256645686e-07, - "loss": 1.1316, - "step": 6207 - }, - { - "epoch": 0.8415915407035857, - "grad_norm": 2.0651668089173896, - "learning_rate": 1.286812958766106e-07, - "loss": 1.1061, - "step": 6208 - }, - { - "epoch": 0.8417271063512506, - "grad_norm": 3.1351617565452092, - "learning_rate": 1.284658872136991e-07, - "loss": 1.1248, - "step": 6209 - }, - { - "epoch": 0.8418626719989155, - "grad_norm": 1.4653107949629878, - "learning_rate": 1.2825064661926133e-07, - "loss": 1.1437, - "step": 6210 - }, - { - "epoch": 0.8419982376465803, - "grad_norm": 1.4930767503593168, - "learning_rate": 1.280355741348056e-07, - "loss": 1.1723, - "step": 6211 - }, - { - "epoch": 0.8421338032942453, - "grad_norm": 1.6780531496579427, - "learning_rate": 1.278206698018064e-07, - "loss": 1.1269, - "step": 6212 - }, - { - "epoch": 0.8422693689419101, - "grad_norm": 1.5146909857110231, - "learning_rate": 1.2760593366170635e-07, - "loss": 1.116, - "step": 6213 - }, - { - "epoch": 0.842404934589575, - "grad_norm": 1.5311098680301287, - "learning_rate": 1.273913657559158e-07, - "loss": 1.1321, - "step": 6214 - }, - { - "epoch": 0.8425405002372399, - "grad_norm": 1.7836111166491453, - "learning_rate": 1.271769661258124e-07, - "loss": 1.1557, - "step": 6215 - }, - { - "epoch": 0.8426760658849047, - "grad_norm": 2.2227267123059473, - "learning_rate": 1.2696273481274144e-07, - "loss": 1.1285, - "step": 6216 - }, - { - "epoch": 0.8428116315325697, - "grad_norm": 1.5538795100954164, - "learning_rate": 1.2674867185801575e-07, - "loss": 1.1729, - "step": 6217 - }, - { - "epoch": 0.8429471971802345, - "grad_norm": 2.3077833117726665, - "learning_rate": 1.2653477730291563e-07, - "loss": 1.1098, - "step": 6218 - }, - { - "epoch": 0.8430827628278994, - "grad_norm": 1.8388898197698145, - "learning_rate": 1.2632105118868896e-07, - "loss": 1.1653, - "step": 6219 - }, - { - "epoch": 0.8432183284755643, - "grad_norm": 1.944809804747932, - "learning_rate": 1.2610749355655125e-07, - "loss": 1.1417, - "step": 6220 - }, - { - "epoch": 0.8433538941232291, - "grad_norm": 1.6500142001771374, - "learning_rate": 1.2589410444768522e-07, - "loss": 1.1234, - "step": 6221 - }, - { - "epoch": 0.8434894597708941, - "grad_norm": 2.7833530945077505, - "learning_rate": 1.256808839032415e-07, - "loss": 1.1255, - "step": 6222 - }, - { - "epoch": 0.8436250254185589, - "grad_norm": 1.3688588959249868, - "learning_rate": 1.2546783196433774e-07, - "loss": 1.071, - "step": 6223 - }, - { - "epoch": 0.8437605910662238, - "grad_norm": 1.7930877372133254, - "learning_rate": 1.2525494867205954e-07, - "loss": 1.1672, - "step": 6224 - }, - { - "epoch": 0.8438961567138887, - "grad_norm": 1.476620002331289, - "learning_rate": 1.2504223406745963e-07, - "loss": 1.1235, - "step": 6225 - }, - { - "epoch": 0.8440317223615535, - "grad_norm": 1.4673302599674496, - "learning_rate": 1.2482968819155837e-07, - "loss": 1.0821, - "step": 6226 - }, - { - "epoch": 0.8441672880092185, - "grad_norm": 1.820152050199136, - "learning_rate": 1.2461731108534378e-07, - "loss": 1.1336, - "step": 6227 - }, - { - "epoch": 0.8443028536568834, - "grad_norm": 1.7500932185488502, - "learning_rate": 1.244051027897708e-07, - "loss": 1.1312, - "step": 6228 - }, - { - "epoch": 0.8444384193045482, - "grad_norm": 1.6437164150957306, - "learning_rate": 1.2419306334576207e-07, - "loss": 1.081, - "step": 6229 - }, - { - "epoch": 0.8445739849522131, - "grad_norm": 3.3751704959978452, - "learning_rate": 1.2398119279420793e-07, - "loss": 1.1175, - "step": 6230 - }, - { - "epoch": 0.844709550599878, - "grad_norm": 1.9482431033804928, - "learning_rate": 1.2376949117596592e-07, - "loss": 1.1158, - "step": 6231 - }, - { - "epoch": 0.8448451162475429, - "grad_norm": 1.8754051142320427, - "learning_rate": 1.2355795853186102e-07, - "loss": 1.1059, - "step": 6232 - }, - { - "epoch": 0.8449806818952078, - "grad_norm": 1.6787405419264065, - "learning_rate": 1.233465949026855e-07, - "loss": 1.1288, - "step": 6233 - }, - { - "epoch": 0.8451162475428726, - "grad_norm": 4.924311003146424, - "learning_rate": 1.2313540032919935e-07, - "loss": 1.0824, - "step": 6234 - }, - { - "epoch": 0.8452518131905375, - "grad_norm": 2.2952780100036385, - "learning_rate": 1.2292437485212957e-07, - "loss": 1.1866, - "step": 6235 - }, - { - "epoch": 0.8453873788382024, - "grad_norm": 1.995885728239094, - "learning_rate": 1.2271351851217104e-07, - "loss": 1.0957, - "step": 6236 - }, - { - "epoch": 0.8455229444858673, - "grad_norm": 1.5156092398983478, - "learning_rate": 1.225028313499855e-07, - "loss": 1.1319, - "step": 6237 - }, - { - "epoch": 0.8456585101335322, - "grad_norm": 1.6414860723968474, - "learning_rate": 1.222923134062025e-07, - "loss": 1.1114, - "step": 6238 - }, - { - "epoch": 0.845794075781197, - "grad_norm": 1.6473392465517438, - "learning_rate": 1.220819647214185e-07, - "loss": 1.133, - "step": 6239 - }, - { - "epoch": 0.845929641428862, - "grad_norm": 1.9980226081775585, - "learning_rate": 1.2187178533619803e-07, - "loss": 1.1102, - "step": 6240 - }, - { - "epoch": 0.8460652070765268, - "grad_norm": 1.727032058981367, - "learning_rate": 1.216617752910718e-07, - "loss": 1.1115, - "step": 6241 - }, - { - "epoch": 0.8462007727241917, - "grad_norm": 1.6703729579460385, - "learning_rate": 1.2145193462653946e-07, - "loss": 1.0859, - "step": 6242 - }, - { - "epoch": 0.8463363383718566, - "grad_norm": 1.471639792965376, - "learning_rate": 1.212422633830663e-07, - "loss": 1.1789, - "step": 6243 - }, - { - "epoch": 0.8464719040195214, - "grad_norm": 1.9230926605998588, - "learning_rate": 1.2103276160108656e-07, - "loss": 1.1443, - "step": 6244 - }, - { - "epoch": 0.8466074696671864, - "grad_norm": 1.5813902754180822, - "learning_rate": 1.208234293210002e-07, - "loss": 1.1106, - "step": 6245 - }, - { - "epoch": 0.8467430353148512, - "grad_norm": 1.635859603729063, - "learning_rate": 1.2061426658317608e-07, - "loss": 1.1108, - "step": 6246 - }, - { - "epoch": 0.8468786009625161, - "grad_norm": 5.553105477202321, - "learning_rate": 1.2040527342794872e-07, - "loss": 1.1129, - "step": 6247 - }, - { - "epoch": 0.847014166610181, - "grad_norm": 1.954881241479395, - "learning_rate": 1.2019644989562184e-07, - "loss": 1.1284, - "step": 6248 - }, - { - "epoch": 0.8471497322578458, - "grad_norm": 1.5250089631064803, - "learning_rate": 1.1998779602646436e-07, - "loss": 1.1642, - "step": 6249 - }, - { - "epoch": 0.8472852979055108, - "grad_norm": 1.6253421575404319, - "learning_rate": 1.1977931186071443e-07, - "loss": 1.0662, - "step": 6250 - }, - { - "epoch": 0.8474208635531756, - "grad_norm": 1.6304258891235228, - "learning_rate": 1.1957099743857568e-07, - "loss": 1.1333, - "step": 6251 - }, - { - "epoch": 0.8475564292008405, - "grad_norm": 1.9456158664895915, - "learning_rate": 1.1936285280022096e-07, - "loss": 1.1252, - "step": 6252 - }, - { - "epoch": 0.8476919948485054, - "grad_norm": 2.705241450144285, - "learning_rate": 1.1915487798578816e-07, - "loss": 1.0974, - "step": 6253 - }, - { - "epoch": 0.8478275604961703, - "grad_norm": 2.9232509899137074, - "learning_rate": 1.1894707303538476e-07, - "loss": 1.115, - "step": 6254 - }, - { - "epoch": 0.8479631261438352, - "grad_norm": 1.5132327669603225, - "learning_rate": 1.1873943798908336e-07, - "loss": 1.1298, - "step": 6255 - }, - { - "epoch": 0.8480986917915, - "grad_norm": 1.5966311073941208, - "learning_rate": 1.1853197288692518e-07, - "loss": 1.1504, - "step": 6256 - }, - { - "epoch": 0.8482342574391649, - "grad_norm": 1.4883038586147965, - "learning_rate": 1.183246777689182e-07, - "loss": 1.092, - "step": 6257 - }, - { - "epoch": 0.8483698230868298, - "grad_norm": 1.5230143142774115, - "learning_rate": 1.1811755267503754e-07, - "loss": 1.1346, - "step": 6258 - }, - { - "epoch": 0.8485053887344947, - "grad_norm": 1.6496999718442382, - "learning_rate": 1.179105976452256e-07, - "loss": 1.1216, - "step": 6259 - }, - { - "epoch": 0.8486409543821596, - "grad_norm": 4.161538189691567, - "learning_rate": 1.1770381271939223e-07, - "loss": 1.1594, - "step": 6260 - }, - { - "epoch": 0.8487765200298244, - "grad_norm": 1.5798063574744565, - "learning_rate": 1.1749719793741409e-07, - "loss": 1.1266, - "step": 6261 - }, - { - "epoch": 0.8489120856774893, - "grad_norm": 1.6303827372921604, - "learning_rate": 1.172907533391353e-07, - "loss": 1.1404, - "step": 6262 - }, - { - "epoch": 0.8490476513251543, - "grad_norm": 1.4793187116187414, - "learning_rate": 1.1708447896436724e-07, - "loss": 1.091, - "step": 6263 - }, - { - "epoch": 0.8491832169728191, - "grad_norm": 1.4807350806595518, - "learning_rate": 1.1687837485288766e-07, - "loss": 1.1071, - "step": 6264 - }, - { - "epoch": 0.849318782620484, - "grad_norm": 1.6116553290378244, - "learning_rate": 1.1667244104444308e-07, - "loss": 1.0754, - "step": 6265 - }, - { - "epoch": 0.8494543482681488, - "grad_norm": 1.7157587303171171, - "learning_rate": 1.1646667757874507e-07, - "loss": 1.0826, - "step": 6266 - }, - { - "epoch": 0.8495899139158137, - "grad_norm": 1.6261982109352877, - "learning_rate": 1.1626108449547467e-07, - "loss": 1.1027, - "step": 6267 - }, - { - "epoch": 0.8497254795634787, - "grad_norm": 1.4633396453398662, - "learning_rate": 1.1605566183427807e-07, - "loss": 1.1095, - "step": 6268 - }, - { - "epoch": 0.8498610452111435, - "grad_norm": 2.1021104930554704, - "learning_rate": 1.1585040963476966e-07, - "loss": 1.1313, - "step": 6269 - }, - { - "epoch": 0.8499966108588084, - "grad_norm": 1.6818298299550982, - "learning_rate": 1.156453279365307e-07, - "loss": 1.0945, - "step": 6270 - }, - { - "epoch": 0.8501321765064732, - "grad_norm": 1.6267090927225414, - "learning_rate": 1.1544041677910954e-07, - "loss": 1.0861, - "step": 6271 - }, - { - "epoch": 0.8502677421541381, - "grad_norm": 1.5573856466774798, - "learning_rate": 1.152356762020218e-07, - "loss": 1.1427, - "step": 6272 - }, - { - "epoch": 0.8504033078018031, - "grad_norm": 1.5522228909560154, - "learning_rate": 1.1503110624474987e-07, - "loss": 1.1027, - "step": 6273 - }, - { - "epoch": 0.8505388734494679, - "grad_norm": 1.730029109785682, - "learning_rate": 1.1482670694674367e-07, - "loss": 1.1419, - "step": 6274 - }, - { - "epoch": 0.8506744390971328, - "grad_norm": 1.8419395512126264, - "learning_rate": 1.146224783474199e-07, - "loss": 1.0753, - "step": 6275 - }, - { - "epoch": 0.8508100047447976, - "grad_norm": 1.8328799406042335, - "learning_rate": 1.1441842048616234e-07, - "loss": 1.139, - "step": 6276 - }, - { - "epoch": 0.8509455703924625, - "grad_norm": 1.5614141208822658, - "learning_rate": 1.1421453340232213e-07, - "loss": 1.1118, - "step": 6277 - }, - { - "epoch": 0.8510811360401275, - "grad_norm": 1.4353533590609489, - "learning_rate": 1.140108171352172e-07, - "loss": 1.0995, - "step": 6278 - }, - { - "epoch": 0.8512167016877923, - "grad_norm": 1.6282186323200165, - "learning_rate": 1.1380727172413262e-07, - "loss": 1.1227, - "step": 6279 - }, - { - "epoch": 0.8513522673354572, - "grad_norm": 1.9631033167794292, - "learning_rate": 1.1360389720832042e-07, - "loss": 1.1283, - "step": 6280 - }, - { - "epoch": 0.851487832983122, - "grad_norm": 1.5401099238204563, - "learning_rate": 1.1340069362699988e-07, - "loss": 1.1383, - "step": 6281 - }, - { - "epoch": 0.851623398630787, - "grad_norm": 1.7075518189411314, - "learning_rate": 1.1319766101935724e-07, - "loss": 1.1356, - "step": 6282 - }, - { - "epoch": 0.8517589642784519, - "grad_norm": 2.3812545102224627, - "learning_rate": 1.1299479942454592e-07, - "loss": 1.1026, - "step": 6283 - }, - { - "epoch": 0.8518945299261167, - "grad_norm": 3.1285490193099506, - "learning_rate": 1.1279210888168544e-07, - "loss": 1.1126, - "step": 6284 - }, - { - "epoch": 0.8520300955737816, - "grad_norm": 1.4602836528779617, - "learning_rate": 1.1258958942986396e-07, - "loss": 1.1199, - "step": 6285 - }, - { - "epoch": 0.8521656612214464, - "grad_norm": 1.5619678341692986, - "learning_rate": 1.1238724110813502e-07, - "loss": 1.1536, - "step": 6286 - }, - { - "epoch": 0.8523012268691114, - "grad_norm": 1.6631009351194246, - "learning_rate": 1.1218506395552063e-07, - "loss": 1.0948, - "step": 6287 - }, - { - "epoch": 0.8524367925167763, - "grad_norm": 1.5962277167243162, - "learning_rate": 1.1198305801100827e-07, - "loss": 1.1392, - "step": 6288 - }, - { - "epoch": 0.8525723581644411, - "grad_norm": 2.0209338242252644, - "learning_rate": 1.11781223313554e-07, - "loss": 1.1542, - "step": 6289 - }, - { - "epoch": 0.852707923812106, - "grad_norm": 1.47768686943271, - "learning_rate": 1.1157955990207946e-07, - "loss": 1.0818, - "step": 6290 - }, - { - "epoch": 0.8528434894597708, - "grad_norm": 1.548541071112339, - "learning_rate": 1.1137806781547398e-07, - "loss": 1.1455, - "step": 6291 - }, - { - "epoch": 0.8529790551074358, - "grad_norm": 2.163590396585606, - "learning_rate": 1.1117674709259372e-07, - "loss": 1.1149, - "step": 6292 - }, - { - "epoch": 0.8531146207551007, - "grad_norm": 1.6336239760628883, - "learning_rate": 1.1097559777226196e-07, - "loss": 1.1001, - "step": 6293 - }, - { - "epoch": 0.8532501864027655, - "grad_norm": 2.544820886010292, - "learning_rate": 1.1077461989326864e-07, - "loss": 1.1451, - "step": 6294 - }, - { - "epoch": 0.8533857520504304, - "grad_norm": 1.6318164203877048, - "learning_rate": 1.1057381349437067e-07, - "loss": 1.0872, - "step": 6295 - }, - { - "epoch": 0.8535213176980952, - "grad_norm": 1.567352367273081, - "learning_rate": 1.1037317861429208e-07, - "loss": 1.1337, - "step": 6296 - }, - { - "epoch": 0.8536568833457602, - "grad_norm": 1.541311301467861, - "learning_rate": 1.1017271529172367e-07, - "loss": 1.1157, - "step": 6297 - }, - { - "epoch": 0.8537924489934251, - "grad_norm": 1.6164655522081692, - "learning_rate": 1.0997242356532333e-07, - "loss": 1.1349, - "step": 6298 - }, - { - "epoch": 0.8539280146410899, - "grad_norm": 1.4547456201867044, - "learning_rate": 1.0977230347371568e-07, - "loss": 1.0828, - "step": 6299 - }, - { - "epoch": 0.8540635802887548, - "grad_norm": 1.8223892030508726, - "learning_rate": 1.0957235505549233e-07, - "loss": 1.1244, - "step": 6300 - }, - { - "epoch": 0.8541991459364197, - "grad_norm": 1.9795860897177036, - "learning_rate": 1.0937257834921144e-07, - "loss": 1.1006, - "step": 6301 - }, - { - "epoch": 0.8543347115840846, - "grad_norm": 9.854992727714778, - "learning_rate": 1.0917297339339892e-07, - "loss": 1.1107, - "step": 6302 - }, - { - "epoch": 0.8544702772317495, - "grad_norm": 1.6417999665024943, - "learning_rate": 1.0897354022654648e-07, - "loss": 1.1046, - "step": 6303 - }, - { - "epoch": 0.8546058428794143, - "grad_norm": 1.9499446788634787, - "learning_rate": 1.0877427888711377e-07, - "loss": 1.0928, - "step": 6304 - }, - { - "epoch": 0.8547414085270792, - "grad_norm": 3.7487721970116734, - "learning_rate": 1.0857518941352605e-07, - "loss": 1.1524, - "step": 6305 - }, - { - "epoch": 0.8548769741747442, - "grad_norm": 1.7060023505437065, - "learning_rate": 1.0837627184417697e-07, - "loss": 1.1348, - "step": 6306 - }, - { - "epoch": 0.855012539822409, - "grad_norm": 3.220624508989493, - "learning_rate": 1.0817752621742537e-07, - "loss": 1.1125, - "step": 6307 - }, - { - "epoch": 0.8551481054700739, - "grad_norm": 3.253595108513089, - "learning_rate": 1.0797895257159872e-07, - "loss": 1.1694, - "step": 6308 - }, - { - "epoch": 0.8552836711177387, - "grad_norm": 1.548892536137069, - "learning_rate": 1.077805509449895e-07, - "loss": 1.1768, - "step": 6309 - }, - { - "epoch": 0.8554192367654037, - "grad_norm": 1.5148208002154036, - "learning_rate": 1.0758232137585854e-07, - "loss": 1.1047, - "step": 6310 - }, - { - "epoch": 0.8555548024130686, - "grad_norm": 1.7364217174041925, - "learning_rate": 1.073842639024325e-07, - "loss": 1.1174, - "step": 6311 - }, - { - "epoch": 0.8556903680607334, - "grad_norm": 1.4246133990243268, - "learning_rate": 1.0718637856290525e-07, - "loss": 1.1316, - "step": 6312 - }, - { - "epoch": 0.8558259337083983, - "grad_norm": 1.7146879491636184, - "learning_rate": 1.069886653954375e-07, - "loss": 1.1695, - "step": 6313 - }, - { - "epoch": 0.8559614993560631, - "grad_norm": 1.6907214126553551, - "learning_rate": 1.0679112443815652e-07, - "loss": 1.1251, - "step": 6314 - }, - { - "epoch": 0.8560970650037281, - "grad_norm": 1.5524538029749009, - "learning_rate": 1.0659375572915674e-07, - "loss": 1.1309, - "step": 6315 - }, - { - "epoch": 0.856232630651393, - "grad_norm": 2.191509423515223, - "learning_rate": 1.0639655930649894e-07, - "loss": 1.0771, - "step": 6316 - }, - { - "epoch": 0.8563681962990578, - "grad_norm": 3.090471286684315, - "learning_rate": 1.0619953520821112e-07, - "loss": 1.1625, - "step": 6317 - }, - { - "epoch": 0.8565037619467227, - "grad_norm": 1.8644628284636473, - "learning_rate": 1.0600268347228757e-07, - "loss": 1.1591, - "step": 6318 - }, - { - "epoch": 0.8566393275943875, - "grad_norm": 1.7284056918240733, - "learning_rate": 1.0580600413668983e-07, - "loss": 1.1316, - "step": 6319 - }, - { - "epoch": 0.8567748932420525, - "grad_norm": 1.9409456053437604, - "learning_rate": 1.0560949723934587e-07, - "loss": 1.1674, - "step": 6320 - }, - { - "epoch": 0.8569104588897174, - "grad_norm": 1.592536871114864, - "learning_rate": 1.0541316281815038e-07, - "loss": 1.1542, - "step": 6321 - }, - { - "epoch": 0.8570460245373822, - "grad_norm": 1.6339455814786095, - "learning_rate": 1.0521700091096508e-07, - "loss": 1.1136, - "step": 6322 - }, - { - "epoch": 0.8571815901850471, - "grad_norm": 1.778786289227493, - "learning_rate": 1.0502101155561816e-07, - "loss": 1.1326, - "step": 6323 - }, - { - "epoch": 0.857317155832712, - "grad_norm": 1.8168624935921425, - "learning_rate": 1.0482519478990481e-07, - "loss": 1.133, - "step": 6324 - }, - { - "epoch": 0.8574527214803769, - "grad_norm": 1.741366783806273, - "learning_rate": 1.0462955065158618e-07, - "loss": 1.1063, - "step": 6325 - }, - { - "epoch": 0.8575882871280418, - "grad_norm": 1.7821062835592214, - "learning_rate": 1.0443407917839141e-07, - "loss": 1.1074, - "step": 6326 - }, - { - "epoch": 0.8577238527757066, - "grad_norm": 1.6376777490056627, - "learning_rate": 1.0423878040801514e-07, - "loss": 1.1159, - "step": 6327 - }, - { - "epoch": 0.8578594184233715, - "grad_norm": 2.100545131929293, - "learning_rate": 1.0404365437811946e-07, - "loss": 1.0963, - "step": 6328 - }, - { - "epoch": 0.8579949840710364, - "grad_norm": 1.5965145181704496, - "learning_rate": 1.0384870112633271e-07, - "loss": 1.0871, - "step": 6329 - }, - { - "epoch": 0.8581305497187013, - "grad_norm": 1.638885476088325, - "learning_rate": 1.0365392069025014e-07, - "loss": 1.1306, - "step": 6330 - }, - { - "epoch": 0.8582661153663662, - "grad_norm": 1.4313764862985288, - "learning_rate": 1.034593131074336e-07, - "loss": 1.1079, - "step": 6331 - }, - { - "epoch": 0.858401681014031, - "grad_norm": 1.5111905717629663, - "learning_rate": 1.0326487841541176e-07, - "loss": 1.1328, - "step": 6332 - }, - { - "epoch": 0.858537246661696, - "grad_norm": 1.9422213098277183, - "learning_rate": 1.030706166516796e-07, - "loss": 1.1173, - "step": 6333 - }, - { - "epoch": 0.8586728123093608, - "grad_norm": 1.5494497591115421, - "learning_rate": 1.0287652785369916e-07, - "loss": 1.1783, - "step": 6334 - }, - { - "epoch": 0.8588083779570257, - "grad_norm": 1.572732219467658, - "learning_rate": 1.0268261205889894e-07, - "loss": 1.0715, - "step": 6335 - }, - { - "epoch": 0.8589439436046906, - "grad_norm": 1.464778192334496, - "learning_rate": 1.0248886930467393e-07, - "loss": 1.0864, - "step": 6336 - }, - { - "epoch": 0.8590795092523554, - "grad_norm": 1.5997813333873667, - "learning_rate": 1.022952996283859e-07, - "loss": 1.1138, - "step": 6337 - }, - { - "epoch": 0.8592150749000204, - "grad_norm": 1.735016614391985, - "learning_rate": 1.0210190306736333e-07, - "loss": 1.1045, - "step": 6338 - }, - { - "epoch": 0.8593506405476852, - "grad_norm": 1.5993598904790784, - "learning_rate": 1.0190867965890137e-07, - "loss": 1.1035, - "step": 6339 - }, - { - "epoch": 0.8594862061953501, - "grad_norm": 1.5750612626379088, - "learning_rate": 1.0171562944026102e-07, - "loss": 1.1198, - "step": 6340 - }, - { - "epoch": 0.859621771843015, - "grad_norm": 1.7343389159250835, - "learning_rate": 1.0152275244867137e-07, - "loss": 1.1496, - "step": 6341 - }, - { - "epoch": 0.8597573374906798, - "grad_norm": 1.666547425731249, - "learning_rate": 1.0133004872132623e-07, - "loss": 1.1291, - "step": 6342 - }, - { - "epoch": 0.8598929031383448, - "grad_norm": 1.611280149557868, - "learning_rate": 1.0113751829538808e-07, - "loss": 1.1483, - "step": 6343 - }, - { - "epoch": 0.8600284687860096, - "grad_norm": 2.9872102158826435, - "learning_rate": 1.009451612079838e-07, - "loss": 1.0874, - "step": 6344 - }, - { - "epoch": 0.8601640344336745, - "grad_norm": 1.6508402911678626, - "learning_rate": 1.0075297749620904e-07, - "loss": 1.1239, - "step": 6345 - }, - { - "epoch": 0.8602996000813394, - "grad_norm": 1.7200558705013373, - "learning_rate": 1.0056096719712382e-07, - "loss": 1.0982, - "step": 6346 - }, - { - "epoch": 0.8604351657290042, - "grad_norm": 1.7321216456667257, - "learning_rate": 1.0036913034775673e-07, - "loss": 1.1207, - "step": 6347 - }, - { - "epoch": 0.8605707313766692, - "grad_norm": 1.7213199002412487, - "learning_rate": 1.0017746698510122e-07, - "loss": 1.143, - "step": 6348 - }, - { - "epoch": 0.860706297024334, - "grad_norm": 5.1622274214134825, - "learning_rate": 9.998597714611889e-08, - "loss": 1.1293, - "step": 6349 - }, - { - "epoch": 0.8608418626719989, - "grad_norm": 1.4850919532836222, - "learning_rate": 9.979466086773614e-08, - "loss": 1.1327, - "step": 6350 - }, - { - "epoch": 0.8609774283196638, - "grad_norm": 2.851541919343992, - "learning_rate": 9.960351818684764e-08, - "loss": 1.0862, - "step": 6351 - }, - { - "epoch": 0.8611129939673287, - "grad_norm": 1.6864593373543244, - "learning_rate": 9.941254914031316e-08, - "loss": 1.1411, - "step": 6352 - }, - { - "epoch": 0.8612485596149936, - "grad_norm": 1.477035660514135, - "learning_rate": 9.922175376495979e-08, - "loss": 1.1021, - "step": 6353 - }, - { - "epoch": 0.8613841252626584, - "grad_norm": 1.9015682511360699, - "learning_rate": 9.903113209758096e-08, - "loss": 1.1565, - "step": 6354 - }, - { - "epoch": 0.8615196909103233, - "grad_norm": 1.5761767033644603, - "learning_rate": 9.88406841749364e-08, - "loss": 1.13, - "step": 6355 - }, - { - "epoch": 0.8616552565579882, - "grad_norm": 1.423066099577052, - "learning_rate": 9.865041003375263e-08, - "loss": 1.1318, - "step": 6356 - }, - { - "epoch": 0.8617908222056531, - "grad_norm": 1.5662259490509276, - "learning_rate": 9.846030971072239e-08, - "loss": 1.1275, - "step": 6357 - }, - { - "epoch": 0.861926387853318, - "grad_norm": 1.6547383993270737, - "learning_rate": 9.827038324250514e-08, - "loss": 1.1002, - "step": 6358 - }, - { - "epoch": 0.8620619535009828, - "grad_norm": 1.410535919626774, - "learning_rate": 9.80806306657267e-08, - "loss": 1.1377, - "step": 6359 - }, - { - "epoch": 0.8621975191486477, - "grad_norm": 1.4993788859521822, - "learning_rate": 9.789105201697923e-08, - "loss": 1.0864, - "step": 6360 - }, - { - "epoch": 0.8623330847963127, - "grad_norm": 3.776827733384264, - "learning_rate": 9.77016473328216e-08, - "loss": 1.1146, - "step": 6361 - }, - { - "epoch": 0.8624686504439775, - "grad_norm": 4.210679929612081, - "learning_rate": 9.751241664977927e-08, - "loss": 1.1558, - "step": 6362 - }, - { - "epoch": 0.8626042160916424, - "grad_norm": 2.214873095363268, - "learning_rate": 9.732336000434304e-08, - "loss": 1.149, - "step": 6363 - }, - { - "epoch": 0.8627397817393072, - "grad_norm": 1.5966397522088718, - "learning_rate": 9.713447743297198e-08, - "loss": 1.122, - "step": 6364 - }, - { - "epoch": 0.8628753473869721, - "grad_norm": 1.5767226426941463, - "learning_rate": 9.694576897208984e-08, - "loss": 1.1114, - "step": 6365 - }, - { - "epoch": 0.8630109130346371, - "grad_norm": 1.5075402504563102, - "learning_rate": 9.675723465808827e-08, - "loss": 1.1296, - "step": 6366 - }, - { - "epoch": 0.8631464786823019, - "grad_norm": 1.8732668935597623, - "learning_rate": 9.656887452732399e-08, - "loss": 1.1289, - "step": 6367 - }, - { - "epoch": 0.8632820443299668, - "grad_norm": 1.4234868383476011, - "learning_rate": 9.638068861612091e-08, - "loss": 1.0983, - "step": 6368 - }, - { - "epoch": 0.8634176099776316, - "grad_norm": 1.5193370913932285, - "learning_rate": 9.619267696076938e-08, - "loss": 1.0972, - "step": 6369 - }, - { - "epoch": 0.8635531756252965, - "grad_norm": 1.7949080854439359, - "learning_rate": 9.600483959752592e-08, - "loss": 1.1392, - "step": 6370 - }, - { - "epoch": 0.8636887412729615, - "grad_norm": 1.7846594073301987, - "learning_rate": 9.581717656261335e-08, - "loss": 1.1261, - "step": 6371 - }, - { - "epoch": 0.8638243069206263, - "grad_norm": 1.6292411271951053, - "learning_rate": 9.562968789222114e-08, - "loss": 1.1046, - "step": 6372 - }, - { - "epoch": 0.8639598725682912, - "grad_norm": 1.4561003212296701, - "learning_rate": 9.544237362250495e-08, - "loss": 1.1318, - "step": 6373 - }, - { - "epoch": 0.864095438215956, - "grad_norm": 1.6189768825916528, - "learning_rate": 9.525523378958688e-08, - "loss": 1.0715, - "step": 6374 - }, - { - "epoch": 0.864231003863621, - "grad_norm": 1.830106445433859, - "learning_rate": 9.50682684295554e-08, - "loss": 1.0843, - "step": 6375 - }, - { - "epoch": 0.8643665695112859, - "grad_norm": 1.6338487326753481, - "learning_rate": 9.488147757846521e-08, - "loss": 1.1031, - "step": 6376 - }, - { - "epoch": 0.8645021351589507, - "grad_norm": 1.5221827104693977, - "learning_rate": 9.46948612723375e-08, - "loss": 1.1286, - "step": 6377 - }, - { - "epoch": 0.8646377008066156, - "grad_norm": 1.6781266869043607, - "learning_rate": 9.450841954715971e-08, - "loss": 1.1519, - "step": 6378 - }, - { - "epoch": 0.8647732664542804, - "grad_norm": 1.5841083651714827, - "learning_rate": 9.432215243888575e-08, - "loss": 1.1138, - "step": 6379 - }, - { - "epoch": 0.8649088321019454, - "grad_norm": 1.580540196027783, - "learning_rate": 9.413605998343566e-08, - "loss": 1.1116, - "step": 6380 - }, - { - "epoch": 0.8650443977496103, - "grad_norm": 3.0152128575194985, - "learning_rate": 9.395014221669595e-08, - "loss": 1.1141, - "step": 6381 - }, - { - "epoch": 0.8651799633972751, - "grad_norm": 1.5937687110616348, - "learning_rate": 9.376439917451962e-08, - "loss": 1.1352, - "step": 6382 - }, - { - "epoch": 0.86531552904494, - "grad_norm": 1.96021769372135, - "learning_rate": 9.357883089272512e-08, - "loss": 1.1335, - "step": 6383 - }, - { - "epoch": 0.8654510946926048, - "grad_norm": 1.5463423149407587, - "learning_rate": 9.33934374070986e-08, - "loss": 1.1218, - "step": 6384 - }, - { - "epoch": 0.8655866603402698, - "grad_norm": 1.438391069747751, - "learning_rate": 9.320821875339091e-08, - "loss": 1.1109, - "step": 6385 - }, - { - "epoch": 0.8657222259879347, - "grad_norm": 4.094998112037752, - "learning_rate": 9.302317496732092e-08, - "loss": 1.1054, - "step": 6386 - }, - { - "epoch": 0.8658577916355995, - "grad_norm": 1.6774747804731347, - "learning_rate": 9.283830608457199e-08, - "loss": 1.116, - "step": 6387 - }, - { - "epoch": 0.8659933572832644, - "grad_norm": 1.6783253083081413, - "learning_rate": 9.265361214079548e-08, - "loss": 1.1463, - "step": 6388 - }, - { - "epoch": 0.8661289229309294, - "grad_norm": 1.6682801119119148, - "learning_rate": 9.246909317160744e-08, - "loss": 1.0983, - "step": 6389 - }, - { - "epoch": 0.8662644885785942, - "grad_norm": 1.6756325198676987, - "learning_rate": 9.228474921259121e-08, - "loss": 1.1543, - "step": 6390 - }, - { - "epoch": 0.8664000542262591, - "grad_norm": 1.8692533097077568, - "learning_rate": 9.210058029929602e-08, - "loss": 1.1294, - "step": 6391 - }, - { - "epoch": 0.8665356198739239, - "grad_norm": 1.879706266890187, - "learning_rate": 9.191658646723732e-08, - "loss": 1.1369, - "step": 6392 - }, - { - "epoch": 0.8666711855215888, - "grad_norm": 1.6351263282239437, - "learning_rate": 9.173276775189709e-08, - "loss": 1.1068, - "step": 6393 - }, - { - "epoch": 0.8668067511692538, - "grad_norm": 1.9559564931421691, - "learning_rate": 9.154912418872306e-08, - "loss": 1.1567, - "step": 6394 - }, - { - "epoch": 0.8669423168169186, - "grad_norm": 1.573599554712936, - "learning_rate": 9.136565581312961e-08, - "loss": 1.1184, - "step": 6395 - }, - { - "epoch": 0.8670778824645835, - "grad_norm": 1.5640936371708176, - "learning_rate": 9.118236266049705e-08, - "loss": 1.1109, - "step": 6396 - }, - { - "epoch": 0.8672134481122483, - "grad_norm": 1.514935764416863, - "learning_rate": 9.099924476617216e-08, - "loss": 1.0976, - "step": 6397 - }, - { - "epoch": 0.8673490137599132, - "grad_norm": 1.7377552574703037, - "learning_rate": 9.081630216546766e-08, - "loss": 1.1437, - "step": 6398 - }, - { - "epoch": 0.8674845794075782, - "grad_norm": 1.6797392733200085, - "learning_rate": 9.063353489366287e-08, - "loss": 1.1282, - "step": 6399 - }, - { - "epoch": 0.867620145055243, - "grad_norm": 1.6035725593940091, - "learning_rate": 9.045094298600232e-08, - "loss": 1.1495, - "step": 6400 - }, - { - "epoch": 0.8677557107029079, - "grad_norm": 2.145096031680423, - "learning_rate": 9.026852647769822e-08, - "loss": 1.1174, - "step": 6401 - }, - { - "epoch": 0.8678912763505727, - "grad_norm": 1.750686266794915, - "learning_rate": 9.008628540392749e-08, - "loss": 1.1135, - "step": 6402 - }, - { - "epoch": 0.8680268419982377, - "grad_norm": 1.6447062268026524, - "learning_rate": 8.990421979983465e-08, - "loss": 1.1407, - "step": 6403 - }, - { - "epoch": 0.8681624076459026, - "grad_norm": 2.2314175403362553, - "learning_rate": 8.972232970052873e-08, - "loss": 1.1089, - "step": 6404 - }, - { - "epoch": 0.8682979732935674, - "grad_norm": 1.7549058701411144, - "learning_rate": 8.954061514108657e-08, - "loss": 1.1472, - "step": 6405 - }, - { - "epoch": 0.8684335389412323, - "grad_norm": 1.7862171186578535, - "learning_rate": 8.93590761565497e-08, - "loss": 1.1347, - "step": 6406 - }, - { - "epoch": 0.8685691045888971, - "grad_norm": 1.5521060523500179, - "learning_rate": 8.917771278192709e-08, - "loss": 1.1086, - "step": 6407 - }, - { - "epoch": 0.8687046702365621, - "grad_norm": 2.2313625583718655, - "learning_rate": 8.899652505219279e-08, - "loss": 1.0601, - "step": 6408 - }, - { - "epoch": 0.868840235884227, - "grad_norm": 1.4838439351390538, - "learning_rate": 8.881551300228785e-08, - "loss": 1.1106, - "step": 6409 - }, - { - "epoch": 0.8689758015318918, - "grad_norm": 3.573960044210363, - "learning_rate": 8.863467666711865e-08, - "loss": 1.1148, - "step": 6410 - }, - { - "epoch": 0.8691113671795567, - "grad_norm": 1.5980857017045205, - "learning_rate": 8.845401608155822e-08, - "loss": 1.1292, - "step": 6411 - }, - { - "epoch": 0.8692469328272215, - "grad_norm": 1.5224713914577301, - "learning_rate": 8.827353128044535e-08, - "loss": 1.1382, - "step": 6412 - }, - { - "epoch": 0.8693824984748865, - "grad_norm": 1.629330009654892, - "learning_rate": 8.809322229858529e-08, - "loss": 1.111, - "step": 6413 - }, - { - "epoch": 0.8695180641225514, - "grad_norm": 2.3427064849808494, - "learning_rate": 8.791308917074925e-08, - "loss": 1.1287, - "step": 6414 - }, - { - "epoch": 0.8696536297702162, - "grad_norm": 1.6492587836816042, - "learning_rate": 8.773313193167431e-08, - "loss": 1.0701, - "step": 6415 - }, - { - "epoch": 0.8697891954178811, - "grad_norm": 1.5544271734385113, - "learning_rate": 8.755335061606383e-08, - "loss": 1.0729, - "step": 6416 - }, - { - "epoch": 0.869924761065546, - "grad_norm": 1.4916959341582237, - "learning_rate": 8.737374525858743e-08, - "loss": 1.1275, - "step": 6417 - }, - { - "epoch": 0.8700603267132109, - "grad_norm": 1.4423988466854738, - "learning_rate": 8.719431589388026e-08, - "loss": 1.0688, - "step": 6418 - }, - { - "epoch": 0.8701958923608758, - "grad_norm": 5.710962023922603, - "learning_rate": 8.701506255654411e-08, - "loss": 1.1412, - "step": 6419 - }, - { - "epoch": 0.8703314580085406, - "grad_norm": 1.5918331460336514, - "learning_rate": 8.683598528114644e-08, - "loss": 1.1074, - "step": 6420 - }, - { - "epoch": 0.8704670236562055, - "grad_norm": 2.1056262196579207, - "learning_rate": 8.665708410222095e-08, - "loss": 1.1172, - "step": 6421 - }, - { - "epoch": 0.8706025893038704, - "grad_norm": 2.092574407273337, - "learning_rate": 8.647835905426726e-08, - "loss": 1.1233, - "step": 6422 - }, - { - "epoch": 0.8707381549515353, - "grad_norm": 1.6715835349998467, - "learning_rate": 8.629981017175136e-08, - "loss": 1.1316, - "step": 6423 - }, - { - "epoch": 0.8708737205992002, - "grad_norm": 4.2751696839046405, - "learning_rate": 8.61214374891045e-08, - "loss": 1.1755, - "step": 6424 - }, - { - "epoch": 0.871009286246865, - "grad_norm": 1.918725093381928, - "learning_rate": 8.59432410407248e-08, - "loss": 1.1675, - "step": 6425 - }, - { - "epoch": 0.87114485189453, - "grad_norm": 1.7213970631259639, - "learning_rate": 8.576522086097593e-08, - "loss": 1.1208, - "step": 6426 - }, - { - "epoch": 0.8712804175421948, - "grad_norm": 1.8577495992291138, - "learning_rate": 8.55873769841876e-08, - "loss": 1.103, - "step": 6427 - }, - { - "epoch": 0.8714159831898597, - "grad_norm": 2.076637594476322, - "learning_rate": 8.540970944465575e-08, - "loss": 1.1214, - "step": 6428 - }, - { - "epoch": 0.8715515488375246, - "grad_norm": 1.6873613287343516, - "learning_rate": 8.523221827664206e-08, - "loss": 1.1126, - "step": 6429 - }, - { - "epoch": 0.8716871144851894, - "grad_norm": 1.7031558855161, - "learning_rate": 8.505490351437438e-08, - "loss": 1.1177, - "step": 6430 - }, - { - "epoch": 0.8718226801328544, - "grad_norm": 1.4959242177444296, - "learning_rate": 8.487776519204637e-08, - "loss": 1.0455, - "step": 6431 - }, - { - "epoch": 0.8719582457805192, - "grad_norm": 2.1308277993139657, - "learning_rate": 8.470080334381791e-08, - "loss": 1.1049, - "step": 6432 - }, - { - "epoch": 0.8720938114281841, - "grad_norm": 1.602873683766397, - "learning_rate": 8.452401800381448e-08, - "loss": 1.1347, - "step": 6433 - }, - { - "epoch": 0.872229377075849, - "grad_norm": 1.8421494433433585, - "learning_rate": 8.434740920612792e-08, - "loss": 1.0806, - "step": 6434 - }, - { - "epoch": 0.8723649427235138, - "grad_norm": 1.6191515164072836, - "learning_rate": 8.417097698481568e-08, - "loss": 1.114, - "step": 6435 - }, - { - "epoch": 0.8725005083711788, - "grad_norm": 1.5941849031294164, - "learning_rate": 8.399472137390152e-08, - "loss": 1.1216, - "step": 6436 - }, - { - "epoch": 0.8726360740188436, - "grad_norm": 2.502001203756435, - "learning_rate": 8.38186424073748e-08, - "loss": 1.1147, - "step": 6437 - }, - { - "epoch": 0.8727716396665085, - "grad_norm": 1.915386758991212, - "learning_rate": 8.364274011919114e-08, - "loss": 1.128, - "step": 6438 - }, - { - "epoch": 0.8729072053141734, - "grad_norm": 1.8675064571032283, - "learning_rate": 8.346701454327143e-08, - "loss": 1.1607, - "step": 6439 - }, - { - "epoch": 0.8730427709618382, - "grad_norm": 2.4419980812618816, - "learning_rate": 8.329146571350365e-08, - "loss": 1.1142, - "step": 6440 - }, - { - "epoch": 0.8731783366095032, - "grad_norm": 2.45899924666329, - "learning_rate": 8.311609366374028e-08, - "loss": 1.1517, - "step": 6441 - }, - { - "epoch": 0.873313902257168, - "grad_norm": 1.5104144741157344, - "learning_rate": 8.294089842780117e-08, - "loss": 1.1141, - "step": 6442 - }, - { - "epoch": 0.8734494679048329, - "grad_norm": 2.0206014286794582, - "learning_rate": 8.27658800394706e-08, - "loss": 1.0729, - "step": 6443 - }, - { - "epoch": 0.8735850335524978, - "grad_norm": 1.851641068563042, - "learning_rate": 8.259103853250027e-08, - "loss": 1.105, - "step": 6444 - }, - { - "epoch": 0.8737205992001627, - "grad_norm": 2.0836688494138325, - "learning_rate": 8.241637394060619e-08, - "loss": 1.1083, - "step": 6445 - }, - { - "epoch": 0.8738561648478276, - "grad_norm": 1.5127354912276167, - "learning_rate": 8.224188629747175e-08, - "loss": 1.0714, - "step": 6446 - }, - { - "epoch": 0.8739917304954924, - "grad_norm": 1.4759554596680053, - "learning_rate": 8.206757563674493e-08, - "loss": 1.1325, - "step": 6447 - }, - { - "epoch": 0.8741272961431573, - "grad_norm": 2.055183879963849, - "learning_rate": 8.189344199204073e-08, - "loss": 1.1479, - "step": 6448 - }, - { - "epoch": 0.8742628617908222, - "grad_norm": 1.531415587968917, - "learning_rate": 8.171948539693874e-08, - "loss": 1.1227, - "step": 6449 - }, - { - "epoch": 0.8743984274384871, - "grad_norm": 1.508478933850404, - "learning_rate": 8.154570588498599e-08, - "loss": 1.1192, - "step": 6450 - }, - { - "epoch": 0.874533993086152, - "grad_norm": 1.4352222907172287, - "learning_rate": 8.13721034896938e-08, - "loss": 1.1013, - "step": 6451 - }, - { - "epoch": 0.8746695587338168, - "grad_norm": 1.757337057814878, - "learning_rate": 8.119867824454018e-08, - "loss": 1.141, - "step": 6452 - }, - { - "epoch": 0.8748051243814817, - "grad_norm": 1.6423164595715511, - "learning_rate": 8.102543018296892e-08, - "loss": 1.1298, - "step": 6453 - }, - { - "epoch": 0.8749406900291467, - "grad_norm": 1.4762119510269074, - "learning_rate": 8.085235933838952e-08, - "loss": 1.0799, - "step": 6454 - }, - { - "epoch": 0.8750762556768115, - "grad_norm": 1.784798123628636, - "learning_rate": 8.067946574417739e-08, - "loss": 1.0858, - "step": 6455 - }, - { - "epoch": 0.8752118213244764, - "grad_norm": 2.0299916851958306, - "learning_rate": 8.050674943367352e-08, - "loss": 1.148, - "step": 6456 - }, - { - "epoch": 0.8753473869721412, - "grad_norm": 1.9293749139802183, - "learning_rate": 8.033421044018496e-08, - "loss": 1.1312, - "step": 6457 - }, - { - "epoch": 0.8754829526198061, - "grad_norm": 1.9635102917176244, - "learning_rate": 8.016184879698462e-08, - "loss": 1.1282, - "step": 6458 - }, - { - "epoch": 0.8756185182674711, - "grad_norm": 2.57172511487264, - "learning_rate": 7.998966453731093e-08, - "loss": 1.1295, - "step": 6459 - }, - { - "epoch": 0.8757540839151359, - "grad_norm": 1.7037032192283534, - "learning_rate": 7.981765769436833e-08, - "loss": 1.1244, - "step": 6460 - }, - { - "epoch": 0.8758896495628008, - "grad_norm": 1.7591214450534252, - "learning_rate": 7.964582830132704e-08, - "loss": 1.1157, - "step": 6461 - }, - { - "epoch": 0.8760252152104656, - "grad_norm": 1.8587698934734926, - "learning_rate": 7.94741763913227e-08, - "loss": 1.1586, - "step": 6462 - }, - { - "epoch": 0.8761607808581305, - "grad_norm": 1.7277271583504699, - "learning_rate": 7.930270199745748e-08, - "loss": 1.1252, - "step": 6463 - }, - { - "epoch": 0.8762963465057955, - "grad_norm": 1.7569150908258437, - "learning_rate": 7.913140515279837e-08, - "loss": 1.1128, - "step": 6464 - }, - { - "epoch": 0.8764319121534603, - "grad_norm": 1.7546395359769382, - "learning_rate": 7.896028589037929e-08, - "loss": 1.1359, - "step": 6465 - }, - { - "epoch": 0.8765674778011252, - "grad_norm": 1.9260725947377775, - "learning_rate": 7.87893442431985e-08, - "loss": 1.1409, - "step": 6466 - }, - { - "epoch": 0.8767030434487901, - "grad_norm": 1.5367032416373057, - "learning_rate": 7.86185802442212e-08, - "loss": 1.1171, - "step": 6467 - }, - { - "epoch": 0.876838609096455, - "grad_norm": 1.5652829522011904, - "learning_rate": 7.844799392637769e-08, - "loss": 1.1669, - "step": 6468 - }, - { - "epoch": 0.8769741747441199, - "grad_norm": 1.486704396032805, - "learning_rate": 7.827758532256435e-08, - "loss": 1.1087, - "step": 6469 - }, - { - "epoch": 0.8771097403917847, - "grad_norm": 2.054500893125875, - "learning_rate": 7.810735446564298e-08, - "loss": 1.1527, - "step": 6470 - }, - { - "epoch": 0.8772453060394496, - "grad_norm": 1.541235954778072, - "learning_rate": 7.793730138844134e-08, - "loss": 1.1374, - "step": 6471 - }, - { - "epoch": 0.8773808716871145, - "grad_norm": 2.078400558691309, - "learning_rate": 7.776742612375275e-08, - "loss": 1.1124, - "step": 6472 - }, - { - "epoch": 0.8775164373347794, - "grad_norm": 2.2751521317907795, - "learning_rate": 7.759772870433645e-08, - "loss": 1.1042, - "step": 6473 - }, - { - "epoch": 0.8776520029824443, - "grad_norm": 1.8472985746733446, - "learning_rate": 7.742820916291714e-08, - "loss": 1.1466, - "step": 6474 - }, - { - "epoch": 0.8777875686301091, - "grad_norm": 1.6454572414804935, - "learning_rate": 7.725886753218536e-08, - "loss": 1.1141, - "step": 6475 - }, - { - "epoch": 0.877923134277774, - "grad_norm": 1.487219573205965, - "learning_rate": 7.708970384479729e-08, - "loss": 1.0662, - "step": 6476 - }, - { - "epoch": 0.878058699925439, - "grad_norm": 1.953113331059195, - "learning_rate": 7.692071813337487e-08, - "loss": 1.1624, - "step": 6477 - }, - { - "epoch": 0.8781942655731038, - "grad_norm": 1.9945047284082038, - "learning_rate": 7.675191043050556e-08, - "loss": 1.1258, - "step": 6478 - }, - { - "epoch": 0.8783298312207687, - "grad_norm": 1.8376426812159854, - "learning_rate": 7.658328076874287e-08, - "loss": 1.0785, - "step": 6479 - }, - { - "epoch": 0.8784653968684335, - "grad_norm": 2.1485387245678584, - "learning_rate": 7.641482918060504e-08, - "loss": 1.1757, - "step": 6480 - }, - { - "epoch": 0.8786009625160984, - "grad_norm": 1.7398993968427678, - "learning_rate": 7.624655569857751e-08, - "loss": 1.0804, - "step": 6481 - }, - { - "epoch": 0.8787365281637634, - "grad_norm": 1.7653803715019276, - "learning_rate": 7.607846035510957e-08, - "loss": 1.1424, - "step": 6482 - }, - { - "epoch": 0.8788720938114282, - "grad_norm": 1.7571064217045664, - "learning_rate": 7.591054318261802e-08, - "loss": 1.163, - "step": 6483 - }, - { - "epoch": 0.8790076594590931, - "grad_norm": 3.6065209650070376, - "learning_rate": 7.574280421348356e-08, - "loss": 1.147, - "step": 6484 - }, - { - "epoch": 0.8791432251067579, - "grad_norm": 1.687405615165891, - "learning_rate": 7.557524348005395e-08, - "loss": 1.0979, - "step": 6485 - }, - { - "epoch": 0.8792787907544228, - "grad_norm": 1.8453909985956207, - "learning_rate": 7.540786101464136e-08, - "loss": 1.138, - "step": 6486 - }, - { - "epoch": 0.8794143564020878, - "grad_norm": 1.6353479209270256, - "learning_rate": 7.524065684952475e-08, - "loss": 1.1374, - "step": 6487 - }, - { - "epoch": 0.8795499220497526, - "grad_norm": 1.609596729728245, - "learning_rate": 7.507363101694775e-08, - "loss": 1.1187, - "step": 6488 - }, - { - "epoch": 0.8796854876974175, - "grad_norm": 2.228272856350824, - "learning_rate": 7.490678354912006e-08, - "loss": 1.164, - "step": 6489 - }, - { - "epoch": 0.8798210533450823, - "grad_norm": 2.0278306692436074, - "learning_rate": 7.474011447821704e-08, - "loss": 1.1168, - "step": 6490 - }, - { - "epoch": 0.8799566189927472, - "grad_norm": 4.581796471996039, - "learning_rate": 7.457362383637922e-08, - "loss": 1.0944, - "step": 6491 - }, - { - "epoch": 0.8800921846404122, - "grad_norm": 2.0269814257038687, - "learning_rate": 7.440731165571323e-08, - "loss": 1.12, - "step": 6492 - }, - { - "epoch": 0.880227750288077, - "grad_norm": 1.6052003278999012, - "learning_rate": 7.42411779682911e-08, - "loss": 1.1232, - "step": 6493 - }, - { - "epoch": 0.8803633159357419, - "grad_norm": 1.800211209198717, - "learning_rate": 7.407522280615019e-08, - "loss": 1.1177, - "step": 6494 - }, - { - "epoch": 0.8804988815834067, - "grad_norm": 1.4903145761544596, - "learning_rate": 7.39094462012938e-08, - "loss": 1.177, - "step": 6495 - }, - { - "epoch": 0.8806344472310716, - "grad_norm": 1.5975918659646278, - "learning_rate": 7.374384818569069e-08, - "loss": 1.1341, - "step": 6496 - }, - { - "epoch": 0.8807700128787366, - "grad_norm": 1.697869456508576, - "learning_rate": 7.357842879127474e-08, - "loss": 1.135, - "step": 6497 - }, - { - "epoch": 0.8809055785264014, - "grad_norm": 1.4625922143842016, - "learning_rate": 7.341318804994645e-08, - "loss": 1.1435, - "step": 6498 - }, - { - "epoch": 0.8810411441740663, - "grad_norm": 1.9808707770665517, - "learning_rate": 7.324812599357044e-08, - "loss": 1.0952, - "step": 6499 - }, - { - "epoch": 0.8811767098217311, - "grad_norm": 1.5403613043533388, - "learning_rate": 7.308324265397836e-08, - "loss": 1.1285, - "step": 6500 - }, - { - "epoch": 0.8813122754693961, - "grad_norm": 1.5294566700935053, - "learning_rate": 7.291853806296599e-08, - "loss": 1.1211, - "step": 6501 - }, - { - "epoch": 0.881447841117061, - "grad_norm": 1.7874321136532343, - "learning_rate": 7.275401225229583e-08, - "loss": 1.1163, - "step": 6502 - }, - { - "epoch": 0.8815834067647258, - "grad_norm": 4.6034592336414875, - "learning_rate": 7.258966525369492e-08, - "loss": 1.1388, - "step": 6503 - }, - { - "epoch": 0.8817189724123907, - "grad_norm": 1.5412881404295358, - "learning_rate": 7.242549709885693e-08, - "loss": 1.0738, - "step": 6504 - }, - { - "epoch": 0.8818545380600555, - "grad_norm": 1.570243090303494, - "learning_rate": 7.226150781943963e-08, - "loss": 1.0983, - "step": 6505 - }, - { - "epoch": 0.8819901037077205, - "grad_norm": 1.5524187396503986, - "learning_rate": 7.209769744706772e-08, - "loss": 1.0826, - "step": 6506 - }, - { - "epoch": 0.8821256693553854, - "grad_norm": 7.3066240006435645, - "learning_rate": 7.193406601333018e-08, - "loss": 1.1382, - "step": 6507 - }, - { - "epoch": 0.8822612350030502, - "grad_norm": 1.4795508860470992, - "learning_rate": 7.177061354978242e-08, - "loss": 1.1217, - "step": 6508 - }, - { - "epoch": 0.8823968006507151, - "grad_norm": 1.6846359798952926, - "learning_rate": 7.160734008794489e-08, - "loss": 1.1287, - "step": 6509 - }, - { - "epoch": 0.8825323662983799, - "grad_norm": 2.1559656470099897, - "learning_rate": 7.144424565930341e-08, - "loss": 1.1197, - "step": 6510 - }, - { - "epoch": 0.8826679319460449, - "grad_norm": 1.4599551189147213, - "learning_rate": 7.128133029530969e-08, - "loss": 1.0911, - "step": 6511 - }, - { - "epoch": 0.8828034975937098, - "grad_norm": 1.681446592917456, - "learning_rate": 7.111859402738052e-08, - "loss": 1.1397, - "step": 6512 - }, - { - "epoch": 0.8829390632413746, - "grad_norm": 1.342960545840645, - "learning_rate": 7.095603688689833e-08, - "loss": 1.0966, - "step": 6513 - }, - { - "epoch": 0.8830746288890395, - "grad_norm": 1.6262350766870117, - "learning_rate": 7.079365890521106e-08, - "loss": 1.104, - "step": 6514 - }, - { - "epoch": 0.8832101945367044, - "grad_norm": 1.7038080605770212, - "learning_rate": 7.063146011363186e-08, - "loss": 1.0894, - "step": 6515 - }, - { - "epoch": 0.8833457601843693, - "grad_norm": 2.1229868327639574, - "learning_rate": 7.046944054343961e-08, - "loss": 1.1436, - "step": 6516 - }, - { - "epoch": 0.8834813258320342, - "grad_norm": 1.6878251522268506, - "learning_rate": 7.030760022587856e-08, - "loss": 1.1467, - "step": 6517 - }, - { - "epoch": 0.883616891479699, - "grad_norm": 1.5264837565387395, - "learning_rate": 7.014593919215816e-08, - "loss": 1.1151, - "step": 6518 - }, - { - "epoch": 0.8837524571273639, - "grad_norm": 2.2838925219273913, - "learning_rate": 6.998445747345371e-08, - "loss": 1.1093, - "step": 6519 - }, - { - "epoch": 0.8838880227750288, - "grad_norm": 1.9093989734358912, - "learning_rate": 6.982315510090542e-08, - "loss": 1.1489, - "step": 6520 - }, - { - "epoch": 0.8840235884226937, - "grad_norm": 1.6044414148355972, - "learning_rate": 6.966203210561927e-08, - "loss": 1.0994, - "step": 6521 - }, - { - "epoch": 0.8841591540703586, - "grad_norm": 1.5606526390632829, - "learning_rate": 6.950108851866687e-08, - "loss": 1.1143, - "step": 6522 - }, - { - "epoch": 0.8842947197180234, - "grad_norm": 3.336924579399035, - "learning_rate": 6.934032437108439e-08, - "loss": 1.1371, - "step": 6523 - }, - { - "epoch": 0.8844302853656884, - "grad_norm": 2.774352303578399, - "learning_rate": 6.917973969387424e-08, - "loss": 1.1229, - "step": 6524 - }, - { - "epoch": 0.8845658510133532, - "grad_norm": 1.6234063372099086, - "learning_rate": 6.901933451800379e-08, - "loss": 1.1357, - "step": 6525 - }, - { - "epoch": 0.8847014166610181, - "grad_norm": 1.9393489893581717, - "learning_rate": 6.885910887440593e-08, - "loss": 1.1225, - "step": 6526 - }, - { - "epoch": 0.884836982308683, - "grad_norm": 1.4282132534972312, - "learning_rate": 6.869906279397897e-08, - "loss": 1.13, - "step": 6527 - }, - { - "epoch": 0.8849725479563478, - "grad_norm": 2.1060820606793373, - "learning_rate": 6.853919630758653e-08, - "loss": 1.142, - "step": 6528 - }, - { - "epoch": 0.8851081136040128, - "grad_norm": 1.6377494986904886, - "learning_rate": 6.837950944605763e-08, - "loss": 1.1013, - "step": 6529 - }, - { - "epoch": 0.8852436792516776, - "grad_norm": 1.8053029112290662, - "learning_rate": 6.822000224018653e-08, - "loss": 1.1043, - "step": 6530 - }, - { - "epoch": 0.8853792448993425, - "grad_norm": 2.388735035333081, - "learning_rate": 6.806067472073296e-08, - "loss": 1.1391, - "step": 6531 - }, - { - "epoch": 0.8855148105470074, - "grad_norm": 1.4558845823017883, - "learning_rate": 6.790152691842199e-08, - "loss": 1.1505, - "step": 6532 - }, - { - "epoch": 0.8856503761946722, - "grad_norm": 2.681199560891195, - "learning_rate": 6.774255886394397e-08, - "loss": 1.09, - "step": 6533 - }, - { - "epoch": 0.8857859418423372, - "grad_norm": 1.7982764247684924, - "learning_rate": 6.758377058795473e-08, - "loss": 1.1131, - "step": 6534 - }, - { - "epoch": 0.885921507490002, - "grad_norm": 1.510101997961337, - "learning_rate": 6.742516212107541e-08, - "loss": 1.1486, - "step": 6535 - }, - { - "epoch": 0.8860570731376669, - "grad_norm": 1.639166930198824, - "learning_rate": 6.726673349389201e-08, - "loss": 1.1461, - "step": 6536 - }, - { - "epoch": 0.8861926387853318, - "grad_norm": 4.790695385523879, - "learning_rate": 6.710848473695674e-08, - "loss": 1.1212, - "step": 6537 - }, - { - "epoch": 0.8863282044329966, - "grad_norm": 1.9082686754712395, - "learning_rate": 6.69504158807862e-08, - "loss": 1.1172, - "step": 6538 - }, - { - "epoch": 0.8864637700806616, - "grad_norm": 1.507042864579074, - "learning_rate": 6.679252695586312e-08, - "loss": 1.1326, - "step": 6539 - }, - { - "epoch": 0.8865993357283264, - "grad_norm": 1.510492195528489, - "learning_rate": 6.663481799263471e-08, - "loss": 1.0594, - "step": 6540 - }, - { - "epoch": 0.8867349013759913, - "grad_norm": 1.979015304287678, - "learning_rate": 6.647728902151428e-08, - "loss": 1.1141, - "step": 6541 - }, - { - "epoch": 0.8868704670236562, - "grad_norm": 1.5034532346119918, - "learning_rate": 6.631994007287966e-08, - "loss": 1.1256, - "step": 6542 - }, - { - "epoch": 0.887006032671321, - "grad_norm": 1.5552629058632617, - "learning_rate": 6.616277117707492e-08, - "loss": 1.1213, - "step": 6543 - }, - { - "epoch": 0.887141598318986, - "grad_norm": 1.8636455410051425, - "learning_rate": 6.600578236440812e-08, - "loss": 1.2007, - "step": 6544 - }, - { - "epoch": 0.8872771639666509, - "grad_norm": 1.824396659317219, - "learning_rate": 6.584897366515407e-08, - "loss": 1.1178, - "step": 6545 - }, - { - "epoch": 0.8874127296143157, - "grad_norm": 1.6705718443506907, - "learning_rate": 6.569234510955135e-08, - "loss": 1.1462, - "step": 6546 - }, - { - "epoch": 0.8875482952619806, - "grad_norm": 2.854156675551722, - "learning_rate": 6.553589672780524e-08, - "loss": 1.1232, - "step": 6547 - }, - { - "epoch": 0.8876838609096455, - "grad_norm": 1.782727800709161, - "learning_rate": 6.537962855008483e-08, - "loss": 1.1157, - "step": 6548 - }, - { - "epoch": 0.8878194265573104, - "grad_norm": 1.599482694358705, - "learning_rate": 6.522354060652602e-08, - "loss": 1.1315, - "step": 6549 - }, - { - "epoch": 0.8879549922049753, - "grad_norm": 1.5411346861761548, - "learning_rate": 6.50676329272285e-08, - "loss": 1.0893, - "step": 6550 - }, - { - "epoch": 0.8880905578526401, - "grad_norm": 1.885904126821212, - "learning_rate": 6.491190554225811e-08, - "loss": 1.0705, - "step": 6551 - }, - { - "epoch": 0.888226123500305, - "grad_norm": 1.5956734551968133, - "learning_rate": 6.475635848164562e-08, - "loss": 1.1128, - "step": 6552 - }, - { - "epoch": 0.8883616891479699, - "grad_norm": 1.9333745970363465, - "learning_rate": 6.460099177538703e-08, - "loss": 1.1318, - "step": 6553 - }, - { - "epoch": 0.8884972547956348, - "grad_norm": 1.5150821855116756, - "learning_rate": 6.444580545344358e-08, - "loss": 1.1119, - "step": 6554 - }, - { - "epoch": 0.8886328204432997, - "grad_norm": 2.2451059033610665, - "learning_rate": 6.429079954574168e-08, - "loss": 1.1169, - "step": 6555 - }, - { - "epoch": 0.8887683860909645, - "grad_norm": 1.8674666684941905, - "learning_rate": 6.413597408217309e-08, - "loss": 1.1223, - "step": 6556 - }, - { - "epoch": 0.8889039517386295, - "grad_norm": 1.7477754815929119, - "learning_rate": 6.398132909259457e-08, - "loss": 1.0745, - "step": 6557 - }, - { - "epoch": 0.8890395173862943, - "grad_norm": 1.606673370223641, - "learning_rate": 6.382686460682851e-08, - "loss": 1.1229, - "step": 6558 - }, - { - "epoch": 0.8891750830339592, - "grad_norm": 1.8129028718790536, - "learning_rate": 6.367258065466152e-08, - "loss": 1.121, - "step": 6559 - }, - { - "epoch": 0.8893106486816241, - "grad_norm": 1.6807824816579051, - "learning_rate": 6.35184772658468e-08, - "loss": 1.0931, - "step": 6560 - }, - { - "epoch": 0.8894462143292889, - "grad_norm": 4.200766277821165, - "learning_rate": 6.336455447010126e-08, - "loss": 1.1437, - "step": 6561 - }, - { - "epoch": 0.8895817799769539, - "grad_norm": 2.5245472176127493, - "learning_rate": 6.321081229710834e-08, - "loss": 1.1237, - "step": 6562 - }, - { - "epoch": 0.8897173456246187, - "grad_norm": 1.5748112443076114, - "learning_rate": 6.305725077651558e-08, - "loss": 1.1316, - "step": 6563 - }, - { - "epoch": 0.8898529112722836, - "grad_norm": 2.229825331387252, - "learning_rate": 6.290386993793617e-08, - "loss": 1.1055, - "step": 6564 - }, - { - "epoch": 0.8899884769199485, - "grad_norm": 12.754560782925731, - "learning_rate": 6.275066981094857e-08, - "loss": 1.1146, - "step": 6565 - }, - { - "epoch": 0.8901240425676133, - "grad_norm": 2.2862690206419094, - "learning_rate": 6.259765042509602e-08, - "loss": 1.1116, - "step": 6566 - }, - { - "epoch": 0.8902596082152783, - "grad_norm": 2.2029974214789467, - "learning_rate": 6.244481180988714e-08, - "loss": 1.108, - "step": 6567 - }, - { - "epoch": 0.8903951738629431, - "grad_norm": 1.630833353797896, - "learning_rate": 6.229215399479582e-08, - "loss": 1.139, - "step": 6568 - }, - { - "epoch": 0.890530739510608, - "grad_norm": 2.5594386025708165, - "learning_rate": 6.213967700926071e-08, - "loss": 1.1161, - "step": 6569 - }, - { - "epoch": 0.8906663051582729, - "grad_norm": 1.4683875385139797, - "learning_rate": 6.198738088268585e-08, - "loss": 1.1269, - "step": 6570 - }, - { - "epoch": 0.8908018708059378, - "grad_norm": 1.5196703561213696, - "learning_rate": 6.183526564444042e-08, - "loss": 1.1335, - "step": 6571 - }, - { - "epoch": 0.8909374364536027, - "grad_norm": 1.5780416222417675, - "learning_rate": 6.16833313238585e-08, - "loss": 1.1077, - "step": 6572 - }, - { - "epoch": 0.8910730021012675, - "grad_norm": 1.5593170715861027, - "learning_rate": 6.153157795023956e-08, - "loss": 1.1556, - "step": 6573 - }, - { - "epoch": 0.8912085677489324, - "grad_norm": 1.532784394030106, - "learning_rate": 6.138000555284806e-08, - "loss": 1.1073, - "step": 6574 - }, - { - "epoch": 0.8913441333965973, - "grad_norm": 2.0973810335118603, - "learning_rate": 6.12286141609134e-08, - "loss": 1.1264, - "step": 6575 - }, - { - "epoch": 0.8914796990442622, - "grad_norm": 4.358925305721833, - "learning_rate": 6.107740380363036e-08, - "loss": 1.1231, - "step": 6576 - }, - { - "epoch": 0.8916152646919271, - "grad_norm": 1.6006614276930178, - "learning_rate": 6.092637451015847e-08, - "loss": 1.1994, - "step": 6577 - }, - { - "epoch": 0.8917508303395919, - "grad_norm": 2.6028573593495827, - "learning_rate": 6.07755263096229e-08, - "loss": 1.1511, - "step": 6578 - }, - { - "epoch": 0.8918863959872568, - "grad_norm": 1.5880062223442666, - "learning_rate": 6.062485923111293e-08, - "loss": 1.1177, - "step": 6579 - }, - { - "epoch": 0.8920219616349218, - "grad_norm": 1.795462523303609, - "learning_rate": 6.047437330368421e-08, - "loss": 1.1418, - "step": 6580 - }, - { - "epoch": 0.8921575272825866, - "grad_norm": 1.9618468612272715, - "learning_rate": 6.032406855635619e-08, - "loss": 1.1787, - "step": 6581 - }, - { - "epoch": 0.8922930929302515, - "grad_norm": 2.5865581260675925, - "learning_rate": 6.017394501811445e-08, - "loss": 1.1669, - "step": 6582 - }, - { - "epoch": 0.8924286585779163, - "grad_norm": 1.7745883093409707, - "learning_rate": 6.002400271790864e-08, - "loss": 1.0519, - "step": 6583 - }, - { - "epoch": 0.8925642242255812, - "grad_norm": 2.017692585737126, - "learning_rate": 5.987424168465439e-08, - "loss": 1.1417, - "step": 6584 - }, - { - "epoch": 0.8926997898732462, - "grad_norm": 1.4640505395053558, - "learning_rate": 5.972466194723159e-08, - "loss": 1.1379, - "step": 6585 - }, - { - "epoch": 0.892835355520911, - "grad_norm": 1.3944689527040175, - "learning_rate": 5.957526353448572e-08, - "loss": 1.1472, - "step": 6586 - }, - { - "epoch": 0.8929709211685759, - "grad_norm": 1.8858666555645642, - "learning_rate": 5.9426046475226975e-08, - "loss": 1.1333, - "step": 6587 - }, - { - "epoch": 0.8931064868162407, - "grad_norm": 1.4607455102674543, - "learning_rate": 5.9277010798230666e-08, - "loss": 1.1289, - "step": 6588 - }, - { - "epoch": 0.8932420524639056, - "grad_norm": 1.55332974930977, - "learning_rate": 5.912815653223724e-08, - "loss": 1.068, - "step": 6589 - }, - { - "epoch": 0.8933776181115706, - "grad_norm": 1.6272422671388427, - "learning_rate": 5.897948370595207e-08, - "loss": 1.0957, - "step": 6590 - }, - { - "epoch": 0.8935131837592354, - "grad_norm": 1.7528428018353854, - "learning_rate": 5.8830992348045563e-08, - "loss": 1.1149, - "step": 6591 - }, - { - "epoch": 0.8936487494069003, - "grad_norm": 2.357288681272348, - "learning_rate": 5.8682682487152915e-08, - "loss": 1.0887, - "step": 6592 - }, - { - "epoch": 0.8937843150545651, - "grad_norm": 2.000239537972991, - "learning_rate": 5.8534554151874805e-08, - "loss": 1.1268, - "step": 6593 - }, - { - "epoch": 0.89391988070223, - "grad_norm": 1.682855820285893, - "learning_rate": 5.8386607370776274e-08, - "loss": 1.1056, - "step": 6594 - }, - { - "epoch": 0.894055446349895, - "grad_norm": 1.5236714134912441, - "learning_rate": 5.823884217238817e-08, - "loss": 1.1013, - "step": 6595 - }, - { - "epoch": 0.8941910119975598, - "grad_norm": 1.7235570801720563, - "learning_rate": 5.809125858520514e-08, - "loss": 1.106, - "step": 6596 - }, - { - "epoch": 0.8943265776452247, - "grad_norm": 1.952667024462507, - "learning_rate": 5.794385663768819e-08, - "loss": 1.1251, - "step": 6597 - }, - { - "epoch": 0.8944621432928895, - "grad_norm": 2.6003588343652675, - "learning_rate": 5.7796636358262155e-08, - "loss": 1.1193, - "step": 6598 - }, - { - "epoch": 0.8945977089405545, - "grad_norm": 3.240102439614192, - "learning_rate": 5.764959777531775e-08, - "loss": 1.1333, - "step": 6599 - }, - { - "epoch": 0.8947332745882194, - "grad_norm": 1.7217869921175282, - "learning_rate": 5.750274091720964e-08, - "loss": 1.1251, - "step": 6600 - }, - { - "epoch": 0.8948688402358842, - "grad_norm": 1.5449719470270684, - "learning_rate": 5.7356065812258604e-08, - "loss": 1.1077, - "step": 6601 - }, - { - "epoch": 0.8950044058835491, - "grad_norm": 1.8286130688132023, - "learning_rate": 5.720957248874925e-08, - "loss": 1.1133, - "step": 6602 - }, - { - "epoch": 0.8951399715312139, - "grad_norm": 1.5196358266934906, - "learning_rate": 5.706326097493219e-08, - "loss": 1.0904, - "step": 6603 - }, - { - "epoch": 0.8952755371788789, - "grad_norm": 1.7011250642183293, - "learning_rate": 5.691713129902187e-08, - "loss": 1.1234, - "step": 6604 - }, - { - "epoch": 0.8954111028265438, - "grad_norm": 2.5680575605405527, - "learning_rate": 5.677118348919874e-08, - "loss": 1.1181, - "step": 6605 - }, - { - "epoch": 0.8955466684742086, - "grad_norm": 1.9997922004416384, - "learning_rate": 5.662541757360739e-08, - "loss": 1.1375, - "step": 6606 - }, - { - "epoch": 0.8956822341218735, - "grad_norm": 2.0296472063616746, - "learning_rate": 5.6479833580357796e-08, - "loss": 1.1225, - "step": 6607 - }, - { - "epoch": 0.8958177997695383, - "grad_norm": 1.5975487436160027, - "learning_rate": 5.633443153752448e-08, - "loss": 1.0894, - "step": 6608 - }, - { - "epoch": 0.8959533654172033, - "grad_norm": 2.3213587414460366, - "learning_rate": 5.6189211473147256e-08, - "loss": 1.1155, - "step": 6609 - }, - { - "epoch": 0.8960889310648682, - "grad_norm": 1.6806542080930005, - "learning_rate": 5.60441734152306e-08, - "loss": 1.1376, - "step": 6610 - }, - { - "epoch": 0.896224496712533, - "grad_norm": 4.065279305333201, - "learning_rate": 5.5899317391744025e-08, - "loss": 1.1317, - "step": 6611 - }, - { - "epoch": 0.8963600623601979, - "grad_norm": 1.7509581061930606, - "learning_rate": 5.575464343062175e-08, - "loss": 1.1415, - "step": 6612 - }, - { - "epoch": 0.8964956280078628, - "grad_norm": 1.7699001989000014, - "learning_rate": 5.561015155976312e-08, - "loss": 1.1395, - "step": 6613 - }, - { - "epoch": 0.8966311936555277, - "grad_norm": 1.5073314441943841, - "learning_rate": 5.546584180703207e-08, - "loss": 1.1398, - "step": 6614 - }, - { - "epoch": 0.8967667593031926, - "grad_norm": 1.6073959306120074, - "learning_rate": 5.5321714200257884e-08, - "loss": 1.1091, - "step": 6615 - }, - { - "epoch": 0.8969023249508574, - "grad_norm": 1.6619968410178436, - "learning_rate": 5.5177768767234236e-08, - "loss": 1.1056, - "step": 6616 - }, - { - "epoch": 0.8970378905985223, - "grad_norm": 3.060407950129733, - "learning_rate": 5.50340055357198e-08, - "loss": 1.1299, - "step": 6617 - }, - { - "epoch": 0.8971734562461872, - "grad_norm": 1.5210213737044191, - "learning_rate": 5.4890424533438394e-08, - "loss": 1.1248, - "step": 6618 - }, - { - "epoch": 0.8973090218938521, - "grad_norm": 2.0061969673217717, - "learning_rate": 5.4747025788078546e-08, - "loss": 1.1616, - "step": 6619 - }, - { - "epoch": 0.897444587541517, - "grad_norm": 2.305878664406554, - "learning_rate": 5.460380932729303e-08, - "loss": 1.123, - "step": 6620 - }, - { - "epoch": 0.8975801531891818, - "grad_norm": 1.538660258877756, - "learning_rate": 5.4460775178700736e-08, - "loss": 1.0979, - "step": 6621 - }, - { - "epoch": 0.8977157188368468, - "grad_norm": 2.3266916872285894, - "learning_rate": 5.431792336988417e-08, - "loss": 1.1129, - "step": 6622 - }, - { - "epoch": 0.8978512844845117, - "grad_norm": 1.7931062896523229, - "learning_rate": 5.417525392839129e-08, - "loss": 1.1472, - "step": 6623 - }, - { - "epoch": 0.8979868501321765, - "grad_norm": 1.6612948725086316, - "learning_rate": 5.4032766881734745e-08, - "loss": 1.1031, - "step": 6624 - }, - { - "epoch": 0.8981224157798414, - "grad_norm": 1.902199405123414, - "learning_rate": 5.3890462257392246e-08, - "loss": 1.1298, - "step": 6625 - }, - { - "epoch": 0.8982579814275062, - "grad_norm": 1.8510286467002033, - "learning_rate": 5.3748340082805824e-08, - "loss": 1.1135, - "step": 6626 - }, - { - "epoch": 0.8983935470751712, - "grad_norm": 1.811136280052085, - "learning_rate": 5.360640038538278e-08, - "loss": 1.1118, - "step": 6627 - }, - { - "epoch": 0.8985291127228361, - "grad_norm": 1.6679526248469438, - "learning_rate": 5.3464643192495104e-08, - "loss": 1.1376, - "step": 6628 - }, - { - "epoch": 0.8986646783705009, - "grad_norm": 1.5006984794960647, - "learning_rate": 5.33230685314795e-08, - "loss": 1.1071, - "step": 6629 - }, - { - "epoch": 0.8988002440181658, - "grad_norm": 1.524877619884794, - "learning_rate": 5.3181676429637447e-08, - "loss": 1.0911, - "step": 6630 - }, - { - "epoch": 0.8989358096658306, - "grad_norm": 1.7831547786981703, - "learning_rate": 5.304046691423536e-08, - "loss": 1.1231, - "step": 6631 - }, - { - "epoch": 0.8990713753134956, - "grad_norm": 1.5159582753641183, - "learning_rate": 5.289944001250446e-08, - "loss": 1.1431, - "step": 6632 - }, - { - "epoch": 0.8992069409611605, - "grad_norm": 1.7800743892638564, - "learning_rate": 5.275859575164054e-08, - "loss": 1.1689, - "step": 6633 - }, - { - "epoch": 0.8993425066088253, - "grad_norm": 1.456790767864359, - "learning_rate": 5.2617934158804557e-08, - "loss": 1.1414, - "step": 6634 - }, - { - "epoch": 0.8994780722564902, - "grad_norm": 1.4960953037722031, - "learning_rate": 5.247745526112146e-08, - "loss": 1.1271, - "step": 6635 - }, - { - "epoch": 0.899613637904155, - "grad_norm": 2.1940513804178186, - "learning_rate": 5.233715908568215e-08, - "loss": 1.103, - "step": 6636 - }, - { - "epoch": 0.89974920355182, - "grad_norm": 1.5886660397575265, - "learning_rate": 5.219704565954097e-08, - "loss": 1.1026, - "step": 6637 - }, - { - "epoch": 0.8998847691994849, - "grad_norm": 1.5578701011822265, - "learning_rate": 5.2057115009718434e-08, - "loss": 1.1401, - "step": 6638 - }, - { - "epoch": 0.9000203348471497, - "grad_norm": 1.7148906444994907, - "learning_rate": 5.191736716319828e-08, - "loss": 1.1452, - "step": 6639 - }, - { - "epoch": 0.9001559004948146, - "grad_norm": 1.6014404782470708, - "learning_rate": 5.17778021469305e-08, - "loss": 1.1362, - "step": 6640 - }, - { - "epoch": 0.9002914661424795, - "grad_norm": 1.5479957352003657, - "learning_rate": 5.1638419987828365e-08, - "loss": 1.1384, - "step": 6641 - }, - { - "epoch": 0.9004270317901444, - "grad_norm": 2.2775526069232135, - "learning_rate": 5.149922071277146e-08, - "loss": 1.0796, - "step": 6642 - }, - { - "epoch": 0.9005625974378093, - "grad_norm": 1.6060022307579547, - "learning_rate": 5.136020434860244e-08, - "loss": 1.1016, - "step": 6643 - }, - { - "epoch": 0.9006981630854741, - "grad_norm": 1.4532670355378896, - "learning_rate": 5.122137092213019e-08, - "loss": 1.1185, - "step": 6644 - }, - { - "epoch": 0.900833728733139, - "grad_norm": 1.7822637550434186, - "learning_rate": 5.108272046012718e-08, - "loss": 1.1701, - "step": 6645 - }, - { - "epoch": 0.9009692943808039, - "grad_norm": 1.530284647918784, - "learning_rate": 5.094425298933136e-08, - "loss": 1.1434, - "step": 6646 - }, - { - "epoch": 0.9011048600284688, - "grad_norm": 1.7431303458661551, - "learning_rate": 5.080596853644492e-08, - "loss": 1.0955, - "step": 6647 - }, - { - "epoch": 0.9012404256761337, - "grad_norm": 1.5803202785204071, - "learning_rate": 5.066786712813498e-08, - "loss": 1.1587, - "step": 6648 - }, - { - "epoch": 0.9013759913237985, - "grad_norm": 1.386352113674172, - "learning_rate": 5.052994879103323e-08, - "loss": 1.1231, - "step": 6649 - }, - { - "epoch": 0.9015115569714635, - "grad_norm": 1.5567348013329179, - "learning_rate": 5.0392213551736176e-08, - "loss": 1.1231, - "step": 6650 - }, - { - "epoch": 0.9016471226191283, - "grad_norm": 1.5444360797583259, - "learning_rate": 5.0254661436805015e-08, - "loss": 1.1255, - "step": 6651 - }, - { - "epoch": 0.9017826882667932, - "grad_norm": 2.0903465670004513, - "learning_rate": 5.0117292472765635e-08, - "loss": 1.1167, - "step": 6652 - }, - { - "epoch": 0.9019182539144581, - "grad_norm": 1.9227015500862508, - "learning_rate": 4.9980106686108416e-08, - "loss": 1.1351, - "step": 6653 - }, - { - "epoch": 0.9020538195621229, - "grad_norm": 1.8182804879239725, - "learning_rate": 4.9843104103288625e-08, - "loss": 1.1097, - "step": 6654 - }, - { - "epoch": 0.9021893852097879, - "grad_norm": 1.8976278076365194, - "learning_rate": 4.9706284750726135e-08, - "loss": 1.1231, - "step": 6655 - }, - { - "epoch": 0.9023249508574527, - "grad_norm": 1.7457852220421306, - "learning_rate": 4.956964865480551e-08, - "loss": 1.1159, - "step": 6656 - }, - { - "epoch": 0.9024605165051176, - "grad_norm": 1.897936942282887, - "learning_rate": 4.9433195841875995e-08, - "loss": 1.1064, - "step": 6657 - }, - { - "epoch": 0.9025960821527825, - "grad_norm": 1.6608822132725811, - "learning_rate": 4.9296926338251e-08, - "loss": 1.082, - "step": 6658 - }, - { - "epoch": 0.9027316478004473, - "grad_norm": 2.0757131756801726, - "learning_rate": 4.916084017020972e-08, - "loss": 1.1053, - "step": 6659 - }, - { - "epoch": 0.9028672134481123, - "grad_norm": 1.56117956905002, - "learning_rate": 4.9024937363994714e-08, - "loss": 1.1733, - "step": 6660 - }, - { - "epoch": 0.9030027790957771, - "grad_norm": 2.1015768213541097, - "learning_rate": 4.888921794581424e-08, - "loss": 1.115, - "step": 6661 - }, - { - "epoch": 0.903138344743442, - "grad_norm": 1.5234965693332603, - "learning_rate": 4.875368194184026e-08, - "loss": 1.1115, - "step": 6662 - }, - { - "epoch": 0.9032739103911069, - "grad_norm": 1.6342897665718141, - "learning_rate": 4.8618329378210085e-08, - "loss": 1.1514, - "step": 6663 - }, - { - "epoch": 0.9034094760387718, - "grad_norm": 1.413906405943974, - "learning_rate": 4.848316028102539e-08, - "loss": 1.1137, - "step": 6664 - }, - { - "epoch": 0.9035450416864367, - "grad_norm": 2.6508059638420405, - "learning_rate": 4.834817467635233e-08, - "loss": 1.1449, - "step": 6665 - }, - { - "epoch": 0.9036806073341015, - "grad_norm": 5.016390010568439, - "learning_rate": 4.821337259022196e-08, - "loss": 1.0717, - "step": 6666 - }, - { - "epoch": 0.9038161729817664, - "grad_norm": 2.0720699804373455, - "learning_rate": 4.807875404862971e-08, - "loss": 1.1325, - "step": 6667 - }, - { - "epoch": 0.9039517386294313, - "grad_norm": 1.492958027759036, - "learning_rate": 4.794431907753571e-08, - "loss": 1.1052, - "step": 6668 - }, - { - "epoch": 0.9040873042770962, - "grad_norm": 1.7249110693672132, - "learning_rate": 4.781006770286478e-08, - "loss": 1.1398, - "step": 6669 - }, - { - "epoch": 0.9042228699247611, - "grad_norm": 2.060991366187916, - "learning_rate": 4.767599995050609e-08, - "loss": 1.1335, - "step": 6670 - }, - { - "epoch": 0.9043584355724259, - "grad_norm": 1.462603687870178, - "learning_rate": 4.7542115846313734e-08, - "loss": 1.098, - "step": 6671 - }, - { - "epoch": 0.9044940012200908, - "grad_norm": 1.6080861802235715, - "learning_rate": 4.740841541610596e-08, - "loss": 1.1165, - "step": 6672 - }, - { - "epoch": 0.9046295668677558, - "grad_norm": 1.6922030707148257, - "learning_rate": 4.727489868566603e-08, - "loss": 1.1301, - "step": 6673 - }, - { - "epoch": 0.9047651325154206, - "grad_norm": 1.5257125012030721, - "learning_rate": 4.714156568074157e-08, - "loss": 1.1202, - "step": 6674 - }, - { - "epoch": 0.9049006981630855, - "grad_norm": 1.7683038824532942, - "learning_rate": 4.700841642704478e-08, - "loss": 1.1116, - "step": 6675 - }, - { - "epoch": 0.9050362638107503, - "grad_norm": 1.4949725367256426, - "learning_rate": 4.687545095025225e-08, - "loss": 1.09, - "step": 6676 - }, - { - "epoch": 0.9051718294584152, - "grad_norm": 1.7149431658195196, - "learning_rate": 4.6742669276005786e-08, - "loss": 1.151, - "step": 6677 - }, - { - "epoch": 0.9053073951060802, - "grad_norm": 1.7856179239230516, - "learning_rate": 4.661007142991069e-08, - "loss": 1.1053, - "step": 6678 - }, - { - "epoch": 0.905442960753745, - "grad_norm": 1.7028035542955315, - "learning_rate": 4.6477657437537953e-08, - "loss": 1.089, - "step": 6679 - }, - { - "epoch": 0.9055785264014099, - "grad_norm": 2.289038129754105, - "learning_rate": 4.634542732442204e-08, - "loss": 1.1179, - "step": 6680 - }, - { - "epoch": 0.9057140920490747, - "grad_norm": 1.876258421438715, - "learning_rate": 4.62133811160631e-08, - "loss": 1.1063, - "step": 6681 - }, - { - "epoch": 0.9058496576967396, - "grad_norm": 1.4521907970302164, - "learning_rate": 4.608151883792466e-08, - "loss": 1.0833, - "step": 6682 - }, - { - "epoch": 0.9059852233444046, - "grad_norm": 1.470499112431176, - "learning_rate": 4.5949840515435715e-08, - "loss": 1.1551, - "step": 6683 - }, - { - "epoch": 0.9061207889920694, - "grad_norm": 1.9445477257198676, - "learning_rate": 4.581834617398916e-08, - "loss": 1.1042, - "step": 6684 - }, - { - "epoch": 0.9062563546397343, - "grad_norm": 3.6037135350854816, - "learning_rate": 4.568703583894262e-08, - "loss": 1.1003, - "step": 6685 - }, - { - "epoch": 0.9063919202873991, - "grad_norm": 2.020784928636139, - "learning_rate": 4.555590953561839e-08, - "loss": 1.1244, - "step": 6686 - }, - { - "epoch": 0.906527485935064, - "grad_norm": 1.6698337548714017, - "learning_rate": 4.542496728930301e-08, - "loss": 1.1125, - "step": 6687 - }, - { - "epoch": 0.906663051582729, - "grad_norm": 1.5326615595450963, - "learning_rate": 4.529420912524773e-08, - "loss": 1.0959, - "step": 6688 - }, - { - "epoch": 0.9067986172303938, - "grad_norm": 1.5172170760756922, - "learning_rate": 4.516363506866827e-08, - "loss": 1.0954, - "step": 6689 - }, - { - "epoch": 0.9069341828780587, - "grad_norm": 2.1908048598902936, - "learning_rate": 4.503324514474483e-08, - "loss": 1.1453, - "step": 6690 - }, - { - "epoch": 0.9070697485257235, - "grad_norm": 1.8507059242482646, - "learning_rate": 4.4903039378621945e-08, - "loss": 1.0883, - "step": 6691 - }, - { - "epoch": 0.9072053141733885, - "grad_norm": 1.5651735805164195, - "learning_rate": 4.477301779540887e-08, - "loss": 1.1299, - "step": 6692 - }, - { - "epoch": 0.9073408798210534, - "grad_norm": 1.5290292405470145, - "learning_rate": 4.4643180420179113e-08, - "loss": 1.1104, - "step": 6693 - }, - { - "epoch": 0.9074764454687182, - "grad_norm": 1.9330034554907178, - "learning_rate": 4.451352727797109e-08, - "loss": 1.0955, - "step": 6694 - }, - { - "epoch": 0.9076120111163831, - "grad_norm": 1.7245337868867912, - "learning_rate": 4.4384058393786895e-08, - "loss": 1.1443, - "step": 6695 - }, - { - "epoch": 0.9077475767640479, - "grad_norm": 1.6684575726657702, - "learning_rate": 4.425477379259424e-08, - "loss": 1.1144, - "step": 6696 - }, - { - "epoch": 0.9078831424117129, - "grad_norm": 1.752329573293536, - "learning_rate": 4.412567349932384e-08, - "loss": 1.1322, - "step": 6697 - }, - { - "epoch": 0.9080187080593778, - "grad_norm": 1.680912760156265, - "learning_rate": 4.399675753887244e-08, - "loss": 1.1251, - "step": 6698 - }, - { - "epoch": 0.9081542737070426, - "grad_norm": 1.9628110884394931, - "learning_rate": 4.386802593609984e-08, - "loss": 1.1361, - "step": 6699 - }, - { - "epoch": 0.9082898393547075, - "grad_norm": 1.971743044464529, - "learning_rate": 4.37394787158315e-08, - "loss": 1.1054, - "step": 6700 - }, - { - "epoch": 0.9084254050023723, - "grad_norm": 1.800632167910115, - "learning_rate": 4.3611115902856044e-08, - "loss": 1.1226, - "step": 6701 - }, - { - "epoch": 0.9085609706500373, - "grad_norm": 2.7047473475228334, - "learning_rate": 4.3482937521928e-08, - "loss": 1.1452, - "step": 6702 - }, - { - "epoch": 0.9086965362977022, - "grad_norm": 1.974768351493521, - "learning_rate": 4.335494359776493e-08, - "loss": 1.1381, - "step": 6703 - }, - { - "epoch": 0.908832101945367, - "grad_norm": 1.562607231411647, - "learning_rate": 4.322713415504975e-08, - "loss": 1.1037, - "step": 6704 - }, - { - "epoch": 0.9089676675930319, - "grad_norm": 1.9991811837218214, - "learning_rate": 4.3099509218429416e-08, - "loss": 1.0871, - "step": 6705 - }, - { - "epoch": 0.9091032332406969, - "grad_norm": 2.138281737846231, - "learning_rate": 4.297206881251547e-08, - "loss": 1.1217, - "step": 6706 - }, - { - "epoch": 0.9092387988883617, - "grad_norm": 1.6882284021645007, - "learning_rate": 4.284481296188369e-08, - "loss": 1.1379, - "step": 6707 - }, - { - "epoch": 0.9093743645360266, - "grad_norm": 1.6610950977317862, - "learning_rate": 4.271774169107445e-08, - "loss": 1.1086, - "step": 6708 - }, - { - "epoch": 0.9095099301836914, - "grad_norm": 1.6532040100652365, - "learning_rate": 4.259085502459236e-08, - "loss": 1.1369, - "step": 6709 - }, - { - "epoch": 0.9096454958313563, - "grad_norm": 1.6184984377051455, - "learning_rate": 4.246415298690653e-08, - "loss": 1.1221, - "step": 6710 - }, - { - "epoch": 0.9097810614790213, - "grad_norm": 5.9158141817397905, - "learning_rate": 4.2337635602450514e-08, - "loss": 1.0892, - "step": 6711 - }, - { - "epoch": 0.9099166271266861, - "grad_norm": 2.116888207803189, - "learning_rate": 4.2211302895622136e-08, - "loss": 1.123, - "step": 6712 - }, - { - "epoch": 0.910052192774351, - "grad_norm": 2.0291051413326193, - "learning_rate": 4.208515489078368e-08, - "loss": 1.1111, - "step": 6713 - }, - { - "epoch": 0.9101877584220158, - "grad_norm": 1.8600805813884354, - "learning_rate": 4.19591916122618e-08, - "loss": 1.1307, - "step": 6714 - }, - { - "epoch": 0.9103233240696808, - "grad_norm": 1.5303046370928024, - "learning_rate": 4.18334130843474e-08, - "loss": 1.0995, - "step": 6715 - }, - { - "epoch": 0.9104588897173457, - "grad_norm": 1.9234574774213533, - "learning_rate": 4.1707819331296076e-08, - "loss": 1.1495, - "step": 6716 - }, - { - "epoch": 0.9105944553650105, - "grad_norm": 3.740454162414902, - "learning_rate": 4.158241037732746e-08, - "loss": 1.1308, - "step": 6717 - }, - { - "epoch": 0.9107300210126754, - "grad_norm": 1.6030890673262763, - "learning_rate": 4.1457186246625863e-08, - "loss": 1.1221, - "step": 6718 - }, - { - "epoch": 0.9108655866603402, - "grad_norm": 1.5610624890784406, - "learning_rate": 4.133214696333942e-08, - "loss": 1.111, - "step": 6719 - }, - { - "epoch": 0.9110011523080052, - "grad_norm": 1.5318089766167327, - "learning_rate": 4.1207292551581284e-08, - "loss": 1.1166, - "step": 6720 - }, - { - "epoch": 0.9111367179556701, - "grad_norm": 1.5032125742660187, - "learning_rate": 4.1082623035428424e-08, - "loss": 1.1309, - "step": 6721 - }, - { - "epoch": 0.9112722836033349, - "grad_norm": 2.121962295313918, - "learning_rate": 4.095813843892259e-08, - "loss": 1.1084, - "step": 6722 - }, - { - "epoch": 0.9114078492509998, - "grad_norm": 2.0517419754756236, - "learning_rate": 4.08338387860695e-08, - "loss": 1.1345, - "step": 6723 - }, - { - "epoch": 0.9115434148986646, - "grad_norm": 1.7458090046833024, - "learning_rate": 4.0709724100839395e-08, - "loss": 1.1421, - "step": 6724 - }, - { - "epoch": 0.9116789805463296, - "grad_norm": 1.4331189853348463, - "learning_rate": 4.058579440716681e-08, - "loss": 1.1046, - "step": 6725 - }, - { - "epoch": 0.9118145461939945, - "grad_norm": 1.5257597427052438, - "learning_rate": 4.046204972895062e-08, - "loss": 1.135, - "step": 6726 - }, - { - "epoch": 0.9119501118416593, - "grad_norm": 2.228172151358896, - "learning_rate": 4.0338490090053966e-08, - "loss": 1.142, - "step": 6727 - }, - { - "epoch": 0.9120856774893242, - "grad_norm": 1.5574099420773155, - "learning_rate": 4.0215115514304456e-08, - "loss": 1.1405, - "step": 6728 - }, - { - "epoch": 0.912221243136989, - "grad_norm": 2.5602018393970223, - "learning_rate": 4.009192602549383e-08, - "loss": 1.138, - "step": 6729 - }, - { - "epoch": 0.912356808784654, - "grad_norm": 1.5824692749654645, - "learning_rate": 3.996892164737819e-08, - "loss": 1.1308, - "step": 6730 - }, - { - "epoch": 0.9124923744323189, - "grad_norm": 1.5090393442238643, - "learning_rate": 3.9846102403678027e-08, - "loss": 1.1131, - "step": 6731 - }, - { - "epoch": 0.9126279400799837, - "grad_norm": 2.1587481939478876, - "learning_rate": 3.972346831807793e-08, - "loss": 1.1723, - "step": 6732 - }, - { - "epoch": 0.9127635057276486, - "grad_norm": 1.84573197649397, - "learning_rate": 3.960101941422711e-08, - "loss": 1.1097, - "step": 6733 - }, - { - "epoch": 0.9128990713753135, - "grad_norm": 2.5887825311551302, - "learning_rate": 3.947875571573867e-08, - "loss": 1.1287, - "step": 6734 - }, - { - "epoch": 0.9130346370229784, - "grad_norm": 11.19250842158323, - "learning_rate": 3.93566772461904e-08, - "loss": 1.138, - "step": 6735 - }, - { - "epoch": 0.9131702026706433, - "grad_norm": 2.0790792653436423, - "learning_rate": 3.923478402912395e-08, - "loss": 1.1234, - "step": 6736 - }, - { - "epoch": 0.9133057683183081, - "grad_norm": 1.9683439079102498, - "learning_rate": 3.911307608804582e-08, - "loss": 1.0886, - "step": 6737 - }, - { - "epoch": 0.913441333965973, - "grad_norm": 2.717191193340322, - "learning_rate": 3.899155344642579e-08, - "loss": 1.1544, - "step": 6738 - }, - { - "epoch": 0.9135768996136379, - "grad_norm": 1.562624762112161, - "learning_rate": 3.887021612769936e-08, - "loss": 1.1224, - "step": 6739 - }, - { - "epoch": 0.9137124652613028, - "grad_norm": 2.165243127956083, - "learning_rate": 3.8749064155264685e-08, - "loss": 1.1087, - "step": 6740 - }, - { - "epoch": 0.9138480309089677, - "grad_norm": 2.034439418082233, - "learning_rate": 3.862809755248564e-08, - "loss": 1.128, - "step": 6741 - }, - { - "epoch": 0.9139835965566325, - "grad_norm": 1.9175213142748964, - "learning_rate": 3.850731634268911e-08, - "loss": 1.1051, - "step": 6742 - }, - { - "epoch": 0.9141191622042975, - "grad_norm": 1.9945286876189583, - "learning_rate": 3.838672054916725e-08, - "loss": 1.1407, - "step": 6743 - }, - { - "epoch": 0.9142547278519623, - "grad_norm": 1.7840672338425272, - "learning_rate": 3.826631019517568e-08, - "loss": 1.123, - "step": 6744 - }, - { - "epoch": 0.9143902934996272, - "grad_norm": 1.4609813868808128, - "learning_rate": 3.814608530393493e-08, - "loss": 1.1612, - "step": 6745 - }, - { - "epoch": 0.9145258591472921, - "grad_norm": 1.5340116197360563, - "learning_rate": 3.802604589862912e-08, - "loss": 1.1243, - "step": 6746 - }, - { - "epoch": 0.9146614247949569, - "grad_norm": 2.416001947618484, - "learning_rate": 3.790619200240697e-08, - "loss": 1.0812, - "step": 6747 - }, - { - "epoch": 0.9147969904426219, - "grad_norm": 1.4254261331293017, - "learning_rate": 3.7786523638381306e-08, - "loss": 1.1256, - "step": 6748 - }, - { - "epoch": 0.9149325560902867, - "grad_norm": 1.9716278196204002, - "learning_rate": 3.766704082962935e-08, - "loss": 1.1404, - "step": 6749 - }, - { - "epoch": 0.9150681217379516, - "grad_norm": 2.094521490049005, - "learning_rate": 3.754774359919244e-08, - "loss": 1.1515, - "step": 6750 - }, - { - "epoch": 0.9152036873856165, - "grad_norm": 1.558915646150576, - "learning_rate": 3.7428631970076065e-08, - "loss": 1.1473, - "step": 6751 - }, - { - "epoch": 0.9153392530332813, - "grad_norm": 1.5799693612173444, - "learning_rate": 3.730970596524985e-08, - "loss": 1.1157, - "step": 6752 - }, - { - "epoch": 0.9154748186809463, - "grad_norm": 2.194035223954067, - "learning_rate": 3.719096560764778e-08, - "loss": 1.0943, - "step": 6753 - }, - { - "epoch": 0.9156103843286111, - "grad_norm": 2.7299841919253627, - "learning_rate": 3.707241092016811e-08, - "loss": 1.1022, - "step": 6754 - }, - { - "epoch": 0.915745949976276, - "grad_norm": 1.4091904918431264, - "learning_rate": 3.69540419256732e-08, - "loss": 1.1554, - "step": 6755 - }, - { - "epoch": 0.9158815156239409, - "grad_norm": 1.6322601315230896, - "learning_rate": 3.683585864698946e-08, - "loss": 1.1531, - "step": 6756 - }, - { - "epoch": 0.9160170812716057, - "grad_norm": 1.4232384340422002, - "learning_rate": 3.6717861106907447e-08, - "loss": 1.1345, - "step": 6757 - }, - { - "epoch": 0.9161526469192707, - "grad_norm": 1.8531879047673163, - "learning_rate": 3.66000493281825e-08, - "loss": 1.0897, - "step": 6758 - }, - { - "epoch": 0.9162882125669355, - "grad_norm": 1.3953291605460312, - "learning_rate": 3.648242333353324e-08, - "loss": 1.1274, - "step": 6759 - }, - { - "epoch": 0.9164237782146004, - "grad_norm": 1.603409078962768, - "learning_rate": 3.6364983145643066e-08, - "loss": 1.1261, - "step": 6760 - }, - { - "epoch": 0.9165593438622653, - "grad_norm": 1.60890010857303, - "learning_rate": 3.624772878715954e-08, - "loss": 1.1184, - "step": 6761 - }, - { - "epoch": 0.9166949095099302, - "grad_norm": 1.682646441050896, - "learning_rate": 3.6130660280694005e-08, - "loss": 1.0704, - "step": 6762 - }, - { - "epoch": 0.9168304751575951, - "grad_norm": 1.5162503563823384, - "learning_rate": 3.6013777648822406e-08, - "loss": 1.0832, - "step": 6763 - }, - { - "epoch": 0.9169660408052599, - "grad_norm": 1.4596356350941322, - "learning_rate": 3.58970809140845e-08, - "loss": 1.129, - "step": 6764 - }, - { - "epoch": 0.9171016064529248, - "grad_norm": 1.932958168020106, - "learning_rate": 3.5780570098984273e-08, - "loss": 1.1319, - "step": 6765 - }, - { - "epoch": 0.9172371721005897, - "grad_norm": 2.0230214571770717, - "learning_rate": 3.5664245225990206e-08, - "loss": 1.1302, - "step": 6766 - }, - { - "epoch": 0.9173727377482546, - "grad_norm": 1.9827032968015892, - "learning_rate": 3.554810631753436e-08, - "loss": 1.1326, - "step": 6767 - }, - { - "epoch": 0.9175083033959195, - "grad_norm": 1.5951241793424613, - "learning_rate": 3.543215339601324e-08, - "loss": 1.1144, - "step": 6768 - }, - { - "epoch": 0.9176438690435843, - "grad_norm": 1.8770225814610406, - "learning_rate": 3.531638648378754e-08, - "loss": 1.0899, - "step": 6769 - }, - { - "epoch": 0.9177794346912492, - "grad_norm": 1.6157239779198629, - "learning_rate": 3.520080560318195e-08, - "loss": 1.1272, - "step": 6770 - }, - { - "epoch": 0.9179150003389142, - "grad_norm": 1.400026819480723, - "learning_rate": 3.508541077648541e-08, - "loss": 1.1713, - "step": 6771 - }, - { - "epoch": 0.918050565986579, - "grad_norm": 1.6138588854978515, - "learning_rate": 3.497020202595069e-08, - "loss": 1.1089, - "step": 6772 - }, - { - "epoch": 0.9181861316342439, - "grad_norm": 1.4210278852239016, - "learning_rate": 3.485517937379512e-08, - "loss": 1.1037, - "step": 6773 - }, - { - "epoch": 0.9183216972819087, - "grad_norm": 1.4710527517830612, - "learning_rate": 3.474034284219995e-08, - "loss": 1.1078, - "step": 6774 - }, - { - "epoch": 0.9184572629295736, - "grad_norm": 1.5194034262817644, - "learning_rate": 3.462569245331004e-08, - "loss": 1.1188, - "step": 6775 - }, - { - "epoch": 0.9185928285772386, - "grad_norm": 3.080599072232844, - "learning_rate": 3.451122822923547e-08, - "loss": 1.0935, - "step": 6776 - }, - { - "epoch": 0.9187283942249034, - "grad_norm": 2.3948632454021963, - "learning_rate": 3.4396950192049134e-08, - "loss": 1.0742, - "step": 6777 - }, - { - "epoch": 0.9188639598725683, - "grad_norm": 1.6677411665105937, - "learning_rate": 3.4282858363789194e-08, - "loss": 1.0985, - "step": 6778 - }, - { - "epoch": 0.9189995255202331, - "grad_norm": 1.8477414586683605, - "learning_rate": 3.4168952766456924e-08, - "loss": 1.1136, - "step": 6779 - }, - { - "epoch": 0.919135091167898, - "grad_norm": 1.5962607257626056, - "learning_rate": 3.405523342201855e-08, - "loss": 1.1059, - "step": 6780 - }, - { - "epoch": 0.919270656815563, - "grad_norm": 2.5660861461346585, - "learning_rate": 3.39417003524034e-08, - "loss": 1.1438, - "step": 6781 - }, - { - "epoch": 0.9194062224632278, - "grad_norm": 2.0075774509609157, - "learning_rate": 3.3828353579505975e-08, - "loss": 1.108, - "step": 6782 - }, - { - "epoch": 0.9195417881108927, - "grad_norm": 1.5729161408815533, - "learning_rate": 3.3715193125184005e-08, - "loss": 1.143, - "step": 6783 - }, - { - "epoch": 0.9196773537585576, - "grad_norm": 1.4682429376135833, - "learning_rate": 3.3602219011259595e-08, - "loss": 1.1074, - "step": 6784 - }, - { - "epoch": 0.9198129194062225, - "grad_norm": 3.390775636880124, - "learning_rate": 3.3489431259518975e-08, - "loss": 1.1153, - "step": 6785 - }, - { - "epoch": 0.9199484850538874, - "grad_norm": 1.741232804569642, - "learning_rate": 3.337682989171242e-08, - "loss": 1.1601, - "step": 6786 - }, - { - "epoch": 0.9200840507015522, - "grad_norm": 1.6820492376623872, - "learning_rate": 3.326441492955412e-08, - "loss": 1.0889, - "step": 6787 - }, - { - "epoch": 0.9202196163492171, - "grad_norm": 1.6799722808875552, - "learning_rate": 3.3152186394722506e-08, - "loss": 1.1226, - "step": 6788 - }, - { - "epoch": 0.920355181996882, - "grad_norm": 3.4961298688858453, - "learning_rate": 3.304014430885982e-08, - "loss": 1.143, - "step": 6789 - }, - { - "epoch": 0.9204907476445469, - "grad_norm": 2.144515505832102, - "learning_rate": 3.292828869357267e-08, - "loss": 1.1581, - "step": 6790 - }, - { - "epoch": 0.9206263132922118, - "grad_norm": 1.6128295019047643, - "learning_rate": 3.281661957043147e-08, - "loss": 1.1427, - "step": 6791 - }, - { - "epoch": 0.9207618789398766, - "grad_norm": 1.5161136403181574, - "learning_rate": 3.270513696097055e-08, - "loss": 1.1322, - "step": 6792 - }, - { - "epoch": 0.9208974445875415, - "grad_norm": 1.58184905370839, - "learning_rate": 3.2593840886688815e-08, - "loss": 1.1153, - "step": 6793 - }, - { - "epoch": 0.9210330102352065, - "grad_norm": 1.6405503786461217, - "learning_rate": 3.248273136904844e-08, - "loss": 1.1066, - "step": 6794 - }, - { - "epoch": 0.9211685758828713, - "grad_norm": 1.5565647045485356, - "learning_rate": 3.23718084294764e-08, - "loss": 1.129, - "step": 6795 - }, - { - "epoch": 0.9213041415305362, - "grad_norm": 1.5677495352039321, - "learning_rate": 3.226107208936279e-08, - "loss": 1.09, - "step": 6796 - }, - { - "epoch": 0.921439707178201, - "grad_norm": 2.6749829568042713, - "learning_rate": 3.2150522370062886e-08, - "loss": 1.0966, - "step": 6797 - }, - { - "epoch": 0.9215752728258659, - "grad_norm": 2.7098094494613245, - "learning_rate": 3.204015929289483e-08, - "loss": 1.1283, - "step": 6798 - }, - { - "epoch": 0.9217108384735309, - "grad_norm": 1.5895928781437476, - "learning_rate": 3.1929982879141613e-08, - "loss": 1.1278, - "step": 6799 - }, - { - "epoch": 0.9218464041211957, - "grad_norm": 1.4545884396137985, - "learning_rate": 3.181999315004946e-08, - "loss": 1.0909, - "step": 6800 - }, - { - "epoch": 0.9219819697688606, - "grad_norm": 1.5624784181603009, - "learning_rate": 3.171019012682952e-08, - "loss": 1.1426, - "step": 6801 - }, - { - "epoch": 0.9221175354165254, - "grad_norm": 2.148027098447075, - "learning_rate": 3.160057383065606e-08, - "loss": 1.1017, - "step": 6802 - }, - { - "epoch": 0.9222531010641903, - "grad_norm": 1.680919178352029, - "learning_rate": 3.149114428266786e-08, - "loss": 1.1381, - "step": 6803 - }, - { - "epoch": 0.9223886667118553, - "grad_norm": 3.2689270501863383, - "learning_rate": 3.138190150396758e-08, - "loss": 1.1152, - "step": 6804 - }, - { - "epoch": 0.9225242323595201, - "grad_norm": 1.7159018500201586, - "learning_rate": 3.1272845515621816e-08, - "loss": 1.1626, - "step": 6805 - }, - { - "epoch": 0.922659798007185, - "grad_norm": 1.4978633830360233, - "learning_rate": 3.116397633866108e-08, - "loss": 1.1306, - "step": 6806 - }, - { - "epoch": 0.9227953636548498, - "grad_norm": 2.641323732681733, - "learning_rate": 3.1055293994080024e-08, - "loss": 1.1408, - "step": 6807 - }, - { - "epoch": 0.9229309293025147, - "grad_norm": 1.9875510445856372, - "learning_rate": 3.09467985028371e-08, - "loss": 1.1744, - "step": 6808 - }, - { - "epoch": 0.9230664949501797, - "grad_norm": 1.6533342296447722, - "learning_rate": 3.08384898858548e-08, - "loss": 1.1296, - "step": 6809 - }, - { - "epoch": 0.9232020605978445, - "grad_norm": 1.849303513555864, - "learning_rate": 3.073036816401975e-08, - "loss": 1.1224, - "step": 6810 - }, - { - "epoch": 0.9233376262455094, - "grad_norm": 1.4655070424663963, - "learning_rate": 3.062243335818215e-08, - "loss": 1.1015, - "step": 6811 - }, - { - "epoch": 0.9234731918931742, - "grad_norm": 1.634029525598839, - "learning_rate": 3.051468548915648e-08, - "loss": 1.1259, - "step": 6812 - }, - { - "epoch": 0.9236087575408392, - "grad_norm": 1.477225045719399, - "learning_rate": 3.04071245777211e-08, - "loss": 1.1211, - "step": 6813 - }, - { - "epoch": 0.9237443231885041, - "grad_norm": 1.4288388723345116, - "learning_rate": 3.0299750644618205e-08, - "loss": 1.093, - "step": 6814 - }, - { - "epoch": 0.9238798888361689, - "grad_norm": 1.5997482608252016, - "learning_rate": 3.019256371055423e-08, - "loss": 1.1282, - "step": 6815 - }, - { - "epoch": 0.9240154544838338, - "grad_norm": 1.8060546247898834, - "learning_rate": 3.0085563796198866e-08, - "loss": 1.1571, - "step": 6816 - }, - { - "epoch": 0.9241510201314986, - "grad_norm": 1.6961291626064592, - "learning_rate": 2.997875092218671e-08, - "loss": 1.1118, - "step": 6817 - }, - { - "epoch": 0.9242865857791636, - "grad_norm": 1.636482620045243, - "learning_rate": 2.987212510911541e-08, - "loss": 1.1144, - "step": 6818 - }, - { - "epoch": 0.9244221514268285, - "grad_norm": 1.5105339516960778, - "learning_rate": 2.976568637754717e-08, - "loss": 1.112, - "step": 6819 - }, - { - "epoch": 0.9245577170744933, - "grad_norm": 1.6565107763605547, - "learning_rate": 2.9659434748007696e-08, - "loss": 1.0509, - "step": 6820 - }, - { - "epoch": 0.9246932827221582, - "grad_norm": 1.4157411514046387, - "learning_rate": 2.9553370240986808e-08, - "loss": 1.0991, - "step": 6821 - }, - { - "epoch": 0.924828848369823, - "grad_norm": 1.745320662701447, - "learning_rate": 2.944749287693815e-08, - "loss": 1.1286, - "step": 6822 - }, - { - "epoch": 0.924964414017488, - "grad_norm": 1.4368208619665475, - "learning_rate": 2.9341802676279505e-08, - "loss": 1.1242, - "step": 6823 - }, - { - "epoch": 0.9250999796651529, - "grad_norm": 1.5232438748581478, - "learning_rate": 2.923629965939234e-08, - "loss": 1.1082, - "step": 6824 - }, - { - "epoch": 0.9252355453128177, - "grad_norm": 1.5583921286989542, - "learning_rate": 2.913098384662205e-08, - "loss": 1.0806, - "step": 6825 - }, - { - "epoch": 0.9253711109604826, - "grad_norm": 1.8432630004862265, - "learning_rate": 2.902585525827783e-08, - "loss": 1.1239, - "step": 6826 - }, - { - "epoch": 0.9255066766081474, - "grad_norm": 1.6177331432956472, - "learning_rate": 2.8920913914633138e-08, - "loss": 1.118, - "step": 6827 - }, - { - "epoch": 0.9256422422558124, - "grad_norm": 1.60434720254282, - "learning_rate": 2.881615983592489e-08, - "loss": 1.1316, - "step": 6828 - }, - { - "epoch": 0.9257778079034773, - "grad_norm": 1.8222456556668976, - "learning_rate": 2.8711593042354154e-08, - "loss": 1.1048, - "step": 6829 - }, - { - "epoch": 0.9259133735511421, - "grad_norm": 1.5994195524589325, - "learning_rate": 2.8607213554086018e-08, - "loss": 1.1157, - "step": 6830 - }, - { - "epoch": 0.926048939198807, - "grad_norm": 1.638413625477002, - "learning_rate": 2.8503021391248718e-08, - "loss": 1.1348, - "step": 6831 - }, - { - "epoch": 0.9261845048464719, - "grad_norm": 1.6321007229891846, - "learning_rate": 2.839901657393551e-08, - "loss": 1.1535, - "step": 6832 - }, - { - "epoch": 0.9263200704941368, - "grad_norm": 1.4286735246647642, - "learning_rate": 2.829519912220235e-08, - "loss": 1.1073, - "step": 6833 - }, - { - "epoch": 0.9264556361418017, - "grad_norm": 1.6355812828921477, - "learning_rate": 2.819156905607012e-08, - "loss": 1.1676, - "step": 6834 - }, - { - "epoch": 0.9265912017894665, - "grad_norm": 1.689521513639891, - "learning_rate": 2.8088126395522495e-08, - "loss": 1.114, - "step": 6835 - }, - { - "epoch": 0.9267267674371314, - "grad_norm": 2.1147291582176746, - "learning_rate": 2.7984871160508185e-08, - "loss": 1.0799, - "step": 6836 - }, - { - "epoch": 0.9268623330847963, - "grad_norm": 1.7594243840013313, - "learning_rate": 2.7881803370938595e-08, - "loss": 1.1069, - "step": 6837 - }, - { - "epoch": 0.9269978987324612, - "grad_norm": 2.036067408633303, - "learning_rate": 2.777892304669005e-08, - "loss": 1.1386, - "step": 6838 - }, - { - "epoch": 0.9271334643801261, - "grad_norm": 1.7587187651146927, - "learning_rate": 2.7676230207601793e-08, - "loss": 1.1702, - "step": 6839 - }, - { - "epoch": 0.9272690300277909, - "grad_norm": 7.328543404608752, - "learning_rate": 2.757372487347753e-08, - "loss": 1.1124, - "step": 6840 - }, - { - "epoch": 0.9274045956754559, - "grad_norm": 1.682606575809816, - "learning_rate": 2.747140706408446e-08, - "loss": 1.1414, - "step": 6841 - }, - { - "epoch": 0.9275401613231207, - "grad_norm": 2.774167565569425, - "learning_rate": 2.7369276799154017e-08, - "loss": 1.1374, - "step": 6842 - }, - { - "epoch": 0.9276757269707856, - "grad_norm": 1.5950815517922945, - "learning_rate": 2.7267334098381e-08, - "loss": 1.1141, - "step": 6843 - }, - { - "epoch": 0.9278112926184505, - "grad_norm": 1.6738585479040335, - "learning_rate": 2.7165578981424354e-08, - "loss": 1.148, - "step": 6844 - }, - { - "epoch": 0.9279468582661153, - "grad_norm": 1.8693509903456538, - "learning_rate": 2.70640114679066e-08, - "loss": 1.1221, - "step": 6845 - }, - { - "epoch": 0.9280824239137803, - "grad_norm": 2.886684785742913, - "learning_rate": 2.696263157741441e-08, - "loss": 1.1559, - "step": 6846 - }, - { - "epoch": 0.9282179895614451, - "grad_norm": 1.5470843795694595, - "learning_rate": 2.6861439329498026e-08, - "loss": 1.1327, - "step": 6847 - }, - { - "epoch": 0.92835355520911, - "grad_norm": 1.447405442538674, - "learning_rate": 2.6760434743671623e-08, - "loss": 1.1196, - "step": 6848 - }, - { - "epoch": 0.9284891208567749, - "grad_norm": 2.430337625797498, - "learning_rate": 2.665961783941306e-08, - "loss": 1.1377, - "step": 6849 - }, - { - "epoch": 0.9286246865044397, - "grad_norm": 1.5863078858033046, - "learning_rate": 2.6558988636164127e-08, - "loss": 1.1268, - "step": 6850 - }, - { - "epoch": 0.9287602521521047, - "grad_norm": 1.6888681514019686, - "learning_rate": 2.645854715333029e-08, - "loss": 1.1449, - "step": 6851 - }, - { - "epoch": 0.9288958177997695, - "grad_norm": 1.5652525715059276, - "learning_rate": 2.6358293410281062e-08, - "loss": 1.1231, - "step": 6852 - }, - { - "epoch": 0.9290313834474344, - "grad_norm": 1.3425517431882683, - "learning_rate": 2.6258227426349533e-08, - "loss": 1.1221, - "step": 6853 - }, - { - "epoch": 0.9291669490950993, - "grad_norm": 1.5413801893099683, - "learning_rate": 2.6158349220832375e-08, - "loss": 1.1158, - "step": 6854 - }, - { - "epoch": 0.9293025147427642, - "grad_norm": 1.608722259612879, - "learning_rate": 2.605865881299074e-08, - "loss": 1.0877, - "step": 6855 - }, - { - "epoch": 0.9294380803904291, - "grad_norm": 2.288984903035039, - "learning_rate": 2.5959156222048805e-08, - "loss": 1.1438, - "step": 6856 - }, - { - "epoch": 0.9295736460380939, - "grad_norm": 2.1255699451864762, - "learning_rate": 2.585984146719511e-08, - "loss": 1.1132, - "step": 6857 - }, - { - "epoch": 0.9297092116857588, - "grad_norm": 2.307623303092103, - "learning_rate": 2.5760714567581554e-08, - "loss": 1.147, - "step": 6858 - }, - { - "epoch": 0.9298447773334237, - "grad_norm": 1.621981968585142, - "learning_rate": 2.566177554232396e-08, - "loss": 1.0997, - "step": 6859 - }, - { - "epoch": 0.9299803429810886, - "grad_norm": 1.8398464172735132, - "learning_rate": 2.5563024410501954e-08, - "loss": 1.1323, - "step": 6860 - }, - { - "epoch": 0.9301159086287535, - "grad_norm": 2.020775364850191, - "learning_rate": 2.546446119115908e-08, - "loss": 1.1548, - "step": 6861 - }, - { - "epoch": 0.9302514742764184, - "grad_norm": 8.45707872653921, - "learning_rate": 2.5366085903302247e-08, - "loss": 1.1592, - "step": 6862 - }, - { - "epoch": 0.9303870399240832, - "grad_norm": 1.4773130824265868, - "learning_rate": 2.5267898565902503e-08, - "loss": 1.0809, - "step": 6863 - }, - { - "epoch": 0.9305226055717482, - "grad_norm": 1.522975134675905, - "learning_rate": 2.5169899197894363e-08, - "loss": 1.1032, - "step": 6864 - }, - { - "epoch": 0.930658171219413, - "grad_norm": 1.423814298483085, - "learning_rate": 2.507208781817638e-08, - "loss": 1.135, - "step": 6865 - }, - { - "epoch": 0.9307937368670779, - "grad_norm": 1.8082580080641621, - "learning_rate": 2.4974464445610688e-08, - "loss": 1.1338, - "step": 6866 - }, - { - "epoch": 0.9309293025147428, - "grad_norm": 1.5391555850519485, - "learning_rate": 2.4877029099023116e-08, - "loss": 1.1127, - "step": 6867 - }, - { - "epoch": 0.9310648681624076, - "grad_norm": 1.8537119215160822, - "learning_rate": 2.4779781797203303e-08, - "loss": 1.1228, - "step": 6868 - }, - { - "epoch": 0.9312004338100726, - "grad_norm": 1.4158912025470152, - "learning_rate": 2.468272255890469e-08, - "loss": 1.0864, - "step": 6869 - }, - { - "epoch": 0.9313359994577374, - "grad_norm": 1.6358665742437404, - "learning_rate": 2.4585851402844305e-08, - "loss": 1.1055, - "step": 6870 - }, - { - "epoch": 0.9314715651054023, - "grad_norm": 1.8459475676037305, - "learning_rate": 2.4489168347703093e-08, - "loss": 1.1356, - "step": 6871 - }, - { - "epoch": 0.9316071307530672, - "grad_norm": 2.005949064511714, - "learning_rate": 2.4392673412125476e-08, - "loss": 1.1204, - "step": 6872 - }, - { - "epoch": 0.931742696400732, - "grad_norm": 1.7113416633950895, - "learning_rate": 2.429636661472001e-08, - "loss": 1.1178, - "step": 6873 - }, - { - "epoch": 0.931878262048397, - "grad_norm": 1.600431983673069, - "learning_rate": 2.4200247974058175e-08, - "loss": 1.1163, - "step": 6874 - }, - { - "epoch": 0.9320138276960618, - "grad_norm": 1.4908099862583746, - "learning_rate": 2.4104317508676363e-08, - "loss": 1.1376, - "step": 6875 - }, - { - "epoch": 0.9321493933437267, - "grad_norm": 1.695193115508233, - "learning_rate": 2.4008575237073335e-08, - "loss": 1.1364, - "step": 6876 - }, - { - "epoch": 0.9322849589913916, - "grad_norm": 2.1605756427016853, - "learning_rate": 2.3913021177712876e-08, - "loss": 1.1329, - "step": 6877 - }, - { - "epoch": 0.9324205246390564, - "grad_norm": 2.1616886567957994, - "learning_rate": 2.3817655349021247e-08, - "loss": 1.1315, - "step": 6878 - }, - { - "epoch": 0.9325560902867214, - "grad_norm": 1.8165654892241727, - "learning_rate": 2.3722477769389515e-08, - "loss": 1.114, - "step": 6879 - }, - { - "epoch": 0.9326916559343862, - "grad_norm": 1.646726270540223, - "learning_rate": 2.362748845717155e-08, - "loss": 1.1305, - "step": 6880 - }, - { - "epoch": 0.9328272215820511, - "grad_norm": 2.359973408379461, - "learning_rate": 2.3532687430685373e-08, - "loss": 1.091, - "step": 6881 - }, - { - "epoch": 0.932962787229716, - "grad_norm": 2.2144609703574, - "learning_rate": 2.3438074708212795e-08, - "loss": 1.1284, - "step": 6882 - }, - { - "epoch": 0.9330983528773809, - "grad_norm": 1.4799906001551226, - "learning_rate": 2.3343650307998896e-08, - "loss": 1.0994, - "step": 6883 - }, - { - "epoch": 0.9332339185250458, - "grad_norm": 1.7118231466801366, - "learning_rate": 2.3249414248252775e-08, - "loss": 1.165, - "step": 6884 - }, - { - "epoch": 0.9333694841727106, - "grad_norm": 2.5494578071929563, - "learning_rate": 2.3155366547147115e-08, - "loss": 1.1343, - "step": 6885 - }, - { - "epoch": 0.9335050498203755, - "grad_norm": 1.512774227766172, - "learning_rate": 2.30615072228183e-08, - "loss": 1.1041, - "step": 6886 - }, - { - "epoch": 0.9336406154680404, - "grad_norm": 1.6596160337896253, - "learning_rate": 2.2967836293366405e-08, - "loss": 1.0703, - "step": 6887 - }, - { - "epoch": 0.9337761811157053, - "grad_norm": 1.6712557712755436, - "learning_rate": 2.287435377685498e-08, - "loss": 1.13, - "step": 6888 - }, - { - "epoch": 0.9339117467633702, - "grad_norm": 2.9449259168928275, - "learning_rate": 2.2781059691311498e-08, - "loss": 1.1441, - "step": 6889 - }, - { - "epoch": 0.934047312411035, - "grad_norm": 1.7447365669804742, - "learning_rate": 2.268795405472701e-08, - "loss": 1.1434, - "step": 6890 - }, - { - "epoch": 0.9341828780586999, - "grad_norm": 2.0289538258635775, - "learning_rate": 2.259503688505593e-08, - "loss": 1.1486, - "step": 6891 - }, - { - "epoch": 0.9343184437063649, - "grad_norm": 1.7106501023448455, - "learning_rate": 2.2502308200217037e-08, - "loss": 1.1031, - "step": 6892 - }, - { - "epoch": 0.9344540093540297, - "grad_norm": 1.5278767753801876, - "learning_rate": 2.2409768018092024e-08, - "loss": 1.1064, - "step": 6893 - }, - { - "epoch": 0.9345895750016946, - "grad_norm": 1.603858760241135, - "learning_rate": 2.231741635652673e-08, - "loss": 1.0992, - "step": 6894 - }, - { - "epoch": 0.9347251406493594, - "grad_norm": 1.5649204003960822, - "learning_rate": 2.222525323333013e-08, - "loss": 1.0969, - "step": 6895 - }, - { - "epoch": 0.9348607062970243, - "grad_norm": 3.884934791868871, - "learning_rate": 2.2133278666275567e-08, - "loss": 1.1311, - "step": 6896 - }, - { - "epoch": 0.9349962719446893, - "grad_norm": 1.9786188918362608, - "learning_rate": 2.2041492673099182e-08, - "loss": 1.1389, - "step": 6897 - }, - { - "epoch": 0.9351318375923541, - "grad_norm": 1.7392238911593925, - "learning_rate": 2.1949895271501596e-08, - "loss": 1.0899, - "step": 6898 - }, - { - "epoch": 0.935267403240019, - "grad_norm": 1.5506541021135933, - "learning_rate": 2.1858486479146344e-08, - "loss": 1.1179, - "step": 6899 - }, - { - "epoch": 0.9354029688876838, - "grad_norm": 1.9640767209978547, - "learning_rate": 2.1767266313661102e-08, - "loss": 1.1342, - "step": 6900 - }, - { - "epoch": 0.9355385345353487, - "grad_norm": 1.7957256088385278, - "learning_rate": 2.1676234792636693e-08, - "loss": 1.1289, - "step": 6901 - }, - { - "epoch": 0.9356741001830137, - "grad_norm": 1.4143884645959615, - "learning_rate": 2.1585391933628073e-08, - "loss": 1.093, - "step": 6902 - }, - { - "epoch": 0.9358096658306785, - "grad_norm": 1.9548100079438269, - "learning_rate": 2.1494737754153558e-08, - "loss": 1.1602, - "step": 6903 - }, - { - "epoch": 0.9359452314783434, - "grad_norm": 1.5401514396780907, - "learning_rate": 2.1404272271694945e-08, - "loss": 1.1401, - "step": 6904 - }, - { - "epoch": 0.9360807971260082, - "grad_norm": 1.700248843676753, - "learning_rate": 2.1313995503697833e-08, - "loss": 1.1304, - "step": 6905 - }, - { - "epoch": 0.9362163627736731, - "grad_norm": 1.3904635692365201, - "learning_rate": 2.122390746757141e-08, - "loss": 1.1307, - "step": 6906 - }, - { - "epoch": 0.9363519284213381, - "grad_norm": 1.8917772963462942, - "learning_rate": 2.1134008180688445e-08, - "loss": 1.1148, - "step": 6907 - }, - { - "epoch": 0.9364874940690029, - "grad_norm": 2.0561125405443206, - "learning_rate": 2.1044297660385292e-08, - "loss": 1.1402, - "step": 6908 - }, - { - "epoch": 0.9366230597166678, - "grad_norm": 1.4956986766305207, - "learning_rate": 2.0954775923961997e-08, - "loss": 1.1191, - "step": 6909 - }, - { - "epoch": 0.9367586253643326, - "grad_norm": 1.6868920760843293, - "learning_rate": 2.086544298868198e-08, - "loss": 1.1484, - "step": 6910 - }, - { - "epoch": 0.9368941910119976, - "grad_norm": 1.939613091674005, - "learning_rate": 2.077629887177257e-08, - "loss": 1.1569, - "step": 6911 - }, - { - "epoch": 0.9370297566596625, - "grad_norm": 1.5096455755822666, - "learning_rate": 2.0687343590424232e-08, - "loss": 1.0928, - "step": 6912 - }, - { - "epoch": 0.9371653223073273, - "grad_norm": 1.6314963218453522, - "learning_rate": 2.0598577161791587e-08, - "loss": 1.1127, - "step": 6913 - }, - { - "epoch": 0.9373008879549922, - "grad_norm": 2.7060184102772964, - "learning_rate": 2.050999960299249e-08, - "loss": 1.1593, - "step": 6914 - }, - { - "epoch": 0.937436453602657, - "grad_norm": 1.797217227441614, - "learning_rate": 2.0421610931108168e-08, - "loss": 1.1814, - "step": 6915 - }, - { - "epoch": 0.937572019250322, - "grad_norm": 1.6543921223683837, - "learning_rate": 2.033341116318399e-08, - "loss": 1.1058, - "step": 6916 - }, - { - "epoch": 0.9377075848979869, - "grad_norm": 1.8599213769151823, - "learning_rate": 2.0245400316228344e-08, - "loss": 1.1363, - "step": 6917 - }, - { - "epoch": 0.9378431505456517, - "grad_norm": 1.6185017304008251, - "learning_rate": 2.015757840721366e-08, - "loss": 1.1175, - "step": 6918 - }, - { - "epoch": 0.9379787161933166, - "grad_norm": 4.115561270786498, - "learning_rate": 2.006994545307539e-08, - "loss": 1.105, - "step": 6919 - }, - { - "epoch": 0.9381142818409814, - "grad_norm": 2.449940974897843, - "learning_rate": 1.998250147071323e-08, - "loss": 1.1078, - "step": 6920 - }, - { - "epoch": 0.9382498474886464, - "grad_norm": 2.5762316343508314, - "learning_rate": 1.9895246476989703e-08, - "loss": 1.1293, - "step": 6921 - }, - { - "epoch": 0.9383854131363113, - "grad_norm": 1.5649930771073488, - "learning_rate": 1.9808180488731564e-08, - "loss": 1.1367, - "step": 6922 - }, - { - "epoch": 0.9385209787839761, - "grad_norm": 2.0288761922376595, - "learning_rate": 1.9721303522728605e-08, - "loss": 1.1469, - "step": 6923 - }, - { - "epoch": 0.938656544431641, - "grad_norm": 1.5312586474589398, - "learning_rate": 1.9634615595734316e-08, - "loss": 1.1309, - "step": 6924 - }, - { - "epoch": 0.9387921100793059, - "grad_norm": 1.4396189867975817, - "learning_rate": 1.954811672446599e-08, - "loss": 1.1139, - "step": 6925 - }, - { - "epoch": 0.9389276757269708, - "grad_norm": 2.781162420781618, - "learning_rate": 1.9461806925604064e-08, - "loss": 1.1456, - "step": 6926 - }, - { - "epoch": 0.9390632413746357, - "grad_norm": 1.7084542387278123, - "learning_rate": 1.9375686215792886e-08, - "loss": 1.089, - "step": 6927 - }, - { - "epoch": 0.9391988070223005, - "grad_norm": 1.6301217211637664, - "learning_rate": 1.9289754611639954e-08, - "loss": 1.1247, - "step": 6928 - }, - { - "epoch": 0.9393343726699654, - "grad_norm": 2.1881348703900625, - "learning_rate": 1.9204012129716672e-08, - "loss": 1.1086, - "step": 6929 - }, - { - "epoch": 0.9394699383176303, - "grad_norm": 2.0637641662419264, - "learning_rate": 1.911845878655749e-08, - "loss": 1.1188, - "step": 6930 - }, - { - "epoch": 0.9396055039652952, - "grad_norm": 1.3888443120808518, - "learning_rate": 1.9033094598661204e-08, - "loss": 1.0902, - "step": 6931 - }, - { - "epoch": 0.9397410696129601, - "grad_norm": 1.7014554261257817, - "learning_rate": 1.89479195824892e-08, - "loss": 1.1303, - "step": 6932 - }, - { - "epoch": 0.9398766352606249, - "grad_norm": 2.7352697108917043, - "learning_rate": 1.8862933754467013e-08, - "loss": 1.1665, - "step": 6933 - }, - { - "epoch": 0.9400122009082899, - "grad_norm": 1.5174995413619772, - "learning_rate": 1.8778137130983307e-08, - "loss": 1.1348, - "step": 6934 - }, - { - "epoch": 0.9401477665559547, - "grad_norm": 2.2347833559009027, - "learning_rate": 1.8693529728390667e-08, - "loss": 1.1635, - "step": 6935 - }, - { - "epoch": 0.9402833322036196, - "grad_norm": 4.080928535063441, - "learning_rate": 1.860911156300482e-08, - "loss": 1.0835, - "step": 6936 - }, - { - "epoch": 0.9404188978512845, - "grad_norm": 4.595009910780769, - "learning_rate": 1.8524882651105188e-08, - "loss": 1.0638, - "step": 6937 - }, - { - "epoch": 0.9405544634989493, - "grad_norm": 1.6484230087947616, - "learning_rate": 1.844084300893456e-08, - "loss": 1.0847, - "step": 6938 - }, - { - "epoch": 0.9406900291466143, - "grad_norm": 1.891204761805782, - "learning_rate": 1.835699265269963e-08, - "loss": 1.1291, - "step": 6939 - }, - { - "epoch": 0.9408255947942791, - "grad_norm": 2.1444987737733614, - "learning_rate": 1.827333159856981e-08, - "loss": 1.1543, - "step": 6940 - }, - { - "epoch": 0.940961160441944, - "grad_norm": 1.9801879201372212, - "learning_rate": 1.8189859862678848e-08, - "loss": 1.1511, - "step": 6941 - }, - { - "epoch": 0.9410967260896089, - "grad_norm": 1.5178660464089382, - "learning_rate": 1.8106577461123428e-08, - "loss": 1.0982, - "step": 6942 - }, - { - "epoch": 0.9412322917372737, - "grad_norm": 3.0225626309540576, - "learning_rate": 1.802348440996393e-08, - "loss": 1.1179, - "step": 6943 - }, - { - "epoch": 0.9413678573849387, - "grad_norm": 2.4083727305932485, - "learning_rate": 1.794058072522431e-08, - "loss": 1.1289, - "step": 6944 - }, - { - "epoch": 0.9415034230326036, - "grad_norm": 1.6656442020042423, - "learning_rate": 1.7857866422891665e-08, - "loss": 1.1493, - "step": 6945 - }, - { - "epoch": 0.9416389886802684, - "grad_norm": 1.4908323405848853, - "learning_rate": 1.777534151891702e-08, - "loss": 1.1445, - "step": 6946 - }, - { - "epoch": 0.9417745543279333, - "grad_norm": 1.5463777066632447, - "learning_rate": 1.7693006029214418e-08, - "loss": 1.119, - "step": 6947 - }, - { - "epoch": 0.9419101199755981, - "grad_norm": 1.533462198788721, - "learning_rate": 1.7610859969661827e-08, - "loss": 1.099, - "step": 6948 - }, - { - "epoch": 0.9420456856232631, - "grad_norm": 2.8519730681738906, - "learning_rate": 1.7528903356100466e-08, - "loss": 1.1045, - "step": 6949 - }, - { - "epoch": 0.942181251270928, - "grad_norm": 1.608257416663336, - "learning_rate": 1.74471362043348e-08, - "loss": 1.1208, - "step": 6950 - }, - { - "epoch": 0.9423168169185928, - "grad_norm": 1.901796756834136, - "learning_rate": 1.7365558530133218e-08, - "loss": 1.1041, - "step": 6951 - }, - { - "epoch": 0.9424523825662577, - "grad_norm": 1.4729875696270804, - "learning_rate": 1.7284170349227246e-08, - "loss": 1.1117, - "step": 6952 - }, - { - "epoch": 0.9425879482139226, - "grad_norm": 1.5625233190109538, - "learning_rate": 1.7202971677311774e-08, - "loss": 1.072, - "step": 6953 - }, - { - "epoch": 0.9427235138615875, - "grad_norm": 1.6385066947084346, - "learning_rate": 1.712196253004572e-08, - "loss": 1.1512, - "step": 6954 - }, - { - "epoch": 0.9428590795092524, - "grad_norm": 2.1042447321625466, - "learning_rate": 1.704114292305059e-08, - "loss": 1.1363, - "step": 6955 - }, - { - "epoch": 0.9429946451569172, - "grad_norm": 1.4335103444210129, - "learning_rate": 1.6960512871912246e-08, - "loss": 1.1006, - "step": 6956 - }, - { - "epoch": 0.9431302108045821, - "grad_norm": 1.949361592202849, - "learning_rate": 1.6880072392179146e-08, - "loss": 1.1136, - "step": 6957 - }, - { - "epoch": 0.943265776452247, - "grad_norm": 1.8686321771259207, - "learning_rate": 1.6799821499363987e-08, - "loss": 1.1293, - "step": 6958 - }, - { - "epoch": 0.9434013420999119, - "grad_norm": 1.5042476473841653, - "learning_rate": 1.671976020894228e-08, - "loss": 1.1266, - "step": 6959 - }, - { - "epoch": 0.9435369077475768, - "grad_norm": 1.4545882622975386, - "learning_rate": 1.663988853635323e-08, - "loss": 1.1134, - "step": 6960 - }, - { - "epoch": 0.9436724733952416, - "grad_norm": 1.6075648476738564, - "learning_rate": 1.6560206496999517e-08, - "loss": 1.1516, - "step": 6961 - }, - { - "epoch": 0.9438080390429066, - "grad_norm": 2.4707797772849758, - "learning_rate": 1.6480714106247186e-08, - "loss": 1.1035, - "step": 6962 - }, - { - "epoch": 0.9439436046905714, - "grad_norm": 1.5349305209211468, - "learning_rate": 1.6401411379425746e-08, - "loss": 1.1482, - "step": 6963 - }, - { - "epoch": 0.9440791703382363, - "grad_norm": 3.5192745565454544, - "learning_rate": 1.6322298331827967e-08, - "loss": 1.0724, - "step": 6964 - }, - { - "epoch": 0.9442147359859012, - "grad_norm": 1.6381455956064097, - "learning_rate": 1.624337497871042e-08, - "loss": 1.165, - "step": 6965 - }, - { - "epoch": 0.944350301633566, - "grad_norm": 1.8208935821051806, - "learning_rate": 1.6164641335292606e-08, - "loss": 1.0733, - "step": 6966 - }, - { - "epoch": 0.944485867281231, - "grad_norm": 1.7882265840482923, - "learning_rate": 1.6086097416757816e-08, - "loss": 1.1647, - "step": 6967 - }, - { - "epoch": 0.9446214329288958, - "grad_norm": 3.9461249045427995, - "learning_rate": 1.60077432382526e-08, - "loss": 1.0858, - "step": 6968 - }, - { - "epoch": 0.9447569985765607, - "grad_norm": 1.5157451771493287, - "learning_rate": 1.5929578814886878e-08, - "loss": 1.0931, - "step": 6969 - }, - { - "epoch": 0.9448925642242256, - "grad_norm": 1.418917649438975, - "learning_rate": 1.5851604161734256e-08, - "loss": 1.129, - "step": 6970 - }, - { - "epoch": 0.9450281298718904, - "grad_norm": 1.7160193541228337, - "learning_rate": 1.5773819293831148e-08, - "loss": 1.1086, - "step": 6971 - }, - { - "epoch": 0.9451636955195554, - "grad_norm": 2.598892821167392, - "learning_rate": 1.5696224226178224e-08, - "loss": 1.1071, - "step": 6972 - }, - { - "epoch": 0.9452992611672202, - "grad_norm": 4.10186290263118, - "learning_rate": 1.5618818973738625e-08, - "loss": 1.1671, - "step": 6973 - }, - { - "epoch": 0.9454348268148851, - "grad_norm": 1.9416681296511424, - "learning_rate": 1.554160355143974e-08, - "loss": 1.0822, - "step": 6974 - }, - { - "epoch": 0.94557039246255, - "grad_norm": 1.9706453159419404, - "learning_rate": 1.5464577974171554e-08, - "loss": 1.1002, - "step": 6975 - }, - { - "epoch": 0.9457059581102149, - "grad_norm": 1.4920111976838892, - "learning_rate": 1.5387742256788294e-08, - "loss": 1.1616, - "step": 6976 - }, - { - "epoch": 0.9458415237578798, - "grad_norm": 1.4232597052134524, - "learning_rate": 1.531109641410666e-08, - "loss": 1.1262, - "step": 6977 - }, - { - "epoch": 0.9459770894055446, - "grad_norm": 1.520322017523513, - "learning_rate": 1.523464046090761e-08, - "loss": 1.1008, - "step": 6978 - }, - { - "epoch": 0.9461126550532095, - "grad_norm": 1.40208941147488, - "learning_rate": 1.5158374411934793e-08, - "loss": 1.1034, - "step": 6979 - }, - { - "epoch": 0.9462482207008744, - "grad_norm": 1.4798192546542372, - "learning_rate": 1.5082298281895666e-08, - "loss": 1.0933, - "step": 6980 - }, - { - "epoch": 0.9463837863485393, - "grad_norm": 1.5488945366744495, - "learning_rate": 1.500641208546072e-08, - "loss": 1.0924, - "step": 6981 - }, - { - "epoch": 0.9465193519962042, - "grad_norm": 2.5382371368791476, - "learning_rate": 1.493071583726424e-08, - "loss": 1.1301, - "step": 6982 - }, - { - "epoch": 0.946654917643869, - "grad_norm": 1.9912244011108433, - "learning_rate": 1.4855209551903559e-08, - "loss": 1.1357, - "step": 6983 - }, - { - "epoch": 0.9467904832915339, - "grad_norm": 2.419854062804998, - "learning_rate": 1.4779893243939356e-08, - "loss": 1.1251, - "step": 6984 - }, - { - "epoch": 0.9469260489391989, - "grad_norm": 1.8277314370351745, - "learning_rate": 1.4704766927895907e-08, - "loss": 1.1586, - "step": 6985 - }, - { - "epoch": 0.9470616145868637, - "grad_norm": 1.550716254316847, - "learning_rate": 1.462983061826084e-08, - "loss": 1.0853, - "step": 6986 - }, - { - "epoch": 0.9471971802345286, - "grad_norm": 1.4670054580694132, - "learning_rate": 1.4555084329484713e-08, - "loss": 1.1384, - "step": 6987 - }, - { - "epoch": 0.9473327458821934, - "grad_norm": 1.8156367834146792, - "learning_rate": 1.4480528075982102e-08, - "loss": 1.1317, - "step": 6988 - }, - { - "epoch": 0.9474683115298583, - "grad_norm": 1.565948072820113, - "learning_rate": 1.4406161872130396e-08, - "loss": 1.1455, - "step": 6989 - }, - { - "epoch": 0.9476038771775233, - "grad_norm": 1.93530425929698, - "learning_rate": 1.4331985732270457e-08, - "loss": 1.1336, - "step": 6990 - }, - { - "epoch": 0.9477394428251881, - "grad_norm": 1.7384357313483854, - "learning_rate": 1.4257999670706844e-08, - "loss": 1.1466, - "step": 6991 - }, - { - "epoch": 0.947875008472853, - "grad_norm": 1.4668070126874586, - "learning_rate": 1.418420370170681e-08, - "loss": 1.0818, - "step": 6992 - }, - { - "epoch": 0.9480105741205178, - "grad_norm": 1.4765431492607977, - "learning_rate": 1.4110597839501748e-08, - "loss": 1.1269, - "step": 6993 - }, - { - "epoch": 0.9481461397681827, - "grad_norm": 1.9986061847297891, - "learning_rate": 1.4037182098285639e-08, - "loss": 1.1334, - "step": 6994 - }, - { - "epoch": 0.9482817054158477, - "grad_norm": 1.9552551526949897, - "learning_rate": 1.3963956492216377e-08, - "loss": 1.1559, - "step": 6995 - }, - { - "epoch": 0.9484172710635125, - "grad_norm": 1.6178021943041743, - "learning_rate": 1.389092103541456e-08, - "loss": 1.1132, - "step": 6996 - }, - { - "epoch": 0.9485528367111774, - "grad_norm": 1.719740347235922, - "learning_rate": 1.3818075741965029e-08, - "loss": 1.1453, - "step": 6997 - }, - { - "epoch": 0.9486884023588422, - "grad_norm": 1.4432484057948856, - "learning_rate": 1.3745420625914995e-08, - "loss": 1.1107, - "step": 6998 - }, - { - "epoch": 0.9488239680065071, - "grad_norm": 1.685466209092293, - "learning_rate": 1.3672955701275579e-08, - "loss": 1.1244, - "step": 6999 - }, - { - "epoch": 0.9489595336541721, - "grad_norm": 1.4737824858413833, - "learning_rate": 1.360068098202105e-08, - "loss": 1.0703, - "step": 7000 - }, - { - "epoch": 0.9490950993018369, - "grad_norm": 1.682379012966888, - "learning_rate": 1.3528596482089039e-08, - "loss": 1.0797, - "step": 7001 - }, - { - "epoch": 0.9492306649495018, - "grad_norm": 1.409310652916507, - "learning_rate": 1.3456702215380534e-08, - "loss": 1.0984, - "step": 7002 - }, - { - "epoch": 0.9493662305971666, - "grad_norm": 1.6126173864908036, - "learning_rate": 1.3384998195759667e-08, - "loss": 1.122, - "step": 7003 - }, - { - "epoch": 0.9495017962448316, - "grad_norm": 1.6882387965699783, - "learning_rate": 1.3313484437053935e-08, - "loss": 1.1561, - "step": 7004 - }, - { - "epoch": 0.9496373618924965, - "grad_norm": 1.4902724635447342, - "learning_rate": 1.3242160953054415e-08, - "loss": 1.1106, - "step": 7005 - }, - { - "epoch": 0.9497729275401613, - "grad_norm": 1.8774091094312704, - "learning_rate": 1.3171027757515107e-08, - "loss": 1.1282, - "step": 7006 - }, - { - "epoch": 0.9499084931878262, - "grad_norm": 1.8240364686125745, - "learning_rate": 1.3100084864153593e-08, - "loss": 1.1566, - "step": 7007 - }, - { - "epoch": 0.950044058835491, - "grad_norm": 1.4623987849287614, - "learning_rate": 1.3029332286650596e-08, - "loss": 1.1101, - "step": 7008 - }, - { - "epoch": 0.950179624483156, - "grad_norm": 1.5623855965835967, - "learning_rate": 1.295877003865009e-08, - "loss": 1.0816, - "step": 7009 - }, - { - "epoch": 0.9503151901308209, - "grad_norm": 1.9690247623136985, - "learning_rate": 1.2888398133759637e-08, - "loss": 1.1423, - "step": 7010 - }, - { - "epoch": 0.9504507557784857, - "grad_norm": 4.264784535213333, - "learning_rate": 1.2818216585549824e-08, - "loss": 1.0882, - "step": 7011 - }, - { - "epoch": 0.9505863214261506, - "grad_norm": 1.5774333995034693, - "learning_rate": 1.2748225407554603e-08, - "loss": 1.1355, - "step": 7012 - }, - { - "epoch": 0.9507218870738154, - "grad_norm": 2.2253130436577773, - "learning_rate": 1.2678424613271288e-08, - "loss": 1.1422, - "step": 7013 - }, - { - "epoch": 0.9508574527214804, - "grad_norm": 1.5217462288509849, - "learning_rate": 1.2608814216160223e-08, - "loss": 1.1199, - "step": 7014 - }, - { - "epoch": 0.9509930183691453, - "grad_norm": 1.7429813997846948, - "learning_rate": 1.253939422964545e-08, - "loss": 1.0967, - "step": 7015 - }, - { - "epoch": 0.9511285840168101, - "grad_norm": 1.4754466714386876, - "learning_rate": 1.2470164667113926e-08, - "loss": 1.1104, - "step": 7016 - }, - { - "epoch": 0.951264149664475, - "grad_norm": 1.59168026806483, - "learning_rate": 1.2401125541915968e-08, - "loss": 1.121, - "step": 7017 - }, - { - "epoch": 0.9513997153121398, - "grad_norm": 1.4194686319424232, - "learning_rate": 1.2332276867365377e-08, - "loss": 1.0976, - "step": 7018 - }, - { - "epoch": 0.9515352809598048, - "grad_norm": 1.9961022998179265, - "learning_rate": 1.2263618656739083e-08, - "loss": 1.1202, - "step": 7019 - }, - { - "epoch": 0.9516708466074697, - "grad_norm": 1.9676001929465468, - "learning_rate": 1.2195150923277054e-08, - "loss": 1.1055, - "step": 7020 - }, - { - "epoch": 0.9518064122551345, - "grad_norm": 1.539056033471364, - "learning_rate": 1.2126873680183058e-08, - "loss": 1.1202, - "step": 7021 - }, - { - "epoch": 0.9519419779027994, - "grad_norm": 1.940692829828634, - "learning_rate": 1.2058786940623678e-08, - "loss": 1.0999, - "step": 7022 - }, - { - "epoch": 0.9520775435504644, - "grad_norm": 1.561046281358634, - "learning_rate": 1.1990890717728852e-08, - "loss": 1.1289, - "step": 7023 - }, - { - "epoch": 0.9522131091981292, - "grad_norm": 1.6463275957616834, - "learning_rate": 1.1923185024591775e-08, - "loss": 1.1157, - "step": 7024 - }, - { - "epoch": 0.9523486748457941, - "grad_norm": 1.6088985186574614, - "learning_rate": 1.1855669874269225e-08, - "loss": 1.1177, - "step": 7025 - }, - { - "epoch": 0.9524842404934589, - "grad_norm": 2.9850588562226403, - "learning_rate": 1.1788345279780786e-08, - "loss": 1.1137, - "step": 7026 - }, - { - "epoch": 0.9526198061411238, - "grad_norm": 1.6013367272812562, - "learning_rate": 1.1721211254109408e-08, - "loss": 1.1292, - "step": 7027 - }, - { - "epoch": 0.9527553717887888, - "grad_norm": 1.692867673013077, - "learning_rate": 1.1654267810201512e-08, - "loss": 1.1381, - "step": 7028 - }, - { - "epoch": 0.9528909374364536, - "grad_norm": 1.7143692753878064, - "learning_rate": 1.1587514960966437e-08, - "loss": 1.1621, - "step": 7029 - }, - { - "epoch": 0.9530265030841185, - "grad_norm": 1.4412971579451728, - "learning_rate": 1.1520952719277222e-08, - "loss": 1.1649, - "step": 7030 - }, - { - "epoch": 0.9531620687317833, - "grad_norm": 1.5685835808405408, - "learning_rate": 1.1454581097969595e-08, - "loss": 1.1351, - "step": 7031 - }, - { - "epoch": 0.9532976343794483, - "grad_norm": 2.542030850939974, - "learning_rate": 1.1388400109842878e-08, - "loss": 1.106, - "step": 7032 - }, - { - "epoch": 0.9534332000271132, - "grad_norm": 2.060472190017656, - "learning_rate": 1.1322409767659525e-08, - "loss": 1.1542, - "step": 7033 - }, - { - "epoch": 0.953568765674778, - "grad_norm": 1.5997284460101098, - "learning_rate": 1.1256610084145468e-08, - "loss": 1.0845, - "step": 7034 - }, - { - "epoch": 0.9537043313224429, - "grad_norm": 1.6596142918303498, - "learning_rate": 1.1191001071989336e-08, - "loss": 1.1301, - "step": 7035 - }, - { - "epoch": 0.9538398969701077, - "grad_norm": 1.6565750681081113, - "learning_rate": 1.1125582743843564e-08, - "loss": 1.0916, - "step": 7036 - }, - { - "epoch": 0.9539754626177727, - "grad_norm": 1.8110036112163295, - "learning_rate": 1.1060355112323395e-08, - "loss": 1.184, - "step": 7037 - }, - { - "epoch": 0.9541110282654376, - "grad_norm": 1.667465905542004, - "learning_rate": 1.0995318190007652e-08, - "loss": 1.1164, - "step": 7038 - }, - { - "epoch": 0.9542465939131024, - "grad_norm": 1.697623659427002, - "learning_rate": 1.0930471989437862e-08, - "loss": 1.0817, - "step": 7039 - }, - { - "epoch": 0.9543821595607673, - "grad_norm": 3.935573748679422, - "learning_rate": 1.0865816523119464e-08, - "loss": 1.1434, - "step": 7040 - }, - { - "epoch": 0.9545177252084321, - "grad_norm": 1.5734379893153523, - "learning_rate": 1.0801351803520598e-08, - "loss": 1.1055, - "step": 7041 - }, - { - "epoch": 0.9546532908560971, - "grad_norm": 1.4948716022079365, - "learning_rate": 1.0737077843072762e-08, - "loss": 1.1059, - "step": 7042 - }, - { - "epoch": 0.954788856503762, - "grad_norm": 1.824357927718509, - "learning_rate": 1.0672994654170598e-08, - "loss": 1.1228, - "step": 7043 - }, - { - "epoch": 0.9549244221514268, - "grad_norm": 2.4757347757008885, - "learning_rate": 1.060910224917222e-08, - "loss": 1.1123, - "step": 7044 - }, - { - "epoch": 0.9550599877990917, - "grad_norm": 1.5901272981483545, - "learning_rate": 1.054540064039866e-08, - "loss": 1.1083, - "step": 7045 - }, - { - "epoch": 0.9551955534467566, - "grad_norm": 1.7479814841875148, - "learning_rate": 1.0481889840134428e-08, - "loss": 1.1084, - "step": 7046 - }, - { - "epoch": 0.9553311190944215, - "grad_norm": 1.8322311490789407, - "learning_rate": 1.0418569860626836e-08, - "loss": 1.1152, - "step": 7047 - }, - { - "epoch": 0.9554666847420864, - "grad_norm": 1.6550568489772073, - "learning_rate": 1.0355440714086782e-08, - "loss": 1.1354, - "step": 7048 - }, - { - "epoch": 0.9556022503897512, - "grad_norm": 1.7036847840594365, - "learning_rate": 1.0292502412688198e-08, - "loss": 1.116, - "step": 7049 - }, - { - "epoch": 0.9557378160374161, - "grad_norm": 1.5906833250912993, - "learning_rate": 1.0229754968568261e-08, - "loss": 1.1184, - "step": 7050 - }, - { - "epoch": 0.955873381685081, - "grad_norm": 1.783708674905589, - "learning_rate": 1.0167198393827403e-08, - "loss": 1.1177, - "step": 7051 - }, - { - "epoch": 0.9560089473327459, - "grad_norm": 1.764198613565205, - "learning_rate": 1.0104832700528975e-08, - "loss": 1.11, - "step": 7052 - }, - { - "epoch": 0.9561445129804108, - "grad_norm": 1.6596475034240916, - "learning_rate": 1.0042657900699803e-08, - "loss": 1.1342, - "step": 7053 - }, - { - "epoch": 0.9562800786280756, - "grad_norm": 1.6295800351997574, - "learning_rate": 9.980674006329848e-09, - "loss": 1.0919, - "step": 7054 - }, - { - "epoch": 0.9564156442757406, - "grad_norm": 2.8827260203681675, - "learning_rate": 9.918881029372106e-09, - "loss": 1.0943, - "step": 7055 - }, - { - "epoch": 0.9565512099234054, - "grad_norm": 2.587169892135705, - "learning_rate": 9.857278981742934e-09, - "loss": 1.1564, - "step": 7056 - }, - { - "epoch": 0.9566867755710703, - "grad_norm": 1.473161758738789, - "learning_rate": 9.795867875321829e-09, - "loss": 1.1241, - "step": 7057 - }, - { - "epoch": 0.9568223412187352, - "grad_norm": 1.615064137202894, - "learning_rate": 9.734647721951427e-09, - "loss": 1.1261, - "step": 7058 - }, - { - "epoch": 0.9569579068664, - "grad_norm": 1.7201019796528774, - "learning_rate": 9.673618533437511e-09, - "loss": 1.1389, - "step": 7059 - }, - { - "epoch": 0.957093472514065, - "grad_norm": 1.9422316613605863, - "learning_rate": 9.612780321549108e-09, - "loss": 1.1384, - "step": 7060 - }, - { - "epoch": 0.9572290381617298, - "grad_norm": 1.9306842905993578, - "learning_rate": 9.552133098018389e-09, - "loss": 1.1345, - "step": 7061 - }, - { - "epoch": 0.9573646038093947, - "grad_norm": 1.4638964129825902, - "learning_rate": 9.491676874540666e-09, - "loss": 1.0874, - "step": 7062 - }, - { - "epoch": 0.9575001694570596, - "grad_norm": 1.9466127588448845, - "learning_rate": 9.431411662774502e-09, - "loss": 1.1508, - "step": 7063 - }, - { - "epoch": 0.9576357351047244, - "grad_norm": 2.012846626331102, - "learning_rate": 9.37133747434149e-09, - "loss": 1.1348, - "step": 7064 - }, - { - "epoch": 0.9577713007523894, - "grad_norm": 1.5161946064349907, - "learning_rate": 9.311454320826473e-09, - "loss": 1.1384, - "step": 7065 - }, - { - "epoch": 0.9579068664000542, - "grad_norm": 2.141844283193209, - "learning_rate": 9.251762213777437e-09, - "loss": 1.137, - "step": 7066 - }, - { - "epoch": 0.9580424320477191, - "grad_norm": 1.6704077726058348, - "learning_rate": 9.192261164705617e-09, - "loss": 1.1371, - "step": 7067 - }, - { - "epoch": 0.958177997695384, - "grad_norm": 3.13955657952051, - "learning_rate": 9.132951185085281e-09, - "loss": 1.116, - "step": 7068 - }, - { - "epoch": 0.9583135633430488, - "grad_norm": 1.6198257434733616, - "learning_rate": 9.073832286353944e-09, - "loss": 1.1515, - "step": 7069 - }, - { - "epoch": 0.9584491289907138, - "grad_norm": 1.6574897957367236, - "learning_rate": 9.014904479912044e-09, - "loss": 1.1392, - "step": 7070 - }, - { - "epoch": 0.9585846946383786, - "grad_norm": 1.581170948353582, - "learning_rate": 8.956167777123602e-09, - "loss": 1.101, - "step": 7071 - }, - { - "epoch": 0.9587202602860435, - "grad_norm": 1.4310285073229456, - "learning_rate": 8.897622189315224e-09, - "loss": 1.1309, - "step": 7072 - }, - { - "epoch": 0.9588558259337084, - "grad_norm": 1.6116000467483946, - "learning_rate": 8.839267727777211e-09, - "loss": 1.1603, - "step": 7073 - }, - { - "epoch": 0.9589913915813733, - "grad_norm": 2.6239475966839567, - "learning_rate": 8.781104403762563e-09, - "loss": 1.0826, - "step": 7074 - }, - { - "epoch": 0.9591269572290382, - "grad_norm": 1.8855744227272089, - "learning_rate": 8.723132228487861e-09, - "loss": 1.1513, - "step": 7075 - }, - { - "epoch": 0.959262522876703, - "grad_norm": 2.3400778616605713, - "learning_rate": 8.665351213132278e-09, - "loss": 1.119, - "step": 7076 - }, - { - "epoch": 0.9593980885243679, - "grad_norm": 1.4148151668098479, - "learning_rate": 8.607761368838785e-09, - "loss": 1.1057, - "step": 7077 - }, - { - "epoch": 0.9595336541720328, - "grad_norm": 1.534889372354019, - "learning_rate": 8.550362706712832e-09, - "loss": 1.1322, - "step": 7078 - }, - { - "epoch": 0.9596692198196977, - "grad_norm": 2.6689759528307864, - "learning_rate": 8.493155237823347e-09, - "loss": 1.1548, - "step": 7079 - }, - { - "epoch": 0.9598047854673626, - "grad_norm": 2.7413091670938754, - "learning_rate": 8.4361389732025e-09, - "loss": 1.111, - "step": 7080 - }, - { - "epoch": 0.9599403511150274, - "grad_norm": 4.442462107929643, - "learning_rate": 8.379313923845277e-09, - "loss": 1.1088, - "step": 7081 - }, - { - "epoch": 0.9600759167626923, - "grad_norm": 2.834748788569348, - "learning_rate": 8.322680100710022e-09, - "loss": 1.0987, - "step": 7082 - }, - { - "epoch": 0.9602114824103573, - "grad_norm": 2.959571665425855, - "learning_rate": 8.266237514718e-09, - "loss": 1.1446, - "step": 7083 - }, - { - "epoch": 0.9603470480580221, - "grad_norm": 1.7645127785183479, - "learning_rate": 8.209986176753948e-09, - "loss": 1.0843, - "step": 7084 - }, - { - "epoch": 0.960482613705687, - "grad_norm": 1.6267555542610739, - "learning_rate": 8.153926097665186e-09, - "loss": 1.1277, - "step": 7085 - }, - { - "epoch": 0.9606181793533518, - "grad_norm": 4.284502447108667, - "learning_rate": 8.098057288262738e-09, - "loss": 1.1249, - "step": 7086 - }, - { - "epoch": 0.9607537450010167, - "grad_norm": 1.7273229768671976, - "learning_rate": 8.042379759320317e-09, - "loss": 1.1452, - "step": 7087 - }, - { - "epoch": 0.9608893106486817, - "grad_norm": 3.8945717788169163, - "learning_rate": 7.986893521574888e-09, - "loss": 1.1356, - "step": 7088 - }, - { - "epoch": 0.9610248762963465, - "grad_norm": 1.5994513965964476, - "learning_rate": 7.931598585726562e-09, - "loss": 1.1225, - "step": 7089 - }, - { - "epoch": 0.9611604419440114, - "grad_norm": 1.7687039554065647, - "learning_rate": 7.876494962438585e-09, - "loss": 1.1499, - "step": 7090 - }, - { - "epoch": 0.9612960075916762, - "grad_norm": 5.193396830910458, - "learning_rate": 7.821582662337123e-09, - "loss": 1.1165, - "step": 7091 - }, - { - "epoch": 0.9614315732393411, - "grad_norm": 2.1931260975269296, - "learning_rate": 7.766861696011816e-09, - "loss": 1.1086, - "step": 7092 - }, - { - "epoch": 0.9615671388870061, - "grad_norm": 2.20352639195792, - "learning_rate": 7.712332074014893e-09, - "loss": 1.1049, - "step": 7093 - }, - { - "epoch": 0.9617027045346709, - "grad_norm": 1.5174326014010209, - "learning_rate": 7.657993806862162e-09, - "loss": 1.1432, - "step": 7094 - }, - { - "epoch": 0.9618382701823358, - "grad_norm": 1.8046047865699633, - "learning_rate": 7.603846905032129e-09, - "loss": 1.1142, - "step": 7095 - }, - { - "epoch": 0.9619738358300006, - "grad_norm": 2.597758872012904, - "learning_rate": 7.549891378966888e-09, - "loss": 1.1371, - "step": 7096 - }, - { - "epoch": 0.9621094014776655, - "grad_norm": 1.8319171107002798, - "learning_rate": 7.496127239071003e-09, - "loss": 1.1382, - "step": 7097 - }, - { - "epoch": 0.9622449671253305, - "grad_norm": 1.6932803774634821, - "learning_rate": 7.442554495712738e-09, - "loss": 1.1157, - "step": 7098 - }, - { - "epoch": 0.9623805327729953, - "grad_norm": 1.3767774244766573, - "learning_rate": 7.3891731592230496e-09, - "loss": 1.0886, - "step": 7099 - }, - { - "epoch": 0.9625160984206602, - "grad_norm": 2.039447888089875, - "learning_rate": 7.335983239896148e-09, - "loss": 1.1239, - "step": 7100 - }, - { - "epoch": 0.9626516640683251, - "grad_norm": 1.4081469058227525, - "learning_rate": 7.282984747989163e-09, - "loss": 1.124, - "step": 7101 - }, - { - "epoch": 0.96278722971599, - "grad_norm": 3.252523134627172, - "learning_rate": 7.230177693722583e-09, - "loss": 1.1134, - "step": 7102 - }, - { - "epoch": 0.9629227953636549, - "grad_norm": 1.5921518814130362, - "learning_rate": 7.17756208727982e-09, - "loss": 1.1242, - "step": 7103 - }, - { - "epoch": 0.9630583610113197, - "grad_norm": 1.3698675871082773, - "learning_rate": 7.125137938807424e-09, - "loss": 1.105, - "step": 7104 - }, - { - "epoch": 0.9631939266589846, - "grad_norm": 2.4243470379992877, - "learning_rate": 7.072905258414752e-09, - "loss": 1.1147, - "step": 7105 - }, - { - "epoch": 0.9633294923066495, - "grad_norm": 5.523284199982062, - "learning_rate": 7.020864056174635e-09, - "loss": 1.085, - "step": 7106 - }, - { - "epoch": 0.9634650579543144, - "grad_norm": 1.5856766888613834, - "learning_rate": 6.969014342122825e-09, - "loss": 1.1012, - "step": 7107 - }, - { - "epoch": 0.9636006236019793, - "grad_norm": 1.6116208092663489, - "learning_rate": 6.9173561262581e-09, - "loss": 1.1546, - "step": 7108 - }, - { - "epoch": 0.9637361892496441, - "grad_norm": 1.833366073579942, - "learning_rate": 6.86588941854227e-09, - "loss": 1.1238, - "step": 7109 - }, - { - "epoch": 0.963871754897309, - "grad_norm": 1.5748170894715938, - "learning_rate": 6.814614228900506e-09, - "loss": 1.1208, - "step": 7110 - }, - { - "epoch": 0.964007320544974, - "grad_norm": 1.63186729891517, - "learning_rate": 6.763530567220455e-09, - "loss": 1.1544, - "step": 7111 - }, - { - "epoch": 0.9641428861926388, - "grad_norm": 1.648992437110244, - "learning_rate": 6.712638443353569e-09, - "loss": 1.1239, - "step": 7112 - }, - { - "epoch": 0.9642784518403037, - "grad_norm": 1.8048419655246692, - "learning_rate": 6.661937867113665e-09, - "loss": 1.1468, - "step": 7113 - }, - { - "epoch": 0.9644140174879685, - "grad_norm": 4.884367236613135, - "learning_rate": 6.611428848278256e-09, - "loss": 1.1271, - "step": 7114 - }, - { - "epoch": 0.9645495831356334, - "grad_norm": 1.7555428768901487, - "learning_rate": 6.5611113965873265e-09, - "loss": 1.1821, - "step": 7115 - }, - { - "epoch": 0.9646851487832984, - "grad_norm": 1.8133530777846618, - "learning_rate": 6.51098552174445e-09, - "loss": 1.1765, - "step": 7116 - }, - { - "epoch": 0.9648207144309632, - "grad_norm": 1.9610990748474797, - "learning_rate": 6.461051233415782e-09, - "loss": 1.1183, - "step": 7117 - }, - { - "epoch": 0.9649562800786281, - "grad_norm": 1.7801324177139817, - "learning_rate": 6.4113085412309535e-09, - "loss": 1.1273, - "step": 7118 - }, - { - "epoch": 0.9650918457262929, - "grad_norm": 2.758625678388622, - "learning_rate": 6.361757454782291e-09, - "loss": 1.1114, - "step": 7119 - }, - { - "epoch": 0.9652274113739578, - "grad_norm": 1.4349348408163982, - "learning_rate": 6.312397983625483e-09, - "loss": 1.1397, - "step": 7120 - }, - { - "epoch": 0.9653629770216228, - "grad_norm": 1.4629378043246057, - "learning_rate": 6.2632301372789185e-09, - "loss": 1.1284, - "step": 7121 - }, - { - "epoch": 0.9654985426692876, - "grad_norm": 1.4445981636057337, - "learning_rate": 6.214253925224455e-09, - "loss": 1.1088, - "step": 7122 - }, - { - "epoch": 0.9656341083169525, - "grad_norm": 2.432048643503999, - "learning_rate": 6.165469356906539e-09, - "loss": 1.1242, - "step": 7123 - }, - { - "epoch": 0.9657696739646173, - "grad_norm": 1.5410841880490678, - "learning_rate": 6.116876441733087e-09, - "loss": 1.1482, - "step": 7124 - }, - { - "epoch": 0.9659052396122823, - "grad_norm": 2.2142902560410387, - "learning_rate": 6.068475189074829e-09, - "loss": 1.1287, - "step": 7125 - }, - { - "epoch": 0.9660408052599472, - "grad_norm": 1.5111854228730028, - "learning_rate": 6.020265608265407e-09, - "loss": 1.1232, - "step": 7126 - }, - { - "epoch": 0.966176370907612, - "grad_norm": 1.6558155383163164, - "learning_rate": 5.97224770860183e-09, - "loss": 1.1807, - "step": 7127 - }, - { - "epoch": 0.9663119365552769, - "grad_norm": 1.615425679779229, - "learning_rate": 5.924421499343801e-09, - "loss": 1.1129, - "step": 7128 - }, - { - "epoch": 0.9664475022029417, - "grad_norm": 2.127723412201215, - "learning_rate": 5.8767869897145e-09, - "loss": 1.1624, - "step": 7129 - }, - { - "epoch": 0.9665830678506067, - "grad_norm": 2.8623555409848, - "learning_rate": 5.8293441888994655e-09, - "loss": 1.0965, - "step": 7130 - }, - { - "epoch": 0.9667186334982716, - "grad_norm": 1.6066327093785773, - "learning_rate": 5.7820931060481585e-09, - "loss": 1.1039, - "step": 7131 - }, - { - "epoch": 0.9668541991459364, - "grad_norm": 1.7174693726204402, - "learning_rate": 5.735033750272067e-09, - "loss": 1.1019, - "step": 7132 - }, - { - "epoch": 0.9669897647936013, - "grad_norm": 1.6579901600833538, - "learning_rate": 5.68816613064671e-09, - "loss": 1.1466, - "step": 7133 - }, - { - "epoch": 0.9671253304412661, - "grad_norm": 1.5610044045742024, - "learning_rate": 5.6414902562096356e-09, - "loss": 1.1381, - "step": 7134 - }, - { - "epoch": 0.9672608960889311, - "grad_norm": 1.746035875751545, - "learning_rate": 5.595006135962421e-09, - "loss": 1.1312, - "step": 7135 - }, - { - "epoch": 0.967396461736596, - "grad_norm": 1.7734437018590852, - "learning_rate": 5.548713778868786e-09, - "loss": 1.1387, - "step": 7136 - }, - { - "epoch": 0.9675320273842608, - "grad_norm": 1.771007087823352, - "learning_rate": 5.502613193856031e-09, - "loss": 1.1048, - "step": 7137 - }, - { - "epoch": 0.9676675930319257, - "grad_norm": 2.1095793839090997, - "learning_rate": 5.45670438981416e-09, - "loss": 1.1298, - "step": 7138 - }, - { - "epoch": 0.9678031586795905, - "grad_norm": 1.6212194984101325, - "learning_rate": 5.4109873755964205e-09, - "loss": 1.1277, - "step": 7139 - }, - { - "epoch": 0.9679387243272555, - "grad_norm": 1.8277827736060042, - "learning_rate": 5.365462160018985e-09, - "loss": 1.1247, - "step": 7140 - }, - { - "epoch": 0.9680742899749204, - "grad_norm": 1.932440151472279, - "learning_rate": 5.3201287518610525e-09, - "loss": 1.1012, - "step": 7141 - }, - { - "epoch": 0.9682098556225852, - "grad_norm": 1.605497132533525, - "learning_rate": 5.274987159864741e-09, - "loss": 1.1269, - "step": 7142 - }, - { - "epoch": 0.9683454212702501, - "grad_norm": 1.642131176520872, - "learning_rate": 5.2300373927351984e-09, - "loss": 1.1767, - "step": 7143 - }, - { - "epoch": 0.968480986917915, - "grad_norm": 2.129711126272144, - "learning_rate": 5.185279459140823e-09, - "loss": 1.1486, - "step": 7144 - }, - { - "epoch": 0.9686165525655799, - "grad_norm": 1.3656983531698006, - "learning_rate": 5.140713367712601e-09, - "loss": 1.1265, - "step": 7145 - }, - { - "epoch": 0.9687521182132448, - "grad_norm": 1.694345572492879, - "learning_rate": 5.09633912704488e-09, - "loss": 1.1495, - "step": 7146 - }, - { - "epoch": 0.9688876838609096, - "grad_norm": 3.6898666568615197, - "learning_rate": 5.052156745694924e-09, - "loss": 1.1192, - "step": 7147 - }, - { - "epoch": 0.9690232495085745, - "grad_norm": 1.7756295428082591, - "learning_rate": 5.00816623218292e-09, - "loss": 1.1683, - "step": 7148 - }, - { - "epoch": 0.9691588151562394, - "grad_norm": 1.6327934167760025, - "learning_rate": 4.964367594991969e-09, - "loss": 1.117, - "step": 7149 - }, - { - "epoch": 0.9692943808039043, - "grad_norm": 1.530117193891137, - "learning_rate": 4.920760842568539e-09, - "loss": 1.1185, - "step": 7150 - }, - { - "epoch": 0.9694299464515692, - "grad_norm": 1.7007134929227408, - "learning_rate": 4.877345983321568e-09, - "loss": 1.1529, - "step": 7151 - }, - { - "epoch": 0.969565512099234, - "grad_norm": 1.394109594785006, - "learning_rate": 4.834123025623471e-09, - "loss": 1.1608, - "step": 7152 - }, - { - "epoch": 0.969701077746899, - "grad_norm": 1.4413727284940347, - "learning_rate": 4.791091977809358e-09, - "loss": 1.1141, - "step": 7153 - }, - { - "epoch": 0.9698366433945638, - "grad_norm": 1.7147988336135565, - "learning_rate": 4.7482528481774805e-09, - "loss": 1.063, - "step": 7154 - }, - { - "epoch": 0.9699722090422287, - "grad_norm": 1.5485004614149296, - "learning_rate": 4.705605644988897e-09, - "loss": 1.14, - "step": 7155 - }, - { - "epoch": 0.9701077746898936, - "grad_norm": 1.5881198562929004, - "learning_rate": 4.663150376468028e-09, - "loss": 1.1091, - "step": 7156 - }, - { - "epoch": 0.9702433403375584, - "grad_norm": 8.21563052108238, - "learning_rate": 4.62088705080177e-09, - "loss": 1.1738, - "step": 7157 - }, - { - "epoch": 0.9703789059852234, - "grad_norm": 1.5318017690288654, - "learning_rate": 4.5788156761404906e-09, - "loss": 1.1361, - "step": 7158 - }, - { - "epoch": 0.9705144716328882, - "grad_norm": 1.6666308376379215, - "learning_rate": 4.536936260597257e-09, - "loss": 1.1286, - "step": 7159 - }, - { - "epoch": 0.9706500372805531, - "grad_norm": 1.4569601376523809, - "learning_rate": 4.495248812248054e-09, - "loss": 1.1168, - "step": 7160 - }, - { - "epoch": 0.970785602928218, - "grad_norm": 1.8935464034122065, - "learning_rate": 4.453753339132116e-09, - "loss": 1.1085, - "step": 7161 - }, - { - "epoch": 0.9709211685758828, - "grad_norm": 2.1801083624592277, - "learning_rate": 4.412449849251598e-09, - "loss": 1.11, - "step": 7162 - }, - { - "epoch": 0.9710567342235478, - "grad_norm": 1.765009061042792, - "learning_rate": 4.371338350571352e-09, - "loss": 1.1234, - "step": 7163 - }, - { - "epoch": 0.9711922998712126, - "grad_norm": 5.025516634475902, - "learning_rate": 4.3304188510194795e-09, - "loss": 1.1265, - "step": 7164 - }, - { - "epoch": 0.9713278655188775, - "grad_norm": 1.6272756298438467, - "learning_rate": 4.289691358486891e-09, - "loss": 1.1677, - "step": 7165 - }, - { - "epoch": 0.9714634311665424, - "grad_norm": 1.750756976530659, - "learning_rate": 4.249155880827859e-09, - "loss": 1.1239, - "step": 7166 - }, - { - "epoch": 0.9715989968142072, - "grad_norm": 1.7505323691800598, - "learning_rate": 4.2088124258590205e-09, - "loss": 1.128, - "step": 7167 - }, - { - "epoch": 0.9717345624618722, - "grad_norm": 1.6963514678904743, - "learning_rate": 4.168661001360485e-09, - "loss": 1.1118, - "step": 7168 - }, - { - "epoch": 0.971870128109537, - "grad_norm": 2.4482028367342936, - "learning_rate": 4.128701615074947e-09, - "loss": 1.1329, - "step": 7169 - }, - { - "epoch": 0.9720056937572019, - "grad_norm": 1.9912879939316792, - "learning_rate": 4.088934274708466e-09, - "loss": 1.1141, - "step": 7170 - }, - { - "epoch": 0.9721412594048668, - "grad_norm": 1.6844963241004025, - "learning_rate": 4.049358987929685e-09, - "loss": 1.1172, - "step": 7171 - }, - { - "epoch": 0.9722768250525317, - "grad_norm": 1.9139355370590987, - "learning_rate": 4.00997576237061e-09, - "loss": 1.1377, - "step": 7172 - }, - { - "epoch": 0.9724123907001966, - "grad_norm": 1.5628172468659727, - "learning_rate": 3.970784605625721e-09, - "loss": 1.1064, - "step": 7173 - }, - { - "epoch": 0.9725479563478614, - "grad_norm": 1.6457380420326853, - "learning_rate": 3.931785525252862e-09, - "loss": 1.0957, - "step": 7174 - }, - { - "epoch": 0.9726835219955263, - "grad_norm": 1.4870128775466216, - "learning_rate": 3.892978528772684e-09, - "loss": 1.1079, - "step": 7175 - }, - { - "epoch": 0.9728190876431912, - "grad_norm": 1.8757609147809848, - "learning_rate": 3.854363623668866e-09, - "loss": 1.1187, - "step": 7176 - }, - { - "epoch": 0.9729546532908561, - "grad_norm": 1.43305722069973, - "learning_rate": 3.815940817387786e-09, - "loss": 1.13, - "step": 7177 - }, - { - "epoch": 0.973090218938521, - "grad_norm": 1.7813322059908911, - "learning_rate": 3.777710117339183e-09, - "loss": 1.0977, - "step": 7178 - }, - { - "epoch": 0.9732257845861859, - "grad_norm": 1.9816928543233685, - "learning_rate": 3.739671530895605e-09, - "loss": 1.1264, - "step": 7179 - }, - { - "epoch": 0.9733613502338507, - "grad_norm": 2.443577714971343, - "learning_rate": 3.7018250653921834e-09, - "loss": 1.1205, - "step": 7180 - }, - { - "epoch": 0.9734969158815157, - "grad_norm": 1.698637249444351, - "learning_rate": 3.6641707281276357e-09, - "loss": 1.1235, - "step": 7181 - }, - { - "epoch": 0.9736324815291805, - "grad_norm": 5.880252266730391, - "learning_rate": 3.6267085263631537e-09, - "loss": 1.1034, - "step": 7182 - }, - { - "epoch": 0.9737680471768454, - "grad_norm": 4.669589598593262, - "learning_rate": 3.589438467322958e-09, - "loss": 1.1317, - "step": 7183 - }, - { - "epoch": 0.9739036128245103, - "grad_norm": 1.788161112332895, - "learning_rate": 3.5523605581944115e-09, - "loss": 1.137, - "step": 7184 - }, - { - "epoch": 0.9740391784721751, - "grad_norm": 1.5488471794422265, - "learning_rate": 3.5154748061276828e-09, - "loss": 1.1335, - "step": 7185 - }, - { - "epoch": 0.9741747441198401, - "grad_norm": 2.239395786147528, - "learning_rate": 3.47878121823586e-09, - "loss": 1.1334, - "step": 7186 - }, - { - "epoch": 0.9743103097675049, - "grad_norm": 1.456058777517878, - "learning_rate": 3.4422798015949496e-09, - "loss": 1.1102, - "step": 7187 - }, - { - "epoch": 0.9744458754151698, - "grad_norm": 1.6045105197740614, - "learning_rate": 3.405970563244098e-09, - "loss": 1.1207, - "step": 7188 - }, - { - "epoch": 0.9745814410628347, - "grad_norm": 1.7228629535159992, - "learning_rate": 3.36985351018515e-09, - "loss": 1.0564, - "step": 7189 - }, - { - "epoch": 0.9747170067104995, - "grad_norm": 1.662236536188821, - "learning_rate": 3.3339286493830886e-09, - "loss": 1.1506, - "step": 7190 - }, - { - "epoch": 0.9748525723581645, - "grad_norm": 1.4321641722132308, - "learning_rate": 3.2981959877657063e-09, - "loss": 1.1117, - "step": 7191 - }, - { - "epoch": 0.9749881380058293, - "grad_norm": 1.556961504461836, - "learning_rate": 3.2626555322236014e-09, - "loss": 1.1292, - "step": 7192 - }, - { - "epoch": 0.9751237036534942, - "grad_norm": 1.6177580413065933, - "learning_rate": 3.227307289610737e-09, - "loss": 1.0952, - "step": 7193 - }, - { - "epoch": 0.9752592693011591, - "grad_norm": 2.1008089168008084, - "learning_rate": 3.192151266743548e-09, - "loss": 1.1521, - "step": 7194 - }, - { - "epoch": 0.975394834948824, - "grad_norm": 1.625322566779412, - "learning_rate": 3.157187470401723e-09, - "loss": 1.1471, - "step": 7195 - }, - { - "epoch": 0.9755304005964889, - "grad_norm": 1.5960617355888707, - "learning_rate": 3.122415907327647e-09, - "loss": 1.1465, - "step": 7196 - }, - { - "epoch": 0.9756659662441537, - "grad_norm": 1.6375029793787959, - "learning_rate": 3.0878365842268437e-09, - "loss": 1.1421, - "step": 7197 - }, - { - "epoch": 0.9758015318918186, - "grad_norm": 1.7855815021636277, - "learning_rate": 3.053449507767536e-09, - "loss": 1.1312, - "step": 7198 - }, - { - "epoch": 0.9759370975394835, - "grad_norm": 1.712386526470286, - "learning_rate": 3.019254684581085e-09, - "loss": 1.1495, - "step": 7199 - }, - { - "epoch": 0.9760726631871484, - "grad_norm": 1.5076055258157215, - "learning_rate": 2.985252121261661e-09, - "loss": 1.1272, - "step": 7200 - }, - { - "epoch": 0.9762082288348133, - "grad_norm": 1.500264248540446, - "learning_rate": 2.951441824366463e-09, - "loss": 1.1059, - "step": 7201 - }, - { - "epoch": 0.9763437944824781, - "grad_norm": 2.151528438319856, - "learning_rate": 2.9178238004154975e-09, - "loss": 1.1089, - "step": 7202 - }, - { - "epoch": 0.976479360130143, - "grad_norm": 1.5893647377933937, - "learning_rate": 2.88439805589169e-09, - "loss": 1.1106, - "step": 7203 - }, - { - "epoch": 0.976614925777808, - "grad_norm": 1.4593149579414892, - "learning_rate": 2.851164597240996e-09, - "loss": 1.1258, - "step": 7204 - }, - { - "epoch": 0.9767504914254728, - "grad_norm": 1.4193178224789618, - "learning_rate": 2.8181234308721767e-09, - "loss": 1.0864, - "step": 7205 - }, - { - "epoch": 0.9768860570731377, - "grad_norm": 1.5094774160928792, - "learning_rate": 2.7852745631570253e-09, - "loss": 1.1078, - "step": 7206 - }, - { - "epoch": 0.9770216227208025, - "grad_norm": 1.5768918731611272, - "learning_rate": 2.7526180004300294e-09, - "loss": 1.1299, - "step": 7207 - }, - { - "epoch": 0.9771571883684674, - "grad_norm": 2.1060637361563193, - "learning_rate": 2.720153748988929e-09, - "loss": 1.1225, - "step": 7208 - }, - { - "epoch": 0.9772927540161324, - "grad_norm": 1.5345220052402073, - "learning_rate": 2.6878818150941616e-09, - "loss": 1.1418, - "step": 7209 - }, - { - "epoch": 0.9774283196637972, - "grad_norm": 1.866843451961597, - "learning_rate": 2.655802204968971e-09, - "loss": 1.14, - "step": 7210 - }, - { - "epoch": 0.9775638853114621, - "grad_norm": 3.1083991196602057, - "learning_rate": 2.6239149247999635e-09, - "loss": 1.1099, - "step": 7211 - }, - { - "epoch": 0.9776994509591269, - "grad_norm": 1.821442876582516, - "learning_rate": 2.592219980735999e-09, - "loss": 1.1172, - "step": 7212 - }, - { - "epoch": 0.9778350166067918, - "grad_norm": 1.5928813131612303, - "learning_rate": 2.5607173788894097e-09, - "loss": 1.1307, - "step": 7213 - }, - { - "epoch": 0.9779705822544568, - "grad_norm": 1.8277344017183261, - "learning_rate": 2.5294071253351146e-09, - "loss": 1.1187, - "step": 7214 - }, - { - "epoch": 0.9781061479021216, - "grad_norm": 2.7217508647669266, - "learning_rate": 2.498289226111061e-09, - "loss": 1.1008, - "step": 7215 - }, - { - "epoch": 0.9782417135497865, - "grad_norm": 1.51131839204187, - "learning_rate": 2.467363687218227e-09, - "loss": 1.1568, - "step": 7216 - }, - { - "epoch": 0.9783772791974513, - "grad_norm": 1.7286727654199492, - "learning_rate": 2.436630514620286e-09, - "loss": 1.1038, - "step": 7217 - }, - { - "epoch": 0.9785128448451162, - "grad_norm": 2.5352817004383734, - "learning_rate": 2.4060897142438308e-09, - "loss": 1.1291, - "step": 7218 - }, - { - "epoch": 0.9786484104927812, - "grad_norm": 4.092071001533522, - "learning_rate": 2.3757412919783725e-09, - "loss": 1.1472, - "step": 7219 - }, - { - "epoch": 0.978783976140446, - "grad_norm": 1.6961996712544052, - "learning_rate": 2.345585253676452e-09, - "loss": 1.1348, - "step": 7220 - }, - { - "epoch": 0.9789195417881109, - "grad_norm": 1.4335976369217418, - "learning_rate": 2.3156216051535284e-09, - "loss": 1.0832, - "step": 7221 - }, - { - "epoch": 0.9790551074357757, - "grad_norm": 1.524707655639051, - "learning_rate": 2.285850352187646e-09, - "loss": 1.1088, - "step": 7222 - }, - { - "epoch": 0.9791906730834407, - "grad_norm": 1.713964926537705, - "learning_rate": 2.2562715005201016e-09, - "loss": 1.1282, - "step": 7223 - }, - { - "epoch": 0.9793262387311056, - "grad_norm": 2.334572717755796, - "learning_rate": 2.226885055854777e-09, - "loss": 1.1664, - "step": 7224 - }, - { - "epoch": 0.9794618043787704, - "grad_norm": 1.703105475714722, - "learning_rate": 2.1976910238588055e-09, - "loss": 1.1466, - "step": 7225 - }, - { - "epoch": 0.9795973700264353, - "grad_norm": 1.4889680688647378, - "learning_rate": 2.168689410162017e-09, - "loss": 1.0979, - "step": 7226 - }, - { - "epoch": 0.9797329356741001, - "grad_norm": 2.092883462174913, - "learning_rate": 2.1398802203569375e-09, - "loss": 1.137, - "step": 7227 - }, - { - "epoch": 0.9798685013217651, - "grad_norm": 2.163030028758034, - "learning_rate": 2.111263459999457e-09, - "loss": 1.1038, - "step": 7228 - }, - { - "epoch": 0.98000406696943, - "grad_norm": 1.4846331439874394, - "learning_rate": 2.0828391346078277e-09, - "loss": 1.1189, - "step": 7229 - }, - { - "epoch": 0.9801396326170948, - "grad_norm": 1.7607602515102412, - "learning_rate": 2.054607249663665e-09, - "loss": 1.137, - "step": 7230 - }, - { - "epoch": 0.9802751982647597, - "grad_norm": 1.4728729316444726, - "learning_rate": 2.0265678106111685e-09, - "loss": 1.1491, - "step": 7231 - }, - { - "epoch": 0.9804107639124245, - "grad_norm": 3.402884054796431, - "learning_rate": 1.9987208228575693e-09, - "loss": 1.1125, - "step": 7232 - }, - { - "epoch": 0.9805463295600895, - "grad_norm": 1.8106424077540244, - "learning_rate": 1.971066291772905e-09, - "loss": 1.1457, - "step": 7233 - }, - { - "epoch": 0.9806818952077544, - "grad_norm": 2.1111346591818116, - "learning_rate": 1.9436042226901315e-09, - "loss": 1.1426, - "step": 7234 - }, - { - "epoch": 0.9808174608554192, - "grad_norm": 1.9638507957486508, - "learning_rate": 1.9163346209051246e-09, - "loss": 1.105, - "step": 7235 - }, - { - "epoch": 0.9809530265030841, - "grad_norm": 1.5324211078352512, - "learning_rate": 1.889257491676677e-09, - "loss": 1.0901, - "step": 7236 - }, - { - "epoch": 0.981088592150749, - "grad_norm": 1.4832408769560632, - "learning_rate": 1.8623728402261674e-09, - "loss": 1.0991, - "step": 7237 - }, - { - "epoch": 0.9812241577984139, - "grad_norm": 1.7422072779860218, - "learning_rate": 1.8356806717383377e-09, - "loss": 1.145, - "step": 7238 - }, - { - "epoch": 0.9813597234460788, - "grad_norm": 2.048130029994945, - "learning_rate": 1.809180991360404e-09, - "loss": 1.1252, - "step": 7239 - }, - { - "epoch": 0.9814952890937436, - "grad_norm": 1.9863326046610281, - "learning_rate": 1.7828738042027225e-09, - "loss": 1.1343, - "step": 7240 - }, - { - "epoch": 0.9816308547414085, - "grad_norm": 1.683067046540622, - "learning_rate": 1.7567591153383466e-09, - "loss": 1.1396, - "step": 7241 - }, - { - "epoch": 0.9817664203890734, - "grad_norm": 2.02405019811939, - "learning_rate": 1.7308369298033587e-09, - "loss": 1.1409, - "step": 7242 - }, - { - "epoch": 0.9819019860367383, - "grad_norm": 1.8052261782129018, - "learning_rate": 1.7051072525965382e-09, - "loss": 1.1325, - "step": 7243 - }, - { - "epoch": 0.9820375516844032, - "grad_norm": 1.8465800247652366, - "learning_rate": 1.6795700886798049e-09, - "loss": 1.1197, - "step": 7244 - }, - { - "epoch": 0.982173117332068, - "grad_norm": 2.877977040972751, - "learning_rate": 1.6542254429776636e-09, - "loss": 1.057, - "step": 7245 - }, - { - "epoch": 0.982308682979733, - "grad_norm": 2.2625360932983147, - "learning_rate": 1.6290733203776497e-09, - "loss": 1.1551, - "step": 7246 - }, - { - "epoch": 0.9824442486273978, - "grad_norm": 1.4464863003873267, - "learning_rate": 1.6041137257303272e-09, - "loss": 1.1121, - "step": 7247 - }, - { - "epoch": 0.9825798142750627, - "grad_norm": 1.6434167946249036, - "learning_rate": 1.5793466638486242e-09, - "loss": 1.1344, - "step": 7248 - }, - { - "epoch": 0.9827153799227276, - "grad_norm": 1.5923898962341945, - "learning_rate": 1.554772139509053e-09, - "loss": 1.1072, - "step": 7249 - }, - { - "epoch": 0.9828509455703924, - "grad_norm": 1.4401962007480966, - "learning_rate": 1.5303901574502675e-09, - "loss": 1.0999, - "step": 7250 - }, - { - "epoch": 0.9829865112180574, - "grad_norm": 1.5171494943306998, - "learning_rate": 1.5062007223743956e-09, - "loss": 1.1439, - "step": 7251 - }, - { - "epoch": 0.9831220768657222, - "grad_norm": 1.59426220009692, - "learning_rate": 1.482203838946039e-09, - "loss": 1.1106, - "step": 7252 - }, - { - "epoch": 0.9832576425133871, - "grad_norm": 2.3907140402653617, - "learning_rate": 1.4583995117929404e-09, - "loss": 1.1596, - "step": 7253 - }, - { - "epoch": 0.983393208161052, - "grad_norm": 1.653398457282989, - "learning_rate": 1.434787745505317e-09, - "loss": 1.1459, - "step": 7254 - }, - { - "epoch": 0.9835287738087168, - "grad_norm": 1.7115020176672784, - "learning_rate": 1.4113685446368595e-09, - "loss": 1.0971, - "step": 7255 - }, - { - "epoch": 0.9836643394563818, - "grad_norm": 1.7257730876789483, - "learning_rate": 1.388141913703511e-09, - "loss": 1.1313, - "step": 7256 - }, - { - "epoch": 0.9837999051040466, - "grad_norm": 2.1699962380813838, - "learning_rate": 1.3651078571844664e-09, - "loss": 1.1066, - "step": 7257 - }, - { - "epoch": 0.9839354707517115, - "grad_norm": 1.4911502093815314, - "learning_rate": 1.3422663795215062e-09, - "loss": 1.1194, - "step": 7258 - }, - { - "epoch": 0.9840710363993764, - "grad_norm": 1.6729600800754403, - "learning_rate": 1.3196174851196617e-09, - "loss": 1.1033, - "step": 7259 - }, - { - "epoch": 0.9842066020470412, - "grad_norm": 1.3830231095469774, - "learning_rate": 1.2971611783465507e-09, - "loss": 1.0896, - "step": 7260 - }, - { - "epoch": 0.9843421676947062, - "grad_norm": 3.681292708926958, - "learning_rate": 1.274897463532487e-09, - "loss": 1.1369, - "step": 7261 - }, - { - "epoch": 0.9844777333423711, - "grad_norm": 1.85426889268654, - "learning_rate": 1.2528263449710363e-09, - "loss": 1.1328, - "step": 7262 - }, - { - "epoch": 0.9846132989900359, - "grad_norm": 1.9431162663522894, - "learning_rate": 1.2309478269184602e-09, - "loss": 1.1313, - "step": 7263 - }, - { - "epoch": 0.9847488646377008, - "grad_norm": 1.4773329895320648, - "learning_rate": 1.2092619135937177e-09, - "loss": 1.163, - "step": 7264 - }, - { - "epoch": 0.9848844302853657, - "grad_norm": 1.6405068215678367, - "learning_rate": 1.1877686091787963e-09, - "loss": 1.1068, - "step": 7265 - }, - { - "epoch": 0.9850199959330306, - "grad_norm": 1.6989520793794661, - "learning_rate": 1.1664679178186032e-09, - "loss": 1.0761, - "step": 7266 - }, - { - "epoch": 0.9851555615806955, - "grad_norm": 1.702021528687978, - "learning_rate": 1.1453598436208522e-09, - "loss": 1.1465, - "step": 7267 - }, - { - "epoch": 0.9852911272283603, - "grad_norm": 1.7429534303034626, - "learning_rate": 1.1244443906558432e-09, - "loss": 1.1274, - "step": 7268 - }, - { - "epoch": 0.9854266928760252, - "grad_norm": 1.7406907942027194, - "learning_rate": 1.1037215629571272e-09, - "loss": 1.1362, - "step": 7269 - }, - { - "epoch": 0.9855622585236901, - "grad_norm": 1.8932931960084298, - "learning_rate": 1.0831913645209522e-09, - "loss": 1.1298, - "step": 7270 - }, - { - "epoch": 0.985697824171355, - "grad_norm": 1.7352427386614573, - "learning_rate": 1.0628537993063736e-09, - "loss": 1.1801, - "step": 7271 - }, - { - "epoch": 0.9858333898190199, - "grad_norm": 1.7239920723082987, - "learning_rate": 1.042708871235143e-09, - "loss": 1.1398, - "step": 7272 - }, - { - "epoch": 0.9859689554666847, - "grad_norm": 2.1901440698428303, - "learning_rate": 1.0227565841923746e-09, - "loss": 1.0969, - "step": 7273 - }, - { - "epoch": 0.9861045211143497, - "grad_norm": 1.7106245690977555, - "learning_rate": 1.002996942025547e-09, - "loss": 1.1243, - "step": 7274 - }, - { - "epoch": 0.9862400867620145, - "grad_norm": 1.685459944468041, - "learning_rate": 9.834299485450559e-10, - "loss": 1.1415, - "step": 7275 - }, - { - "epoch": 0.9863756524096794, - "grad_norm": 4.688780571079677, - "learning_rate": 9.640556075244388e-10, - "loss": 1.1465, - "step": 7276 - }, - { - "epoch": 0.9865112180573443, - "grad_norm": 1.7967560376704728, - "learning_rate": 9.448739226997072e-10, - "loss": 1.1331, - "step": 7277 - }, - { - "epoch": 0.9866467837050091, - "grad_norm": 1.928662704425858, - "learning_rate": 9.258848977700129e-10, - "loss": 1.1313, - "step": 7278 - }, - { - "epoch": 0.9867823493526741, - "grad_norm": 1.9613054597838901, - "learning_rate": 9.070885363972047e-10, - "loss": 1.1688, - "step": 7279 - }, - { - "epoch": 0.9869179150003389, - "grad_norm": 1.749659226495413, - "learning_rate": 8.884848422060498e-10, - "loss": 1.1608, - "step": 7280 - }, - { - "epoch": 0.9870534806480038, - "grad_norm": 1.6648974152295442, - "learning_rate": 8.700738187840118e-10, - "loss": 1.1392, - "step": 7281 - }, - { - "epoch": 0.9871890462956687, - "grad_norm": 1.892382211603729, - "learning_rate": 8.518554696815838e-10, - "loss": 1.1627, - "step": 7282 - }, - { - "epoch": 0.9873246119433335, - "grad_norm": 1.6501112844948553, - "learning_rate": 8.338297984121778e-10, - "loss": 1.1088, - "step": 7283 - }, - { - "epoch": 0.9874601775909985, - "grad_norm": 1.7644572051868976, - "learning_rate": 8.159968084515689e-10, - "loss": 1.1083, - "step": 7284 - }, - { - "epoch": 0.9875957432386633, - "grad_norm": 2.6966041013335196, - "learning_rate": 7.983565032390061e-10, - "loss": 1.0914, - "step": 7285 - }, - { - "epoch": 0.9877313088863282, - "grad_norm": 1.7506618671260525, - "learning_rate": 7.809088861762125e-10, - "loss": 1.0988, - "step": 7286 - }, - { - "epoch": 0.9878668745339931, - "grad_norm": 1.5435437759790647, - "learning_rate": 7.636539606277192e-10, - "loss": 1.1741, - "step": 7287 - }, - { - "epoch": 0.988002440181658, - "grad_norm": 1.758252587275288, - "learning_rate": 7.465917299210866e-10, - "loss": 1.1157, - "step": 7288 - }, - { - "epoch": 0.9881380058293229, - "grad_norm": 1.5287575916617557, - "learning_rate": 7.297221973465717e-10, - "loss": 1.1256, - "step": 7289 - }, - { - "epoch": 0.9882735714769877, - "grad_norm": 2.193629185375675, - "learning_rate": 7.130453661573499e-10, - "loss": 1.1283, - "step": 7290 - }, - { - "epoch": 0.9884091371246526, - "grad_norm": 1.6327581284090786, - "learning_rate": 6.965612395695153e-10, - "loss": 1.1579, - "step": 7291 - }, - { - "epoch": 0.9885447027723175, - "grad_norm": 1.5418186971137735, - "learning_rate": 6.802698207617474e-10, - "loss": 1.1256, - "step": 7292 - }, - { - "epoch": 0.9886802684199824, - "grad_norm": 2.863059737573039, - "learning_rate": 6.641711128758665e-10, - "loss": 1.0991, - "step": 7293 - }, - { - "epoch": 0.9888158340676473, - "grad_norm": 1.5473231142088129, - "learning_rate": 6.48265119016278e-10, - "loss": 1.0904, - "step": 7294 - }, - { - "epoch": 0.9889513997153121, - "grad_norm": 1.4516835053908697, - "learning_rate": 6.325518422503063e-10, - "loss": 1.1191, - "step": 7295 - }, - { - "epoch": 0.989086965362977, - "grad_norm": 2.556621016364256, - "learning_rate": 6.170312856083048e-10, - "loss": 1.1471, - "step": 7296 - }, - { - "epoch": 0.989222531010642, - "grad_norm": 1.6607784710513507, - "learning_rate": 6.017034520831021e-10, - "loss": 1.1646, - "step": 7297 - }, - { - "epoch": 0.9893580966583068, - "grad_norm": 1.715507417494631, - "learning_rate": 5.865683446305558e-10, - "loss": 1.1165, - "step": 7298 - }, - { - "epoch": 0.9894936623059717, - "grad_norm": 1.4574846145659213, - "learning_rate": 5.716259661695533e-10, - "loss": 1.1379, - "step": 7299 - }, - { - "epoch": 0.9896292279536365, - "grad_norm": 1.582271575839357, - "learning_rate": 5.568763195813453e-10, - "loss": 1.1191, - "step": 7300 - }, - { - "epoch": 0.9897647936013014, - "grad_norm": 1.8607327955016504, - "learning_rate": 5.423194077104343e-10, - "loss": 1.0755, - "step": 7301 - }, - { - "epoch": 0.9899003592489664, - "grad_norm": 1.8320853283583345, - "learning_rate": 5.279552333640191e-10, - "loss": 1.0832, - "step": 7302 - }, - { - "epoch": 0.9900359248966312, - "grad_norm": 1.4823390583347391, - "learning_rate": 5.137837993121064e-10, - "loss": 1.0983, - "step": 7303 - }, - { - "epoch": 0.9901714905442961, - "grad_norm": 3.715413220303096, - "learning_rate": 4.998051082875099e-10, - "loss": 1.2052, - "step": 7304 - }, - { - "epoch": 0.9903070561919609, - "grad_norm": 2.4589387946918704, - "learning_rate": 4.860191629859623e-10, - "loss": 1.1468, - "step": 7305 - }, - { - "epoch": 0.9904426218396258, - "grad_norm": 1.6344181332801768, - "learning_rate": 4.724259660658924e-10, - "loss": 1.1489, - "step": 7306 - }, - { - "epoch": 0.9905781874872908, - "grad_norm": 2.2737258503824194, - "learning_rate": 4.5902552014864815e-10, - "loss": 1.0974, - "step": 7307 - }, - { - "epoch": 0.9907137531349556, - "grad_norm": 2.105938488139748, - "learning_rate": 4.458178278184954e-10, - "loss": 1.1132, - "step": 7308 - }, - { - "epoch": 0.9908493187826205, - "grad_norm": 2.003598053946252, - "learning_rate": 4.328028916222859e-10, - "loss": 1.1324, - "step": 7309 - }, - { - "epoch": 0.9909848844302853, - "grad_norm": 2.0024953594704162, - "learning_rate": 4.199807140700118e-10, - "loss": 1.1435, - "step": 7310 - }, - { - "epoch": 0.9911204500779502, - "grad_norm": 1.5427283002394452, - "learning_rate": 4.073512976342508e-10, - "loss": 1.101, - "step": 7311 - }, - { - "epoch": 0.9912560157256152, - "grad_norm": 1.9290061735456894, - "learning_rate": 3.9491464475049916e-10, - "loss": 1.1491, - "step": 7312 - }, - { - "epoch": 0.99139158137328, - "grad_norm": 1.6138522356975329, - "learning_rate": 3.826707578170607e-10, - "loss": 1.1297, - "step": 7313 - }, - { - "epoch": 0.9915271470209449, - "grad_norm": 1.5536762712081174, - "learning_rate": 3.7061963919504667e-10, - "loss": 1.1368, - "step": 7314 - }, - { - "epoch": 0.9916627126686097, - "grad_norm": 1.4264188384787424, - "learning_rate": 3.5876129120837596e-10, - "loss": 1.1297, - "step": 7315 - }, - { - "epoch": 0.9917982783162747, - "grad_norm": 1.4121047127966044, - "learning_rate": 3.470957161439969e-10, - "loss": 1.1126, - "step": 7316 - }, - { - "epoch": 0.9919338439639396, - "grad_norm": 1.731028168799319, - "learning_rate": 3.3562291625133245e-10, - "loss": 1.1378, - "step": 7317 - }, - { - "epoch": 0.9920694096116044, - "grad_norm": 1.9230802792603376, - "learning_rate": 3.24342893742946e-10, - "loss": 1.1104, - "step": 7318 - }, - { - "epoch": 0.9922049752592693, - "grad_norm": 1.6136834036223806, - "learning_rate": 3.1325565079409755e-10, - "loss": 1.1243, - "step": 7319 - }, - { - "epoch": 0.9923405409069341, - "grad_norm": 1.4996058577826614, - "learning_rate": 3.023611895428546e-10, - "loss": 1.0996, - "step": 7320 - }, - { - "epoch": 0.9924761065545991, - "grad_norm": 1.6168617449142415, - "learning_rate": 2.9165951209020325e-10, - "loss": 1.1101, - "step": 7321 - }, - { - "epoch": 0.992611672202264, - "grad_norm": 1.686072185851146, - "learning_rate": 2.8115062049971493e-10, - "loss": 1.091, - "step": 7322 - }, - { - "epoch": 0.9927472378499288, - "grad_norm": 1.6629707746149576, - "learning_rate": 2.7083451679799084e-10, - "loss": 1.0818, - "step": 7323 - }, - { - "epoch": 0.9928828034975937, - "grad_norm": 1.7730890475593983, - "learning_rate": 2.6071120297443963e-10, - "loss": 1.1495, - "step": 7324 - }, - { - "epoch": 0.9930183691452585, - "grad_norm": 1.9655778763658645, - "learning_rate": 2.507806809813884e-10, - "loss": 1.1052, - "step": 7325 - }, - { - "epoch": 0.9931539347929235, - "grad_norm": 2.056507731685081, - "learning_rate": 2.410429527336388e-10, - "loss": 1.1324, - "step": 7326 - }, - { - "epoch": 0.9932895004405884, - "grad_norm": 1.4850184006335412, - "learning_rate": 2.3149802010913322e-10, - "loss": 1.137, - "step": 7327 - }, - { - "epoch": 0.9934250660882532, - "grad_norm": 2.0140448826711066, - "learning_rate": 2.221458849486213e-10, - "loss": 1.1509, - "step": 7328 - }, - { - "epoch": 0.9935606317359181, - "grad_norm": 2.232749960251188, - "learning_rate": 2.1298654905543834e-10, - "loss": 1.1046, - "step": 7329 - }, - { - "epoch": 0.993696197383583, - "grad_norm": 1.6397173392746007, - "learning_rate": 2.0402001419594917e-10, - "loss": 1.1108, - "step": 7330 - }, - { - "epoch": 0.9938317630312479, - "grad_norm": 1.6447243925517394, - "learning_rate": 1.9524628209943718e-10, - "loss": 1.1567, - "step": 7331 - }, - { - "epoch": 0.9939673286789128, - "grad_norm": 1.5853965296206602, - "learning_rate": 1.8666535445754917e-10, - "loss": 1.1256, - "step": 7332 - }, - { - "epoch": 0.9941028943265776, - "grad_norm": 1.7118124411706108, - "learning_rate": 1.7827723292518358e-10, - "loss": 1.1203, - "step": 7333 - }, - { - "epoch": 0.9942384599742425, - "grad_norm": 2.803321938934508, - "learning_rate": 1.7008191912004645e-10, - "loss": 1.1252, - "step": 7334 - }, - { - "epoch": 0.9943740256219074, - "grad_norm": 1.6795398287726964, - "learning_rate": 1.6207941462242912e-10, - "loss": 1.1239, - "step": 7335 - }, - { - "epoch": 0.9945095912695723, - "grad_norm": 3.0297576749773687, - "learning_rate": 1.5426972097543068e-10, - "loss": 1.1232, - "step": 7336 - }, - { - "epoch": 0.9946451569172372, - "grad_norm": 1.5705092717887783, - "learning_rate": 1.4665283968529062e-10, - "loss": 1.0783, - "step": 7337 - }, - { - "epoch": 0.994780722564902, - "grad_norm": 1.4910709312915966, - "learning_rate": 1.3922877222083407e-10, - "loss": 1.1426, - "step": 7338 - }, - { - "epoch": 0.994916288212567, - "grad_norm": 1.619772143822431, - "learning_rate": 1.3199752001369359e-10, - "loss": 1.1492, - "step": 7339 - }, - { - "epoch": 0.9950518538602319, - "grad_norm": 5.380196955303005, - "learning_rate": 1.2495908445830928e-10, - "loss": 1.1044, - "step": 7340 - }, - { - "epoch": 0.9951874195078967, - "grad_norm": 1.6881572309655417, - "learning_rate": 1.1811346691203982e-10, - "loss": 1.0976, - "step": 7341 - }, - { - "epoch": 0.9953229851555616, - "grad_norm": 1.8214927865906039, - "learning_rate": 1.1146066869494042e-10, - "loss": 1.1443, - "step": 7342 - }, - { - "epoch": 0.9954585508032264, - "grad_norm": 1.6384553030455569, - "learning_rate": 1.0500069109009579e-10, - "loss": 1.1383, - "step": 7343 - }, - { - "epoch": 0.9955941164508914, - "grad_norm": 1.5121953532912003, - "learning_rate": 9.873353534317619e-11, - "loss": 1.0981, - "step": 7344 - }, - { - "epoch": 0.9957296820985563, - "grad_norm": 2.038036255652666, - "learning_rate": 9.265920266265936e-11, - "loss": 1.1168, - "step": 7345 - }, - { - "epoch": 0.9958652477462211, - "grad_norm": 1.6261583637656811, - "learning_rate": 8.677769422005266e-11, - "loss": 1.1384, - "step": 7346 - }, - { - "epoch": 0.996000813393886, - "grad_norm": 2.1995897481442235, - "learning_rate": 8.108901114955991e-11, - "loss": 1.1236, - "step": 7347 - }, - { - "epoch": 0.9961363790415508, - "grad_norm": 1.6704269673792445, - "learning_rate": 7.559315454819249e-11, - "loss": 1.0988, - "step": 7348 - }, - { - "epoch": 0.9962719446892158, - "grad_norm": 2.1384871906587715, - "learning_rate": 7.029012547576929e-11, - "loss": 1.1313, - "step": 7349 - }, - { - "epoch": 0.9964075103368807, - "grad_norm": 2.811126866614931, - "learning_rate": 6.517992495491676e-11, - "loss": 1.1378, - "step": 7350 - }, - { - "epoch": 0.9965430759845455, - "grad_norm": 2.3065960737960696, - "learning_rate": 6.026255397106884e-11, - "loss": 1.1161, - "step": 7351 - }, - { - "epoch": 0.9966786416322104, - "grad_norm": 1.605578152159696, - "learning_rate": 5.553801347257803e-11, - "loss": 1.1476, - "step": 7352 - }, - { - "epoch": 0.9968142072798752, - "grad_norm": 1.4029879694161946, - "learning_rate": 5.1006304370493355e-11, - "loss": 1.1174, - "step": 7353 - }, - { - "epoch": 0.9969497729275402, - "grad_norm": 2.9832720144266087, - "learning_rate": 4.6667427538782386e-11, - "loss": 1.106, - "step": 7354 - }, - { - "epoch": 0.9970853385752051, - "grad_norm": 1.8103886482639782, - "learning_rate": 4.252138381399817e-11, - "loss": 1.1245, - "step": 7355 - }, - { - "epoch": 0.9972209042228699, - "grad_norm": 1.405773299696744, - "learning_rate": 3.856817399594536e-11, - "loss": 1.1186, - "step": 7356 - }, - { - "epoch": 0.9973564698705348, - "grad_norm": 2.404771544856134, - "learning_rate": 3.4807798846681055e-11, - "loss": 1.1553, - "step": 7357 - }, - { - "epoch": 0.9974920355181996, - "grad_norm": 1.9439275154917557, - "learning_rate": 3.124025909151395e-11, - "loss": 1.148, - "step": 7358 - }, - { - "epoch": 0.9976276011658646, - "grad_norm": 1.7945789729856365, - "learning_rate": 2.7865555418338238e-11, - "loss": 1.1742, - "step": 7359 - }, - { - "epoch": 0.9977631668135295, - "grad_norm": 1.6143669685941981, - "learning_rate": 2.4683688477966647e-11, - "loss": 1.1369, - "step": 7360 - }, - { - "epoch": 0.9978987324611943, - "grad_norm": 2.144184300108407, - "learning_rate": 2.1694658884130468e-11, - "loss": 1.1453, - "step": 7361 - }, - { - "epoch": 0.9980342981088592, - "grad_norm": 1.6121861406230185, - "learning_rate": 1.8898467213146473e-11, - "loss": 1.138, - "step": 7362 - }, - { - "epoch": 0.9981698637565241, - "grad_norm": 1.6096134437939005, - "learning_rate": 1.6295114004138965e-11, - "loss": 1.1529, - "step": 7363 - }, - { - "epoch": 0.998305429404189, - "grad_norm": 2.0572823140665686, - "learning_rate": 1.3884599759261818e-11, - "loss": 1.148, - "step": 7364 - }, - { - "epoch": 0.9984409950518539, - "grad_norm": 1.5146281984850485, - "learning_rate": 1.1666924943254386e-11, - "loss": 1.1227, - "step": 7365 - }, - { - "epoch": 0.9985765606995187, - "grad_norm": 1.7126984056723948, - "learning_rate": 9.642089983885604e-12, - "loss": 1.1393, - "step": 7366 - }, - { - "epoch": 0.9987121263471836, - "grad_norm": 1.7243573323349441, - "learning_rate": 7.810095271620908e-12, - "loss": 1.1261, - "step": 7367 - }, - { - "epoch": 0.9988476919948485, - "grad_norm": 1.5924815091108142, - "learning_rate": 6.170941159733267e-12, - "loss": 1.118, - "step": 7368 - }, - { - "epoch": 0.9989832576425134, - "grad_norm": 1.4906155752891566, - "learning_rate": 4.724627964303174e-12, - "loss": 1.1121, - "step": 7369 - }, - { - "epoch": 0.9991188232901783, - "grad_norm": 1.5462989120298993, - "learning_rate": 3.4711559642186527e-12, - "loss": 1.1042, - "step": 7370 - }, - { - "epoch": 0.9992543889378431, - "grad_norm": 2.7375283696759416, - "learning_rate": 2.4105254012862784e-12, - "loss": 1.0976, - "step": 7371 - }, - { - "epoch": 0.9993899545855081, - "grad_norm": 1.470073145713096, - "learning_rate": 1.5427364800091325e-12, - "loss": 1.1355, - "step": 7372 - }, - { - "epoch": 0.9995255202331729, - "grad_norm": 3.1378205058696995, - "learning_rate": 8.67789367586802e-13, - "loss": 1.1058, - "step": 7373 - }, - { - "epoch": 0.9996610858808378, - "grad_norm": 1.6191020535732017, - "learning_rate": 3.856841943594702e-13, - "loss": 1.1435, - "step": 7374 - }, - { - "epoch": 0.9997966515285027, - "grad_norm": 1.650802410263571, - "learning_rate": 9.642105325280425e-14, - "loss": 1.0968, - "step": 7375 - }, - { - "epoch": 0.9999322171761675, - "grad_norm": 1.660676723056901, - "learning_rate": 0.0, - "loss": 1.1592, - "step": 7376 - }, - { - "epoch": 0.9999322171761675, - "step": 7376, - "total_flos": 4.243207408718971e+17, - "train_loss": 1.182327052824797, - "train_runtime": 84350.6068, - "train_samples_per_second": 8.395, - "train_steps_per_second": 0.087 - } - ], - "logging_steps": 1.0, - "max_steps": 7376, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 100, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 4.243207408718971e+17, - "train_batch_size": 6, - "trial_name": null, - "trial_params": null -}