|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 924, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 1.5234, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 1.5583, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0, |
|
"loss": 1.5219, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0, |
|
"loss": 1.4819, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0, |
|
"loss": 1.5571, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0, |
|
"loss": 1.5618, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0, |
|
"loss": 1.5302, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0, |
|
"loss": 1.399, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.3733, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6e-06, |
|
"loss": 1.6481, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4742, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.4e-05, |
|
"loss": 1.3546, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.3144, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5075, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9999763715279134e-05, |
|
"loss": 1.4149, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9999054872282624e-05, |
|
"loss": 1.228, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9997873504508222e-05, |
|
"loss": 1.3756, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9996219667783766e-05, |
|
"loss": 1.5109, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9994093440264524e-05, |
|
"loss": 1.4623, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9991494922429505e-05, |
|
"loss": 1.4063, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9988424237076728e-05, |
|
"loss": 1.4229, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9984881529317394e-05, |
|
"loss": 1.4154, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9980866966569053e-05, |
|
"loss": 1.3689, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9976380738547667e-05, |
|
"loss": 1.4078, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9971423057258664e-05, |
|
"loss": 1.4648, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9965994156986913e-05, |
|
"loss": 1.3209, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.996310307903586e-05, |
|
"loss": 1.5759, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9956967838282882e-05, |
|
"loss": 1.3839, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9950362061656633e-05, |
|
"loss": 1.3, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9943286061325932e-05, |
|
"loss": 1.4333, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9935740171680926e-05, |
|
"loss": 1.4935, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.99277247493173e-05, |
|
"loss": 1.3597, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.991924017301943e-05, |
|
"loss": 1.5478, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9910286843742458e-05, |
|
"loss": 1.4124, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9900865184593364e-05, |
|
"loss": 1.4587, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9890975640810973e-05, |
|
"loss": 1.4449, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.98806186797449e-05, |
|
"loss": 1.4994, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9869794790833478e-05, |
|
"loss": 1.4709, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9858504485580618e-05, |
|
"loss": 1.336, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.984674829753165e-05, |
|
"loss": 1.4401, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9834526782248092e-05, |
|
"loss": 1.4636, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9821840517281415e-05, |
|
"loss": 1.5029, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9808690102145727e-05, |
|
"loss": 1.5737, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9795076158289466e-05, |
|
"loss": 1.4333, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9780999329066018e-05, |
|
"loss": 1.472, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9766460279703312e-05, |
|
"loss": 1.5217, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9751459697272396e-05, |
|
"loss": 1.3852, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.973599829065495e-05, |
|
"loss": 1.4694, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9720076790509814e-05, |
|
"loss": 1.2218, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.970369594923842e-05, |
|
"loss": 1.3959, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.968685654094928e-05, |
|
"loss": 1.2564, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9669559361421366e-05, |
|
"loss": 1.3412, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9651805228066522e-05, |
|
"loss": 1.5168, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9633594979890842e-05, |
|
"loss": 1.4346, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9614929477455007e-05, |
|
"loss": 1.3223, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.959580960283362e-05, |
|
"loss": 1.4993, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9576236259573537e-05, |
|
"loss": 1.5611, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9556210372651134e-05, |
|
"loss": 1.4156, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9535732888428644e-05, |
|
"loss": 1.3979, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.951480477460939e-05, |
|
"loss": 1.3124, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.949342702019208e-05, |
|
"loss": 1.4111, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9471600635424058e-05, |
|
"loss": 1.4656, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9449326651753572e-05, |
|
"loss": 1.2345, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9426606121781028e-05, |
|
"loss": 1.3033, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.940344011920924e-05, |
|
"loss": 1.3654, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.93798297387927e-05, |
|
"loss": 1.2962, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9355776096285833e-05, |
|
"loss": 1.5612, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9331280328390284e-05, |
|
"loss": 1.4371, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9306343592701187e-05, |
|
"loss": 1.5658, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.928096706765247e-05, |
|
"loss": 1.3471, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.925515195246116e-05, |
|
"loss": 1.3567, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9228899467070713e-05, |
|
"loss": 1.3916, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9202210852093367e-05, |
|
"loss": 1.4113, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.917508736875151e-05, |
|
"loss": 1.4357, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.914753029881808e-05, |
|
"loss": 1.4278, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.9119540944555992e-05, |
|
"loss": 1.2472, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9091120628656597e-05, |
|
"loss": 1.3739, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.906227069417718e-05, |
|
"loss": 1.3371, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9032992504477475e-05, |
|
"loss": 1.362, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.9003287443155274e-05, |
|
"loss": 1.3747, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8973156913980988e-05, |
|
"loss": 1.428, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8942602340831356e-05, |
|
"loss": 1.4416, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.891162516762214e-05, |
|
"loss": 1.3246, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.8880226858239878e-05, |
|
"loss": 1.3395, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.8848408896472728e-05, |
|
"loss": 1.4583, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.8816172785940335e-05, |
|
"loss": 1.36, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.878352005002277e-05, |
|
"loss": 1.4412, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.875045223178855e-05, |
|
"loss": 1.3123, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.871697089392172e-05, |
|
"loss": 1.4006, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.8683077618647997e-05, |
|
"loss": 1.2854, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.8648774007659986e-05, |
|
"loss": 1.5193, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8614061682041532e-05, |
|
"loss": 1.2914, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8578942282191057e-05, |
|
"loss": 1.3192, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8543417467744085e-05, |
|
"loss": 1.428, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8507488917494788e-05, |
|
"loss": 1.4875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.847115832931666e-05, |
|
"loss": 1.6362, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8434427420082278e-05, |
|
"loss": 1.2374, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8397297925582167e-05, |
|
"loss": 1.5176, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8359771600442778e-05, |
|
"loss": 1.3912, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.832185021804356e-05, |
|
"loss": 1.5812, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.828353557043317e-05, |
|
"loss": 1.3373, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8244829468244768e-05, |
|
"loss": 1.3064, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8205733740610462e-05, |
|
"loss": 1.5292, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8166250235074874e-05, |
|
"loss": 1.471, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8126380817507817e-05, |
|
"loss": 1.4468, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8086127372016132e-05, |
|
"loss": 1.2668, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.804549180085465e-05, |
|
"loss": 1.416, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8004476024336284e-05, |
|
"loss": 1.5767, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7963081980741298e-05, |
|
"loss": 1.2952, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.79213116262257e-05, |
|
"loss": 1.5002, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.78791669347288e-05, |
|
"loss": 1.2971, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7836649897879924e-05, |
|
"loss": 1.2372, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.779376252490432e-05, |
|
"loss": 1.3897, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7750506842528174e-05, |
|
"loss": 1.4017, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7706884894882853e-05, |
|
"loss": 1.4475, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.76628987434083e-05, |
|
"loss": 1.4873, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.7618550466755626e-05, |
|
"loss": 1.4008, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.7573842160688858e-05, |
|
"loss": 1.3481, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.752877593798592e-05, |
|
"loss": 1.4984, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.748335392833879e-05, |
|
"loss": 1.5237, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.7437578278252834e-05, |
|
"loss": 1.4043, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.7391451150945395e-05, |
|
"loss": 1.3393, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.7344974726243555e-05, |
|
"loss": 1.3782, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.729815120048112e-05, |
|
"loss": 1.3935, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.7250982786394833e-05, |
|
"loss": 1.4633, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7203471713019805e-05, |
|
"loss": 1.2838, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7155620225584176e-05, |
|
"loss": 1.3142, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7107430585403024e-05, |
|
"loss": 1.2973, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.7058905069771477e-05, |
|
"loss": 1.4129, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.7010045971857117e-05, |
|
"loss": 1.3484, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.696085560059162e-05, |
|
"loss": 1.5596, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6911336280561595e-05, |
|
"loss": 1.2894, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6861490351898798e-05, |
|
"loss": 1.2303, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6811320170169496e-05, |
|
"loss": 1.5192, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.676082810626316e-05, |
|
"loss": 1.3415, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6710016546280443e-05, |
|
"loss": 1.3893, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6658887891420393e-05, |
|
"loss": 1.4108, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6607444557866995e-05, |
|
"loss": 1.401, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6555688976675e-05, |
|
"loss": 1.5258, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6503623593655013e-05, |
|
"loss": 1.4698, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.645125086925794e-05, |
|
"loss": 1.3611, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6398573278458682e-05, |
|
"loss": 1.2556, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6345593310639212e-05, |
|
"loss": 1.3739, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6292313469470917e-05, |
|
"loss": 1.3527, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6238736272796267e-05, |
|
"loss": 1.358, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6184864252509866e-05, |
|
"loss": 1.3874, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6130699954438756e-05, |
|
"loss": 1.4507, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.607624593822216e-05, |
|
"loss": 1.4701, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.602150477719047e-05, |
|
"loss": 1.4038, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5966479058243686e-05, |
|
"loss": 1.3851, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5911171381729135e-05, |
|
"loss": 1.3512, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5855584361318594e-05, |
|
"loss": 1.3924, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.579972062388479e-05, |
|
"loss": 1.3825, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.5743582809377237e-05, |
|
"loss": 1.3565, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.5687173570697505e-05, |
|
"loss": 0.9095, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.5630495573573837e-05, |
|
"loss": 1.0694, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.557355149643518e-05, |
|
"loss": 1.133, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.5516344030284605e-05, |
|
"loss": 0.9732, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.545887587857215e-05, |
|
"loss": 0.9751, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.540114975706705e-05, |
|
"loss": 0.9463, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.5343168393729406e-05, |
|
"loss": 0.9389, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.528493452858127e-05, |
|
"loss": 0.8876, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.522645091357716e-05, |
|
"loss": 0.8831, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.5167720312473995e-05, |
|
"loss": 0.8689, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.510874550070052e-05, |
|
"loss": 0.8803, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.504952926522612e-05, |
|
"loss": 0.8584, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.4990074404429131e-05, |
|
"loss": 0.8771, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.493038372796459e-05, |
|
"loss": 0.8431, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.4870460056631464e-05, |
|
"loss": 0.8739, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.4810306222239337e-05, |
|
"loss": 0.9607, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.4749925067474612e-05, |
|
"loss": 0.9771, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4689319445766144e-05, |
|
"loss": 0.7807, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4628492221150411e-05, |
|
"loss": 1.0123, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.4567446268136179e-05, |
|
"loss": 0.9227, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.4506184471568635e-05, |
|
"loss": 0.9247, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.4444709726493085e-05, |
|
"loss": 0.8943, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.438302493801812e-05, |
|
"loss": 0.855, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.4321133021178349e-05, |
|
"loss": 0.9835, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.4259036900796628e-05, |
|
"loss": 0.8994, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.4196739511345851e-05, |
|
"loss": 0.8497, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.4134243796810277e-05, |
|
"loss": 0.9333, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.40715527105464e-05, |
|
"loss": 0.935, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.4008669215143376e-05, |
|
"loss": 0.7321, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.3945596282283043e-05, |
|
"loss": 0.9281, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.3882336892599469e-05, |
|
"loss": 0.9498, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.3818894035538093e-05, |
|
"loss": 0.8647, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.3755270709214476e-05, |
|
"loss": 1.0073, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3691469920272603e-05, |
|
"loss": 0.9846, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.362749468374278e-05, |
|
"loss": 0.8693, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.3563348022899203e-05, |
|
"loss": 0.9208, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.3499032969117036e-05, |
|
"loss": 0.9223, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.3434552561729181e-05, |
|
"loss": 0.9236, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.336990984788266e-05, |
|
"loss": 0.9974, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.3305107882394587e-05, |
|
"loss": 0.8629, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.3240149727607827e-05, |
|
"loss": 0.9514, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.3175038453246274e-05, |
|
"loss": 0.982, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.3109777136269788e-05, |
|
"loss": 0.9342, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.3044368860728781e-05, |
|
"loss": 0.7236, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.2978816717618479e-05, |
|
"loss": 0.8384, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.2913123804732848e-05, |
|
"loss": 0.7538, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.2847293226518207e-05, |
|
"loss": 0.9229, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.2781328093926514e-05, |
|
"loss": 0.8993, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.2715231524268357e-05, |
|
"loss": 0.9106, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.264900664106564e-05, |
|
"loss": 0.939, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.258265657390397e-05, |
|
"loss": 0.9626, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.2516184458284766e-05, |
|
"loss": 0.8662, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.244959343547709e-05, |
|
"loss": 0.8466, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.2382886652369183e-05, |
|
"loss": 0.8591, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.2316067261319773e-05, |
|
"loss": 0.7773, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.2249138420009095e-05, |
|
"loss": 0.8936, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.218210329128966e-05, |
|
"loss": 0.8882, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.2114965043036808e-05, |
|
"loss": 0.9398, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.2047726847998993e-05, |
|
"loss": 0.964, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1980391883647832e-05, |
|
"loss": 1.0241, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1912963332027987e-05, |
|
"loss": 1.0854, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1845444379606757e-05, |
|
"loss": 0.9673, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.1777838217123498e-05, |
|
"loss": 0.9455, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1710148039438867e-05, |
|
"loss": 0.9575, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1642377045383806e-05, |
|
"loss": 0.8251, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1574528437608404e-05, |
|
"loss": 0.7966, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.150660542243052e-05, |
|
"loss": 0.9938, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.1438611209684296e-05, |
|
"loss": 0.9175, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.1370549012568452e-05, |
|
"loss": 0.832, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.1302422047494432e-05, |
|
"loss": 0.9767, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.1234233533934425e-05, |
|
"loss": 0.859, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.1165986694269208e-05, |
|
"loss": 0.9282, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.109768475363587e-05, |
|
"loss": 0.9586, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.1029330939775411e-05, |
|
"loss": 0.9444, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0960928482880194e-05, |
|
"loss": 0.972, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0892480615441308e-05, |
|
"loss": 0.8934, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.08239905720958e-05, |
|
"loss": 0.9087, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0755461589473829e-05, |
|
"loss": 0.8701, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0686896906045695e-05, |
|
"loss": 1.0226, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.0618299761968822e-05, |
|
"loss": 0.8852, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0549673398934615e-05, |
|
"loss": 0.8311, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0481021060015282e-05, |
|
"loss": 0.8665, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.0412345989510569e-05, |
|
"loss": 0.8921, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.0343651432794446e-05, |
|
"loss": 0.9216, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.0274940636161752e-05, |
|
"loss": 0.9414, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.0206216846674764e-05, |
|
"loss": 0.9655, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.013748331200976e-05, |
|
"loss": 0.9586, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.0068743280303563e-05, |
|
"loss": 0.9755, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1e-05, |
|
"loss": 0.888, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.931256719696442e-06, |
|
"loss": 0.8819, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.86251668799024e-06, |
|
"loss": 0.8599, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.79378315332524e-06, |
|
"loss": 0.9035, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.725059363838251e-06, |
|
"loss": 0.8372, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.656348567205556e-06, |
|
"loss": 0.8864, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.587654010489435e-06, |
|
"loss": 0.9798, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.518978939984721e-06, |
|
"loss": 0.8865, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.450326601065387e-06, |
|
"loss": 0.8469, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.381700238031178e-06, |
|
"loss": 0.9409, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.313103093954306e-06, |
|
"loss": 0.8823, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.244538410526176e-06, |
|
"loss": 0.8595, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.1760094279042e-06, |
|
"loss": 0.9537, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.107519384558695e-06, |
|
"loss": 0.9716, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.039071517119808e-06, |
|
"loss": 0.8419, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.970669060224595e-06, |
|
"loss": 0.757, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.902315246364131e-06, |
|
"loss": 0.8714, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.834013305730795e-06, |
|
"loss": 0.7869, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.76576646606558e-06, |
|
"loss": 1.0176, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.697577952505568e-06, |
|
"loss": 0.8037, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.62945098743155e-06, |
|
"loss": 0.9673, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.561388790315707e-06, |
|
"loss": 0.817, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.493394577569482e-06, |
|
"loss": 0.8556, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.425471562391601e-06, |
|
"loss": 0.858, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.357622954616197e-06, |
|
"loss": 0.9486, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.289851960561137e-06, |
|
"loss": 0.89, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.222161782876502e-06, |
|
"loss": 0.861, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.154555620393248e-06, |
|
"loss": 0.8839, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.087036667972018e-06, |
|
"loss": 0.9137, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.019608116352168e-06, |
|
"loss": 1.0312, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.95227315200101e-06, |
|
"loss": 0.8959, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.885034956963195e-06, |
|
"loss": 0.8568, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.81789670871034e-06, |
|
"loss": 0.8631, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.750861579990909e-06, |
|
"loss": 0.8424, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.68393273868023e-06, |
|
"loss": 1.04, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.6171133476308216e-06, |
|
"loss": 0.9083, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.550406564522911e-06, |
|
"loss": 0.9353, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.483815541715236e-06, |
|
"loss": 0.925, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.417343426096034e-06, |
|
"loss": 0.9781, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.350993358934361e-06, |
|
"loss": 0.974, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.284768475731646e-06, |
|
"loss": 0.9654, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.2186719060734915e-06, |
|
"loss": 0.7186, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.1527067734817975e-06, |
|
"loss": 0.8643, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.086876195267154e-06, |
|
"loss": 0.9238, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.021183282381525e-06, |
|
"loss": 0.8739, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.955631139271223e-06, |
|
"loss": 0.8457, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.890222863730214e-06, |
|
"loss": 1.0443, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.824961546753728e-06, |
|
"loss": 0.9861, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.7598502723921766e-06, |
|
"loss": 0.8405, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.694892117605416e-06, |
|
"loss": 1.0291, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.630090152117342e-06, |
|
"loss": 0.8729, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.56544743827082e-06, |
|
"loss": 0.9119, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.50096703088297e-06, |
|
"loss": 0.7965, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.436651977100801e-06, |
|
"loss": 0.9044, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.3725053162572225e-06, |
|
"loss": 0.9219, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.308530079727404e-06, |
|
"loss": 0.8538, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.244729290785525e-06, |
|
"loss": 0.8807, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.181105964461908e-06, |
|
"loss": 0.8578, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.1176631074005365e-06, |
|
"loss": 0.8694, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.054403717716959e-06, |
|
"loss": 0.7783, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.991330784856626e-06, |
|
"loss": 0.9314, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.9284472894536045e-06, |
|
"loss": 0.8734, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.8657562031897255e-06, |
|
"loss": 0.8809, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.80326048865415e-06, |
|
"loss": 0.8615, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.740963099203375e-06, |
|
"loss": 1.0366, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.678866978821655e-06, |
|
"loss": 0.9718, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.6169750619818796e-06, |
|
"loss": 0.6932, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.555290273506919e-06, |
|
"loss": 0.5196, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.493815528431369e-06, |
|
"loss": 0.6037, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.432553731863823e-06, |
|
"loss": 0.5271, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.3715077788495895e-06, |
|
"loss": 0.5792, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.31068055423386e-06, |
|
"loss": 0.5071, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.250074932525392e-06, |
|
"loss": 0.5189, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.189693777760665e-06, |
|
"loss": 0.6438, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.129539943368542e-06, |
|
"loss": 0.576, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.069616272035414e-06, |
|
"loss": 0.511, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.0099255955708704e-06, |
|
"loss": 0.6052, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.950470734773881e-06, |
|
"loss": 0.4889, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.891254499299484e-06, |
|
"loss": 0.5586, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.832279687526009e-06, |
|
"loss": 0.5663, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.773549086422846e-06, |
|
"loss": 0.5941, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.715065471418731e-06, |
|
"loss": 0.7716, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.656831606270596e-06, |
|
"loss": 0.5417, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.598850242932951e-06, |
|
"loss": 0.564, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.541124121427851e-06, |
|
"loss": 0.5382, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.483655969715396e-06, |
|
"loss": 0.5737, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.426448503564823e-06, |
|
"loss": 0.527, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.369504426426165e-06, |
|
"loss": 0.6137, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.312826429302499e-06, |
|
"loss": 0.7163, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.256417190622768e-06, |
|
"loss": 0.4676, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.2002793761152126e-06, |
|
"loss": 0.5177, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.144415638681407e-06, |
|
"loss": 0.5405, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.088828618270868e-06, |
|
"loss": 0.5134, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.033520941756316e-06, |
|
"loss": 0.5176, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.978495222809533e-06, |
|
"loss": 0.4865, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.923754061777846e-06, |
|
"loss": 0.6041, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.869300045561244e-06, |
|
"loss": 0.5154, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.815135747490138e-06, |
|
"loss": 0.5103, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.761263727203733e-06, |
|
"loss": 0.5287, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.707686530529088e-06, |
|
"loss": 0.5508, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.65440668936079e-06, |
|
"loss": 0.5387, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.6014267215413233e-06, |
|
"loss": 0.5759, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.5487491307420675e-06, |
|
"loss": 0.4253, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.4963764063449858e-06, |
|
"loss": 0.5185, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.444311023325e-06, |
|
"loss": 0.5245, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.3925554421330044e-06, |
|
"loss": 0.527, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.3411121085796106e-06, |
|
"loss": 0.4561, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.2899834537195594e-06, |
|
"loss": 0.5389, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.2391718937368397e-06, |
|
"loss": 0.5283, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.1886798298305065e-06, |
|
"loss": 0.5664, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.1385096481012e-06, |
|
"loss": 0.5734, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.088663719438404e-06, |
|
"loss": 0.5811, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.039144399408387e-06, |
|
"loss": 0.5713, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.989954028142882e-06, |
|
"loss": 0.5543, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.9410949302285262e-06, |
|
"loss": 0.5914, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.8925694145969798e-06, |
|
"loss": 0.5784, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.844379774415822e-06, |
|
"loss": 0.5684, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.796528286980197e-06, |
|
"loss": 0.5311, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.749017213605172e-06, |
|
"loss": 0.5826, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.7018487995188845e-06, |
|
"loss": 0.5593, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.6550252737564475e-06, |
|
"loss": 0.5497, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.6085488490546075e-06, |
|
"loss": 0.5192, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.562421721747169e-06, |
|
"loss": 0.5395, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.5166460716612106e-06, |
|
"loss": 0.6872, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.4712240620140815e-06, |
|
"loss": 0.5065, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.4261578393111463e-06, |
|
"loss": 0.5618, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.381449533244379e-06, |
|
"loss": 0.5709, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.3371012565916995e-06, |
|
"loss": 0.5566, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.2931151051171486e-06, |
|
"loss": 0.559, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.249493157471828e-06, |
|
"loss": 0.4974, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.2062374750956815e-06, |
|
"loss": 0.5811, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.1633501021200775e-06, |
|
"loss": 0.5748, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.120833065271206e-06, |
|
"loss": 0.4701, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.0786883737743014e-06, |
|
"loss": 0.5965, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.0369180192587014e-06, |
|
"loss": 0.5264, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.9955239756637156e-06, |
|
"loss": 0.4516, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.9545081991453507e-06, |
|
"loss": 0.6062, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.913872627983868e-06, |
|
"loss": 0.4955, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.8736191824921868e-06, |
|
"loss": 0.4711, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.8337497649251313e-06, |
|
"loss": 0.5683, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7942662593895399e-06, |
|
"loss": 0.6317, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7551705317552348e-06, |
|
"loss": 0.4903, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.716464429566832e-06, |
|
"loss": 0.4769, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6781497819564407e-06, |
|
"loss": 0.4629, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6402283995572255e-06, |
|
"loss": 0.5897, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.6027020744178367e-06, |
|
"loss": 0.5047, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.5655725799177268e-06, |
|
"loss": 0.5175, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.528841670683341e-06, |
|
"loss": 0.5433, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.4925110825052125e-06, |
|
"loss": 0.489, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.456582532255917e-06, |
|
"loss": 0.5022, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.4210577178089447e-06, |
|
"loss": 0.4123, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.3859383179584717e-06, |
|
"loss": 0.5422, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.351225992340015e-06, |
|
"loss": 0.6037, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.3169223813520072e-06, |
|
"loss": 0.5819, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.2830291060782806e-06, |
|
"loss": 0.4759, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.2495477682114509e-06, |
|
"loss": 0.5633, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.216479949977234e-06, |
|
"loss": 0.6078, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.1838272140596684e-06, |
|
"loss": 0.5128, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.151591103527273e-06, |
|
"loss": 0.5576, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.1197731417601233e-06, |
|
"loss": 0.6094, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.088374832377861e-06, |
|
"loss": 0.6498, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.057397659168643e-06, |
|
"loss": 0.517, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.0268430860190136e-06, |
|
"loss": 0.5233, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.96712556844729e-07, |
|
"loss": 0.4652, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.67007495522524e-07, |
|
"loss": 0.6261, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 9.377293058228243e-07, |
|
"loss": 0.4634, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 9.088793713434063e-07, |
|
"loss": 0.5321, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.804590554440096e-07, |
|
"loss": 0.5212, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 8.524697011819216e-07, |
|
"loss": 0.5385, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 8.249126312484934e-07, |
|
"loss": 0.4638, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.977891479066347e-07, |
|
"loss": 0.4914, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 7.711005329292887e-07, |
|
"loss": 0.5311, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 7.44848047538842e-07, |
|
"loss": 0.5352, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 7.190329323475321e-07, |
|
"loss": 0.5172, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.936564072988128e-07, |
|
"loss": 0.5763, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.687196716097188e-07, |
|
"loss": 0.4306, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 6.442239037141696e-07, |
|
"loss": 0.6044, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 6.201702612073024e-07, |
|
"loss": 0.5409, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.965598807907602e-07, |
|
"loss": 0.5242, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.73393878218973e-07, |
|
"loss": 0.5805, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 5.506733482464277e-07, |
|
"loss": 0.4793, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 5.283993645759456e-07, |
|
"loss": 0.498, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 5.065729798079245e-07, |
|
"loss": 0.5173, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.851952253906133e-07, |
|
"loss": 0.5124, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.6426711157135815e-07, |
|
"loss": 0.5415, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.437896273488673e-07, |
|
"loss": 0.651, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.2376374042646827e-07, |
|
"loss": 0.5693, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.0419039716638076e-07, |
|
"loss": 0.5002, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.8507052254499575e-07, |
|
"loss": 0.5297, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.6640502010915957e-07, |
|
"loss": 0.5375, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.4819477193347885e-07, |
|
"loss": 0.6165, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.304406385786374e-07, |
|
"loss": 0.5443, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.131434590507221e-07, |
|
"loss": 0.5885, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.9630405076158e-07, |
|
"loss": 0.4871, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.7992320949018827e-07, |
|
"loss": 0.5719, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6400170934504976e-07, |
|
"loss": 0.5361, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.485403027276079e-07, |
|
"loss": 0.5775, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.3353972029668936e-07, |
|
"loss": 0.4652, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.1900067093398425e-07, |
|
"loss": 0.54, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.049238417105348e-07, |
|
"loss": 0.491, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.913098978542749e-07, |
|
"loss": 0.4532, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.781594827185884e-07, |
|
"loss": 0.69, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.6547321775190806e-07, |
|
"loss": 0.4711, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.5325170246835218e-07, |
|
"loss": 0.5634, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.4149551441938303e-07, |
|
"loss": 0.444, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.3020520916652513e-07, |
|
"loss": 0.5638, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.1938132025510308e-07, |
|
"loss": 0.5102, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.0902435918903053e-07, |
|
"loss": 0.5644, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 9.913481540663849e-08, |
|
"loss": 0.5643, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.971315625754595e-08, |
|
"loss": 0.683, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.075982698057205e-08, |
|
"loss": 0.4953, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.227525068269958e-08, |
|
"loss": 0.5235, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 6.425982831907651e-08, |
|
"loss": 0.5531, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.6713938674071156e-08, |
|
"loss": 0.5424, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.9637938343367607e-08, |
|
"loss": 0.5042, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.3032161717119216e-08, |
|
"loss": 0.5134, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.6896920964143435e-08, |
|
"loss": 0.5131, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.1232506017169206e-08, |
|
"loss": 0.5912, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.6039184559139007e-08, |
|
"loss": 0.5772, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.1317202010553428e-08, |
|
"loss": 0.4642, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 924, |
|
"total_flos": 412039810383872.0, |
|
"train_loss": 0.9543006863944974, |
|
"train_runtime": 21744.4539, |
|
"train_samples_per_second": 1.358, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"max_steps": 924, |
|
"num_train_epochs": 3, |
|
"total_flos": 412039810383872.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|