|
{ |
|
"best_metric": 0.07131176441907883, |
|
"best_model_checkpoint": "/scratch/pkadambi/pllr_distil_mdls_logprobmse_CKPT/checkpoint-73200", |
|
"epoch": 8.623483492792628, |
|
"global_step": 76500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1006, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0496, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0433, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 0.07973845303058624, |
|
"eval_runtime": 132.2791, |
|
"eval_samples_per_second": 28.455, |
|
"eval_steps_per_second": 14.227, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0405, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-05, |
|
"loss": 0.039, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6e-05, |
|
"loss": 0.0385, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.07679147273302078, |
|
"eval_runtime": 111.8017, |
|
"eval_samples_per_second": 33.667, |
|
"eval_steps_per_second": 16.833, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7e-05, |
|
"loss": 0.0372, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8e-05, |
|
"loss": 0.0392, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9e-05, |
|
"loss": 0.04, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.08369360119104385, |
|
"eval_runtime": 112.8127, |
|
"eval_samples_per_second": 33.365, |
|
"eval_steps_per_second": 16.683, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0394, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.99242797107485e-05, |
|
"loss": 0.0367, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.984855942149698e-05, |
|
"loss": 0.0373, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.08127009123563766, |
|
"eval_runtime": 111.9619, |
|
"eval_samples_per_second": 33.619, |
|
"eval_steps_per_second": 16.809, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.97728391322455e-05, |
|
"loss": 0.0367, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.969711884299399e-05, |
|
"loss": 0.0373, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.962139855374249e-05, |
|
"loss": 0.0365, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 0.07595643401145935, |
|
"eval_runtime": 112.3556, |
|
"eval_samples_per_second": 33.501, |
|
"eval_steps_per_second": 16.75, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.954567826449097e-05, |
|
"loss": 0.0383, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.946995797523946e-05, |
|
"loss": 0.0374, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.939423768598796e-05, |
|
"loss": 0.0373, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.08062821626663208, |
|
"eval_runtime": 113.4977, |
|
"eval_samples_per_second": 33.164, |
|
"eval_steps_per_second": 16.582, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.931851739673647e-05, |
|
"loss": 0.0364, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.924279710748495e-05, |
|
"loss": 0.0392, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.916707681823345e-05, |
|
"loss": 0.037, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.07755022495985031, |
|
"eval_runtime": 112.8386, |
|
"eval_samples_per_second": 33.357, |
|
"eval_steps_per_second": 16.679, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.909135652898195e-05, |
|
"loss": 0.0366, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.901563623973044e-05, |
|
"loss": 0.0365, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.893991595047894e-05, |
|
"loss": 0.0368, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 0.07821377366781235, |
|
"eval_runtime": 112.693, |
|
"eval_samples_per_second": 33.4, |
|
"eval_steps_per_second": 16.7, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.886419566122743e-05, |
|
"loss": 0.037, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.878847537197593e-05, |
|
"loss": 0.0413, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.871275508272441e-05, |
|
"loss": 0.0376, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.0812031701207161, |
|
"eval_runtime": 113.1924, |
|
"eval_samples_per_second": 33.253, |
|
"eval_steps_per_second": 16.627, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.863703479347291e-05, |
|
"loss": 0.0377, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.856131450422142e-05, |
|
"loss": 0.0378, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.84855942149699e-05, |
|
"loss": 0.0368, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.07830669730901718, |
|
"eval_runtime": 112.5898, |
|
"eval_samples_per_second": 33.431, |
|
"eval_steps_per_second": 16.716, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.84098739257184e-05, |
|
"loss": 0.0383, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.83341536364669e-05, |
|
"loss": 0.038, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.825843334721539e-05, |
|
"loss": 0.037, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 0.07889199256896973, |
|
"eval_runtime": 112.7491, |
|
"eval_samples_per_second": 33.384, |
|
"eval_steps_per_second": 16.692, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.818271305796389e-05, |
|
"loss": 0.0367, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.810699276871238e-05, |
|
"loss": 0.0379, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.803127247946088e-05, |
|
"loss": 0.0369, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 0.07736033946275711, |
|
"eval_runtime": 114.7801, |
|
"eval_samples_per_second": 32.793, |
|
"eval_steps_per_second": 16.397, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.795555219020938e-05, |
|
"loss": 0.0363, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.787983190095786e-05, |
|
"loss": 0.0369, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.780411161170636e-05, |
|
"loss": 0.0372, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 0.07993236929178238, |
|
"eval_runtime": 111.8819, |
|
"eval_samples_per_second": 33.643, |
|
"eval_steps_per_second": 16.821, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.772839132245486e-05, |
|
"loss": 0.0388, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.765267103320335e-05, |
|
"loss": 0.0369, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.757695074395184e-05, |
|
"loss": 0.0366, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.07971027493476868, |
|
"eval_runtime": 112.5724, |
|
"eval_samples_per_second": 33.436, |
|
"eval_steps_per_second": 16.718, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.750123045470034e-05, |
|
"loss": 0.0374, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.742551016544884e-05, |
|
"loss": 0.0368, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.734978987619733e-05, |
|
"loss": 0.0367, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 0.07913076877593994, |
|
"eval_runtime": 111.8618, |
|
"eval_samples_per_second": 33.649, |
|
"eval_steps_per_second": 16.824, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.727406958694583e-05, |
|
"loss": 0.0371, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.719834929769433e-05, |
|
"loss": 0.0376, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.712262900844281e-05, |
|
"loss": 0.0369, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 0.07797858864068985, |
|
"eval_runtime": 111.9782, |
|
"eval_samples_per_second": 33.614, |
|
"eval_steps_per_second": 16.807, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.70469087191913e-05, |
|
"loss": 0.0366, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.697118842993981e-05, |
|
"loss": 0.0365, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.689546814068831e-05, |
|
"loss": 0.0362, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 0.07814334332942963, |
|
"eval_runtime": 112.8567, |
|
"eval_samples_per_second": 33.352, |
|
"eval_steps_per_second": 16.676, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.681974785143679e-05, |
|
"loss": 0.0363, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.674402756218529e-05, |
|
"loss": 0.0363, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.666830727293379e-05, |
|
"loss": 0.036, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 0.08152509480714798, |
|
"eval_runtime": 113.7844, |
|
"eval_samples_per_second": 33.08, |
|
"eval_steps_per_second": 16.54, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.659258698368228e-05, |
|
"loss": 0.036, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.651686669443078e-05, |
|
"loss": 0.0377, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.644114640517927e-05, |
|
"loss": 0.0362, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.07828745245933533, |
|
"eval_runtime": 113.8804, |
|
"eval_samples_per_second": 33.052, |
|
"eval_steps_per_second": 16.526, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.636542611592777e-05, |
|
"loss": 0.0371, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.628970582667625e-05, |
|
"loss": 0.0368, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.621398553742475e-05, |
|
"loss": 0.0416, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.0812540277838707, |
|
"eval_runtime": 114.0213, |
|
"eval_samples_per_second": 33.011, |
|
"eval_steps_per_second": 16.506, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.613826524817326e-05, |
|
"loss": 0.0371, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.606330216181426e-05, |
|
"loss": 0.0402, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.598758187256276e-05, |
|
"loss": 0.0372, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 0.07858484238386154, |
|
"eval_runtime": 117.0686, |
|
"eval_samples_per_second": 32.152, |
|
"eval_steps_per_second": 16.076, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.591186158331126e-05, |
|
"loss": 0.0371, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.583614129405974e-05, |
|
"loss": 0.0365, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.576042100480824e-05, |
|
"loss": 0.0368, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 0.0838443860411644, |
|
"eval_runtime": 113.086, |
|
"eval_samples_per_second": 33.284, |
|
"eval_steps_per_second": 16.642, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.568470071555675e-05, |
|
"loss": 0.0361, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.560898042630523e-05, |
|
"loss": 0.0374, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.553326013705373e-05, |
|
"loss": 0.0368, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 0.07696254551410675, |
|
"eval_runtime": 124.4437, |
|
"eval_samples_per_second": 30.247, |
|
"eval_steps_per_second": 15.123, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.545753984780222e-05, |
|
"loss": 0.037, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.538181955855072e-05, |
|
"loss": 0.0365, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.530609926929921e-05, |
|
"loss": 0.0366, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 0.07776732742786407, |
|
"eval_runtime": 116.9397, |
|
"eval_samples_per_second": 32.188, |
|
"eval_steps_per_second": 16.094, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.523037898004771e-05, |
|
"loss": 0.0367, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.51546586907962e-05, |
|
"loss": 0.0358, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.50789384015447e-05, |
|
"loss": 0.0393, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 0.08190678805112839, |
|
"eval_runtime": 112.5791, |
|
"eval_samples_per_second": 33.434, |
|
"eval_steps_per_second": 16.717, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.500321811229319e-05, |
|
"loss": 0.0367, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.49274978230417e-05, |
|
"loss": 0.0364, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.485177753379019e-05, |
|
"loss": 0.0365, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 0.0807182639837265, |
|
"eval_runtime": 113.3895, |
|
"eval_samples_per_second": 33.195, |
|
"eval_steps_per_second": 16.598, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.477605724453867e-05, |
|
"loss": 0.0366, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.470033695528717e-05, |
|
"loss": 0.0366, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.462461666603567e-05, |
|
"loss": 0.0396, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 0.07820982486009598, |
|
"eval_runtime": 113.1441, |
|
"eval_samples_per_second": 33.267, |
|
"eval_steps_per_second": 16.634, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.454889637678416e-05, |
|
"loss": 0.0371, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.447317608753266e-05, |
|
"loss": 0.036, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.439745579828116e-05, |
|
"loss": 0.0363, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.07800594717264175, |
|
"eval_runtime": 112.869, |
|
"eval_samples_per_second": 33.348, |
|
"eval_steps_per_second": 16.674, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.432173550902965e-05, |
|
"loss": 0.0363, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.424677242267066e-05, |
|
"loss": 0.0369, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.417105213341915e-05, |
|
"loss": 0.036, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 0.0792468786239624, |
|
"eval_runtime": 112.9199, |
|
"eval_samples_per_second": 33.333, |
|
"eval_steps_per_second": 16.667, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.409533184416765e-05, |
|
"loss": 0.0369, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.401961155491615e-05, |
|
"loss": 0.0364, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.394389126566464e-05, |
|
"loss": 0.036, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.0804230123758316, |
|
"eval_runtime": 116.8987, |
|
"eval_samples_per_second": 32.199, |
|
"eval_steps_per_second": 16.099, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.386817097641314e-05, |
|
"loss": 0.0362, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.379245068716162e-05, |
|
"loss": 0.0393, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.371673039791012e-05, |
|
"loss": 0.0388, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 0.07973662763834, |
|
"eval_runtime": 114.807, |
|
"eval_samples_per_second": 32.785, |
|
"eval_steps_per_second": 16.393, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.364101010865863e-05, |
|
"loss": 0.0362, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.356528981940711e-05, |
|
"loss": 0.0367, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.34895695301556e-05, |
|
"loss": 0.0365, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 0.08074788749217987, |
|
"eval_runtime": 112.7642, |
|
"eval_samples_per_second": 33.379, |
|
"eval_steps_per_second": 16.69, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.34138492409041e-05, |
|
"loss": 0.0362, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.33381289516526e-05, |
|
"loss": 0.0356, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.32624086624011e-05, |
|
"loss": 0.0377, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.08139072358608246, |
|
"eval_runtime": 112.547, |
|
"eval_samples_per_second": 33.444, |
|
"eval_steps_per_second": 16.722, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.318668837314959e-05, |
|
"loss": 0.0363, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.311096808389809e-05, |
|
"loss": 0.0364, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.303524779464658e-05, |
|
"loss": 0.0359, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.08024642616510391, |
|
"eval_runtime": 113.2959, |
|
"eval_samples_per_second": 33.223, |
|
"eval_steps_per_second": 16.611, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.295952750539507e-05, |
|
"loss": 0.0363, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.288380721614358e-05, |
|
"loss": 0.0362, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 9.280808692689207e-05, |
|
"loss": 0.0362, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 0.08062873780727386, |
|
"eval_runtime": 118.7559, |
|
"eval_samples_per_second": 31.695, |
|
"eval_steps_per_second": 15.848, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 9.273236663764056e-05, |
|
"loss": 0.0367, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.265664634838905e-05, |
|
"loss": 0.0377, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 9.258092605913755e-05, |
|
"loss": 0.0366, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 0.07990479469299316, |
|
"eval_runtime": 113.3503, |
|
"eval_samples_per_second": 33.207, |
|
"eval_steps_per_second": 16.603, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 9.250520576988604e-05, |
|
"loss": 0.0363, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 9.242948548063454e-05, |
|
"loss": 0.0362, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.235376519138304e-05, |
|
"loss": 0.0373, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.08071722835302353, |
|
"eval_runtime": 112.9624, |
|
"eval_samples_per_second": 33.321, |
|
"eval_steps_per_second": 16.66, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.227804490213153e-05, |
|
"loss": 0.0366, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.220232461288003e-05, |
|
"loss": 0.0361, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 9.212660432362851e-05, |
|
"loss": 0.0366, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.08084140717983246, |
|
"eval_runtime": 113.0148, |
|
"eval_samples_per_second": 33.305, |
|
"eval_steps_per_second": 16.653, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.205088403437702e-05, |
|
"loss": 0.0368, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 9.197516374512552e-05, |
|
"loss": 0.0367, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 9.1899443455874e-05, |
|
"loss": 0.0358, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 0.07723592221736908, |
|
"eval_runtime": 114.2103, |
|
"eval_samples_per_second": 32.957, |
|
"eval_steps_per_second": 16.478, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 9.18237231666225e-05, |
|
"loss": 0.0356, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.174800287737099e-05, |
|
"loss": 0.0361, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.16722825881195e-05, |
|
"loss": 0.0359, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.07802087813615799, |
|
"eval_runtime": 119.4601, |
|
"eval_samples_per_second": 31.508, |
|
"eval_steps_per_second": 15.754, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.159656229886799e-05, |
|
"loss": 0.0356, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.152084200961648e-05, |
|
"loss": 0.0362, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.144512172036498e-05, |
|
"loss": 0.0363, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 0.07936106622219086, |
|
"eval_runtime": 113.7864, |
|
"eval_samples_per_second": 33.08, |
|
"eval_steps_per_second": 16.54, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 9.136940143111346e-05, |
|
"loss": 0.0365, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.129368114186197e-05, |
|
"loss": 0.0366, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 9.121796085261047e-05, |
|
"loss": 0.036, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 0.07988855987787247, |
|
"eval_runtime": 113.8155, |
|
"eval_samples_per_second": 33.071, |
|
"eval_steps_per_second": 16.536, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.114224056335896e-05, |
|
"loss": 0.0362, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 9.106652027410745e-05, |
|
"loss": 0.0358, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.099079998485594e-05, |
|
"loss": 0.0365, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 0.07837213575839996, |
|
"eval_runtime": 112.3824, |
|
"eval_samples_per_second": 33.493, |
|
"eval_steps_per_second": 16.746, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.091507969560444e-05, |
|
"loss": 0.0359, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 9.084011660924546e-05, |
|
"loss": 0.0357, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.076439631999395e-05, |
|
"loss": 0.0363, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 0.0799858570098877, |
|
"eval_runtime": 113.5694, |
|
"eval_samples_per_second": 33.143, |
|
"eval_steps_per_second": 16.571, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.068867603074244e-05, |
|
"loss": 0.0364, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.061295574149093e-05, |
|
"loss": 0.0361, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.053723545223943e-05, |
|
"loss": 0.0369, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 0.07944396883249283, |
|
"eval_runtime": 113.0753, |
|
"eval_samples_per_second": 33.288, |
|
"eval_steps_per_second": 16.644, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.046151516298793e-05, |
|
"loss": 0.0362, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.038579487373642e-05, |
|
"loss": 0.0369, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.031007458448492e-05, |
|
"loss": 0.0365, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 0.07925315946340561, |
|
"eval_runtime": 114.7305, |
|
"eval_samples_per_second": 32.807, |
|
"eval_steps_per_second": 16.404, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.023435429523341e-05, |
|
"loss": 0.0355, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.015863400598191e-05, |
|
"loss": 0.0362, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.008291371673039e-05, |
|
"loss": 0.0367, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.07759507745504379, |
|
"eval_runtime": 115.9731, |
|
"eval_samples_per_second": 32.456, |
|
"eval_steps_per_second": 16.228, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.00071934274789e-05, |
|
"loss": 0.0365, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 8.99314731382274e-05, |
|
"loss": 0.0364, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 8.985575284897588e-05, |
|
"loss": 0.0369, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 0.07654128968715668, |
|
"eval_runtime": 114.6416, |
|
"eval_samples_per_second": 32.833, |
|
"eval_steps_per_second": 16.416, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 8.978003255972438e-05, |
|
"loss": 0.0361, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.970431227047287e-05, |
|
"loss": 0.0357, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.962859198122138e-05, |
|
"loss": 0.0361, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.07798641175031662, |
|
"eval_runtime": 114.8415, |
|
"eval_samples_per_second": 32.776, |
|
"eval_steps_per_second": 16.388, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.955287169196987e-05, |
|
"loss": 0.036, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.947715140271836e-05, |
|
"loss": 0.0355, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.940143111346686e-05, |
|
"loss": 0.0356, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 0.07912832498550415, |
|
"eval_runtime": 134.2375, |
|
"eval_samples_per_second": 28.04, |
|
"eval_steps_per_second": 14.02, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.932571082421534e-05, |
|
"loss": 0.0359, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.924999053496385e-05, |
|
"loss": 0.0359, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.917427024571235e-05, |
|
"loss": 0.0365, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 0.07893016189336777, |
|
"eval_runtime": 115.7069, |
|
"eval_samples_per_second": 32.53, |
|
"eval_steps_per_second": 16.265, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.909854995646084e-05, |
|
"loss": 0.0363, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.902282966720933e-05, |
|
"loss": 0.0362, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.894710937795782e-05, |
|
"loss": 0.0361, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.08188512921333313, |
|
"eval_runtime": 115.1415, |
|
"eval_samples_per_second": 32.69, |
|
"eval_steps_per_second": 16.345, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.887138908870632e-05, |
|
"loss": 0.0364, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.879566879945483e-05, |
|
"loss": 0.036, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.871994851020331e-05, |
|
"loss": 0.0361, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 0.0779973492026329, |
|
"eval_runtime": 116.8002, |
|
"eval_samples_per_second": 32.226, |
|
"eval_steps_per_second": 16.113, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 8.864422822095181e-05, |
|
"loss": 0.0364, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.85685079317003e-05, |
|
"loss": 0.0359, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 8.849354484534131e-05, |
|
"loss": 0.0361, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 0.0758824571967125, |
|
"eval_runtime": 116.8127, |
|
"eval_samples_per_second": 32.223, |
|
"eval_steps_per_second": 16.111, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 8.84178245560898e-05, |
|
"loss": 0.0372, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 8.83421042668383e-05, |
|
"loss": 0.036, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 8.82663839775868e-05, |
|
"loss": 0.0362, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 0.0769895613193512, |
|
"eval_runtime": 117.447, |
|
"eval_samples_per_second": 32.048, |
|
"eval_steps_per_second": 16.024, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 8.81906636883353e-05, |
|
"loss": 0.0364, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 8.811494339908379e-05, |
|
"loss": 0.0357, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 8.803922310983227e-05, |
|
"loss": 0.0359, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.07869885116815567, |
|
"eval_runtime": 118.1092, |
|
"eval_samples_per_second": 31.869, |
|
"eval_steps_per_second": 15.934, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 8.796350282058078e-05, |
|
"loss": 0.0357, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.788778253132928e-05, |
|
"loss": 0.0356, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 8.781206224207776e-05, |
|
"loss": 0.0359, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 0.07759717106819153, |
|
"eval_runtime": 115.442, |
|
"eval_samples_per_second": 32.605, |
|
"eval_steps_per_second": 16.303, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.773634195282626e-05, |
|
"loss": 0.0362, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.766062166357476e-05, |
|
"loss": 0.0366, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 8.758490137432326e-05, |
|
"loss": 0.036, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 0.07808942347764969, |
|
"eval_runtime": 135.2162, |
|
"eval_samples_per_second": 27.837, |
|
"eval_steps_per_second": 13.918, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 8.750918108507175e-05, |
|
"loss": 0.0368, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 8.743346079582024e-05, |
|
"loss": 0.0382, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 8.735774050656874e-05, |
|
"loss": 0.0362, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.08313935250043869, |
|
"eval_runtime": 127.2974, |
|
"eval_samples_per_second": 29.569, |
|
"eval_steps_per_second": 14.784, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 8.728202021731724e-05, |
|
"loss": 0.0365, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 8.720629992806573e-05, |
|
"loss": 0.0357, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 8.713057963881423e-05, |
|
"loss": 0.0361, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.07743257284164429, |
|
"eval_runtime": 125.0251, |
|
"eval_samples_per_second": 30.106, |
|
"eval_steps_per_second": 15.053, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 8.705485934956273e-05, |
|
"loss": 0.0359, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.697913906031121e-05, |
|
"loss": 0.0354, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 8.69034187710597e-05, |
|
"loss": 0.036, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 0.0795595720410347, |
|
"eval_runtime": 119.464, |
|
"eval_samples_per_second": 31.507, |
|
"eval_steps_per_second": 15.754, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 8.68276984818082e-05, |
|
"loss": 0.0352, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 8.675197819255671e-05, |
|
"loss": 0.0355, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 8.667625790330519e-05, |
|
"loss": 0.0356, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 0.07749467343091965, |
|
"eval_runtime": 135.0035, |
|
"eval_samples_per_second": 27.881, |
|
"eval_steps_per_second": 13.94, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.660053761405369e-05, |
|
"loss": 0.0354, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.65255745276947e-05, |
|
"loss": 0.0356, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.644985423844319e-05, |
|
"loss": 0.0356, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 0.08037791401147842, |
|
"eval_runtime": 118.3916, |
|
"eval_samples_per_second": 31.793, |
|
"eval_steps_per_second": 15.896, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.637413394919169e-05, |
|
"loss": 0.0357, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.629841365994018e-05, |
|
"loss": 0.0357, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.622269337068868e-05, |
|
"loss": 0.0362, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 0.07724020630121231, |
|
"eval_runtime": 122.4205, |
|
"eval_samples_per_second": 30.746, |
|
"eval_steps_per_second": 15.373, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.614697308143718e-05, |
|
"loss": 0.0365, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.607125279218567e-05, |
|
"loss": 0.0362, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.599553250293416e-05, |
|
"loss": 0.0358, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 0.0782192125916481, |
|
"eval_runtime": 116.6226, |
|
"eval_samples_per_second": 32.275, |
|
"eval_steps_per_second": 16.138, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.591981221368266e-05, |
|
"loss": 0.0354, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.584409192443116e-05, |
|
"loss": 0.0355, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.576837163517964e-05, |
|
"loss": 0.0358, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_loss": 0.07782430946826935, |
|
"eval_runtime": 123.6999, |
|
"eval_samples_per_second": 30.428, |
|
"eval_steps_per_second": 15.214, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.569265134592814e-05, |
|
"loss": 0.0356, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.561693105667664e-05, |
|
"loss": 0.0358, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.554121076742515e-05, |
|
"loss": 0.0353, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.07807427644729614, |
|
"eval_runtime": 119.5559, |
|
"eval_samples_per_second": 31.483, |
|
"eval_steps_per_second": 15.742, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.546549047817363e-05, |
|
"loss": 0.0354, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.538977018892213e-05, |
|
"loss": 0.0353, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 8.531404989967062e-05, |
|
"loss": 0.0356, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 0.07546605169773102, |
|
"eval_runtime": 150.6173, |
|
"eval_samples_per_second": 24.99, |
|
"eval_steps_per_second": 12.495, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 8.523832961041912e-05, |
|
"loss": 0.0362, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 8.516260932116761e-05, |
|
"loss": 0.0361, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 8.508688903191611e-05, |
|
"loss": 0.038, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 0.07998595386743546, |
|
"eval_runtime": 136.8586, |
|
"eval_samples_per_second": 27.503, |
|
"eval_steps_per_second": 13.751, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 8.50111687426646e-05, |
|
"loss": 0.037, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 8.493544845341309e-05, |
|
"loss": 0.0357, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 8.485972816416159e-05, |
|
"loss": 0.0357, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.07483500242233276, |
|
"eval_runtime": 134.3333, |
|
"eval_samples_per_second": 28.02, |
|
"eval_steps_per_second": 14.01, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 8.478400787491008e-05, |
|
"loss": 0.0358, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 8.470828758565859e-05, |
|
"loss": 0.0358, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 8.463256729640707e-05, |
|
"loss": 0.036, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.07638426870107651, |
|
"eval_runtime": 115.3414, |
|
"eval_samples_per_second": 32.634, |
|
"eval_steps_per_second": 16.317, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 8.455684700715557e-05, |
|
"loss": 0.0357, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 8.448112671790407e-05, |
|
"loss": 0.0358, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 8.440540642865256e-05, |
|
"loss": 0.0364, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 0.07863634079694748, |
|
"eval_runtime": 115.0055, |
|
"eval_samples_per_second": 32.729, |
|
"eval_steps_per_second": 16.364, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 8.432968613940106e-05, |
|
"loss": 0.0355, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.425396585014956e-05, |
|
"loss": 0.0356, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.417900276379056e-05, |
|
"loss": 0.0369, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 0.08041872084140778, |
|
"eval_runtime": 115.1802, |
|
"eval_samples_per_second": 32.679, |
|
"eval_steps_per_second": 16.34, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.410328247453906e-05, |
|
"loss": 0.036, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.402756218528755e-05, |
|
"loss": 0.0359, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.395184189603605e-05, |
|
"loss": 0.0356, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.0775662437081337, |
|
"eval_runtime": 115.4158, |
|
"eval_samples_per_second": 32.613, |
|
"eval_steps_per_second": 16.306, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.387612160678455e-05, |
|
"loss": 0.036, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.380040131753304e-05, |
|
"loss": 0.0355, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.372468102828154e-05, |
|
"loss": 0.0362, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 0.07657490670681, |
|
"eval_runtime": 113.6194, |
|
"eval_samples_per_second": 33.128, |
|
"eval_steps_per_second": 16.564, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.364896073903002e-05, |
|
"loss": 0.0371, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.357324044977852e-05, |
|
"loss": 0.036, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.349752016052703e-05, |
|
"loss": 0.0362, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 0.07671009004116058, |
|
"eval_runtime": 112.6702, |
|
"eval_samples_per_second": 33.407, |
|
"eval_steps_per_second": 16.704, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 8.342179987127551e-05, |
|
"loss": 0.0357, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 8.3346079582024e-05, |
|
"loss": 0.0356, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 8.32703592927725e-05, |
|
"loss": 0.0359, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 0.07723435014486313, |
|
"eval_runtime": 114.56, |
|
"eval_samples_per_second": 32.856, |
|
"eval_steps_per_second": 16.428, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 8.319539620641351e-05, |
|
"loss": 0.0386, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.3119675917162e-05, |
|
"loss": 0.0355, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.304395562791051e-05, |
|
"loss": 0.0356, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_loss": 0.08305002748966217, |
|
"eval_runtime": 113.3103, |
|
"eval_samples_per_second": 33.219, |
|
"eval_steps_per_second": 16.609, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 8.2968235338659e-05, |
|
"loss": 0.0358, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.28925150494075e-05, |
|
"loss": 0.0354, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 8.281679476015599e-05, |
|
"loss": 0.0357, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 0.0788276344537735, |
|
"eval_runtime": 114.3528, |
|
"eval_samples_per_second": 32.916, |
|
"eval_steps_per_second": 16.458, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 8.274107447090447e-05, |
|
"loss": 0.0358, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 8.266535418165298e-05, |
|
"loss": 0.036, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 8.258963389240148e-05, |
|
"loss": 0.036, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 0.07712933421134949, |
|
"eval_runtime": 112.7369, |
|
"eval_samples_per_second": 33.387, |
|
"eval_steps_per_second": 16.694, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 8.251391360314997e-05, |
|
"loss": 0.036, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 8.243819331389846e-05, |
|
"loss": 0.0354, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.236247302464695e-05, |
|
"loss": 0.0351, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 0.07657415419816971, |
|
"eval_runtime": 134.3132, |
|
"eval_samples_per_second": 28.024, |
|
"eval_steps_per_second": 14.012, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.228675273539545e-05, |
|
"loss": 0.0357, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.221103244614395e-05, |
|
"loss": 0.0354, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.213531215689244e-05, |
|
"loss": 0.0355, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 0.07925111800432205, |
|
"eval_runtime": 113.1895, |
|
"eval_samples_per_second": 33.254, |
|
"eval_steps_per_second": 16.627, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.205959186764094e-05, |
|
"loss": 0.0358, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 8.198387157838943e-05, |
|
"loss": 0.0359, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 8.190815128913793e-05, |
|
"loss": 0.0357, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 0.07652386277914047, |
|
"eval_runtime": 121.0346, |
|
"eval_samples_per_second": 31.099, |
|
"eval_steps_per_second": 15.549, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 8.183243099988643e-05, |
|
"loss": 0.0362, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.175671071063492e-05, |
|
"loss": 0.0378, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 8.168099042138342e-05, |
|
"loss": 0.0349, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 0.07597441971302032, |
|
"eval_runtime": 114.3036, |
|
"eval_samples_per_second": 32.93, |
|
"eval_steps_per_second": 16.465, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.16052701321319e-05, |
|
"loss": 0.0358, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 8.15295498428804e-05, |
|
"loss": 0.0361, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.145382955362891e-05, |
|
"loss": 0.036, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 0.0766075849533081, |
|
"eval_runtime": 114.2717, |
|
"eval_samples_per_second": 32.939, |
|
"eval_steps_per_second": 16.47, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 8.137810926437739e-05, |
|
"loss": 0.0365, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 8.130238897512589e-05, |
|
"loss": 0.0357, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.122666868587438e-05, |
|
"loss": 0.0357, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 0.07591988146305084, |
|
"eval_runtime": 114.209, |
|
"eval_samples_per_second": 32.957, |
|
"eval_steps_per_second": 16.479, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.115094839662288e-05, |
|
"loss": 0.0363, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.107522810737138e-05, |
|
"loss": 0.0359, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.099950781811987e-05, |
|
"loss": 0.0356, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 0.0761919841170311, |
|
"eval_runtime": 114.3543, |
|
"eval_samples_per_second": 32.915, |
|
"eval_steps_per_second": 16.458, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.092378752886837e-05, |
|
"loss": 0.0354, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.084806723961685e-05, |
|
"loss": 0.0358, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 8.077234695036535e-05, |
|
"loss": 0.0356, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 0.07746852934360504, |
|
"eval_runtime": 118.398, |
|
"eval_samples_per_second": 31.791, |
|
"eval_steps_per_second": 15.896, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.069662666111384e-05, |
|
"loss": 0.0356, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.062090637186235e-05, |
|
"loss": 0.0359, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.054518608261084e-05, |
|
"loss": 0.0357, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_loss": 0.07902507483959198, |
|
"eval_runtime": 114.5226, |
|
"eval_samples_per_second": 32.867, |
|
"eval_steps_per_second": 16.433, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 8.046946579335933e-05, |
|
"loss": 0.0356, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 8.039374550410783e-05, |
|
"loss": 0.035, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.031802521485633e-05, |
|
"loss": 0.0353, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_loss": 0.08476170897483826, |
|
"eval_runtime": 113.6318, |
|
"eval_samples_per_second": 33.125, |
|
"eval_steps_per_second": 16.562, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 8.024230492560482e-05, |
|
"loss": 0.0352, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 8.016658463635332e-05, |
|
"loss": 0.0352, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 8.009086434710181e-05, |
|
"loss": 0.0354, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 0.07601426541805267, |
|
"eval_runtime": 112.1225, |
|
"eval_samples_per_second": 33.57, |
|
"eval_steps_per_second": 16.785, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 8.00151440578503e-05, |
|
"loss": 0.0347, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 7.993942376859879e-05, |
|
"loss": 0.0348, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 7.986446068223981e-05, |
|
"loss": 0.0359, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_loss": 0.07497977465391159, |
|
"eval_runtime": 111.8895, |
|
"eval_samples_per_second": 33.64, |
|
"eval_steps_per_second": 16.82, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.978874039298831e-05, |
|
"loss": 0.0354, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 7.97130201037368e-05, |
|
"loss": 0.0359, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 7.96372998144853e-05, |
|
"loss": 0.036, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 0.0736338198184967, |
|
"eval_runtime": 112.4468, |
|
"eval_samples_per_second": 33.474, |
|
"eval_steps_per_second": 16.737, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.956157952523378e-05, |
|
"loss": 0.0354, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.948585923598228e-05, |
|
"loss": 0.0351, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.941013894673079e-05, |
|
"loss": 0.0353, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_loss": 0.07945719361305237, |
|
"eval_runtime": 112.053, |
|
"eval_samples_per_second": 33.591, |
|
"eval_steps_per_second": 16.796, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.933441865747927e-05, |
|
"loss": 0.0356, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.925869836822777e-05, |
|
"loss": 0.0358, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 7.918297807897626e-05, |
|
"loss": 0.0358, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 0.07664191722869873, |
|
"eval_runtime": 111.7472, |
|
"eval_samples_per_second": 33.683, |
|
"eval_steps_per_second": 16.842, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 7.910725778972476e-05, |
|
"loss": 0.0353, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 7.903153750047326e-05, |
|
"loss": 0.0357, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 7.895581721122175e-05, |
|
"loss": 0.0361, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_loss": 0.07565232366323471, |
|
"eval_runtime": 114.0331, |
|
"eval_samples_per_second": 33.008, |
|
"eval_steps_per_second": 16.504, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 7.888009692197025e-05, |
|
"loss": 0.036, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 7.880437663271875e-05, |
|
"loss": 0.0352, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.872865634346723e-05, |
|
"loss": 0.0352, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 0.0762295052409172, |
|
"eval_runtime": 113.0111, |
|
"eval_samples_per_second": 33.306, |
|
"eval_steps_per_second": 16.653, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 7.865293605421573e-05, |
|
"loss": 0.0351, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 7.857721576496423e-05, |
|
"loss": 0.0349, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 7.850149547571272e-05, |
|
"loss": 0.0358, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_loss": 0.07571133971214294, |
|
"eval_runtime": 113.0141, |
|
"eval_samples_per_second": 33.306, |
|
"eval_steps_per_second": 16.653, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 7.842577518646121e-05, |
|
"loss": 0.0354, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 7.835005489720971e-05, |
|
"loss": 0.0349, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 7.82743346079582e-05, |
|
"loss": 0.035, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_loss": 0.07601726055145264, |
|
"eval_runtime": 113.6207, |
|
"eval_samples_per_second": 33.128, |
|
"eval_steps_per_second": 16.564, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 7.81986143187067e-05, |
|
"loss": 0.0356, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 7.81228940294552e-05, |
|
"loss": 0.0352, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 7.80471737402037e-05, |
|
"loss": 0.035, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 0.076691173017025, |
|
"eval_runtime": 113.7193, |
|
"eval_samples_per_second": 33.099, |
|
"eval_steps_per_second": 16.55, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 7.797145345095218e-05, |
|
"loss": 0.0349, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 7.789573316170067e-05, |
|
"loss": 0.0352, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 7.782001287244918e-05, |
|
"loss": 0.0354, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 0.07372647523880005, |
|
"eval_runtime": 113.7343, |
|
"eval_samples_per_second": 33.095, |
|
"eval_steps_per_second": 16.547, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 7.774429258319768e-05, |
|
"loss": 0.0356, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 7.766932949683869e-05, |
|
"loss": 0.037, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 7.759360920758718e-05, |
|
"loss": 0.0351, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_loss": 0.07602939754724503, |
|
"eval_runtime": 114.023, |
|
"eval_samples_per_second": 33.011, |
|
"eval_steps_per_second": 16.505, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 7.751788891833566e-05, |
|
"loss": 0.0352, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 7.744216862908416e-05, |
|
"loss": 0.0349, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 7.736644833983267e-05, |
|
"loss": 0.0353, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_loss": 0.07627377659082413, |
|
"eval_runtime": 114.2389, |
|
"eval_samples_per_second": 32.948, |
|
"eval_steps_per_second": 16.474, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 7.729072805058115e-05, |
|
"loss": 0.0349, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.721500776132965e-05, |
|
"loss": 0.0352, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 7.713928747207815e-05, |
|
"loss": 0.035, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 0.07616809755563736, |
|
"eval_runtime": 113.5611, |
|
"eval_samples_per_second": 33.145, |
|
"eval_steps_per_second": 16.573, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 7.706356718282664e-05, |
|
"loss": 0.0353, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 7.698784689357514e-05, |
|
"loss": 0.0353, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 7.691212660432363e-05, |
|
"loss": 0.0354, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_loss": 0.07693105936050415, |
|
"eval_runtime": 113.519, |
|
"eval_samples_per_second": 33.157, |
|
"eval_steps_per_second": 16.579, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 7.683640631507213e-05, |
|
"loss": 0.0348, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 7.676068602582063e-05, |
|
"loss": 0.0347, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 7.668496573656911e-05, |
|
"loss": 0.0358, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_loss": 0.07934340089559555, |
|
"eval_runtime": 114.2912, |
|
"eval_samples_per_second": 32.933, |
|
"eval_steps_per_second": 16.467, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 7.66092454473176e-05, |
|
"loss": 0.0348, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 7.653352515806612e-05, |
|
"loss": 0.0353, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 7.64578048688146e-05, |
|
"loss": 0.0349, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_loss": 0.0772014930844307, |
|
"eval_runtime": 114.1546, |
|
"eval_samples_per_second": 32.973, |
|
"eval_steps_per_second": 16.486, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 7.63820845795631e-05, |
|
"loss": 0.0349, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 7.630636429031159e-05, |
|
"loss": 0.0354, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 7.623064400106009e-05, |
|
"loss": 0.0357, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 0.0794532299041748, |
|
"eval_runtime": 113.6919, |
|
"eval_samples_per_second": 33.107, |
|
"eval_steps_per_second": 16.554, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 7.615492371180858e-05, |
|
"loss": 0.0348, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 7.607920342255708e-05, |
|
"loss": 0.0354, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 7.600348313330558e-05, |
|
"loss": 0.0351, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_loss": 0.07594738155603409, |
|
"eval_runtime": 113.8476, |
|
"eval_samples_per_second": 33.062, |
|
"eval_steps_per_second": 16.531, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 7.592776284405407e-05, |
|
"loss": 0.0348, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 7.585204255480256e-05, |
|
"loss": 0.0359, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 7.577632226555106e-05, |
|
"loss": 0.035, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_loss": 0.0763004794716835, |
|
"eval_runtime": 114.4274, |
|
"eval_samples_per_second": 32.894, |
|
"eval_steps_per_second": 16.447, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 7.570060197629956e-05, |
|
"loss": 0.0369, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 7.562488168704804e-05, |
|
"loss": 0.0358, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 7.554916139779654e-05, |
|
"loss": 0.0351, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_loss": 0.08864033222198486, |
|
"eval_runtime": 113.7353, |
|
"eval_samples_per_second": 33.094, |
|
"eval_steps_per_second": 16.547, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 7.547344110854504e-05, |
|
"loss": 0.0352, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.539772081929353e-05, |
|
"loss": 0.0355, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.532200053004203e-05, |
|
"loss": 0.0351, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_loss": 0.0805099830031395, |
|
"eval_runtime": 113.3959, |
|
"eval_samples_per_second": 33.193, |
|
"eval_steps_per_second": 16.597, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 7.524628024079053e-05, |
|
"loss": 0.0353, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 7.517055995153902e-05, |
|
"loss": 0.0355, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 7.50948396622875e-05, |
|
"loss": 0.035, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"eval_loss": 0.08216110616922379, |
|
"eval_runtime": 113.3929, |
|
"eval_samples_per_second": 33.194, |
|
"eval_steps_per_second": 16.597, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 7.5019119373036e-05, |
|
"loss": 0.0352, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 7.494339908378451e-05, |
|
"loss": 0.0355, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 7.4867678794533e-05, |
|
"loss": 0.0347, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"eval_loss": 0.075843445956707, |
|
"eval_runtime": 113.7455, |
|
"eval_samples_per_second": 33.091, |
|
"eval_steps_per_second": 16.546, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 7.479195850528149e-05, |
|
"loss": 0.0351, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 7.471623821602999e-05, |
|
"loss": 0.0353, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 7.464051792677848e-05, |
|
"loss": 0.0345, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_loss": 0.07579231262207031, |
|
"eval_runtime": 114.0621, |
|
"eval_samples_per_second": 33.0, |
|
"eval_steps_per_second": 16.5, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 7.456479763752698e-05, |
|
"loss": 0.0357, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 7.448907734827547e-05, |
|
"loss": 0.0349, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 7.441335705902397e-05, |
|
"loss": 0.0348, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_loss": 0.07482566684484482, |
|
"eval_runtime": 113.2359, |
|
"eval_samples_per_second": 33.24, |
|
"eval_steps_per_second": 16.62, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 7.433763676977247e-05, |
|
"loss": 0.0352, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 7.426191648052095e-05, |
|
"loss": 0.0359, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 7.418695339416197e-05, |
|
"loss": 0.0362, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"eval_loss": 0.07597967982292175, |
|
"eval_runtime": 114.2165, |
|
"eval_samples_per_second": 32.955, |
|
"eval_steps_per_second": 16.477, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 7.411123310491046e-05, |
|
"loss": 0.035, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 7.403551281565896e-05, |
|
"loss": 0.0351, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 7.395979252640746e-05, |
|
"loss": 0.0356, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_loss": 0.08180462568998337, |
|
"eval_runtime": 113.5698, |
|
"eval_samples_per_second": 33.143, |
|
"eval_steps_per_second": 16.571, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 7.388407223715595e-05, |
|
"loss": 0.0353, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 7.380835194790444e-05, |
|
"loss": 0.035, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 7.373263165865295e-05, |
|
"loss": 0.0352, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 0.07536729425191879, |
|
"eval_runtime": 112.8085, |
|
"eval_samples_per_second": 33.366, |
|
"eval_steps_per_second": 16.683, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 7.365691136940144e-05, |
|
"loss": 0.0347, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 7.358119108014993e-05, |
|
"loss": 0.0348, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 7.350547079089842e-05, |
|
"loss": 0.0344, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_loss": 0.0756198838353157, |
|
"eval_runtime": 114.0122, |
|
"eval_samples_per_second": 33.014, |
|
"eval_steps_per_second": 16.507, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 7.342975050164692e-05, |
|
"loss": 0.0346, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 7.335403021239541e-05, |
|
"loss": 0.0353, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 7.327830992314391e-05, |
|
"loss": 0.0347, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_loss": 0.0754874125123024, |
|
"eval_runtime": 113.1924, |
|
"eval_samples_per_second": 33.253, |
|
"eval_steps_per_second": 16.627, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 7.32025896338924e-05, |
|
"loss": 0.0346, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 7.31268693446409e-05, |
|
"loss": 0.0359, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 7.305114905538939e-05, |
|
"loss": 0.0349, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_loss": 0.07801900804042816, |
|
"eval_runtime": 113.1078, |
|
"eval_samples_per_second": 33.278, |
|
"eval_steps_per_second": 16.639, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 7.297542876613788e-05, |
|
"loss": 0.0345, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 7.289970847688639e-05, |
|
"loss": 0.0355, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 7.282398818763489e-05, |
|
"loss": 0.0349, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_loss": 0.07637841254472733, |
|
"eval_runtime": 113.663, |
|
"eval_samples_per_second": 33.115, |
|
"eval_steps_per_second": 16.558, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 7.274826789838337e-05, |
|
"loss": 0.0356, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 7.267254760913187e-05, |
|
"loss": 0.0351, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 7.259682731988036e-05, |
|
"loss": 0.0344, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_loss": 0.07504291087388992, |
|
"eval_runtime": 114.64, |
|
"eval_samples_per_second": 32.833, |
|
"eval_steps_per_second": 16.417, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 7.252110703062886e-05, |
|
"loss": 0.0349, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 7.244538674137736e-05, |
|
"loss": 0.035, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 7.236966645212585e-05, |
|
"loss": 0.0347, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_loss": 0.07671190053224564, |
|
"eval_runtime": 113.5495, |
|
"eval_samples_per_second": 33.149, |
|
"eval_steps_per_second": 16.574, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 7.229470336576686e-05, |
|
"loss": 0.0347, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 7.221898307651535e-05, |
|
"loss": 0.035, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 7.214326278726385e-05, |
|
"loss": 0.0354, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_loss": 0.07560531795024872, |
|
"eval_runtime": 113.1212, |
|
"eval_samples_per_second": 33.274, |
|
"eval_steps_per_second": 16.637, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 7.206754249801235e-05, |
|
"loss": 0.035, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 7.199182220876084e-05, |
|
"loss": 0.0351, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 7.191610191950934e-05, |
|
"loss": 0.0347, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_loss": 0.07543208450078964, |
|
"eval_runtime": 113.3143, |
|
"eval_samples_per_second": 33.217, |
|
"eval_steps_per_second": 16.609, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 7.184038163025783e-05, |
|
"loss": 0.0345, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 7.176466134100632e-05, |
|
"loss": 0.0348, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 7.168894105175483e-05, |
|
"loss": 0.0345, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 0.07596579194068909, |
|
"eval_runtime": 114.2163, |
|
"eval_samples_per_second": 32.955, |
|
"eval_steps_per_second": 16.478, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 7.161322076250332e-05, |
|
"loss": 0.0349, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 7.15375004732518e-05, |
|
"loss": 0.0346, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 7.14617801840003e-05, |
|
"loss": 0.0348, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_loss": 0.07596199959516525, |
|
"eval_runtime": 119.9286, |
|
"eval_samples_per_second": 31.385, |
|
"eval_steps_per_second": 15.693, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 7.13860598947488e-05, |
|
"loss": 0.0351, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 7.13103396054973e-05, |
|
"loss": 0.0348, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 7.123461931624579e-05, |
|
"loss": 0.0349, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 0.0768192708492279, |
|
"eval_runtime": 117.1329, |
|
"eval_samples_per_second": 32.134, |
|
"eval_steps_per_second": 16.067, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 7.115965622988681e-05, |
|
"loss": 0.0355, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 7.10839359406353e-05, |
|
"loss": 0.035, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 7.100821565138379e-05, |
|
"loss": 0.0349, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_loss": 0.08052736520767212, |
|
"eval_runtime": 118.9481, |
|
"eval_samples_per_second": 31.644, |
|
"eval_steps_per_second": 15.822, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 7.093249536213229e-05, |
|
"loss": 0.0355, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 7.085677507288078e-05, |
|
"loss": 0.0352, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 7.078105478362928e-05, |
|
"loss": 0.0352, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"eval_loss": 0.0750809758901596, |
|
"eval_runtime": 115.8124, |
|
"eval_samples_per_second": 32.501, |
|
"eval_steps_per_second": 16.25, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 7.070533449437777e-05, |
|
"loss": 0.0344, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 7.062961420512627e-05, |
|
"loss": 0.0345, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 7.055389391587475e-05, |
|
"loss": 0.0348, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 0.07553275674581528, |
|
"eval_runtime": 115.0839, |
|
"eval_samples_per_second": 32.707, |
|
"eval_steps_per_second": 16.353, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 7.047817362662325e-05, |
|
"loss": 0.0347, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 7.040245333737176e-05, |
|
"loss": 0.0349, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 7.032673304812026e-05, |
|
"loss": 0.0351, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_loss": 0.07605579495429993, |
|
"eval_runtime": 115.3815, |
|
"eval_samples_per_second": 32.622, |
|
"eval_steps_per_second": 16.311, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 7.025101275886874e-05, |
|
"loss": 0.0346, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 7.017529246961723e-05, |
|
"loss": 0.0353, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.009957218036573e-05, |
|
"loss": 0.0351, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_loss": 0.07649397104978561, |
|
"eval_runtime": 114.9326, |
|
"eval_samples_per_second": 32.75, |
|
"eval_steps_per_second": 16.375, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 7.002385189111423e-05, |
|
"loss": 0.0355, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 6.994813160186272e-05, |
|
"loss": 0.0349, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 6.987241131261122e-05, |
|
"loss": 0.0346, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_loss": 0.07548332214355469, |
|
"eval_runtime": 115.3811, |
|
"eval_samples_per_second": 32.622, |
|
"eval_steps_per_second": 16.311, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 6.979669102335972e-05, |
|
"loss": 0.0348, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 6.97209707341082e-05, |
|
"loss": 0.0345, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 6.964525044485671e-05, |
|
"loss": 0.0349, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 0.07825972139835358, |
|
"eval_runtime": 114.6259, |
|
"eval_samples_per_second": 32.837, |
|
"eval_steps_per_second": 16.419, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 6.95695301556052e-05, |
|
"loss": 0.0348, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 6.949380986635369e-05, |
|
"loss": 0.0352, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 6.941808957710218e-05, |
|
"loss": 0.0354, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_loss": 0.07569003850221634, |
|
"eval_runtime": 112.4937, |
|
"eval_samples_per_second": 33.46, |
|
"eval_steps_per_second": 16.73, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 6.934236928785068e-05, |
|
"loss": 0.0355, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 6.926664899859918e-05, |
|
"loss": 0.035, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 6.919092870934767e-05, |
|
"loss": 0.035, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_loss": 0.0746193379163742, |
|
"eval_runtime": 112.964, |
|
"eval_samples_per_second": 33.32, |
|
"eval_steps_per_second": 16.66, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 6.911520842009617e-05, |
|
"loss": 0.035, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 6.903948813084466e-05, |
|
"loss": 0.0352, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 6.896376784159316e-05, |
|
"loss": 0.035, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_loss": 0.0757523626089096, |
|
"eval_runtime": 113.8615, |
|
"eval_samples_per_second": 33.058, |
|
"eval_steps_per_second": 16.529, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 6.888804755234164e-05, |
|
"loss": 0.0345, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 6.881232726309015e-05, |
|
"loss": 0.0347, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 6.873660697383865e-05, |
|
"loss": 0.0349, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_loss": 0.07621181011199951, |
|
"eval_runtime": 112.6648, |
|
"eval_samples_per_second": 33.409, |
|
"eval_steps_per_second": 16.704, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 6.866088668458713e-05, |
|
"loss": 0.0349, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 6.858516639533563e-05, |
|
"loss": 0.0349, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 6.850944610608412e-05, |
|
"loss": 0.0346, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 0.07498028874397278, |
|
"eval_runtime": 113.2194, |
|
"eval_samples_per_second": 33.245, |
|
"eval_steps_per_second": 16.623, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 6.843372581683262e-05, |
|
"loss": 0.0346, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 6.835800552758112e-05, |
|
"loss": 0.0351, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 6.828228523832961e-05, |
|
"loss": 0.0352, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_loss": 0.07786377519369125, |
|
"eval_runtime": 115.3475, |
|
"eval_samples_per_second": 32.632, |
|
"eval_steps_per_second": 16.316, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 6.820656494907811e-05, |
|
"loss": 0.0348, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 6.81308446598266e-05, |
|
"loss": 0.0352, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 6.80551243705751e-05, |
|
"loss": 0.0349, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"eval_loss": 0.0752381980419159, |
|
"eval_runtime": 113.0064, |
|
"eval_samples_per_second": 33.308, |
|
"eval_steps_per_second": 16.654, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 6.79794040813236e-05, |
|
"loss": 0.0344, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 6.790519819785713e-05, |
|
"loss": 0.0382, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 6.782947790860561e-05, |
|
"loss": 0.0347, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_loss": 0.07460816204547882, |
|
"eval_runtime": 113.1095, |
|
"eval_samples_per_second": 33.277, |
|
"eval_steps_per_second": 16.639, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 6.775375761935411e-05, |
|
"loss": 0.0347, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 6.76780373301026e-05, |
|
"loss": 0.0353, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 6.76023170408511e-05, |
|
"loss": 0.0351, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_loss": 0.07471829652786255, |
|
"eval_runtime": 112.6959, |
|
"eval_samples_per_second": 33.4, |
|
"eval_steps_per_second": 16.7, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 6.75265967515996e-05, |
|
"loss": 0.0341, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 6.745087646234809e-05, |
|
"loss": 0.0348, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 6.737515617309659e-05, |
|
"loss": 0.035, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_loss": 0.07476358860731125, |
|
"eval_runtime": 113.4313, |
|
"eval_samples_per_second": 33.183, |
|
"eval_steps_per_second": 16.592, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 6.729943588384507e-05, |
|
"loss": 0.0351, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 6.722371559459357e-05, |
|
"loss": 0.0353, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 6.714799530534208e-05, |
|
"loss": 0.0355, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_loss": 0.07447104156017303, |
|
"eval_runtime": 112.1108, |
|
"eval_samples_per_second": 33.574, |
|
"eval_steps_per_second": 16.787, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 6.707227501609057e-05, |
|
"loss": 0.0343, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 6.699655472683906e-05, |
|
"loss": 0.0342, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 6.692083443758755e-05, |
|
"loss": 0.0345, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_loss": 0.07709582149982452, |
|
"eval_runtime": 112.6189, |
|
"eval_samples_per_second": 33.422, |
|
"eval_steps_per_second": 16.711, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 6.684511414833605e-05, |
|
"loss": 0.0343, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 6.676939385908454e-05, |
|
"loss": 0.0341, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 6.669367356983304e-05, |
|
"loss": 0.0342, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_loss": 0.07542753219604492, |
|
"eval_runtime": 112.6741, |
|
"eval_samples_per_second": 33.406, |
|
"eval_steps_per_second": 16.703, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 6.661795328058154e-05, |
|
"loss": 0.0347, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 6.654223299133003e-05, |
|
"loss": 0.0338, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 6.646651270207852e-05, |
|
"loss": 0.0343, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_loss": 0.07512263208627701, |
|
"eval_runtime": 112.271, |
|
"eval_samples_per_second": 33.526, |
|
"eval_steps_per_second": 16.763, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 6.639079241282701e-05, |
|
"loss": 0.0349, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 6.631507212357552e-05, |
|
"loss": 0.035, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 6.623935183432402e-05, |
|
"loss": 0.0341, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"eval_loss": 0.07442734390497208, |
|
"eval_runtime": 112.5001, |
|
"eval_samples_per_second": 33.458, |
|
"eval_steps_per_second": 16.729, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 6.616438874796502e-05, |
|
"loss": 0.0375, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 6.608866845871352e-05, |
|
"loss": 0.0374, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 6.6012948169462e-05, |
|
"loss": 0.0354, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_loss": 0.07625485956668854, |
|
"eval_runtime": 112.4439, |
|
"eval_samples_per_second": 33.474, |
|
"eval_steps_per_second": 16.737, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 6.59372278802105e-05, |
|
"loss": 0.0345, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 6.586150759095901e-05, |
|
"loss": 0.035, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 6.578578730170749e-05, |
|
"loss": 0.0349, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_loss": 0.0745326355099678, |
|
"eval_runtime": 112.881, |
|
"eval_samples_per_second": 33.345, |
|
"eval_steps_per_second": 16.672, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 6.571006701245599e-05, |
|
"loss": 0.0349, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 6.563434672320448e-05, |
|
"loss": 0.0346, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 6.555862643395298e-05, |
|
"loss": 0.0347, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 0.07392705231904984, |
|
"eval_runtime": 112.4306, |
|
"eval_samples_per_second": 33.478, |
|
"eval_steps_per_second": 16.739, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 6.548290614470148e-05, |
|
"loss": 0.0348, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 6.540718585544997e-05, |
|
"loss": 0.0344, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 6.533146556619847e-05, |
|
"loss": 0.0351, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_loss": 0.07434208691120148, |
|
"eval_runtime": 112.6099, |
|
"eval_samples_per_second": 33.425, |
|
"eval_steps_per_second": 16.713, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 6.525574527694695e-05, |
|
"loss": 0.0346, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 6.518002498769545e-05, |
|
"loss": 0.034, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 6.510430469844396e-05, |
|
"loss": 0.0346, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"eval_loss": 0.07515175640583038, |
|
"eval_runtime": 112.3676, |
|
"eval_samples_per_second": 33.497, |
|
"eval_steps_per_second": 16.749, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 6.502858440919245e-05, |
|
"loss": 0.0337, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 6.495286411994094e-05, |
|
"loss": 0.0347, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 6.487714383068943e-05, |
|
"loss": 0.0345, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_loss": 0.07697822153568268, |
|
"eval_runtime": 112.875, |
|
"eval_samples_per_second": 33.347, |
|
"eval_steps_per_second": 16.673, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 6.480142354143793e-05, |
|
"loss": 0.0345, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 6.472570325218643e-05, |
|
"loss": 0.0352, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 6.464998296293492e-05, |
|
"loss": 0.0338, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_loss": 0.07330728322267532, |
|
"eval_runtime": 112.726, |
|
"eval_samples_per_second": 33.391, |
|
"eval_steps_per_second": 16.695, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 6.457426267368342e-05, |
|
"loss": 0.0346, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 6.449854238443191e-05, |
|
"loss": 0.0352, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 6.44228220951804e-05, |
|
"loss": 0.0347, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 0.07405757158994675, |
|
"eval_runtime": 112.5781, |
|
"eval_samples_per_second": 33.435, |
|
"eval_steps_per_second": 16.717, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 6.43471018059289e-05, |
|
"loss": 0.0341, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 6.42713815166774e-05, |
|
"loss": 0.0346, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 6.41956612274259e-05, |
|
"loss": 0.0341, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_loss": 0.07745511829853058, |
|
"eval_runtime": 112.9061, |
|
"eval_samples_per_second": 33.337, |
|
"eval_steps_per_second": 16.669, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 6.411994093817438e-05, |
|
"loss": 0.0351, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 6.404422064892288e-05, |
|
"loss": 0.0348, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 6.396850035967137e-05, |
|
"loss": 0.0348, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_loss": 0.07592994719743729, |
|
"eval_runtime": 112.679, |
|
"eval_samples_per_second": 33.405, |
|
"eval_steps_per_second": 16.702, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 6.389278007041987e-05, |
|
"loss": 0.0351, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 6.381705978116837e-05, |
|
"loss": 0.0342, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 6.374133949191686e-05, |
|
"loss": 0.0353, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"eval_loss": 0.07491569221019745, |
|
"eval_runtime": 112.9518, |
|
"eval_samples_per_second": 33.324, |
|
"eval_steps_per_second": 16.662, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 6.366561920266536e-05, |
|
"loss": 0.0347, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 6.358989891341384e-05, |
|
"loss": 0.034, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 6.351417862416235e-05, |
|
"loss": 0.0347, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_loss": 0.07395059615373611, |
|
"eval_runtime": 112.0898, |
|
"eval_samples_per_second": 33.58, |
|
"eval_steps_per_second": 16.79, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 6.343845833491085e-05, |
|
"loss": 0.0353, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 6.336273804565934e-05, |
|
"loss": 0.035, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 6.328701775640783e-05, |
|
"loss": 0.0351, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"eval_loss": 0.07327116280794144, |
|
"eval_runtime": 112.8259, |
|
"eval_samples_per_second": 33.361, |
|
"eval_steps_per_second": 16.681, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 6.321129746715632e-05, |
|
"loss": 0.0345, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 6.313557717790482e-05, |
|
"loss": 0.0346, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 6.305985688865332e-05, |
|
"loss": 0.0345, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 0.07566232234239578, |
|
"eval_runtime": 112.8497, |
|
"eval_samples_per_second": 33.354, |
|
"eval_steps_per_second": 16.677, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 6.298413659940181e-05, |
|
"loss": 0.0335, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 6.290841631015031e-05, |
|
"loss": 0.0343, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 6.28326960208988e-05, |
|
"loss": 0.0349, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_loss": 0.07591845095157623, |
|
"eval_runtime": 113.5061, |
|
"eval_samples_per_second": 33.161, |
|
"eval_steps_per_second": 16.581, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 6.275697573164729e-05, |
|
"loss": 0.0347, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 6.26812554423958e-05, |
|
"loss": 0.035, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 6.26062923560368e-05, |
|
"loss": 0.036, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 0.0736907348036766, |
|
"eval_runtime": 113.2526, |
|
"eval_samples_per_second": 33.235, |
|
"eval_steps_per_second": 16.618, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 6.25305720667853e-05, |
|
"loss": 0.0343, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 6.24548517775338e-05, |
|
"loss": 0.0342, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 6.237913148828228e-05, |
|
"loss": 0.0343, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"eval_loss": 0.07372862100601196, |
|
"eval_runtime": 113.3232, |
|
"eval_samples_per_second": 33.215, |
|
"eval_steps_per_second": 16.607, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 6.230341119903077e-05, |
|
"loss": 0.0347, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 6.222769090977928e-05, |
|
"loss": 0.0338, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 6.215197062052778e-05, |
|
"loss": 0.0347, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_loss": 0.07471512258052826, |
|
"eval_runtime": 112.821, |
|
"eval_samples_per_second": 33.363, |
|
"eval_steps_per_second": 16.681, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 6.207625033127626e-05, |
|
"loss": 0.0354, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 6.200128724491728e-05, |
|
"loss": 0.0348, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 6.192556695566577e-05, |
|
"loss": 0.0351, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"eval_loss": 0.07300514727830887, |
|
"eval_runtime": 113.2788, |
|
"eval_samples_per_second": 33.228, |
|
"eval_steps_per_second": 16.614, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 6.184984666641426e-05, |
|
"loss": 0.0349, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 6.177412637716277e-05, |
|
"loss": 0.0349, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 6.169840608791125e-05, |
|
"loss": 0.0342, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_loss": 0.07366597652435303, |
|
"eval_runtime": 119.9548, |
|
"eval_samples_per_second": 31.378, |
|
"eval_steps_per_second": 15.689, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 6.162268579865975e-05, |
|
"loss": 0.0351, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 6.154696550940825e-05, |
|
"loss": 0.0339, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 6.147124522015674e-05, |
|
"loss": 0.0344, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"eval_loss": 0.07214556634426117, |
|
"eval_runtime": 112.7662, |
|
"eval_samples_per_second": 33.379, |
|
"eval_steps_per_second": 16.689, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 6.139552493090524e-05, |
|
"loss": 0.0346, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 6.131980464165374e-05, |
|
"loss": 0.0352, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 6.124408435240223e-05, |
|
"loss": 0.0347, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 0.07323683798313141, |
|
"eval_runtime": 112.2967, |
|
"eval_samples_per_second": 33.518, |
|
"eval_steps_per_second": 16.759, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 6.116836406315073e-05, |
|
"loss": 0.0344, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 6.109264377389921e-05, |
|
"loss": 0.0344, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 6.1016923484647714e-05, |
|
"loss": 0.0339, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"eval_loss": 0.07283404469490051, |
|
"eval_runtime": 113.4587, |
|
"eval_samples_per_second": 33.175, |
|
"eval_steps_per_second": 16.588, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 6.094120319539621e-05, |
|
"loss": 0.0345, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 6.08654829061447e-05, |
|
"loss": 0.0348, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 6.0789762616893196e-05, |
|
"loss": 0.0344, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_loss": 0.0768737643957138, |
|
"eval_runtime": 112.9144, |
|
"eval_samples_per_second": 33.335, |
|
"eval_steps_per_second": 16.667, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 6.07140423276417e-05, |
|
"loss": 0.0341, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 6.0638322038390195e-05, |
|
"loss": 0.0349, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 6.0562601749138684e-05, |
|
"loss": 0.034, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_loss": 0.07310787588357925, |
|
"eval_runtime": 113.4681, |
|
"eval_samples_per_second": 33.172, |
|
"eval_steps_per_second": 16.586, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 6.048688145988718e-05, |
|
"loss": 0.0347, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 6.041116117063568e-05, |
|
"loss": 0.0349, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 6.0335440881384166e-05, |
|
"loss": 0.0347, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_loss": 0.07271888107061386, |
|
"eval_runtime": 111.9488, |
|
"eval_samples_per_second": 33.623, |
|
"eval_steps_per_second": 16.811, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 6.025972059213266e-05, |
|
"loss": 0.0342, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 6.018400030288116e-05, |
|
"loss": 0.0341, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 6.010828001362966e-05, |
|
"loss": 0.0345, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_loss": 0.0807260125875473, |
|
"eval_runtime": 114.3881, |
|
"eval_samples_per_second": 32.906, |
|
"eval_steps_per_second": 16.453, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 6.0032559724378144e-05, |
|
"loss": 0.0343, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 5.995683943512664e-05, |
|
"loss": 0.0344, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 5.9881119145875144e-05, |
|
"loss": 0.0341, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"eval_loss": 0.07396366447210312, |
|
"eval_runtime": 112.662, |
|
"eval_samples_per_second": 33.41, |
|
"eval_steps_per_second": 16.705, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 5.980539885662364e-05, |
|
"loss": 0.0346, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 5.972967856737213e-05, |
|
"loss": 0.0342, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 5.9653958278120626e-05, |
|
"loss": 0.0343, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_loss": 0.07657469063997269, |
|
"eval_runtime": 113.7341, |
|
"eval_samples_per_second": 33.095, |
|
"eval_steps_per_second": 16.547, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 5.957823798886912e-05, |
|
"loss": 0.0346, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 5.950251769961761e-05, |
|
"loss": 0.0339, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 5.942679741036611e-05, |
|
"loss": 0.0344, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_loss": 0.07259540259838104, |
|
"eval_runtime": 112.7714, |
|
"eval_samples_per_second": 33.377, |
|
"eval_steps_per_second": 16.689, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 5.9351077121114604e-05, |
|
"loss": 0.0344, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 5.927535683186311e-05, |
|
"loss": 0.0342, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 5.919963654261159e-05, |
|
"loss": 0.0346, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"eval_loss": 0.07272990792989731, |
|
"eval_runtime": 112.0126, |
|
"eval_samples_per_second": 33.603, |
|
"eval_steps_per_second": 16.802, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 5.912391625336009e-05, |
|
"loss": 0.0342, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 5.904819596410859e-05, |
|
"loss": 0.0344, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 5.8972475674857085e-05, |
|
"loss": 0.0349, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_loss": 0.07258746773004532, |
|
"eval_runtime": 112.1197, |
|
"eval_samples_per_second": 33.571, |
|
"eval_steps_per_second": 16.786, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 5.8896755385605575e-05, |
|
"loss": 0.0349, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 5.882103509635407e-05, |
|
"loss": 0.0342, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 5.874531480710257e-05, |
|
"loss": 0.0337, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_loss": 0.0732216015458107, |
|
"eval_runtime": 112.2957, |
|
"eval_samples_per_second": 33.519, |
|
"eval_steps_per_second": 16.759, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 5.867035172074358e-05, |
|
"loss": 0.0351, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 5.8594631431492076e-05, |
|
"loss": 0.034, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 5.851966834513308e-05, |
|
"loss": 0.0346, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_loss": 0.0736905038356781, |
|
"eval_runtime": 112.9513, |
|
"eval_samples_per_second": 33.324, |
|
"eval_steps_per_second": 16.662, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 5.844394805588158e-05, |
|
"loss": 0.0343, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 5.836822776663007e-05, |
|
"loss": 0.0344, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 5.8292507477378564e-05, |
|
"loss": 0.0347, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_loss": 0.0742163360118866, |
|
"eval_runtime": 113.5576, |
|
"eval_samples_per_second": 33.146, |
|
"eval_steps_per_second": 16.573, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 5.821678718812707e-05, |
|
"loss": 0.0342, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 5.814106689887555e-05, |
|
"loss": 0.0347, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 5.806534660962405e-05, |
|
"loss": 0.0345, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_loss": 0.08524000644683838, |
|
"eval_runtime": 113.1586, |
|
"eval_samples_per_second": 33.263, |
|
"eval_steps_per_second": 16.632, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 5.798962632037255e-05, |
|
"loss": 0.0346, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 5.7913906031121045e-05, |
|
"loss": 0.0341, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 5.7838185741869535e-05, |
|
"loss": 0.0337, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_loss": 0.07483002543449402, |
|
"eval_runtime": 112.4393, |
|
"eval_samples_per_second": 33.476, |
|
"eval_steps_per_second": 16.738, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 5.776246545261803e-05, |
|
"loss": 0.0338, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 5.768674516336653e-05, |
|
"loss": 0.0342, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 5.761102487411503e-05, |
|
"loss": 0.0347, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 0.07542693614959717, |
|
"eval_runtime": 112.0708, |
|
"eval_samples_per_second": 33.586, |
|
"eval_steps_per_second": 16.793, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 5.753530458486351e-05, |
|
"loss": 0.0346, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 5.745958429561201e-05, |
|
"loss": 0.0343, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 5.738386400636051e-05, |
|
"loss": 0.0343, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"eval_loss": 0.07504470646381378, |
|
"eval_runtime": 113.1258, |
|
"eval_samples_per_second": 33.273, |
|
"eval_steps_per_second": 16.636, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 5.7308143717108995e-05, |
|
"loss": 0.0342, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 5.72324234278575e-05, |
|
"loss": 0.0343, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 5.7156703138605994e-05, |
|
"loss": 0.0348, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"eval_loss": 0.07590635865926743, |
|
"eval_runtime": 112.9114, |
|
"eval_samples_per_second": 33.336, |
|
"eval_steps_per_second": 16.668, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 5.708098284935449e-05, |
|
"loss": 0.0347, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 5.700526256010298e-05, |
|
"loss": 0.0346, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 5.6929542270851476e-05, |
|
"loss": 0.0347, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_loss": 0.07634126394987106, |
|
"eval_runtime": 112.7278, |
|
"eval_samples_per_second": 33.39, |
|
"eval_steps_per_second": 16.695, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 5.685382198159997e-05, |
|
"loss": 0.0346, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 5.677810169234846e-05, |
|
"loss": 0.034, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 5.670238140309696e-05, |
|
"loss": 0.0344, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 0.07448583841323853, |
|
"eval_runtime": 112.6897, |
|
"eval_samples_per_second": 33.401, |
|
"eval_steps_per_second": 16.701, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 5.662666111384546e-05, |
|
"loss": 0.0341, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 5.655094082459396e-05, |
|
"loss": 0.035, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 5.647522053534245e-05, |
|
"loss": 0.0348, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_loss": 0.07628956437110901, |
|
"eval_runtime": 114.1953, |
|
"eval_samples_per_second": 32.961, |
|
"eval_steps_per_second": 16.481, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 5.639950024609094e-05, |
|
"loss": 0.0341, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 5.632377995683944e-05, |
|
"loss": 0.0343, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 5.6248059667587935e-05, |
|
"loss": 0.0342, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"eval_loss": 0.07423391193151474, |
|
"eval_runtime": 112.6962, |
|
"eval_samples_per_second": 33.4, |
|
"eval_steps_per_second": 16.7, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 5.6172339378336425e-05, |
|
"loss": 0.0342, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 5.609661908908492e-05, |
|
"loss": 0.0344, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 5.6020898799833424e-05, |
|
"loss": 0.0345, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"eval_loss": 0.07315324246883392, |
|
"eval_runtime": 112.2224, |
|
"eval_samples_per_second": 33.541, |
|
"eval_steps_per_second": 16.77, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 5.594517851058191e-05, |
|
"loss": 0.034, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 5.58694582213304e-05, |
|
"loss": 0.0343, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 5.5793737932078906e-05, |
|
"loss": 0.0339, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_loss": 0.07429691404104233, |
|
"eval_runtime": 112.0124, |
|
"eval_samples_per_second": 33.603, |
|
"eval_steps_per_second": 16.802, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 5.57180176428274e-05, |
|
"loss": 0.0344, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 5.564229735357589e-05, |
|
"loss": 0.0338, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 5.556657706432439e-05, |
|
"loss": 0.0345, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"eval_loss": 0.07223436236381531, |
|
"eval_runtime": 113.1772, |
|
"eval_samples_per_second": 33.258, |
|
"eval_steps_per_second": 16.629, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 5.5490856775072884e-05, |
|
"loss": 0.0342, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 5.541513648582139e-05, |
|
"loss": 0.0339, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 5.533941619656987e-05, |
|
"loss": 0.0337, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"eval_loss": 0.07364470511674881, |
|
"eval_runtime": 112.7525, |
|
"eval_samples_per_second": 33.383, |
|
"eval_steps_per_second": 16.691, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 5.5263695907318366e-05, |
|
"loss": 0.0341, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 5.518797561806687e-05, |
|
"loss": 0.0343, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 5.511225532881535e-05, |
|
"loss": 0.0347, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_loss": 0.0728161558508873, |
|
"eval_runtime": 113.1279, |
|
"eval_samples_per_second": 33.272, |
|
"eval_steps_per_second": 16.636, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 5.503729224245637e-05, |
|
"loss": 0.0344, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 5.496157195320486e-05, |
|
"loss": 0.0342, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 5.488585166395336e-05, |
|
"loss": 0.0339, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"eval_loss": 0.07324323803186417, |
|
"eval_runtime": 112.2927, |
|
"eval_samples_per_second": 33.52, |
|
"eval_steps_per_second": 16.76, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 5.4810131374701853e-05, |
|
"loss": 0.0343, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 5.4734411085450356e-05, |
|
"loss": 0.0339, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 5.465869079619884e-05, |
|
"loss": 0.0341, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"eval_loss": 0.07426012307405472, |
|
"eval_runtime": 112.3605, |
|
"eval_samples_per_second": 33.499, |
|
"eval_steps_per_second": 16.75, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 5.458297050694734e-05, |
|
"loss": 0.0342, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 5.450725021769584e-05, |
|
"loss": 0.0341, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 5.443152992844433e-05, |
|
"loss": 0.0352, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_loss": 0.07148529589176178, |
|
"eval_runtime": 112.8953, |
|
"eval_samples_per_second": 33.341, |
|
"eval_steps_per_second": 16.67, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 5.4355809639192824e-05, |
|
"loss": 0.0342, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 5.428008934994132e-05, |
|
"loss": 0.034, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 5.420436906068982e-05, |
|
"loss": 0.0344, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"eval_loss": 0.07299716770648956, |
|
"eval_runtime": 112.5155, |
|
"eval_samples_per_second": 33.453, |
|
"eval_steps_per_second": 16.727, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 5.4128648771438306e-05, |
|
"loss": 0.0344, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 5.40529284821868e-05, |
|
"loss": 0.0342, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 5.3977208192935305e-05, |
|
"loss": 0.0344, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_loss": 0.07337184995412827, |
|
"eval_runtime": 112.7442, |
|
"eval_samples_per_second": 33.385, |
|
"eval_steps_per_second": 16.693, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 5.390148790368379e-05, |
|
"loss": 0.0345, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 5.3825767614432284e-05, |
|
"loss": 0.0343, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 5.375004732518079e-05, |
|
"loss": 0.0343, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.07349181920289993, |
|
"eval_runtime": 112.219, |
|
"eval_samples_per_second": 33.542, |
|
"eval_steps_per_second": 16.771, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 5.3674327035929284e-05, |
|
"loss": 0.0342, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 5.359860674667777e-05, |
|
"loss": 0.0334, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 5.352288645742627e-05, |
|
"loss": 0.0337, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"eval_loss": 0.07557845860719681, |
|
"eval_runtime": 112.8297, |
|
"eval_samples_per_second": 33.36, |
|
"eval_steps_per_second": 16.68, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 5.3447166168174766e-05, |
|
"loss": 0.0335, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 5.337144587892327e-05, |
|
"loss": 0.0338, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 5.329572558967175e-05, |
|
"loss": 0.0342, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 0.07301827520132065, |
|
"eval_runtime": 111.948, |
|
"eval_samples_per_second": 33.623, |
|
"eval_steps_per_second": 16.811, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 5.322000530042025e-05, |
|
"loss": 0.0339, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 5.314428501116875e-05, |
|
"loss": 0.0346, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 5.306856472191723e-05, |
|
"loss": 0.0336, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_loss": 0.0720408484339714, |
|
"eval_runtime": 114.2002, |
|
"eval_samples_per_second": 32.96, |
|
"eval_steps_per_second": 16.48, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 5.2992844432665736e-05, |
|
"loss": 0.0338, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 5.291788134630674e-05, |
|
"loss": 0.0338, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 5.284216105705524e-05, |
|
"loss": 0.0338, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 0.0730295181274414, |
|
"eval_runtime": 112.1253, |
|
"eval_samples_per_second": 33.57, |
|
"eval_steps_per_second": 16.785, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 5.2766440767803735e-05, |
|
"loss": 0.0346, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 5.269072047855224e-05, |
|
"loss": 0.0343, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 5.261500018930072e-05, |
|
"loss": 0.0342, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"eval_loss": 0.07264556735754013, |
|
"eval_runtime": 112.4533, |
|
"eval_samples_per_second": 33.472, |
|
"eval_steps_per_second": 16.736, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 5.2539279900049223e-05, |
|
"loss": 0.034, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 5.246355961079772e-05, |
|
"loss": 0.034, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 5.238783932154621e-05, |
|
"loss": 0.0338, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 0.07210873812437057, |
|
"eval_runtime": 112.1068, |
|
"eval_samples_per_second": 33.575, |
|
"eval_steps_per_second": 16.788, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 5.2312119032294705e-05, |
|
"loss": 0.0336, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 5.22363987430432e-05, |
|
"loss": 0.0338, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 5.21606784537917e-05, |
|
"loss": 0.0341, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 0.07246097177267075, |
|
"eval_runtime": 116.2175, |
|
"eval_samples_per_second": 32.388, |
|
"eval_steps_per_second": 16.194, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 5.208495816454019e-05, |
|
"loss": 0.0343, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 5.2009237875288684e-05, |
|
"loss": 0.034, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 5.1933517586037187e-05, |
|
"loss": 0.0345, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"eval_loss": 0.07464263588190079, |
|
"eval_runtime": 112.0209, |
|
"eval_samples_per_second": 33.601, |
|
"eval_steps_per_second": 16.8, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 5.185779729678567e-05, |
|
"loss": 0.0334, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 5.1782077007534165e-05, |
|
"loss": 0.0339, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 5.170635671828267e-05, |
|
"loss": 0.0337, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"eval_loss": 0.07431815564632416, |
|
"eval_runtime": 112.3195, |
|
"eval_samples_per_second": 33.512, |
|
"eval_steps_per_second": 16.756, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 5.1630636429031165e-05, |
|
"loss": 0.0334, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 5.1554916139779654e-05, |
|
"loss": 0.0337, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 5.147919585052815e-05, |
|
"loss": 0.0342, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 0.07142894715070724, |
|
"eval_runtime": 112.0766, |
|
"eval_samples_per_second": 33.584, |
|
"eval_steps_per_second": 16.792, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 5.140347556127665e-05, |
|
"loss": 0.0338, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 5.132775527202515e-05, |
|
"loss": 0.0345, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 5.125203498277363e-05, |
|
"loss": 0.0341, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"eval_loss": 0.07273725420236588, |
|
"eval_runtime": 112.638, |
|
"eval_samples_per_second": 33.417, |
|
"eval_steps_per_second": 16.708, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 5.117631469352213e-05, |
|
"loss": 0.0337, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 5.110059440427063e-05, |
|
"loss": 0.0343, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 5.102563131791164e-05, |
|
"loss": 0.0336, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 0.07513102144002914, |
|
"eval_runtime": 112.0798, |
|
"eval_samples_per_second": 33.583, |
|
"eval_steps_per_second": 16.792, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 5.0949911028660134e-05, |
|
"loss": 0.0338, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 5.087419073940862e-05, |
|
"loss": 0.0338, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 5.079847045015712e-05, |
|
"loss": 0.0344, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 0.07184793055057526, |
|
"eval_runtime": 113.8705, |
|
"eval_samples_per_second": 33.055, |
|
"eval_steps_per_second": 16.528, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 5.0722750160905616e-05, |
|
"loss": 0.0343, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 5.064702987165412e-05, |
|
"loss": 0.0347, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 5.05713095824026e-05, |
|
"loss": 0.034, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"eval_loss": 0.07399240136146545, |
|
"eval_runtime": 112.4809, |
|
"eval_samples_per_second": 33.463, |
|
"eval_steps_per_second": 16.732, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 5.0495589293151105e-05, |
|
"loss": 0.0335, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 5.04198690038996e-05, |
|
"loss": 0.0339, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 5.034414871464809e-05, |
|
"loss": 0.0339, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_loss": 0.07266144454479218, |
|
"eval_runtime": 112.555, |
|
"eval_samples_per_second": 33.441, |
|
"eval_steps_per_second": 16.721, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 5.0268428425396587e-05, |
|
"loss": 0.0336, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 5.019270813614508e-05, |
|
"loss": 0.0342, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 5.011698784689358e-05, |
|
"loss": 0.0341, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"eval_loss": 0.07241350412368774, |
|
"eval_runtime": 112.5399, |
|
"eval_samples_per_second": 33.446, |
|
"eval_steps_per_second": 16.723, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 5.004126755764207e-05, |
|
"loss": 0.0342, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 4.9965547268390565e-05, |
|
"loss": 0.034, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 4.988982697913907e-05, |
|
"loss": 0.034, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_loss": 0.0732254683971405, |
|
"eval_runtime": 112.7632, |
|
"eval_samples_per_second": 33.38, |
|
"eval_steps_per_second": 16.69, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 4.981410668988756e-05, |
|
"loss": 0.0344, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 4.973838640063605e-05, |
|
"loss": 0.0344, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 4.966266611138455e-05, |
|
"loss": 0.0339, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_loss": 0.07184404134750366, |
|
"eval_runtime": 112.034, |
|
"eval_samples_per_second": 33.597, |
|
"eval_steps_per_second": 16.798, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 4.958694582213304e-05, |
|
"loss": 0.0341, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 4.951198273577405e-05, |
|
"loss": 0.0352, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 4.943626244652255e-05, |
|
"loss": 0.0337, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"eval_loss": 0.07546329498291016, |
|
"eval_runtime": 112.62, |
|
"eval_samples_per_second": 33.422, |
|
"eval_steps_per_second": 16.711, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 4.9360542157271044e-05, |
|
"loss": 0.0344, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 4.9284821868019534e-05, |
|
"loss": 0.0339, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 4.920910157876804e-05, |
|
"loss": 0.0342, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 0.07388905435800552, |
|
"eval_runtime": 112.7065, |
|
"eval_samples_per_second": 33.396, |
|
"eval_steps_per_second": 16.698, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 4.9133381289516526e-05, |
|
"loss": 0.0348, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 4.905766100026503e-05, |
|
"loss": 0.034, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 4.898194071101352e-05, |
|
"loss": 0.0341, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_loss": 0.07326843589544296, |
|
"eval_runtime": 112.5254, |
|
"eval_samples_per_second": 33.45, |
|
"eval_steps_per_second": 16.725, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 4.890622042176201e-05, |
|
"loss": 0.0341, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 4.883050013251051e-05, |
|
"loss": 0.0336, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.8754779843259e-05, |
|
"loss": 0.034, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"eval_loss": 0.07398169487714767, |
|
"eval_runtime": 112.579, |
|
"eval_samples_per_second": 33.434, |
|
"eval_steps_per_second": 16.717, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 4.86790595540075e-05, |
|
"loss": 0.0336, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 4.860333926475599e-05, |
|
"loss": 0.0335, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 4.852761897550449e-05, |
|
"loss": 0.0342, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_loss": 0.07327091693878174, |
|
"eval_runtime": 113.0271, |
|
"eval_samples_per_second": 33.302, |
|
"eval_steps_per_second": 16.651, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 4.8451898686252986e-05, |
|
"loss": 0.034, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 4.837617839700148e-05, |
|
"loss": 0.0334, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 4.830045810774997e-05, |
|
"loss": 0.0346, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_loss": 0.07217860966920853, |
|
"eval_runtime": 112.7729, |
|
"eval_samples_per_second": 33.377, |
|
"eval_steps_per_second": 16.688, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 4.822473781849847e-05, |
|
"loss": 0.0338, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 4.8149017529246964e-05, |
|
"loss": 0.0343, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 4.807329723999546e-05, |
|
"loss": 0.0343, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_loss": 0.07317784428596497, |
|
"eval_runtime": 112.9669, |
|
"eval_samples_per_second": 33.32, |
|
"eval_steps_per_second": 16.66, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 4.7997576950743956e-05, |
|
"loss": 0.034, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 4.7921856661492446e-05, |
|
"loss": 0.0335, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 4.784613637224095e-05, |
|
"loss": 0.0339, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"eval_loss": 0.07431201636791229, |
|
"eval_runtime": 112.6345, |
|
"eval_samples_per_second": 33.418, |
|
"eval_steps_per_second": 16.709, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 4.777041608298944e-05, |
|
"loss": 0.0342, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 4.7694695793737935e-05, |
|
"loss": 0.0344, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 4.761897550448643e-05, |
|
"loss": 0.0338, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"eval_loss": 0.07180227339267731, |
|
"eval_runtime": 112.6264, |
|
"eval_samples_per_second": 33.42, |
|
"eval_steps_per_second": 16.71, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 4.754325521523492e-05, |
|
"loss": 0.0334, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 4.746753492598342e-05, |
|
"loss": 0.0336, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 4.739181463673191e-05, |
|
"loss": 0.0336, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 0.07384290546178818, |
|
"eval_runtime": 112.3928, |
|
"eval_samples_per_second": 33.49, |
|
"eval_steps_per_second": 16.745, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 4.731609434748041e-05, |
|
"loss": 0.0337, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 4.7240374058228905e-05, |
|
"loss": 0.0352, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 4.71646537689774e-05, |
|
"loss": 0.0341, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"eval_loss": 0.07421711087226868, |
|
"eval_runtime": 114.0697, |
|
"eval_samples_per_second": 32.997, |
|
"eval_steps_per_second": 16.499, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 4.708893347972589e-05, |
|
"loss": 0.0336, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.7013213190474394e-05, |
|
"loss": 0.0336, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.6937492901222884e-05, |
|
"loss": 0.034, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"eval_loss": 0.07209596037864685, |
|
"eval_runtime": 111.5901, |
|
"eval_samples_per_second": 33.731, |
|
"eval_steps_per_second": 16.865, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 4.686177261197138e-05, |
|
"loss": 0.0333, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 4.6786052322719876e-05, |
|
"loss": 0.0339, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 4.6710332033468365e-05, |
|
"loss": 0.0338, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_loss": 0.07271593809127808, |
|
"eval_runtime": 112.5367, |
|
"eval_samples_per_second": 33.447, |
|
"eval_steps_per_second": 16.723, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 4.663461174421687e-05, |
|
"loss": 0.0334, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 4.655889145496536e-05, |
|
"loss": 0.0335, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 4.6483171165713854e-05, |
|
"loss": 0.0337, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 0.07174564898014069, |
|
"eval_runtime": 112.5266, |
|
"eval_samples_per_second": 33.45, |
|
"eval_steps_per_second": 16.725, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 4.640745087646235e-05, |
|
"loss": 0.0334, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 4.633173058721085e-05, |
|
"loss": 0.0338, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 4.625601029795934e-05, |
|
"loss": 0.034, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"eval_loss": 0.07253672927618027, |
|
"eval_runtime": 111.6687, |
|
"eval_samples_per_second": 33.707, |
|
"eval_steps_per_second": 16.853, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 4.618029000870783e-05, |
|
"loss": 0.0336, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 4.610456971945633e-05, |
|
"loss": 0.0338, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 4.6028849430204825e-05, |
|
"loss": 0.0344, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 0.07169464230537415, |
|
"eval_runtime": 112.1693, |
|
"eval_samples_per_second": 33.556, |
|
"eval_steps_per_second": 16.778, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 4.595312914095332e-05, |
|
"loss": 0.0331, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 4.587740885170182e-05, |
|
"loss": 0.0336, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 4.5801688562450314e-05, |
|
"loss": 0.0337, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"eval_loss": 0.07270840555429459, |
|
"eval_runtime": 112.0566, |
|
"eval_samples_per_second": 33.59, |
|
"eval_steps_per_second": 16.795, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 4.57259682731988e-05, |
|
"loss": 0.0342, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 4.5650247983947306e-05, |
|
"loss": 0.0338, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 4.5574527694695796e-05, |
|
"loss": 0.0342, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 0.072144515812397, |
|
"eval_runtime": 113.1575, |
|
"eval_samples_per_second": 33.263, |
|
"eval_steps_per_second": 16.632, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 4.549880740544429e-05, |
|
"loss": 0.0338, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 4.542308711619279e-05, |
|
"loss": 0.0332, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 4.534736682694128e-05, |
|
"loss": 0.034, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_loss": 0.07131176441907883, |
|
"eval_runtime": 114.0394, |
|
"eval_samples_per_second": 33.006, |
|
"eval_steps_per_second": 16.503, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 4.5271646537689774e-05, |
|
"loss": 0.0336, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 4.519592624843827e-05, |
|
"loss": 0.0337, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 4.5120205959186766e-05, |
|
"loss": 0.0339, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"eval_loss": 0.07340256869792938, |
|
"eval_runtime": 114.0856, |
|
"eval_samples_per_second": 32.993, |
|
"eval_steps_per_second": 16.496, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 4.504448566993526e-05, |
|
"loss": 0.0343, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 4.496876538068376e-05, |
|
"loss": 0.0339, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 4.489304509143225e-05, |
|
"loss": 0.0334, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"eval_loss": 0.07161632180213928, |
|
"eval_runtime": 113.2386, |
|
"eval_samples_per_second": 33.24, |
|
"eval_steps_per_second": 16.62, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 4.481732480218075e-05, |
|
"loss": 0.0338, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 4.4743118918714274e-05, |
|
"loss": 0.0334, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 4.466739862946276e-05, |
|
"loss": 0.0329, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 0.07321549952030182, |
|
"eval_runtime": 111.9449, |
|
"eval_samples_per_second": 33.624, |
|
"eval_steps_per_second": 16.812, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 4.459167834021126e-05, |
|
"loss": 0.0336, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 4.4515958050959756e-05, |
|
"loss": 0.0336, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 4.444023776170825e-05, |
|
"loss": 0.034, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_loss": 0.07254920899868011, |
|
"eval_runtime": 115.4895, |
|
"eval_samples_per_second": 32.592, |
|
"eval_steps_per_second": 16.296, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 4.436451747245675e-05, |
|
"loss": 0.0336, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 4.4288797183205244e-05, |
|
"loss": 0.0345, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 4.4213076893953734e-05, |
|
"loss": 0.0338, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 0.07417631149291992, |
|
"eval_runtime": 112.7985, |
|
"eval_samples_per_second": 33.369, |
|
"eval_steps_per_second": 16.685, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 4.413735660470224e-05, |
|
"loss": 0.0336, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 4.4061636315450726e-05, |
|
"loss": 0.0342, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 4.398591602619922e-05, |
|
"loss": 0.0338, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 0.07506504654884338, |
|
"eval_runtime": 112.4975, |
|
"eval_samples_per_second": 33.459, |
|
"eval_steps_per_second": 16.729, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 4.391019573694772e-05, |
|
"loss": 0.0342, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 4.383447544769621e-05, |
|
"loss": 0.033, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 4.375875515844471e-05, |
|
"loss": 0.0342, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 0.07256604731082916, |
|
"eval_runtime": 118.8991, |
|
"eval_samples_per_second": 31.657, |
|
"eval_steps_per_second": 15.829, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 4.36830348691932e-05, |
|
"loss": 0.0339, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 4.36073145799417e-05, |
|
"loss": 0.0341, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 4.353159429069019e-05, |
|
"loss": 0.0342, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_loss": 0.07219333946704865, |
|
"eval_runtime": 112.2209, |
|
"eval_samples_per_second": 33.541, |
|
"eval_steps_per_second": 16.77, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 4.345587400143869e-05, |
|
"loss": 0.0345, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 4.3380153712187186e-05, |
|
"loss": 0.0335, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 4.3304433422935675e-05, |
|
"loss": 0.0342, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 0.07268434017896652, |
|
"eval_runtime": 118.9844, |
|
"eval_samples_per_second": 31.634, |
|
"eval_steps_per_second": 15.817, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 4.322871313368417e-05, |
|
"loss": 0.0343, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 4.315299284443267e-05, |
|
"loss": 0.0342, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 4.307802975807368e-05, |
|
"loss": 0.034, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 0.07335703819990158, |
|
"eval_runtime": 117.7972, |
|
"eval_samples_per_second": 31.953, |
|
"eval_steps_per_second": 15.977, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 4.300230946882217e-05, |
|
"loss": 0.0339, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 4.292658917957067e-05, |
|
"loss": 0.0342, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 4.285086889031916e-05, |
|
"loss": 0.0338, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"eval_loss": 0.07344873249530792, |
|
"eval_runtime": 117.2132, |
|
"eval_samples_per_second": 32.112, |
|
"eval_steps_per_second": 16.056, |
|
"step": 76500 |
|
} |
|
], |
|
"max_steps": 133065, |
|
"num_train_epochs": 15, |
|
"total_flos": 3.571939832452273e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|