|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.682926829268293, |
|
"eval_steps": 500, |
|
"global_step": 217, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9999451693655125e-05, |
|
"loss": 1.3914, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9986295347545738e-05, |
|
"loss": 1.1489, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.9945218953682736e-05, |
|
"loss": 1.0541, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.0341095924377441, |
|
"eval_runtime": 379.2893, |
|
"eval_samples_per_second": 60.93, |
|
"eval_steps_per_second": 0.477, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.9876883405951378e-05, |
|
"loss": 1.0441, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.9781476007338058e-05, |
|
"loss": 0.9858, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.9659258262890683e-05, |
|
"loss": 0.9708, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 1.0142368078231812, |
|
"eval_runtime": 378.349, |
|
"eval_samples_per_second": 61.081, |
|
"eval_steps_per_second": 0.478, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.9510565162951538e-05, |
|
"loss": 0.9679, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.9335804264972018e-05, |
|
"loss": 0.9274, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.913545457642601e-05, |
|
"loss": 0.9142, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_loss": 1.0111455917358398, |
|
"eval_runtime": 378.1977, |
|
"eval_samples_per_second": 61.106, |
|
"eval_steps_per_second": 0.479, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.891006524188368e-05, |
|
"loss": 0.8994, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.866025403784439e-05, |
|
"loss": 0.8775, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.8386705679454243e-05, |
|
"loss": 0.8637, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 1.023858666419983, |
|
"eval_runtime": 378.3558, |
|
"eval_samples_per_second": 61.08, |
|
"eval_steps_per_second": 0.478, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1.8090169943749477e-05, |
|
"loss": 0.8323, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.777145961456971e-05, |
|
"loss": 0.8209, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 1.7431448254773943e-05, |
|
"loss": 0.8091, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_loss": 1.0362672805786133, |
|
"eval_runtime": 378.1433, |
|
"eval_samples_per_second": 61.114, |
|
"eval_steps_per_second": 0.479, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 0.7588, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 1.6691306063588583e-05, |
|
"loss": 0.7629, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 1.6293203910498375e-05, |
|
"loss": 0.7516, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"eval_loss": 1.0779954195022583, |
|
"eval_runtime": 378.7856, |
|
"eval_samples_per_second": 61.011, |
|
"eval_steps_per_second": 0.478, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1.5877852522924733e-05, |
|
"loss": 0.6866, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.5446390350150272e-05, |
|
"loss": 0.6993, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.6884, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"eval_loss": 1.0987329483032227, |
|
"eval_runtime": 378.7519, |
|
"eval_samples_per_second": 61.016, |
|
"eval_steps_per_second": 0.478, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 1.4539904997395468e-05, |
|
"loss": 0.6135, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.4067366430758004e-05, |
|
"loss": 0.6301, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 1.3583679495453e-05, |
|
"loss": 0.6309, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_loss": 1.1394284963607788, |
|
"eval_runtime": 378.7271, |
|
"eval_samples_per_second": 61.02, |
|
"eval_steps_per_second": 0.478, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 1.3090169943749475e-05, |
|
"loss": 0.5431, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 1.2588190451025209e-05, |
|
"loss": 0.5686, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 1.2079116908177592e-05, |
|
"loss": 0.5696, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"eval_loss": 1.1820148229599, |
|
"eval_runtime": 378.7544, |
|
"eval_samples_per_second": 61.016, |
|
"eval_steps_per_second": 0.478, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 1.156434465040231e-05, |
|
"loss": 0.489, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 1.1045284632676535e-05, |
|
"loss": 0.5067, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.0523359562429441e-05, |
|
"loss": 0.4752, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"eval_loss": 1.2695468664169312, |
|
"eval_runtime": 379.3356, |
|
"eval_samples_per_second": 60.922, |
|
"eval_steps_per_second": 0.477, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4579, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 9.476640437570562e-06, |
|
"loss": 0.448, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"eval_loss": 1.3108839988708496, |
|
"eval_runtime": 379.098, |
|
"eval_samples_per_second": 60.96, |
|
"eval_steps_per_second": 0.477, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 8.954715367323468e-06, |
|
"loss": 0.4175, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 8.43565534959769e-06, |
|
"loss": 0.4065, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 7.92088309182241e-06, |
|
"loss": 0.3955, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"eval_loss": 1.387709379196167, |
|
"eval_runtime": 379.1872, |
|
"eval_samples_per_second": 60.946, |
|
"eval_steps_per_second": 0.477, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 7.411809548974792e-06, |
|
"loss": 0.3736, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 6.909830056250527e-06, |
|
"loss": 0.3623, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 6.4163205045469975e-06, |
|
"loss": 0.3579, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"eval_loss": 1.3922604322433472, |
|
"eval_runtime": 378.9155, |
|
"eval_samples_per_second": 60.99, |
|
"eval_steps_per_second": 0.478, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 5.932633569242e-06, |
|
"loss": 0.3336, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 5.460095002604533e-06, |
|
"loss": 0.3216, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.3228, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"eval_loss": 1.4064093828201294, |
|
"eval_runtime": 378.8946, |
|
"eval_samples_per_second": 60.993, |
|
"eval_steps_per_second": 0.478, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 4.5536096498497295e-06, |
|
"loss": 0.2974, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"learning_rate": 4.12214747707527e-06, |
|
"loss": 0.29, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 3.7067960895016277e-06, |
|
"loss": 0.2914, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"eval_loss": 1.4377079010009766, |
|
"eval_runtime": 378.9608, |
|
"eval_samples_per_second": 60.983, |
|
"eval_steps_per_second": 0.478, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"step": 217, |
|
"total_flos": 353275824046080.0, |
|
"train_loss": 0.6393404012451523, |
|
"train_runtime": 14968.9879, |
|
"train_samples_per_second": 10.415, |
|
"train_steps_per_second": 0.02 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 300, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 353275824046080.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|