|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9337068160597572, |
|
"eval_steps": 50, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.6599863767623901, |
|
"eval_runtime": 3.0665, |
|
"eval_samples_per_second": 32.611, |
|
"eval_steps_per_second": 4.239, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.5268713235855103, |
|
"eval_runtime": 3.0649, |
|
"eval_samples_per_second": 32.627, |
|
"eval_steps_per_second": 4.242, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.4272453784942627, |
|
"eval_runtime": 3.0703, |
|
"eval_samples_per_second": 32.57, |
|
"eval_steps_per_second": 4.234, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.3691519498825073, |
|
"eval_runtime": 3.0755, |
|
"eval_samples_per_second": 32.515, |
|
"eval_steps_per_second": 4.227, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.3439632654190063, |
|
"eval_runtime": 3.0781, |
|
"eval_samples_per_second": 32.488, |
|
"eval_steps_per_second": 4.223, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.2951898574829102, |
|
"eval_runtime": 3.0608, |
|
"eval_samples_per_second": 32.671, |
|
"eval_steps_per_second": 4.247, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.2598179578781128, |
|
"eval_runtime": 3.0651, |
|
"eval_samples_per_second": 32.626, |
|
"eval_steps_per_second": 4.241, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.2034798860549927, |
|
"eval_runtime": 3.0667, |
|
"eval_samples_per_second": 32.608, |
|
"eval_steps_per_second": 4.239, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.1628668308258057, |
|
"eval_runtime": 3.0674, |
|
"eval_samples_per_second": 32.601, |
|
"eval_steps_per_second": 4.238, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.75e-05, |
|
"loss": 1.6585, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.1156766414642334, |
|
"eval_runtime": 3.0714, |
|
"eval_samples_per_second": 32.558, |
|
"eval_steps_per_second": 4.233, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.0700278282165527, |
|
"eval_runtime": 3.0584, |
|
"eval_samples_per_second": 32.696, |
|
"eval_steps_per_second": 4.251, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.0359002351760864, |
|
"eval_runtime": 3.0694, |
|
"eval_samples_per_second": 32.58, |
|
"eval_steps_per_second": 4.235, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.019242286682129, |
|
"eval_runtime": 3.0562, |
|
"eval_samples_per_second": 32.72, |
|
"eval_steps_per_second": 4.254, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.9841110706329346, |
|
"eval_runtime": 3.0664, |
|
"eval_samples_per_second": 32.611, |
|
"eval_steps_per_second": 4.239, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 0.993296205997467, |
|
"eval_runtime": 3.074, |
|
"eval_samples_per_second": 32.531, |
|
"eval_steps_per_second": 4.229, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.9330427646636963, |
|
"eval_runtime": 3.0586, |
|
"eval_samples_per_second": 32.695, |
|
"eval_steps_per_second": 4.25, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.9203641414642334, |
|
"eval_runtime": 3.0859, |
|
"eval_samples_per_second": 32.405, |
|
"eval_steps_per_second": 4.213, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 0.9058796763420105, |
|
"eval_runtime": 3.068, |
|
"eval_samples_per_second": 32.595, |
|
"eval_steps_per_second": 4.237, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 0.9017823934555054, |
|
"eval_runtime": 3.0852, |
|
"eval_samples_per_second": 32.413, |
|
"eval_steps_per_second": 4.214, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.3142, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 0.8834471702575684, |
|
"eval_runtime": 3.0917, |
|
"eval_samples_per_second": 32.344, |
|
"eval_steps_per_second": 4.205, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4000, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"total_flos": 1.369514704896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|