|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 128, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9230769230769231e-07, |
|
"loss": 2.3131, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"loss": 2.3205, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.99626950870707e-07, |
|
"loss": 2.1741, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.954429235188896e-07, |
|
"loss": 1.9213, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.866867588977609e-07, |
|
"loss": 1.8027, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.7352156778691276e-07, |
|
"loss": 1.5125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.4529484510421753, |
|
"eval_runtime": 16.1705, |
|
"eval_samples_per_second": 7.05, |
|
"eval_steps_per_second": 0.928, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.561925927872421e-07, |
|
"loss": 1.4467, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.3502263991531294e-07, |
|
"loss": 1.3768, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.104060653380402e-07, |
|
"loss": 1.2423, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.828014292634508e-07, |
|
"loss": 1.1306, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.527229538316371e-07, |
|
"loss": 1.0165, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.207309441292325e-07, |
|
"loss": 0.9656, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.9357134699821472, |
|
"eval_runtime": 15.8011, |
|
"eval_samples_per_second": 7.215, |
|
"eval_steps_per_second": 0.949, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.8742135076578607e-07, |
|
"loss": 0.9752, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.5341466844148774e-07, |
|
"loss": 0.8735, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.1934437730492543e-07, |
|
"loss": 0.8911, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.8584514241650663e-07, |
|
"loss": 0.8266, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.5354099113921612e-07, |
|
"loss": 0.8245, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.2303368868954847e-07, |
|
"loss": 0.7816, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 9.489152839010797e-08, |
|
"loss": 0.7992, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.8069692850112915, |
|
"eval_runtime": 15.7689, |
|
"eval_samples_per_second": 7.229, |
|
"eval_steps_per_second": 0.951, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 6.963874544026108e-08, |
|
"loss": 0.7874, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 4.774575140626316e-08, |
|
"loss": 0.7714, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.9620371343832106e-08, |
|
"loss": 0.7559, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 1.5600246788994938e-08, |
|
"loss": 0.7602, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 5.946546135113861e-09, |
|
"loss": 0.7548, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 8.390995598676065e-10, |
|
"loss": 0.7865, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.7899278998374939, |
|
"eval_runtime": 15.7827, |
|
"eval_samples_per_second": 7.223, |
|
"eval_steps_per_second": 0.95, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 128, |
|
"total_flos": 89583526084608.0, |
|
"train_loss": 1.1804139949381351, |
|
"train_runtime": 4681.6725, |
|
"train_samples_per_second": 0.871, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 128, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 89583526084608.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|