|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.99722479185939, |
|
"eval_steps": 30, |
|
"global_step": 810, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.157894736842105e-06, |
|
"loss": 11.8786, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 11.730859756469727, |
|
"eval_runtime": 60.9075, |
|
"eval_samples_per_second": 0.542, |
|
"eval_steps_per_second": 0.542, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.31578947368421e-06, |
|
"loss": 11.386, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 10.65054702758789, |
|
"eval_runtime": 60.8988, |
|
"eval_samples_per_second": 0.542, |
|
"eval_steps_per_second": 0.542, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.473684210526315e-06, |
|
"loss": 9.8984, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 8.621472358703613, |
|
"eval_runtime": 60.8346, |
|
"eval_samples_per_second": 0.542, |
|
"eval_steps_per_second": 0.542, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.263157894736842e-05, |
|
"loss": 7.9555, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 6.96145486831665, |
|
"eval_runtime": 60.9577, |
|
"eval_samples_per_second": 0.541, |
|
"eval_steps_per_second": 0.541, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.578947368421053e-05, |
|
"loss": 6.6507, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 6.004063606262207, |
|
"eval_runtime": 60.9978, |
|
"eval_samples_per_second": 0.541, |
|
"eval_steps_per_second": 0.541, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.894736842105263e-05, |
|
"loss": 5.8685, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 5.406551361083984, |
|
"eval_runtime": 60.6662, |
|
"eval_samples_per_second": 0.544, |
|
"eval_steps_per_second": 0.544, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.9993250234920638e-05, |
|
"loss": 5.3925, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 5.084054946899414, |
|
"eval_runtime": 61.1004, |
|
"eval_samples_per_second": 0.54, |
|
"eval_steps_per_second": 0.54, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.9957838880989076e-05, |
|
"loss": 5.1386, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_loss": 4.918034553527832, |
|
"eval_runtime": 60.9721, |
|
"eval_samples_per_second": 0.541, |
|
"eval_steps_per_second": 0.541, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.989218589765658e-05, |
|
"loss": 5.0205, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 4.811317443847656, |
|
"eval_runtime": 60.7077, |
|
"eval_samples_per_second": 0.544, |
|
"eval_steps_per_second": 0.544, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.979649067087574e-05, |
|
"loss": 4.9088, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 4.734280109405518, |
|
"eval_runtime": 61.3359, |
|
"eval_samples_per_second": 0.538, |
|
"eval_steps_per_second": 0.538, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.967104382390511e-05, |
|
"loss": 4.8469, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 4.680157661437988, |
|
"eval_runtime": 60.9063, |
|
"eval_samples_per_second": 0.542, |
|
"eval_steps_per_second": 0.542, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.951622633469592e-05, |
|
"loss": 4.7796, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 4.634165287017822, |
|
"eval_runtime": 61.2818, |
|
"eval_samples_per_second": 0.538, |
|
"eval_steps_per_second": 0.538, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.933250837887457e-05, |
|
"loss": 4.7591, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 4.599093437194824, |
|
"eval_runtime": 61.1388, |
|
"eval_samples_per_second": 0.54, |
|
"eval_steps_per_second": 0.54, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.9120447901834708e-05, |
|
"loss": 4.7235, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_loss": 4.5706963539123535, |
|
"eval_runtime": 60.7134, |
|
"eval_samples_per_second": 0.544, |
|
"eval_steps_per_second": 0.544, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.888068892427538e-05, |
|
"loss": 6.7181, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 5.124680995941162, |
|
"eval_runtime": 55.4445, |
|
"eval_samples_per_second": 0.595, |
|
"eval_steps_per_second": 0.595, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.8613959586331364e-05, |
|
"loss": 5.0297, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 4.7116570472717285, |
|
"eval_runtime": 54.0242, |
|
"eval_samples_per_second": 0.611, |
|
"eval_steps_per_second": 0.611, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.8321069936235503e-05, |
|
"loss": 4.7846, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 4.612066268920898, |
|
"eval_runtime": 55.4504, |
|
"eval_samples_per_second": 0.595, |
|
"eval_steps_per_second": 0.595, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.800290947022884e-05, |
|
"loss": 4.733, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 4.56680965423584, |
|
"eval_runtime": 55.4115, |
|
"eval_samples_per_second": 0.596, |
|
"eval_steps_per_second": 0.596, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.766044443118978e-05, |
|
"loss": 4.6863, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 4.538435459136963, |
|
"eval_runtime": 55.5112, |
|
"eval_samples_per_second": 0.594, |
|
"eval_steps_per_second": 0.594, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.729471487418621e-05, |
|
"loss": 4.6477, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_loss": 4.5178914070129395, |
|
"eval_runtime": 55.4747, |
|
"eval_samples_per_second": 0.595, |
|
"eval_steps_per_second": 0.595, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.6906831507862446e-05, |
|
"loss": 4.6356, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_loss": 4.49837589263916, |
|
"eval_runtime": 55.4914, |
|
"eval_samples_per_second": 0.595, |
|
"eval_steps_per_second": 0.595, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 1.64979723212536e-05, |
|
"loss": 4.6119, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_loss": 4.483817100524902, |
|
"eval_runtime": 55.3919, |
|
"eval_samples_per_second": 0.596, |
|
"eval_steps_per_second": 0.596, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 1.606937900627157e-05, |
|
"loss": 4.5994, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_loss": 4.471835613250732, |
|
"eval_runtime": 55.4563, |
|
"eval_samples_per_second": 0.595, |
|
"eval_steps_per_second": 0.595, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1.5622353186727542e-05, |
|
"loss": 4.5703, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_loss": 4.460899353027344, |
|
"eval_runtime": 55.4262, |
|
"eval_samples_per_second": 0.595, |
|
"eval_steps_per_second": 0.595, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 1.5158252465343242e-05, |
|
"loss": 4.5625, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_loss": 4.450038909912109, |
|
"eval_runtime": 55.4182, |
|
"eval_samples_per_second": 0.595, |
|
"eval_steps_per_second": 0.595, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1.467848630075608e-05, |
|
"loss": 4.5649, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"eval_loss": 4.4421467781066895, |
|
"eval_runtime": 55.3928, |
|
"eval_samples_per_second": 0.596, |
|
"eval_steps_per_second": 0.596, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.4184511727039612e-05, |
|
"loss": 4.559, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 4.432287693023682, |
|
"eval_runtime": 54.0497, |
|
"eval_samples_per_second": 0.611, |
|
"eval_steps_per_second": 0.611, |
|
"step": 810 |
|
} |
|
], |
|
"logging_steps": 30, |
|
"max_steps": 1900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 30, |
|
"total_flos": 3.3050813792256e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|