Phi-2_PT_3.0_rev5 / trainer_state.json
vitorandrade's picture
Upload 14 files
d5cc123 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.99722479185939,
"eval_steps": 30,
"global_step": 810,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22,
"learning_rate": 3.157894736842105e-06,
"loss": 11.8786,
"step": 30
},
{
"epoch": 0.22,
"eval_loss": 11.730859756469727,
"eval_runtime": 60.9075,
"eval_samples_per_second": 0.542,
"eval_steps_per_second": 0.542,
"step": 30
},
{
"epoch": 0.44,
"learning_rate": 6.31578947368421e-06,
"loss": 11.386,
"step": 60
},
{
"epoch": 0.44,
"eval_loss": 10.65054702758789,
"eval_runtime": 60.8988,
"eval_samples_per_second": 0.542,
"eval_steps_per_second": 0.542,
"step": 60
},
{
"epoch": 0.67,
"learning_rate": 9.473684210526315e-06,
"loss": 9.8984,
"step": 90
},
{
"epoch": 0.67,
"eval_loss": 8.621472358703613,
"eval_runtime": 60.8346,
"eval_samples_per_second": 0.542,
"eval_steps_per_second": 0.542,
"step": 90
},
{
"epoch": 0.89,
"learning_rate": 1.263157894736842e-05,
"loss": 7.9555,
"step": 120
},
{
"epoch": 0.89,
"eval_loss": 6.96145486831665,
"eval_runtime": 60.9577,
"eval_samples_per_second": 0.541,
"eval_steps_per_second": 0.541,
"step": 120
},
{
"epoch": 1.11,
"learning_rate": 1.578947368421053e-05,
"loss": 6.6507,
"step": 150
},
{
"epoch": 1.11,
"eval_loss": 6.004063606262207,
"eval_runtime": 60.9978,
"eval_samples_per_second": 0.541,
"eval_steps_per_second": 0.541,
"step": 150
},
{
"epoch": 1.33,
"learning_rate": 1.894736842105263e-05,
"loss": 5.8685,
"step": 180
},
{
"epoch": 1.33,
"eval_loss": 5.406551361083984,
"eval_runtime": 60.6662,
"eval_samples_per_second": 0.544,
"eval_steps_per_second": 0.544,
"step": 180
},
{
"epoch": 1.55,
"learning_rate": 1.9993250234920638e-05,
"loss": 5.3925,
"step": 210
},
{
"epoch": 1.55,
"eval_loss": 5.084054946899414,
"eval_runtime": 61.1004,
"eval_samples_per_second": 0.54,
"eval_steps_per_second": 0.54,
"step": 210
},
{
"epoch": 1.78,
"learning_rate": 1.9957838880989076e-05,
"loss": 5.1386,
"step": 240
},
{
"epoch": 1.78,
"eval_loss": 4.918034553527832,
"eval_runtime": 60.9721,
"eval_samples_per_second": 0.541,
"eval_steps_per_second": 0.541,
"step": 240
},
{
"epoch": 2.0,
"learning_rate": 1.989218589765658e-05,
"loss": 5.0205,
"step": 270
},
{
"epoch": 2.0,
"eval_loss": 4.811317443847656,
"eval_runtime": 60.7077,
"eval_samples_per_second": 0.544,
"eval_steps_per_second": 0.544,
"step": 270
},
{
"epoch": 2.22,
"learning_rate": 1.979649067087574e-05,
"loss": 4.9088,
"step": 300
},
{
"epoch": 2.22,
"eval_loss": 4.734280109405518,
"eval_runtime": 61.3359,
"eval_samples_per_second": 0.538,
"eval_steps_per_second": 0.538,
"step": 300
},
{
"epoch": 2.44,
"learning_rate": 1.967104382390511e-05,
"loss": 4.8469,
"step": 330
},
{
"epoch": 2.44,
"eval_loss": 4.680157661437988,
"eval_runtime": 60.9063,
"eval_samples_per_second": 0.542,
"eval_steps_per_second": 0.542,
"step": 330
},
{
"epoch": 2.66,
"learning_rate": 1.951622633469592e-05,
"loss": 4.7796,
"step": 360
},
{
"epoch": 2.66,
"eval_loss": 4.634165287017822,
"eval_runtime": 61.2818,
"eval_samples_per_second": 0.538,
"eval_steps_per_second": 0.538,
"step": 360
},
{
"epoch": 2.89,
"learning_rate": 1.933250837887457e-05,
"loss": 4.7591,
"step": 390
},
{
"epoch": 2.89,
"eval_loss": 4.599093437194824,
"eval_runtime": 61.1388,
"eval_samples_per_second": 0.54,
"eval_steps_per_second": 0.54,
"step": 390
},
{
"epoch": 3.11,
"learning_rate": 1.9120447901834708e-05,
"loss": 4.7235,
"step": 420
},
{
"epoch": 3.11,
"eval_loss": 4.5706963539123535,
"eval_runtime": 60.7134,
"eval_samples_per_second": 0.544,
"eval_steps_per_second": 0.544,
"step": 420
},
{
"epoch": 3.33,
"learning_rate": 1.888068892427538e-05,
"loss": 6.7181,
"step": 450
},
{
"epoch": 3.33,
"eval_loss": 5.124680995941162,
"eval_runtime": 55.4445,
"eval_samples_per_second": 0.595,
"eval_steps_per_second": 0.595,
"step": 450
},
{
"epoch": 3.56,
"learning_rate": 1.8613959586331364e-05,
"loss": 5.0297,
"step": 480
},
{
"epoch": 3.56,
"eval_loss": 4.7116570472717285,
"eval_runtime": 54.0242,
"eval_samples_per_second": 0.611,
"eval_steps_per_second": 0.611,
"step": 480
},
{
"epoch": 3.78,
"learning_rate": 1.8321069936235503e-05,
"loss": 4.7846,
"step": 510
},
{
"epoch": 3.78,
"eval_loss": 4.612066268920898,
"eval_runtime": 55.4504,
"eval_samples_per_second": 0.595,
"eval_steps_per_second": 0.595,
"step": 510
},
{
"epoch": 4.0,
"learning_rate": 1.800290947022884e-05,
"loss": 4.733,
"step": 540
},
{
"epoch": 4.0,
"eval_loss": 4.56680965423584,
"eval_runtime": 55.4115,
"eval_samples_per_second": 0.596,
"eval_steps_per_second": 0.596,
"step": 540
},
{
"epoch": 4.22,
"learning_rate": 1.766044443118978e-05,
"loss": 4.6863,
"step": 570
},
{
"epoch": 4.22,
"eval_loss": 4.538435459136963,
"eval_runtime": 55.5112,
"eval_samples_per_second": 0.594,
"eval_steps_per_second": 0.594,
"step": 570
},
{
"epoch": 4.44,
"learning_rate": 1.729471487418621e-05,
"loss": 4.6477,
"step": 600
},
{
"epoch": 4.44,
"eval_loss": 4.5178914070129395,
"eval_runtime": 55.4747,
"eval_samples_per_second": 0.595,
"eval_steps_per_second": 0.595,
"step": 600
},
{
"epoch": 4.67,
"learning_rate": 1.6906831507862446e-05,
"loss": 4.6356,
"step": 630
},
{
"epoch": 4.67,
"eval_loss": 4.49837589263916,
"eval_runtime": 55.4914,
"eval_samples_per_second": 0.595,
"eval_steps_per_second": 0.595,
"step": 630
},
{
"epoch": 4.89,
"learning_rate": 1.64979723212536e-05,
"loss": 4.6119,
"step": 660
},
{
"epoch": 4.89,
"eval_loss": 4.483817100524902,
"eval_runtime": 55.3919,
"eval_samples_per_second": 0.596,
"eval_steps_per_second": 0.596,
"step": 660
},
{
"epoch": 5.11,
"learning_rate": 1.606937900627157e-05,
"loss": 4.5994,
"step": 690
},
{
"epoch": 5.11,
"eval_loss": 4.471835613250732,
"eval_runtime": 55.4563,
"eval_samples_per_second": 0.595,
"eval_steps_per_second": 0.595,
"step": 690
},
{
"epoch": 5.33,
"learning_rate": 1.5622353186727542e-05,
"loss": 4.5703,
"step": 720
},
{
"epoch": 5.33,
"eval_loss": 4.460899353027344,
"eval_runtime": 55.4262,
"eval_samples_per_second": 0.595,
"eval_steps_per_second": 0.595,
"step": 720
},
{
"epoch": 5.55,
"learning_rate": 1.5158252465343242e-05,
"loss": 4.5625,
"step": 750
},
{
"epoch": 5.55,
"eval_loss": 4.450038909912109,
"eval_runtime": 55.4182,
"eval_samples_per_second": 0.595,
"eval_steps_per_second": 0.595,
"step": 750
},
{
"epoch": 5.78,
"learning_rate": 1.467848630075608e-05,
"loss": 4.5649,
"step": 780
},
{
"epoch": 5.78,
"eval_loss": 4.4421467781066895,
"eval_runtime": 55.3928,
"eval_samples_per_second": 0.596,
"eval_steps_per_second": 0.596,
"step": 780
},
{
"epoch": 6.0,
"learning_rate": 1.4184511727039612e-05,
"loss": 4.559,
"step": 810
},
{
"epoch": 6.0,
"eval_loss": 4.432287693023682,
"eval_runtime": 54.0497,
"eval_samples_per_second": 0.611,
"eval_steps_per_second": 0.611,
"step": 810
}
],
"logging_steps": 30,
"max_steps": 1900,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 30,
"total_flos": 3.3050813792256e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}