|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 4035, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 13.118809700012207, |
|
"learning_rate": 9.335811648079307e-06, |
|
"loss": 0.8716, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6197183132171631, |
|
"eval_loss": 0.9822000861167908, |
|
"eval_runtime": 32.2509, |
|
"eval_samples_per_second": 2.201, |
|
"eval_steps_per_second": 2.201, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 10.717401504516602, |
|
"learning_rate": 8.66914498141264e-06, |
|
"loss": 0.8143, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5352112650871277, |
|
"eval_loss": 1.232447624206543, |
|
"eval_runtime": 32.3951, |
|
"eval_samples_per_second": 2.192, |
|
"eval_steps_per_second": 2.192, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 23.701391220092773, |
|
"learning_rate": 8.004956629491945e-06, |
|
"loss": 0.7584, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6478873491287231, |
|
"eval_loss": 1.0226496458053589, |
|
"eval_runtime": 32.3, |
|
"eval_samples_per_second": 2.198, |
|
"eval_steps_per_second": 2.198, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 33.54118728637695, |
|
"learning_rate": 7.340768277571252e-06, |
|
"loss": 0.6715, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6619718074798584, |
|
"eval_loss": 0.9550462961196899, |
|
"eval_runtime": 32.4715, |
|
"eval_samples_per_second": 2.187, |
|
"eval_steps_per_second": 2.187, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 30.0217227935791, |
|
"learning_rate": 6.674101610904585e-06, |
|
"loss": 0.6471, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6760563254356384, |
|
"eval_loss": 1.1272403001785278, |
|
"eval_runtime": 32.2183, |
|
"eval_samples_per_second": 2.204, |
|
"eval_steps_per_second": 2.204, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.36277323961257935, |
|
"learning_rate": 6.009913258983892e-06, |
|
"loss": 0.5759, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6760563254356384, |
|
"eval_loss": 1.2193043231964111, |
|
"eval_runtime": 32.3011, |
|
"eval_samples_per_second": 2.198, |
|
"eval_steps_per_second": 2.198, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 17.32307243347168, |
|
"learning_rate": 5.343246592317225e-06, |
|
"loss": 0.4963, |
|
"step": 1883 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7183098793029785, |
|
"eval_loss": 1.221394658088684, |
|
"eval_runtime": 32.1815, |
|
"eval_samples_per_second": 2.206, |
|
"eval_steps_per_second": 2.206, |
|
"step": 1883 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.09576527029275894, |
|
"learning_rate": 4.679058240396531e-06, |
|
"loss": 0.4053, |
|
"step": 2152 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7464788556098938, |
|
"eval_loss": 1.3082976341247559, |
|
"eval_runtime": 32.7134, |
|
"eval_samples_per_second": 2.17, |
|
"eval_steps_per_second": 2.17, |
|
"step": 2152 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 73.52884674072266, |
|
"learning_rate": 4.012391573729864e-06, |
|
"loss": 0.3344, |
|
"step": 2421 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6619718074798584, |
|
"eval_loss": 1.6390645503997803, |
|
"eval_runtime": 32.8911, |
|
"eval_samples_per_second": 2.159, |
|
"eval_steps_per_second": 2.159, |
|
"step": 2421 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 242.48890686035156, |
|
"learning_rate": 3.34820322180917e-06, |
|
"loss": 0.3216, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6478873491287231, |
|
"eval_loss": 1.722383737564087, |
|
"eval_runtime": 32.4813, |
|
"eval_samples_per_second": 2.186, |
|
"eval_steps_per_second": 2.186, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 376.0904541015625, |
|
"learning_rate": 2.6815365551425034e-06, |
|
"loss": 0.2248, |
|
"step": 2959 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6760563254356384, |
|
"eval_loss": 1.7972948551177979, |
|
"eval_runtime": 32.2962, |
|
"eval_samples_per_second": 2.198, |
|
"eval_steps_per_second": 2.198, |
|
"step": 2959 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 38.31782913208008, |
|
"learning_rate": 2.0148698884758364e-06, |
|
"loss": 0.1982, |
|
"step": 3228 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6478873491287231, |
|
"eval_loss": 2.02411150932312, |
|
"eval_runtime": 32.4081, |
|
"eval_samples_per_second": 2.191, |
|
"eval_steps_per_second": 2.191, |
|
"step": 3228 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.5905938744544983, |
|
"learning_rate": 1.3482032218091697e-06, |
|
"loss": 0.1362, |
|
"step": 3497 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6478873491287231, |
|
"eval_loss": 1.9932571649551392, |
|
"eval_runtime": 32.4096, |
|
"eval_samples_per_second": 2.191, |
|
"eval_steps_per_second": 2.191, |
|
"step": 3497 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 2.5972626209259033, |
|
"learning_rate": 6.815365551425032e-07, |
|
"loss": 0.0879, |
|
"step": 3766 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6478873491287231, |
|
"eval_loss": 2.086475372314453, |
|
"eval_runtime": 32.8946, |
|
"eval_samples_per_second": 2.158, |
|
"eval_steps_per_second": 2.158, |
|
"step": 3766 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.036518827080726624, |
|
"learning_rate": 1.4869888475836432e-08, |
|
"loss": 0.0712, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6338028311729431, |
|
"eval_loss": 2.1690945625305176, |
|
"eval_runtime": 32.3643, |
|
"eval_samples_per_second": 2.194, |
|
"eval_steps_per_second": 2.194, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 4035, |
|
"total_flos": 7.1272809219168e+18, |
|
"train_loss": 0.44098070326643213, |
|
"train_runtime": 9434.6926, |
|
"train_samples_per_second": 1.283, |
|
"train_steps_per_second": 0.428 |
|
} |
|
], |
|
"logging_steps": 8, |
|
"max_steps": 4035, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.1272809219168e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|