{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 500, "global_step": 4035, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 13.118809700012207, "learning_rate": 9.335811648079307e-06, "loss": 0.8716, "step": 269 }, { "epoch": 1.0, "eval_accuracy": 0.6197183132171631, "eval_loss": 0.9822000861167908, "eval_runtime": 32.2509, "eval_samples_per_second": 2.201, "eval_steps_per_second": 2.201, "step": 269 }, { "epoch": 2.0, "grad_norm": 10.717401504516602, "learning_rate": 8.66914498141264e-06, "loss": 0.8143, "step": 538 }, { "epoch": 2.0, "eval_accuracy": 0.5352112650871277, "eval_loss": 1.232447624206543, "eval_runtime": 32.3951, "eval_samples_per_second": 2.192, "eval_steps_per_second": 2.192, "step": 538 }, { "epoch": 3.0, "grad_norm": 23.701391220092773, "learning_rate": 8.004956629491945e-06, "loss": 0.7584, "step": 807 }, { "epoch": 3.0, "eval_accuracy": 0.6478873491287231, "eval_loss": 1.0226496458053589, "eval_runtime": 32.3, "eval_samples_per_second": 2.198, "eval_steps_per_second": 2.198, "step": 807 }, { "epoch": 4.0, "grad_norm": 33.54118728637695, "learning_rate": 7.340768277571252e-06, "loss": 0.6715, "step": 1076 }, { "epoch": 4.0, "eval_accuracy": 0.6619718074798584, "eval_loss": 0.9550462961196899, "eval_runtime": 32.4715, "eval_samples_per_second": 2.187, "eval_steps_per_second": 2.187, "step": 1076 }, { "epoch": 5.0, "grad_norm": 30.0217227935791, "learning_rate": 6.674101610904585e-06, "loss": 0.6471, "step": 1345 }, { "epoch": 5.0, "eval_accuracy": 0.6760563254356384, "eval_loss": 1.1272403001785278, "eval_runtime": 32.2183, "eval_samples_per_second": 2.204, "eval_steps_per_second": 2.204, "step": 1345 }, { "epoch": 6.0, "grad_norm": 0.36277323961257935, "learning_rate": 6.009913258983892e-06, "loss": 0.5759, "step": 1614 }, { "epoch": 6.0, "eval_accuracy": 0.6760563254356384, "eval_loss": 1.2193043231964111, "eval_runtime": 32.3011, "eval_samples_per_second": 2.198, "eval_steps_per_second": 2.198, "step": 1614 }, { "epoch": 7.0, "grad_norm": 17.32307243347168, "learning_rate": 5.343246592317225e-06, "loss": 0.4963, "step": 1883 }, { "epoch": 7.0, "eval_accuracy": 0.7183098793029785, "eval_loss": 1.221394658088684, "eval_runtime": 32.1815, "eval_samples_per_second": 2.206, "eval_steps_per_second": 2.206, "step": 1883 }, { "epoch": 8.0, "grad_norm": 0.09576527029275894, "learning_rate": 4.679058240396531e-06, "loss": 0.4053, "step": 2152 }, { "epoch": 8.0, "eval_accuracy": 0.7464788556098938, "eval_loss": 1.3082976341247559, "eval_runtime": 32.7134, "eval_samples_per_second": 2.17, "eval_steps_per_second": 2.17, "step": 2152 }, { "epoch": 9.0, "grad_norm": 73.52884674072266, "learning_rate": 4.012391573729864e-06, "loss": 0.3344, "step": 2421 }, { "epoch": 9.0, "eval_accuracy": 0.6619718074798584, "eval_loss": 1.6390645503997803, "eval_runtime": 32.8911, "eval_samples_per_second": 2.159, "eval_steps_per_second": 2.159, "step": 2421 }, { "epoch": 10.0, "grad_norm": 242.48890686035156, "learning_rate": 3.34820322180917e-06, "loss": 0.3216, "step": 2690 }, { "epoch": 10.0, "eval_accuracy": 0.6478873491287231, "eval_loss": 1.722383737564087, "eval_runtime": 32.4813, "eval_samples_per_second": 2.186, "eval_steps_per_second": 2.186, "step": 2690 }, { "epoch": 11.0, "grad_norm": 376.0904541015625, "learning_rate": 2.6815365551425034e-06, "loss": 0.2248, "step": 2959 }, { "epoch": 11.0, "eval_accuracy": 0.6760563254356384, "eval_loss": 1.7972948551177979, "eval_runtime": 32.2962, "eval_samples_per_second": 2.198, "eval_steps_per_second": 2.198, "step": 2959 }, { "epoch": 12.0, "grad_norm": 38.31782913208008, "learning_rate": 2.0148698884758364e-06, "loss": 0.1982, "step": 3228 }, { "epoch": 12.0, "eval_accuracy": 0.6478873491287231, "eval_loss": 2.02411150932312, "eval_runtime": 32.4081, "eval_samples_per_second": 2.191, "eval_steps_per_second": 2.191, "step": 3228 }, { "epoch": 13.0, "grad_norm": 0.5905938744544983, "learning_rate": 1.3482032218091697e-06, "loss": 0.1362, "step": 3497 }, { "epoch": 13.0, "eval_accuracy": 0.6478873491287231, "eval_loss": 1.9932571649551392, "eval_runtime": 32.4096, "eval_samples_per_second": 2.191, "eval_steps_per_second": 2.191, "step": 3497 }, { "epoch": 14.0, "grad_norm": 2.5972626209259033, "learning_rate": 6.815365551425032e-07, "loss": 0.0879, "step": 3766 }, { "epoch": 14.0, "eval_accuracy": 0.6478873491287231, "eval_loss": 2.086475372314453, "eval_runtime": 32.8946, "eval_samples_per_second": 2.158, "eval_steps_per_second": 2.158, "step": 3766 }, { "epoch": 15.0, "grad_norm": 0.036518827080726624, "learning_rate": 1.4869888475836432e-08, "loss": 0.0712, "step": 4035 }, { "epoch": 15.0, "eval_accuracy": 0.6338028311729431, "eval_loss": 2.1690945625305176, "eval_runtime": 32.3643, "eval_samples_per_second": 2.194, "eval_steps_per_second": 2.194, "step": 4035 }, { "epoch": 15.0, "step": 4035, "total_flos": 7.1272809219168e+18, "train_loss": 0.44098070326643213, "train_runtime": 9434.6926, "train_samples_per_second": 1.283, "train_steps_per_second": 0.428 } ], "logging_steps": 8, "max_steps": 4035, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.1272809219168e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }