{ "best_metric": 0.5298831429205758, "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-7/checkpoint-3207", "epoch": 3.0, "eval_steps": 500, "global_step": 3207, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4677268475210477, "grad_norm": 12.974456787109375, "learning_rate": 1.1101348047567246e-05, "loss": 0.5482, "step": 500 }, { "epoch": 0.9354536950420954, "grad_norm": 6.763242721557617, "learning_rate": 9.955698094257931e-06, "loss": 0.4949, "step": 1000 }, { "epoch": 1.0, "eval_loss": 0.47264614701271057, "eval_matthews_correlation": 0.46472372045336796, "eval_runtime": 0.7997, "eval_samples_per_second": 1304.217, "eval_steps_per_second": 82.53, "step": 1069 }, { "epoch": 1.4031805425631432, "grad_norm": 21.93242835998535, "learning_rate": 8.810048140948619e-06, "loss": 0.3822, "step": 1500 }, { "epoch": 1.8709073900841908, "grad_norm": 23.08902359008789, "learning_rate": 7.664398187639304e-06, "loss": 0.3651, "step": 2000 }, { "epoch": 2.0, "eval_loss": 0.5118151903152466, "eval_matthews_correlation": 0.5238946707778435, "eval_runtime": 0.7256, "eval_samples_per_second": 1437.525, "eval_steps_per_second": 90.965, "step": 2138 }, { "epoch": 2.3386342376052385, "grad_norm": 0.19134408235549927, "learning_rate": 6.5187482343299915e-06, "loss": 0.2818, "step": 2500 }, { "epoch": 2.8063610851262863, "grad_norm": 25.71369743347168, "learning_rate": 5.373098281020679e-06, "loss": 0.2996, "step": 3000 }, { "epoch": 3.0, "eval_loss": 0.739141583442688, "eval_matthews_correlation": 0.5298831429205758, "eval_runtime": 0.7927, "eval_samples_per_second": 1315.739, "eval_steps_per_second": 83.259, "step": 3207 } ], "logging_steps": 500, "max_steps": 5345, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 220519927404816.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "learning_rate": 1.2246998000876558e-05, "num_train_epochs": 5, "per_device_train_batch_size": 8, "seed": 6 } }