{ "best_metric": 0.7326732673267327, "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-8/checkpoint-24", "epoch": 6.0, "eval_steps": 500, "global_step": 144, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21, "grad_norm": 1.6496813297271729, "learning_rate": 2.9441483262927863e-06, "loss": 0.6992, "step": 5 }, { "epoch": 0.42, "grad_norm": 0.620004415512085, "learning_rate": 5.888296652585573e-06, "loss": 0.6939, "step": 10 }, { "epoch": 0.62, "grad_norm": 0.5726878046989441, "learning_rate": 8.832444978878358e-06, "loss": 0.6835, "step": 15 }, { "epoch": 0.83, "grad_norm": 1.4239176511764526, "learning_rate": 1.1776593305171145e-05, "loss": 0.6673, "step": 20 }, { "epoch": 1.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.6805419921875, "eval_runtime": 1.3716, "eval_samples_per_second": 46.661, "eval_steps_per_second": 5.833, "step": 24 }, { "epoch": 1.04, "grad_norm": 0.9195191264152527, "learning_rate": 1.406648644784331e-05, "loss": 0.6673, "step": 25 }, { "epoch": 1.25, "grad_norm": 1.8524231910705566, "learning_rate": 1.3739358856033002e-05, "loss": 0.6155, "step": 30 }, { "epoch": 1.46, "grad_norm": 1.8213531970977783, "learning_rate": 1.3412231264222692e-05, "loss": 0.5895, "step": 35 }, { "epoch": 1.67, "grad_norm": 0.4818130433559418, "learning_rate": 1.3085103672412383e-05, "loss": 0.6468, "step": 40 }, { "epoch": 1.88, "grad_norm": 0.6597484946250916, "learning_rate": 1.2757976080602073e-05, "loss": 0.6173, "step": 45 }, { "epoch": 2.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.6970634460449219, "eval_runtime": 1.3632, "eval_samples_per_second": 46.949, "eval_steps_per_second": 5.869, "step": 48 }, { "epoch": 2.08, "grad_norm": 0.48903289437294006, "learning_rate": 1.2430848488791764e-05, "loss": 0.6302, "step": 50 }, { "epoch": 2.29, "grad_norm": 0.6064260601997375, "learning_rate": 1.2103720896981454e-05, "loss": 0.5867, "step": 55 }, { "epoch": 2.5, "grad_norm": 0.6802453398704529, "learning_rate": 1.1776593305171145e-05, "loss": 0.6321, "step": 60 }, { "epoch": 2.71, "grad_norm": 1.2592875957489014, "learning_rate": 1.1449465713360835e-05, "loss": 0.6223, "step": 65 }, { "epoch": 2.92, "grad_norm": 1.1591824293136597, "learning_rate": 1.1122338121550526e-05, "loss": 0.4922, "step": 70 }, { "epoch": 3.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.7079887390136719, "eval_runtime": 1.3669, "eval_samples_per_second": 46.821, "eval_steps_per_second": 5.853, "step": 72 }, { "epoch": 3.12, "grad_norm": 0.5332023501396179, "learning_rate": 1.0795210529740214e-05, "loss": 0.5989, "step": 75 }, { "epoch": 3.33, "grad_norm": 0.5555600523948669, "learning_rate": 1.0468082937929906e-05, "loss": 0.6104, "step": 80 }, { "epoch": 3.54, "grad_norm": 1.2928024530410767, "learning_rate": 1.0140955346119596e-05, "loss": 0.4936, "step": 85 }, { "epoch": 3.75, "grad_norm": 1.1424989700317383, "learning_rate": 9.813827754309287e-06, "loss": 0.6191, "step": 90 }, { "epoch": 3.96, "grad_norm": 1.119732141494751, "learning_rate": 9.486700162498977e-06, "loss": 0.6004, "step": 95 }, { "epoch": 4.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.7053489685058594, "eval_runtime": 1.3556, "eval_samples_per_second": 47.211, "eval_steps_per_second": 5.901, "step": 96 }, { "epoch": 4.17, "grad_norm": 0.8135461211204529, "learning_rate": 9.159572570688668e-06, "loss": 0.5154, "step": 100 }, { "epoch": 4.38, "grad_norm": 1.8034342527389526, "learning_rate": 8.832444978878358e-06, "loss": 0.6067, "step": 105 }, { "epoch": 4.58, "grad_norm": 0.8029685020446777, "learning_rate": 8.505317387068049e-06, "loss": 0.5499, "step": 110 }, { "epoch": 4.79, "grad_norm": 1.019626259803772, "learning_rate": 8.178189795257739e-06, "loss": 0.5542, "step": 115 }, { "epoch": 5.0, "grad_norm": 1.861674427986145, "learning_rate": 7.85106220344743e-06, "loss": 0.5545, "step": 120 }, { "epoch": 5.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.6832504272460938, "eval_runtime": 1.3811, "eval_samples_per_second": 46.341, "eval_steps_per_second": 5.793, "step": 120 }, { "epoch": 5.21, "grad_norm": 1.5949212312698364, "learning_rate": 7.523934611637121e-06, "loss": 0.4806, "step": 125 }, { "epoch": 5.42, "grad_norm": 3.002861738204956, "learning_rate": 7.196807019826811e-06, "loss": 0.5832, "step": 130 }, { "epoch": 5.62, "grad_norm": 1.4606820344924927, "learning_rate": 6.9351049463785626e-06, "loss": 0.5481, "step": 135 }, { "epoch": 5.83, "grad_norm": 1.6088628768920898, "learning_rate": 6.607977354568253e-06, "loss": 0.5333, "step": 140 }, { "epoch": 6.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.6599597930908203, "eval_runtime": 1.3747, "eval_samples_per_second": 46.555, "eval_steps_per_second": 5.819, "step": 144 } ], "logging_steps": 5, "max_steps": 240, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 4911381340990080.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "learning_rate": 1.4131911966205373e-05, "per_device_train_batch_size": 8 } }