{ "best_metric": 0.7326732673267327, "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-14/checkpoint-48", "epoch": 2.0, "eval_steps": 500, "global_step": 96, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 1.263960838317871, "learning_rate": 4.556569949810922e-06, "loss": 0.7021, "step": 5 }, { "epoch": 0.21, "grad_norm": 1.1750551462173462, "learning_rate": 9.113139899621844e-06, "loss": 0.6857, "step": 10 }, { "epoch": 0.31, "grad_norm": 1.3693294525146484, "learning_rate": 1.3669709849432766e-05, "loss": 0.6849, "step": 15 }, { "epoch": 0.42, "grad_norm": 0.9440478086471558, "learning_rate": 1.822627979924369e-05, "loss": 0.6652, "step": 20 }, { "epoch": 0.52, "grad_norm": 0.9551452398300171, "learning_rate": 2.2782849749054612e-05, "loss": 0.64, "step": 25 }, { "epoch": 0.62, "grad_norm": 0.777751088142395, "learning_rate": 2.7339419698865533e-05, "loss": 0.6269, "step": 30 }, { "epoch": 0.73, "grad_norm": 2.4816713333129883, "learning_rate": 3.1895989648676456e-05, "loss": 0.5586, "step": 35 }, { "epoch": 0.83, "grad_norm": 1.696999430656433, "learning_rate": 3.645255959848738e-05, "loss": 0.6586, "step": 40 }, { "epoch": 0.94, "grad_norm": 3.2336232662200928, "learning_rate": 4.10091295482983e-05, "loss": 0.6259, "step": 45 }, { "epoch": 1.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.7427406311035156, "eval_runtime": 1.4051, "eval_samples_per_second": 45.548, "eval_steps_per_second": 5.694, "step": 48 }, { "epoch": 1.04, "grad_norm": 4.544165134429932, "learning_rate": 4.354055729819326e-05, "loss": 0.7, "step": 50 }, { "epoch": 1.15, "grad_norm": 0.8700355887413025, "learning_rate": 4.3034271748214265e-05, "loss": 0.6972, "step": 55 }, { "epoch": 1.25, "grad_norm": 1.7657314538955688, "learning_rate": 4.252798619823527e-05, "loss": 0.4085, "step": 60 }, { "epoch": 1.35, "grad_norm": 0.6718289852142334, "learning_rate": 4.202170064825628e-05, "loss": 0.5618, "step": 65 }, { "epoch": 1.46, "grad_norm": 1.5080432891845703, "learning_rate": 4.151541509827729e-05, "loss": 0.4822, "step": 70 }, { "epoch": 1.56, "grad_norm": Infinity, "learning_rate": 4.11103866582941e-05, "loss": 0.7679, "step": 75 }, { "epoch": 1.67, "grad_norm": 1.8398224115371704, "learning_rate": 4.0604101108315106e-05, "loss": 0.6183, "step": 80 }, { "epoch": 1.77, "grad_norm": 1.7079250812530518, "learning_rate": 4.0097815558336114e-05, "loss": 0.5954, "step": 85 }, { "epoch": 1.88, "grad_norm": 1.8899829387664795, "learning_rate": 3.959153000835712e-05, "loss": 0.596, "step": 90 }, { "epoch": 1.98, "grad_norm": 3.904428243637085, "learning_rate": 3.908524445837813e-05, "loss": 0.6094, "step": 95 }, { "epoch": 2.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.673431396484375, "eval_runtime": 1.3605, "eval_samples_per_second": 47.041, "eval_steps_per_second": 5.88, "step": 96 } ], "logging_steps": 5, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1442567462539200.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "learning_rate": 4.374307151818485e-05, "per_device_train_batch_size": 4 } }