{ "best_metric": 0.7326732673267327, "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-17/checkpoint-48", "epoch": 1.0, "eval_steps": 500, "global_step": 48, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 1.2570356130599976, "learning_rate": 7.514138837771986e-06, "loss": 0.7017, "step": 5 }, { "epoch": 0.21, "grad_norm": 1.132084846496582, "learning_rate": 1.5028277675543972e-05, "loss": 0.678, "step": 10 }, { "epoch": 0.31, "grad_norm": 1.1500951051712036, "learning_rate": 2.2542416513315956e-05, "loss": 0.6796, "step": 15 }, { "epoch": 0.42, "grad_norm": 0.8457872867584229, "learning_rate": 3.0056555351087943e-05, "loss": 0.6488, "step": 20 }, { "epoch": 0.52, "grad_norm": 0.7979240417480469, "learning_rate": 3.757069418885993e-05, "loss": 0.6218, "step": 25 }, { "epoch": 0.62, "grad_norm": 0.605031430721283, "learning_rate": 4.508483302663191e-05, "loss": 0.612, "step": 30 }, { "epoch": 0.73, "grad_norm": 3.2487857341766357, "learning_rate": 5.2598971864403895e-05, "loss": 0.5324, "step": 35 }, { "epoch": 0.83, "grad_norm": 1.5509129762649536, "learning_rate": 6.0113110702175886e-05, "loss": 0.6915, "step": 40 }, { "epoch": 0.94, "grad_norm": 3.975639581680298, "learning_rate": 6.762724953994786e-05, "loss": 0.643, "step": 45 }, { "epoch": 1.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.7643775939941406, "eval_runtime": 1.3793, "eval_samples_per_second": 46.401, "eval_steps_per_second": 5.8, "step": 48 } ], "logging_steps": 5, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 670686130935120.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "learning_rate": 7.213573284261106e-05, "per_device_train_batch_size": 4 } }