{ "best_metric": 0.7346938775510203, "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-14/checkpoint-144", "epoch": 3.0, "eval_steps": 500, "global_step": 144, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 1.263960838317871, "learning_rate": 4.556569949810922e-06, "loss": 0.7021, "step": 5 }, { "epoch": 0.21, "grad_norm": 1.1750551462173462, "learning_rate": 9.113139899621844e-06, "loss": 0.6857, "step": 10 }, { "epoch": 0.31, "grad_norm": 1.3693294525146484, "learning_rate": 1.3669709849432766e-05, "loss": 0.6849, "step": 15 }, { "epoch": 0.42, "grad_norm": 0.9440478086471558, "learning_rate": 1.822627979924369e-05, "loss": 0.6652, "step": 20 }, { "epoch": 0.52, "grad_norm": 0.9551452398300171, "learning_rate": 2.2782849749054612e-05, "loss": 0.64, "step": 25 }, { "epoch": 0.62, "grad_norm": 0.777751088142395, "learning_rate": 2.7339419698865533e-05, "loss": 0.6269, "step": 30 }, { "epoch": 0.73, "grad_norm": 2.4816713333129883, "learning_rate": 3.1895989648676456e-05, "loss": 0.5586, "step": 35 }, { "epoch": 0.83, "grad_norm": 1.696999430656433, "learning_rate": 3.645255959848738e-05, "loss": 0.6586, "step": 40 }, { "epoch": 0.94, "grad_norm": 3.2336232662200928, "learning_rate": 4.10091295482983e-05, "loss": 0.6259, "step": 45 }, { "epoch": 1.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.7427406311035156, "eval_runtime": 1.4051, "eval_samples_per_second": 45.548, "eval_steps_per_second": 5.694, "step": 48 }, { "epoch": 1.04, "grad_norm": 4.544165134429932, "learning_rate": 4.354055729819326e-05, "loss": 0.7, "step": 50 }, { "epoch": 1.15, "grad_norm": 0.8700355887413025, "learning_rate": 4.3034271748214265e-05, "loss": 0.6972, "step": 55 }, { "epoch": 1.25, "grad_norm": 1.7657314538955688, "learning_rate": 4.252798619823527e-05, "loss": 0.4085, "step": 60 }, { "epoch": 1.35, "grad_norm": 0.6718289852142334, "learning_rate": 4.202170064825628e-05, "loss": 0.5618, "step": 65 }, { "epoch": 1.46, "grad_norm": 1.5080432891845703, "learning_rate": 4.151541509827729e-05, "loss": 0.4822, "step": 70 }, { "epoch": 1.56, "grad_norm": Infinity, "learning_rate": 4.11103866582941e-05, "loss": 0.7679, "step": 75 }, { "epoch": 1.67, "grad_norm": 1.8398224115371704, "learning_rate": 4.0604101108315106e-05, "loss": 0.6183, "step": 80 }, { "epoch": 1.77, "grad_norm": 1.7079250812530518, "learning_rate": 4.0097815558336114e-05, "loss": 0.5954, "step": 85 }, { "epoch": 1.88, "grad_norm": 1.8899829387664795, "learning_rate": 3.959153000835712e-05, "loss": 0.596, "step": 90 }, { "epoch": 1.98, "grad_norm": 3.904428243637085, "learning_rate": 3.908524445837813e-05, "loss": 0.6094, "step": 95 }, { "epoch": 2.0, "eval_f1": 0.7326732673267327, "eval_loss": 0.673431396484375, "eval_runtime": 1.3605, "eval_samples_per_second": 47.041, "eval_steps_per_second": 5.88, "step": 96 }, { "epoch": 2.08, "grad_norm": 1.0948293209075928, "learning_rate": 3.857895890839914e-05, "loss": 0.5749, "step": 100 }, { "epoch": 2.19, "grad_norm": 1.2664576768875122, "learning_rate": 3.807267335842015e-05, "loss": 0.4808, "step": 105 }, { "epoch": 2.29, "grad_norm": 1.7244511842727661, "learning_rate": 3.7566387808441155e-05, "loss": 0.5641, "step": 110 }, { "epoch": 2.4, "grad_norm": 1.1561750173568726, "learning_rate": 3.706010225846216e-05, "loss": 0.5395, "step": 115 }, { "epoch": 2.5, "grad_norm": Infinity, "learning_rate": 3.665507381847897e-05, "loss": 0.5224, "step": 120 }, { "epoch": 2.6, "grad_norm": 6.8102827072143555, "learning_rate": 3.614878826849998e-05, "loss": 0.4427, "step": 125 }, { "epoch": 2.71, "grad_norm": Infinity, "learning_rate": 3.574375982851679e-05, "loss": 0.6088, "step": 130 }, { "epoch": 2.81, "grad_norm": 3.278684139251709, "learning_rate": 3.52374742785378e-05, "loss": 0.3563, "step": 135 }, { "epoch": 2.92, "grad_norm": 4.377391815185547, "learning_rate": 3.4731188728558806e-05, "loss": 0.3177, "step": 140 }, { "epoch": 3.0, "eval_f1": 0.7346938775510203, "eval_loss": 0.804656982421875, "eval_runtime": 1.3676, "eval_samples_per_second": 46.796, "eval_steps_per_second": 5.85, "step": 144 } ], "logging_steps": 5, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2121874430755872.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "learning_rate": 4.374307151818485e-05, "per_device_train_batch_size": 4 } }