{ "best_metric": 0.2997977137565613, "best_model_checkpoint": "models/toxic-bert-hubert/checkpoint-150", "epoch": 0.23088023088023088, "eval_steps": 10, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.9600000000000002e-05, "loss": 0.206, "step": 10 }, { "epoch": 0.01, "eval_f1": 0.7730744144170906, "eval_loss": 0.6420192122459412, "eval_runtime": 22.3453, "eval_samples_per_second": 247.837, "eval_steps_per_second": 7.787, "step": 10 }, { "epoch": 0.03, "learning_rate": 1.9200000000000003e-05, "loss": 0.5054, "step": 20 }, { "epoch": 0.03, "eval_f1": 0.8048163482788003, "eval_loss": 0.5228331685066223, "eval_runtime": 22.8204, "eval_samples_per_second": 242.677, "eval_steps_per_second": 7.625, "step": 20 }, { "epoch": 0.04, "learning_rate": 1.88e-05, "loss": 0.5716, "step": 30 }, { "epoch": 0.04, "eval_f1": 0.8136074625725318, "eval_loss": 0.4328407943248749, "eval_runtime": 23.1876, "eval_samples_per_second": 238.834, "eval_steps_per_second": 7.504, "step": 30 }, { "epoch": 0.06, "learning_rate": 1.8400000000000003e-05, "loss": 0.4257, "step": 40 }, { "epoch": 0.06, "eval_f1": 0.8259426500452947, "eval_loss": 0.4285435080528259, "eval_runtime": 23.2092, "eval_samples_per_second": 238.613, "eval_steps_per_second": 7.497, "step": 40 }, { "epoch": 0.07, "learning_rate": 1.8e-05, "loss": 0.4937, "step": 50 }, { "epoch": 0.07, "eval_f1": 0.8317824244449398, "eval_loss": 0.39296483993530273, "eval_runtime": 23.2217, "eval_samples_per_second": 238.484, "eval_steps_per_second": 7.493, "step": 50 }, { "epoch": 0.09, "learning_rate": 1.76e-05, "loss": 0.4411, "step": 60 }, { "epoch": 0.09, "eval_f1": 0.8356587695730946, "eval_loss": 0.3781413733959198, "eval_runtime": 23.4641, "eval_samples_per_second": 236.021, "eval_steps_per_second": 7.416, "step": 60 }, { "epoch": 0.1, "learning_rate": 1.72e-05, "loss": 0.4197, "step": 70 }, { "epoch": 0.1, "eval_f1": 0.8497564694972085, "eval_loss": 0.3615292012691498, "eval_runtime": 23.1269, "eval_samples_per_second": 239.462, "eval_steps_per_second": 7.524, "step": 70 }, { "epoch": 0.12, "learning_rate": 1.6800000000000002e-05, "loss": 0.3702, "step": 80 }, { "epoch": 0.12, "eval_f1": 0.8725646676802798, "eval_loss": 0.3257770836353302, "eval_runtime": 22.9073, "eval_samples_per_second": 241.757, "eval_steps_per_second": 7.596, "step": 80 }, { "epoch": 0.13, "learning_rate": 1.64e-05, "loss": 0.3357, "step": 90 }, { "epoch": 0.13, "eval_f1": 0.8735307760659007, "eval_loss": 0.31817400455474854, "eval_runtime": 23.2006, "eval_samples_per_second": 238.701, "eval_steps_per_second": 7.5, "step": 90 }, { "epoch": 0.14, "learning_rate": 1.6000000000000003e-05, "loss": 0.4082, "step": 100 }, { "epoch": 0.14, "eval_f1": 0.8404628911456754, "eval_loss": 0.35047250986099243, "eval_runtime": 22.7192, "eval_samples_per_second": 243.758, "eval_steps_per_second": 7.659, "step": 100 }, { "epoch": 0.16, "learning_rate": 1.5600000000000003e-05, "loss": 0.4107, "step": 110 }, { "epoch": 0.16, "eval_f1": 0.8586341129440507, "eval_loss": 0.3375680148601532, "eval_runtime": 23.371, "eval_samples_per_second": 236.96, "eval_steps_per_second": 7.445, "step": 110 }, { "epoch": 0.17, "learning_rate": 1.5200000000000002e-05, "loss": 0.2864, "step": 120 }, { "epoch": 0.17, "eval_f1": 0.8561894871027176, "eval_loss": 0.32658323645591736, "eval_runtime": 23.2113, "eval_samples_per_second": 238.591, "eval_steps_per_second": 7.496, "step": 120 }, { "epoch": 0.19, "learning_rate": 1.48e-05, "loss": 0.3483, "step": 130 }, { "epoch": 0.19, "eval_f1": 0.8645368802572093, "eval_loss": 0.3245397210121155, "eval_runtime": 23.0332, "eval_samples_per_second": 240.436, "eval_steps_per_second": 7.554, "step": 130 }, { "epoch": 0.2, "learning_rate": 1.4400000000000001e-05, "loss": 0.317, "step": 140 }, { "epoch": 0.2, "eval_f1": 0.8735823993344022, "eval_loss": 0.32588475942611694, "eval_runtime": 23.3446, "eval_samples_per_second": 237.228, "eval_steps_per_second": 7.454, "step": 140 }, { "epoch": 0.22, "learning_rate": 1.4e-05, "loss": 0.3114, "step": 150 }, { "epoch": 0.22, "eval_f1": 0.8704014314485559, "eval_loss": 0.2997977137565613, "eval_runtime": 22.6714, "eval_samples_per_second": 244.273, "eval_steps_per_second": 7.675, "step": 150 }, { "epoch": 0.23, "learning_rate": 1.3600000000000002e-05, "loss": 0.3323, "step": 160 }, { "epoch": 0.23, "eval_f1": 0.8661724356256255, "eval_loss": 0.3152276575565338, "eval_runtime": 23.046, "eval_samples_per_second": 240.302, "eval_steps_per_second": 7.55, "step": 160 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "total_flos": 359241714253440.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }