{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.582800284292822, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 8.333333333333334e-06, "loss": 5.2679, "step": 250 }, { "epoch": 0.47, "learning_rate": 1.6666666666666667e-05, "loss": 4.0457, "step": 500 }, { "epoch": 0.71, "learning_rate": 2.5e-05, "loss": 3.7746, "step": 750 }, { "epoch": 0.71, "eval_loss": 3.4735970497131348, "eval_rouge2_fmeasure": 0.0608, "eval_rouge2_precision": 0.0752, "eval_rouge2_recall": 0.0676, "eval_runtime": 10444.2518, "eval_samples_per_second": 0.09, "eval_steps_per_second": 0.09, "step": 750 }, { "epoch": 0.95, "learning_rate": 3.3333333333333335e-05, "loss": 3.6193, "step": 1000 }, { "epoch": 1.18, "learning_rate": 4.166666666666667e-05, "loss": 3.5271, "step": 1250 }, { "epoch": 1.42, "learning_rate": 5e-05, "loss": 3.4793, "step": 1500 }, { "epoch": 1.42, "eval_loss": 3.331468105316162, "eval_rouge2_fmeasure": 0.0653, "eval_rouge2_precision": 0.0796, "eval_rouge2_recall": 0.0735, "eval_runtime": 10901.7035, "eval_samples_per_second": 0.086, "eval_steps_per_second": 0.086, "step": 1500 }, { "epoch": 1.66, "learning_rate": 4.8529411764705885e-05, "loss": 3.4251, "step": 1750 }, { "epoch": 1.9, "learning_rate": 4.705882352941177e-05, "loss": 3.4313, "step": 2000 }, { "epoch": 2.13, "learning_rate": 4.558823529411765e-05, "loss": 3.3459, "step": 2250 }, { "epoch": 2.13, "eval_loss": 3.2606232166290283, "eval_rouge2_fmeasure": 0.0685, "eval_rouge2_precision": 0.0825, "eval_rouge2_recall": 0.077, "eval_runtime": 10929.9339, "eval_samples_per_second": 0.086, "eval_steps_per_second": 0.086, "step": 2250 }, { "epoch": 2.37, "learning_rate": 4.411764705882353e-05, "loss": 3.291, "step": 2500 }, { "epoch": 2.61, "learning_rate": 4.2647058823529415e-05, "loss": 3.2803, "step": 2750 }, { "epoch": 2.84, "learning_rate": 4.11764705882353e-05, "loss": 3.2948, "step": 3000 }, { "epoch": 2.84, "eval_loss": 3.227094888687134, "eval_rouge2_fmeasure": 0.0684, "eval_rouge2_precision": 0.0815, "eval_rouge2_recall": 0.078, "eval_runtime": 11536.812, "eval_samples_per_second": 0.081, "eval_steps_per_second": 0.081, "step": 3000 }, { "epoch": 3.08, "learning_rate": 3.970588235294117e-05, "loss": 3.2314, "step": 3250 }, { "epoch": 3.32, "learning_rate": 3.8235294117647055e-05, "loss": 3.2134, "step": 3500 }, { "epoch": 3.55, "learning_rate": 3.6764705882352945e-05, "loss": 3.1879, "step": 3750 }, { "epoch": 3.55, "eval_loss": 3.2067530155181885, "eval_rouge2_fmeasure": 0.0704, "eval_rouge2_precision": 0.083, "eval_rouge2_recall": 0.0809, "eval_runtime": 11358.1561, "eval_samples_per_second": 0.083, "eval_steps_per_second": 0.083, "step": 3750 }, { "epoch": 3.79, "learning_rate": 3.529411764705883e-05, "loss": 3.1617, "step": 4000 }, { "epoch": 4.03, "learning_rate": 3.382352941176471e-05, "loss": 3.172, "step": 4250 }, { "epoch": 4.27, "learning_rate": 3.235294117647059e-05, "loss": 3.1038, "step": 4500 }, { "epoch": 4.27, "eval_loss": 3.1938178539276123, "eval_rouge2_fmeasure": 0.0716, "eval_rouge2_precision": 0.0858, "eval_rouge2_recall": 0.0813, "eval_runtime": 10958.7861, "eval_samples_per_second": 0.086, "eval_steps_per_second": 0.086, "step": 4500 }, { "epoch": 4.5, "learning_rate": 3.0882352941176475e-05, "loss": 3.1153, "step": 4750 }, { "epoch": 4.74, "learning_rate": 2.9411764705882354e-05, "loss": 3.1037, "step": 5000 }, { "epoch": 4.98, "learning_rate": 2.7941176470588236e-05, "loss": 3.1155, "step": 5250 }, { "epoch": 4.98, "eval_loss": 3.187068462371826, "eval_rouge2_fmeasure": 0.0725, "eval_rouge2_precision": 0.0856, "eval_rouge2_recall": 0.0839, "eval_runtime": 11370.9481, "eval_samples_per_second": 0.083, "eval_steps_per_second": 0.083, "step": 5250 }, { "epoch": 5.21, "learning_rate": 2.647058823529412e-05, "loss": 3.048, "step": 5500 }, { "epoch": 5.45, "learning_rate": 2.5e-05, "loss": 3.0603, "step": 5750 }, { "epoch": 5.69, "learning_rate": 2.3529411764705884e-05, "loss": 3.0534, "step": 6000 }, { "epoch": 5.69, "eval_loss": 3.181117057800293, "eval_rouge2_fmeasure": 0.0735, "eval_rouge2_precision": 0.0865, "eval_rouge2_recall": 0.0848, "eval_runtime": 11629.6122, "eval_samples_per_second": 0.081, "eval_steps_per_second": 0.081, "step": 6000 }, { "epoch": 5.92, "learning_rate": 2.2058823529411766e-05, "loss": 3.0664, "step": 6250 }, { "epoch": 6.16, "learning_rate": 2.058823529411765e-05, "loss": 3.0255, "step": 6500 }, { "epoch": 6.4, "learning_rate": 1.9117647058823528e-05, "loss": 3.0338, "step": 6750 }, { "epoch": 6.4, "eval_loss": 3.1781060695648193, "eval_rouge2_fmeasure": 0.0747, "eval_rouge2_precision": 0.0879, "eval_rouge2_recall": 0.0865, "eval_runtime": 11275.9298, "eval_samples_per_second": 0.083, "eval_steps_per_second": 0.083, "step": 6750 }, { "epoch": 6.63, "learning_rate": 1.7647058823529414e-05, "loss": 2.9872, "step": 7000 }, { "epoch": 6.87, "learning_rate": 1.6176470588235296e-05, "loss": 3.0128, "step": 7250 }, { "epoch": 7.11, "learning_rate": 1.4705882352941177e-05, "loss": 3.0266, "step": 7500 }, { "epoch": 7.11, "eval_loss": 3.1731793880462646, "eval_rouge2_fmeasure": 0.074, "eval_rouge2_precision": 0.0886, "eval_rouge2_recall": 0.0836, "eval_runtime": 11197.1963, "eval_samples_per_second": 0.084, "eval_steps_per_second": 0.084, "step": 7500 }, { "epoch": 7.35, "learning_rate": 1.323529411764706e-05, "loss": 2.9676, "step": 7750 }, { "epoch": 7.58, "learning_rate": 1.1764705882352942e-05, "loss": 2.9834, "step": 8000 } ], "max_steps": 10000, "num_train_epochs": 10, "total_flos": 1.248414149407408e+18, "trial_name": null, "trial_params": null }