|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.582800284292822, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 5.2679, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 4.0457, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.7746, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 3.4735970497131348, |
|
"eval_rouge2_fmeasure": 0.0608, |
|
"eval_rouge2_precision": 0.0752, |
|
"eval_rouge2_recall": 0.0676, |
|
"eval_runtime": 10444.2518, |
|
"eval_samples_per_second": 0.09, |
|
"eval_steps_per_second": 0.09, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 3.6193, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 3.5271, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4793, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 3.331468105316162, |
|
"eval_rouge2_fmeasure": 0.0653, |
|
"eval_rouge2_precision": 0.0796, |
|
"eval_rouge2_recall": 0.0735, |
|
"eval_runtime": 10901.7035, |
|
"eval_samples_per_second": 0.086, |
|
"eval_steps_per_second": 0.086, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.8529411764705885e-05, |
|
"loss": 3.4251, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 3.4313, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.558823529411765e-05, |
|
"loss": 3.3459, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 3.2606232166290283, |
|
"eval_rouge2_fmeasure": 0.0685, |
|
"eval_rouge2_precision": 0.0825, |
|
"eval_rouge2_recall": 0.077, |
|
"eval_runtime": 10929.9339, |
|
"eval_samples_per_second": 0.086, |
|
"eval_steps_per_second": 0.086, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 3.291, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.2647058823529415e-05, |
|
"loss": 3.2803, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 3.2948, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 3.227094888687134, |
|
"eval_rouge2_fmeasure": 0.0684, |
|
"eval_rouge2_precision": 0.0815, |
|
"eval_rouge2_recall": 0.078, |
|
"eval_runtime": 11536.812, |
|
"eval_samples_per_second": 0.081, |
|
"eval_steps_per_second": 0.081, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.970588235294117e-05, |
|
"loss": 3.2314, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 3.2134, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.6764705882352945e-05, |
|
"loss": 3.1879, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_loss": 3.2067530155181885, |
|
"eval_rouge2_fmeasure": 0.0704, |
|
"eval_rouge2_precision": 0.083, |
|
"eval_rouge2_recall": 0.0809, |
|
"eval_runtime": 11358.1561, |
|
"eval_samples_per_second": 0.083, |
|
"eval_steps_per_second": 0.083, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 3.1617, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.382352941176471e-05, |
|
"loss": 3.172, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 3.1038, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_loss": 3.1938178539276123, |
|
"eval_rouge2_fmeasure": 0.0716, |
|
"eval_rouge2_precision": 0.0858, |
|
"eval_rouge2_recall": 0.0813, |
|
"eval_runtime": 10958.7861, |
|
"eval_samples_per_second": 0.086, |
|
"eval_steps_per_second": 0.086, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 3.1153, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 3.1037, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.7941176470588236e-05, |
|
"loss": 3.1155, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_loss": 3.187068462371826, |
|
"eval_rouge2_fmeasure": 0.0725, |
|
"eval_rouge2_precision": 0.0856, |
|
"eval_rouge2_recall": 0.0839, |
|
"eval_runtime": 11370.9481, |
|
"eval_samples_per_second": 0.083, |
|
"eval_steps_per_second": 0.083, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 3.048, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.0603, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 3.0534, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 3.181117057800293, |
|
"eval_rouge2_fmeasure": 0.0735, |
|
"eval_rouge2_precision": 0.0865, |
|
"eval_rouge2_recall": 0.0848, |
|
"eval_runtime": 11629.6122, |
|
"eval_samples_per_second": 0.081, |
|
"eval_steps_per_second": 0.081, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 2.2058823529411766e-05, |
|
"loss": 3.0664, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 3.0255, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.9117647058823528e-05, |
|
"loss": 3.0338, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 3.1781060695648193, |
|
"eval_rouge2_fmeasure": 0.0747, |
|
"eval_rouge2_precision": 0.0879, |
|
"eval_rouge2_recall": 0.0865, |
|
"eval_runtime": 11275.9298, |
|
"eval_samples_per_second": 0.083, |
|
"eval_steps_per_second": 0.083, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 2.9872, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 3.0128, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 3.0266, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 3.1731793880462646, |
|
"eval_rouge2_fmeasure": 0.074, |
|
"eval_rouge2_precision": 0.0886, |
|
"eval_rouge2_recall": 0.0836, |
|
"eval_runtime": 11197.1963, |
|
"eval_samples_per_second": 0.084, |
|
"eval_steps_per_second": 0.084, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 2.9676, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 2.9834, |
|
"step": 8000 |
|
} |
|
], |
|
"max_steps": 10000, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.248414149407408e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|