|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.988739357319417, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 5.2801, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 4.0432, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.7257, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 3.4456169605255127, |
|
"eval_rouge2_fmeasure": 0.0619, |
|
"eval_rouge2_precision": 0.0751, |
|
"eval_rouge2_recall": 0.0686, |
|
"eval_runtime": 10201.4024, |
|
"eval_samples_per_second": 0.089, |
|
"eval_steps_per_second": 0.089, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 3.6021, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 3.505, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4611, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 3.324451446533203, |
|
"eval_rouge2_fmeasure": 0.064, |
|
"eval_rouge2_precision": 0.0771, |
|
"eval_rouge2_recall": 0.0724, |
|
"eval_runtime": 10238.3889, |
|
"eval_samples_per_second": 0.089, |
|
"eval_steps_per_second": 0.089, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.8529411764705885e-05, |
|
"loss": 3.4327, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 3.3307, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 4.558823529411765e-05, |
|
"loss": 3.2866, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 3.268772602081299, |
|
"eval_rouge2_fmeasure": 0.0675, |
|
"eval_rouge2_precision": 0.0787, |
|
"eval_rouge2_recall": 0.0787, |
|
"eval_runtime": 11096.9726, |
|
"eval_samples_per_second": 0.082, |
|
"eval_steps_per_second": 0.082, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 3.3072, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.2647058823529415e-05, |
|
"loss": 3.2516, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 3.1896, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 3.2407822608947754, |
|
"eval_rouge2_fmeasure": 0.0688, |
|
"eval_rouge2_precision": 0.0806, |
|
"eval_rouge2_recall": 0.0802, |
|
"eval_runtime": 10753.114, |
|
"eval_samples_per_second": 0.085, |
|
"eval_steps_per_second": 0.085, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.970588235294117e-05, |
|
"loss": 3.1495, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 3.1904, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.6764705882352945e-05, |
|
"loss": 3.1541, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_loss": 3.230734348297119, |
|
"eval_rouge2_fmeasure": 0.0704, |
|
"eval_rouge2_precision": 0.081, |
|
"eval_rouge2_recall": 0.083, |
|
"eval_runtime": 11594.4275, |
|
"eval_samples_per_second": 0.078, |
|
"eval_steps_per_second": 0.078, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 3.0879, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 3.382352941176471e-05, |
|
"loss": 3.0753, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 3.1243, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_loss": 3.2160983085632324, |
|
"eval_rouge2_fmeasure": 0.0707, |
|
"eval_rouge2_precision": 0.0821, |
|
"eval_rouge2_recall": 0.0817, |
|
"eval_runtime": 11304.2042, |
|
"eval_samples_per_second": 0.081, |
|
"eval_steps_per_second": 0.081, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 3.0364, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 3.0249, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 2.7941176470588236e-05, |
|
"loss": 3.0428, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_loss": 3.2113523483276367, |
|
"eval_rouge2_fmeasure": 0.072, |
|
"eval_rouge2_precision": 0.0828, |
|
"eval_rouge2_recall": 0.0848, |
|
"eval_runtime": 11113.1913, |
|
"eval_samples_per_second": 0.082, |
|
"eval_steps_per_second": 0.082, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 3.0229, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.979, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 2.975, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_loss": 3.213066339492798, |
|
"eval_rouge2_fmeasure": 0.0728, |
|
"eval_rouge2_precision": 0.0841, |
|
"eval_rouge2_recall": 0.085, |
|
"eval_runtime": 11135.6948, |
|
"eval_samples_per_second": 0.082, |
|
"eval_steps_per_second": 0.082, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 2.2058823529411766e-05, |
|
"loss": 3.0022, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 2.9825, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 1.9117647058823528e-05, |
|
"loss": 2.9392, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"eval_loss": 3.2107818126678467, |
|
"eval_rouge2_fmeasure": 0.073, |
|
"eval_rouge2_precision": 0.0839, |
|
"eval_rouge2_recall": 0.0848, |
|
"eval_runtime": 11088.831, |
|
"eval_samples_per_second": 0.082, |
|
"eval_steps_per_second": 0.082, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 2.9429, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 2.9304, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 2.9204, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"eval_loss": 3.2088212966918945, |
|
"eval_rouge2_fmeasure": 0.073, |
|
"eval_rouge2_precision": 0.0833, |
|
"eval_rouge2_recall": 0.0865, |
|
"eval_runtime": 11327.9603, |
|
"eval_samples_per_second": 0.08, |
|
"eval_steps_per_second": 0.08, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 2.8969, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 2.9162, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.0294117647058824e-05, |
|
"loss": 2.9233, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"eval_loss": 3.210251808166504, |
|
"eval_rouge2_fmeasure": 0.0738, |
|
"eval_rouge2_precision": 0.0849, |
|
"eval_rouge2_recall": 0.0859, |
|
"eval_runtime": 11276.9504, |
|
"eval_samples_per_second": 0.081, |
|
"eval_steps_per_second": 0.081, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 2.8888, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 7.3529411764705884e-06, |
|
"loss": 2.8771, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 2.9142, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"eval_loss": 3.206127643585205, |
|
"eval_rouge2_fmeasure": 0.0728, |
|
"eval_rouge2_precision": 0.0831, |
|
"eval_rouge2_recall": 0.0855, |
|
"eval_runtime": 11283.9811, |
|
"eval_samples_per_second": 0.081, |
|
"eval_steps_per_second": 0.081, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 2.857, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 2.8846, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 2.8889, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"eval_loss": 3.2091023921966553, |
|
"eval_rouge2_fmeasure": 0.0729, |
|
"eval_rouge2_precision": 0.0835, |
|
"eval_rouge2_recall": 0.0849, |
|
"eval_runtime": 11160.0848, |
|
"eval_samples_per_second": 0.082, |
|
"eval_steps_per_second": 0.082, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0, |
|
"loss": 2.8707, |
|
"step": 10000 |
|
} |
|
], |
|
"max_steps": 10000, |
|
"num_train_epochs": 11, |
|
"total_flos": 1.560566442271703e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|