|
{ |
|
"best_metric": 2.681736707687378, |
|
"best_model_checkpoint": "longt5_xl_summ_screen_memsum_bp_30/checkpoint-100", |
|
"epoch": 9.73913043478261, |
|
"eval_steps": 500, |
|
"global_step": 140, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.001, |
|
"loss": 0.0739, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.0732, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.001, |
|
"loss": 0.0657, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.0699, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.001, |
|
"loss": 0.0667, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.001, |
|
"loss": 0.0705, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.0707, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_gen_len": 453.5295857988166, |
|
"eval_loss": 2.709702730178833, |
|
"eval_rouge1": 41.4751, |
|
"eval_rouge2": 15.5831, |
|
"eval_rougeL": 25.1976, |
|
"eval_rougeLsum": 39.9229, |
|
"eval_runtime": 757.2273, |
|
"eval_samples_per_second": 0.446, |
|
"eval_steps_per_second": 0.057, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.001, |
|
"loss": 0.0649, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.0535, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.001, |
|
"loss": 0.0715, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.001, |
|
"loss": 0.0731, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.001, |
|
"loss": 0.0931, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.001, |
|
"loss": 0.0583, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.0608, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_gen_len": 387.41715976331363, |
|
"eval_loss": 2.7271180152893066, |
|
"eval_rouge1": 45.691, |
|
"eval_rouge2": 17.905, |
|
"eval_rougeL": 27.9519, |
|
"eval_rougeLsum": 43.8787, |
|
"eval_runtime": 765.753, |
|
"eval_samples_per_second": 0.441, |
|
"eval_steps_per_second": 0.056, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.001, |
|
"loss": 0.0521, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.0396, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.001, |
|
"loss": 0.0505, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.001, |
|
"loss": 0.0496, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.0591, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.001, |
|
"loss": 0.061, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.0851, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_gen_len": 261.56804733727813, |
|
"eval_loss": 3.0001442432403564, |
|
"eval_rouge1": 47.1647, |
|
"eval_rouge2": 17.8993, |
|
"eval_rougeL": 28.7561, |
|
"eval_rougeLsum": 45.661, |
|
"eval_runtime": 691.7097, |
|
"eval_samples_per_second": 0.489, |
|
"eval_steps_per_second": 0.062, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.001, |
|
"loss": 0.0519, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.0481, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.001, |
|
"loss": 0.0535, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.0404, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.001, |
|
"loss": 0.0468, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.001, |
|
"loss": 0.0535, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.0697, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_gen_len": 365.3047337278106, |
|
"eval_loss": 2.9297378063201904, |
|
"eval_rouge1": 46.6892, |
|
"eval_rouge2": 17.8922, |
|
"eval_rougeL": 28.0724, |
|
"eval_rougeLsum": 44.8821, |
|
"eval_runtime": 770.7359, |
|
"eval_samples_per_second": 0.439, |
|
"eval_steps_per_second": 0.056, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.001, |
|
"loss": 0.0371, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.001, |
|
"loss": 0.0306, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.0285, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.001, |
|
"loss": 0.033, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.0411, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.001, |
|
"loss": 0.0326, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.0296, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_gen_len": 440.6390532544379, |
|
"eval_loss": 2.901674270629883, |
|
"eval_rouge1": 44.2702, |
|
"eval_rouge2": 17.7874, |
|
"eval_rougeL": 26.7598, |
|
"eval_rougeLsum": 42.6857, |
|
"eval_runtime": 764.3617, |
|
"eval_samples_per_second": 0.442, |
|
"eval_steps_per_second": 0.056, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.001, |
|
"loss": 0.0384, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.0429, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.001, |
|
"loss": 0.0459, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.0336, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.001, |
|
"loss": 0.0406, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.001, |
|
"loss": 0.0361, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.0394, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.001, |
|
"loss": 0.0312, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_gen_len": 306.6715976331361, |
|
"eval_loss": 3.04889178276062, |
|
"eval_rouge1": 47.7884, |
|
"eval_rouge2": 18.1788, |
|
"eval_rougeL": 28.6688, |
|
"eval_rougeLsum": 46.0744, |
|
"eval_runtime": 746.8476, |
|
"eval_samples_per_second": 0.453, |
|
"eval_steps_per_second": 0.058, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.0276, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.001, |
|
"loss": 0.0338, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.001, |
|
"loss": 0.0487, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.0379, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.001, |
|
"loss": 0.0309, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.0366, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.001, |
|
"loss": 0.0383, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_gen_len": 308.9082840236686, |
|
"eval_loss": 2.681736707687378, |
|
"eval_rouge1": 47.1842, |
|
"eval_rouge2": 18.22, |
|
"eval_rougeL": 28.4626, |
|
"eval_rougeLsum": 45.5778, |
|
"eval_runtime": 764.2847, |
|
"eval_samples_per_second": 0.442, |
|
"eval_steps_per_second": 0.056, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.0474, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.0376, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.001, |
|
"loss": 0.0535, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.036, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.001, |
|
"loss": 0.0339, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.0557, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.001, |
|
"loss": 0.0367, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 227.8550295857988, |
|
"eval_loss": 3.0245203971862793, |
|
"eval_rouge1": 45.5573, |
|
"eval_rouge2": 17.2161, |
|
"eval_rougeL": 28.0573, |
|
"eval_rougeLsum": 43.7772, |
|
"eval_runtime": 668.0023, |
|
"eval_samples_per_second": 0.506, |
|
"eval_steps_per_second": 0.064, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.033, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.001, |
|
"loss": 0.0379, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.001, |
|
"loss": 0.0274, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.026, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.001, |
|
"loss": 0.0269, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.0314, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.04, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"eval_gen_len": 429.8757396449704, |
|
"eval_loss": 3.2872769832611084, |
|
"eval_rouge1": 44.0164, |
|
"eval_rouge2": 17.1682, |
|
"eval_rougeL": 26.4769, |
|
"eval_rougeLsum": 42.3752, |
|
"eval_runtime": 779.6058, |
|
"eval_samples_per_second": 0.434, |
|
"eval_steps_per_second": 0.055, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.001, |
|
"loss": 0.0288, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.0252, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.001, |
|
"loss": 0.0262, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.0297, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.001, |
|
"loss": 0.0286, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.028, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"eval_gen_len": 337.4822485207101, |
|
"eval_loss": 2.9815244674682617, |
|
"eval_rouge1": 46.6542, |
|
"eval_rouge2": 17.8515, |
|
"eval_rougeL": 28.146, |
|
"eval_rougeLsum": 45.0274, |
|
"eval_runtime": 765.5763, |
|
"eval_samples_per_second": 0.441, |
|
"eval_steps_per_second": 0.056, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"step": 140, |
|
"total_flos": 2.7436549259892326e+17, |
|
"train_loss": 0.045875947869249756, |
|
"train_runtime": 15096.5898, |
|
"train_samples_per_second": 2.433, |
|
"train_steps_per_second": 0.009 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 140, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.7436549259892326e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|