|
{ |
|
"best_metric": 1.2010900974273682, |
|
"best_model_checkpoint": "./checkpoints/pegasus-large/checkpoint-11412", |
|
"epoch": 12.0, |
|
"global_step": 11412, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-06, |
|
"loss": 8.7673, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1e-05, |
|
"loss": 7.6286, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.93178426774675e-06, |
|
"loss": 7.0271, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 6.360767364501953, |
|
"eval_runtime": 8.5969, |
|
"eval_samples_per_second": 58.16, |
|
"eval_steps_per_second": 7.328, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.863568535493498e-06, |
|
"loss": 6.6826, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.795352803240248e-06, |
|
"loss": 6.0794, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.727137070986997e-06, |
|
"loss": 4.7646, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.658921338733747e-06, |
|
"loss": 2.6262, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.3431365489959717, |
|
"eval_runtime": 8.7742, |
|
"eval_samples_per_second": 56.985, |
|
"eval_steps_per_second": 7.18, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.590705606480494e-06, |
|
"loss": 1.5925, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.522489874227244e-06, |
|
"loss": 1.4186, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 9.454274141973993e-06, |
|
"loss": 1.4248, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 9.386058409720743e-06, |
|
"loss": 1.4015, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.2660728693008423, |
|
"eval_runtime": 8.6854, |
|
"eval_samples_per_second": 57.568, |
|
"eval_steps_per_second": 7.254, |
|
"step": 2853 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 9.31784267746749e-06, |
|
"loss": 1.3516, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 9.24962694521424e-06, |
|
"loss": 1.2862, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 9.18141121296099e-06, |
|
"loss": 1.2847, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.238619327545166, |
|
"eval_runtime": 8.6335, |
|
"eval_samples_per_second": 57.914, |
|
"eval_steps_per_second": 7.297, |
|
"step": 3804 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 9.11319548070774e-06, |
|
"loss": 1.3335, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 9.044979748454487e-06, |
|
"loss": 1.3177, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.976764016201236e-06, |
|
"loss": 1.2456, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 8.908548283947986e-06, |
|
"loss": 1.269, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.2249763011932373, |
|
"eval_runtime": 8.6401, |
|
"eval_samples_per_second": 57.87, |
|
"eval_steps_per_second": 7.292, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 8.840332551694735e-06, |
|
"loss": 1.2381, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 8.772116819441483e-06, |
|
"loss": 1.224, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 8.703901087188233e-06, |
|
"loss": 1.2753, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 8.635685354934982e-06, |
|
"loss": 1.2279, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.2167253494262695, |
|
"eval_runtime": 8.6769, |
|
"eval_samples_per_second": 57.625, |
|
"eval_steps_per_second": 7.261, |
|
"step": 5706 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 8.567469622681732e-06, |
|
"loss": 1.2545, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 8.499253890428481e-06, |
|
"loss": 1.1907, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 8.431038158175229e-06, |
|
"loss": 1.2113, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 8.362822425921979e-06, |
|
"loss": 1.1956, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.210858941078186, |
|
"eval_runtime": 8.6349, |
|
"eval_samples_per_second": 57.904, |
|
"eval_steps_per_second": 7.296, |
|
"step": 6657 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 8.294606693668728e-06, |
|
"loss": 1.1814, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 8.226390961415478e-06, |
|
"loss": 1.1784, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 8.158175229162227e-06, |
|
"loss": 1.179, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.2075146436691284, |
|
"eval_runtime": 8.6767, |
|
"eval_samples_per_second": 57.625, |
|
"eval_steps_per_second": 7.261, |
|
"step": 7608 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 8.089959496908975e-06, |
|
"loss": 1.2127, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 8.021743764655724e-06, |
|
"loss": 1.1417, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 7.953528032402474e-06, |
|
"loss": 1.1988, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 7.885312300149223e-06, |
|
"loss": 1.183, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.2050586938858032, |
|
"eval_runtime": 8.769, |
|
"eval_samples_per_second": 57.019, |
|
"eval_steps_per_second": 7.184, |
|
"step": 8559 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 7.817096567895973e-06, |
|
"loss": 1.1557, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 7.74888083564272e-06, |
|
"loss": 1.149, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 7.68066510338947e-06, |
|
"loss": 1.1279, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 7.612449371136219e-06, |
|
"loss": 1.1471, |
|
"step": 9472 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.203889012336731, |
|
"eval_runtime": 8.6047, |
|
"eval_samples_per_second": 58.108, |
|
"eval_steps_per_second": 7.322, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 7.544233638882968e-06, |
|
"loss": 1.1345, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 7.476017906629717e-06, |
|
"loss": 1.1573, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 7.4078021743764664e-06, |
|
"loss": 1.1148, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.2023481130599976, |
|
"eval_runtime": 8.7109, |
|
"eval_samples_per_second": 57.4, |
|
"eval_steps_per_second": 7.232, |
|
"step": 10461 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 7.339586442123215e-06, |
|
"loss": 1.1417, |
|
"step": 10496 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 7.2713707098699646e-06, |
|
"loss": 1.1067, |
|
"step": 10752 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 7.203154977616713e-06, |
|
"loss": 1.1289, |
|
"step": 11008 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 7.134939245363463e-06, |
|
"loss": 1.1112, |
|
"step": 11264 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.2010900974273682, |
|
"eval_runtime": 8.6876, |
|
"eval_samples_per_second": 57.553, |
|
"eval_steps_per_second": 7.252, |
|
"step": 11412 |
|
} |
|
], |
|
"max_steps": 38040, |
|
"num_train_epochs": 40, |
|
"total_flos": 2.5573624673245594e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|