ntr-base-qrecc / trainer_state.json
3v324v23's picture
udpate new checkpint 15K
dce943d
raw
history blame
9.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.8896447467876039,
"eval_steps": 500,
"global_step": 15000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 0.000975,
"loss": 1.0238,
"step": 500
},
{
"epoch": 0.06,
"eval_loss": 0.8929525017738342,
"eval_runtime": 4.0089,
"eval_samples_per_second": 74.833,
"eval_steps_per_second": 37.417,
"step": 500
},
{
"epoch": 0.13,
"learning_rate": 0.00095,
"loss": 0.9971,
"step": 1000
},
{
"epoch": 0.13,
"eval_loss": 0.8510345220565796,
"eval_runtime": 4.0089,
"eval_samples_per_second": 74.833,
"eval_steps_per_second": 37.417,
"step": 1000
},
{
"epoch": 0.19,
"learning_rate": 0.000925,
"loss": 0.9522,
"step": 1500
},
{
"epoch": 0.19,
"eval_loss": 0.8565580248832703,
"eval_runtime": 4.0278,
"eval_samples_per_second": 74.483,
"eval_steps_per_second": 37.241,
"step": 1500
},
{
"epoch": 0.25,
"learning_rate": 0.0009000000000000001,
"loss": 0.9143,
"step": 2000
},
{
"epoch": 0.25,
"eval_loss": 0.8810315132141113,
"eval_runtime": 4.0097,
"eval_samples_per_second": 74.819,
"eval_steps_per_second": 37.409,
"step": 2000
},
{
"epoch": 0.31,
"learning_rate": 0.000875,
"loss": 0.9151,
"step": 2500
},
{
"epoch": 0.31,
"eval_loss": 0.8461073040962219,
"eval_runtime": 4.0108,
"eval_samples_per_second": 74.798,
"eval_steps_per_second": 37.399,
"step": 2500
},
{
"epoch": 0.38,
"learning_rate": 0.00085,
"loss": 0.9263,
"step": 3000
},
{
"epoch": 0.38,
"eval_loss": 0.8521466255187988,
"eval_runtime": 4.0059,
"eval_samples_per_second": 74.89,
"eval_steps_per_second": 37.445,
"step": 3000
},
{
"epoch": 0.44,
"learning_rate": 0.000825,
"loss": 0.9102,
"step": 3500
},
{
"epoch": 0.44,
"eval_loss": 0.8521981835365295,
"eval_runtime": 4.0413,
"eval_samples_per_second": 74.234,
"eval_steps_per_second": 37.117,
"step": 3500
},
{
"epoch": 0.5,
"learning_rate": 0.0008,
"loss": 0.8866,
"step": 4000
},
{
"epoch": 0.5,
"eval_loss": 0.8259674906730652,
"eval_runtime": 4.0316,
"eval_samples_per_second": 74.412,
"eval_steps_per_second": 37.206,
"step": 4000
},
{
"epoch": 0.57,
"learning_rate": 0.0007750000000000001,
"loss": 0.911,
"step": 4500
},
{
"epoch": 0.57,
"eval_loss": 0.8008900284767151,
"eval_runtime": 4.0224,
"eval_samples_per_second": 74.583,
"eval_steps_per_second": 37.292,
"step": 4500
},
{
"epoch": 0.63,
"learning_rate": 0.00075,
"loss": 0.8536,
"step": 5000
},
{
"epoch": 0.63,
"eval_loss": 0.808663547039032,
"eval_runtime": 5.0615,
"eval_samples_per_second": 59.271,
"eval_steps_per_second": 29.635,
"step": 5000
},
{
"epoch": 0.69,
"learning_rate": 0.000725,
"loss": 0.8277,
"step": 5500
},
{
"epoch": 0.69,
"eval_loss": 0.7903586626052856,
"eval_runtime": 4.0081,
"eval_samples_per_second": 74.849,
"eval_steps_per_second": 37.425,
"step": 5500
},
{
"epoch": 0.76,
"learning_rate": 0.0007,
"loss": 0.8413,
"step": 6000
},
{
"epoch": 0.76,
"eval_loss": 0.8013813495635986,
"eval_runtime": 3.994,
"eval_samples_per_second": 75.113,
"eval_steps_per_second": 37.556,
"step": 6000
},
{
"epoch": 0.82,
"learning_rate": 0.000675,
"loss": 0.8491,
"step": 6500
},
{
"epoch": 0.82,
"eval_loss": 0.7867017984390259,
"eval_runtime": 4.0072,
"eval_samples_per_second": 74.864,
"eval_steps_per_second": 37.432,
"step": 6500
},
{
"epoch": 0.88,
"learning_rate": 0.0006500000000000001,
"loss": 0.8077,
"step": 7000
},
{
"epoch": 0.88,
"eval_loss": 0.759165346622467,
"eval_runtime": 4.0166,
"eval_samples_per_second": 74.691,
"eval_steps_per_second": 37.345,
"step": 7000
},
{
"epoch": 0.94,
"learning_rate": 0.000625,
"loss": 0.796,
"step": 7500
},
{
"epoch": 0.94,
"eval_loss": 0.7527943849563599,
"eval_runtime": 3.998,
"eval_samples_per_second": 75.037,
"eval_steps_per_second": 37.518,
"step": 7500
},
{
"epoch": 1.01,
"learning_rate": 0.0006,
"loss": 0.7952,
"step": 8000
},
{
"epoch": 1.01,
"eval_loss": 0.7407393455505371,
"eval_runtime": 3.9421,
"eval_samples_per_second": 76.102,
"eval_steps_per_second": 38.051,
"step": 8000
},
{
"epoch": 1.07,
"learning_rate": 0.000575,
"loss": 0.5761,
"step": 8500
},
{
"epoch": 1.07,
"eval_loss": 0.7574229836463928,
"eval_runtime": 3.9855,
"eval_samples_per_second": 75.273,
"eval_steps_per_second": 37.636,
"step": 8500
},
{
"epoch": 1.13,
"learning_rate": 0.00055,
"loss": 0.6084,
"step": 9000
},
{
"epoch": 1.13,
"eval_loss": 0.7349967956542969,
"eval_runtime": 3.995,
"eval_samples_per_second": 75.093,
"eval_steps_per_second": 37.547,
"step": 9000
},
{
"epoch": 1.2,
"learning_rate": 0.0005250000000000001,
"loss": 0.5863,
"step": 9500
},
{
"epoch": 1.2,
"eval_loss": 0.7407773733139038,
"eval_runtime": 3.9874,
"eval_samples_per_second": 75.238,
"eval_steps_per_second": 37.619,
"step": 9500
},
{
"epoch": 1.26,
"learning_rate": 0.0005,
"loss": 0.5835,
"step": 10000
},
{
"epoch": 1.26,
"eval_loss": 0.719527006149292,
"eval_runtime": 3.9471,
"eval_samples_per_second": 76.006,
"eval_steps_per_second": 38.003,
"step": 10000
},
{
"epoch": 1.32,
"learning_rate": 0.000475,
"loss": 0.5751,
"step": 10500
},
{
"epoch": 1.32,
"eval_loss": 0.7423205971717834,
"eval_runtime": 3.9978,
"eval_samples_per_second": 75.041,
"eval_steps_per_second": 37.52,
"step": 10500
},
{
"epoch": 1.39,
"learning_rate": 0.00045000000000000004,
"loss": 0.5746,
"step": 11000
},
{
"epoch": 1.39,
"eval_loss": 0.7284040451049805,
"eval_runtime": 3.9657,
"eval_samples_per_second": 75.649,
"eval_steps_per_second": 37.824,
"step": 11000
},
{
"epoch": 1.45,
"learning_rate": 0.000425,
"loss": 0.5847,
"step": 11500
},
{
"epoch": 1.45,
"eval_loss": 0.7247716188430786,
"eval_runtime": 3.962,
"eval_samples_per_second": 75.719,
"eval_steps_per_second": 37.86,
"step": 11500
},
{
"epoch": 1.51,
"learning_rate": 0.0004,
"loss": 0.5759,
"step": 12000
},
{
"epoch": 1.51,
"eval_loss": 0.7151039838790894,
"eval_runtime": 3.9922,
"eval_samples_per_second": 75.147,
"eval_steps_per_second": 37.573,
"step": 12000
},
{
"epoch": 1.57,
"learning_rate": 0.000375,
"loss": 0.5922,
"step": 12500
},
{
"epoch": 1.57,
"eval_loss": 0.7031030058860779,
"eval_runtime": 4.0386,
"eval_samples_per_second": 74.282,
"eval_steps_per_second": 37.141,
"step": 12500
},
{
"epoch": 1.64,
"learning_rate": 0.00035,
"loss": 0.5678,
"step": 13000
},
{
"epoch": 1.64,
"eval_loss": 0.710035502910614,
"eval_runtime": 4.0808,
"eval_samples_per_second": 73.515,
"eval_steps_per_second": 36.758,
"step": 13000
},
{
"epoch": 1.7,
"learning_rate": 0.00032500000000000004,
"loss": 0.5627,
"step": 13500
},
{
"epoch": 1.7,
"eval_loss": 0.7093074321746826,
"eval_runtime": 4.0326,
"eval_samples_per_second": 74.393,
"eval_steps_per_second": 37.197,
"step": 13500
},
{
"epoch": 1.76,
"learning_rate": 0.0003,
"loss": 0.5568,
"step": 14000
},
{
"epoch": 1.76,
"eval_loss": 0.6942310333251953,
"eval_runtime": 4.0875,
"eval_samples_per_second": 73.395,
"eval_steps_per_second": 36.697,
"step": 14000
},
{
"epoch": 1.83,
"learning_rate": 0.000275,
"loss": 0.5589,
"step": 14500
},
{
"epoch": 1.83,
"eval_loss": 0.6947352290153503,
"eval_runtime": 3.9856,
"eval_samples_per_second": 75.27,
"eval_steps_per_second": 37.635,
"step": 14500
},
{
"epoch": 1.89,
"learning_rate": 0.00025,
"loss": 0.5675,
"step": 15000
},
{
"epoch": 1.89,
"eval_loss": 0.6902616620063782,
"eval_runtime": 4.0066,
"eval_samples_per_second": 74.876,
"eval_steps_per_second": 37.438,
"step": 15000
}
],
"logging_steps": 500,
"max_steps": 20000,
"num_train_epochs": 3,
"save_steps": 5000,
"total_flos": 2.66437480937472e+16,
"trial_name": null,
"trial_params": null
}