ntr-base-qrecc / trainer_state.json
3v324v23's picture
update new models
ccb7d61
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.8896447467876039,
"eval_steps": 500,
"global_step": 15000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 0.000975,
"loss": 1.0238,
"step": 500
},
{
"epoch": 0.06,
"eval_loss": 0.884382426738739,
"eval_runtime": 4.1887,
"eval_samples_per_second": 238.736,
"eval_steps_per_second": 29.842,
"step": 500
},
{
"epoch": 0.13,
"learning_rate": 0.00095,
"loss": 0.9971,
"step": 1000
},
{
"epoch": 0.13,
"eval_loss": 0.8562089800834656,
"eval_runtime": 4.1399,
"eval_samples_per_second": 241.551,
"eval_steps_per_second": 30.194,
"step": 1000
},
{
"epoch": 0.19,
"learning_rate": 0.000925,
"loss": 0.9522,
"step": 1500
},
{
"epoch": 0.19,
"eval_loss": 0.8645691871643066,
"eval_runtime": 4.1915,
"eval_samples_per_second": 238.576,
"eval_steps_per_second": 29.822,
"step": 1500
},
{
"epoch": 0.25,
"learning_rate": 0.0009000000000000001,
"loss": 0.9143,
"step": 2000
},
{
"epoch": 0.25,
"eval_loss": 0.8668216466903687,
"eval_runtime": 4.1753,
"eval_samples_per_second": 239.502,
"eval_steps_per_second": 29.938,
"step": 2000
},
{
"epoch": 0.31,
"learning_rate": 0.000875,
"loss": 0.9151,
"step": 2500
},
{
"epoch": 0.31,
"eval_loss": 0.8562004566192627,
"eval_runtime": 4.2561,
"eval_samples_per_second": 234.956,
"eval_steps_per_second": 29.369,
"step": 2500
},
{
"epoch": 0.38,
"learning_rate": 0.00085,
"loss": 0.9263,
"step": 3000
},
{
"epoch": 0.38,
"eval_loss": 0.850217342376709,
"eval_runtime": 4.1404,
"eval_samples_per_second": 241.521,
"eval_steps_per_second": 30.19,
"step": 3000
},
{
"epoch": 0.44,
"learning_rate": 0.000825,
"loss": 0.9102,
"step": 3500
},
{
"epoch": 0.44,
"eval_loss": 0.8581109642982483,
"eval_runtime": 4.1878,
"eval_samples_per_second": 238.788,
"eval_steps_per_second": 29.849,
"step": 3500
},
{
"epoch": 0.5,
"learning_rate": 0.0008,
"loss": 0.8866,
"step": 4000
},
{
"epoch": 0.5,
"eval_loss": 0.8308799862861633,
"eval_runtime": 4.1108,
"eval_samples_per_second": 243.259,
"eval_steps_per_second": 30.407,
"step": 4000
},
{
"epoch": 0.57,
"learning_rate": 0.0007750000000000001,
"loss": 0.911,
"step": 4500
},
{
"epoch": 0.57,
"eval_loss": 0.801396369934082,
"eval_runtime": 4.1725,
"eval_samples_per_second": 239.665,
"eval_steps_per_second": 29.958,
"step": 4500
},
{
"epoch": 0.63,
"learning_rate": 0.00075,
"loss": 0.8536,
"step": 5000
},
{
"epoch": 0.63,
"eval_loss": 0.8181760907173157,
"eval_runtime": 4.1738,
"eval_samples_per_second": 239.59,
"eval_steps_per_second": 29.949,
"step": 5000
},
{
"epoch": 0.69,
"learning_rate": 0.000725,
"loss": 0.8277,
"step": 5500
},
{
"epoch": 0.69,
"eval_loss": 0.7971529960632324,
"eval_runtime": 4.1414,
"eval_samples_per_second": 241.466,
"eval_steps_per_second": 30.183,
"step": 5500
},
{
"epoch": 0.76,
"learning_rate": 0.0007,
"loss": 0.8413,
"step": 6000
},
{
"epoch": 0.76,
"eval_loss": 0.8046051263809204,
"eval_runtime": 4.278,
"eval_samples_per_second": 233.755,
"eval_steps_per_second": 29.219,
"step": 6000
},
{
"epoch": 0.82,
"learning_rate": 0.000675,
"loss": 0.8491,
"step": 6500
},
{
"epoch": 0.82,
"eval_loss": 0.8008071184158325,
"eval_runtime": 4.2965,
"eval_samples_per_second": 232.746,
"eval_steps_per_second": 29.093,
"step": 6500
},
{
"epoch": 0.88,
"learning_rate": 0.0006500000000000001,
"loss": 0.8077,
"step": 7000
},
{
"epoch": 0.88,
"eval_loss": 0.7875821590423584,
"eval_runtime": 4.1627,
"eval_samples_per_second": 240.231,
"eval_steps_per_second": 30.029,
"step": 7000
},
{
"epoch": 0.94,
"learning_rate": 0.000625,
"loss": 0.796,
"step": 7500
},
{
"epoch": 0.94,
"eval_loss": 0.7883646488189697,
"eval_runtime": 4.2129,
"eval_samples_per_second": 237.368,
"eval_steps_per_second": 29.671,
"step": 7500
},
{
"epoch": 1.01,
"learning_rate": 0.0006,
"loss": 0.7952,
"step": 8000
},
{
"epoch": 1.01,
"eval_loss": 0.7713276743888855,
"eval_runtime": 4.1008,
"eval_samples_per_second": 243.858,
"eval_steps_per_second": 30.482,
"step": 8000
},
{
"epoch": 1.07,
"learning_rate": 0.000575,
"loss": 0.5761,
"step": 8500
},
{
"epoch": 1.07,
"eval_loss": 0.7843192219734192,
"eval_runtime": 4.1527,
"eval_samples_per_second": 240.808,
"eval_steps_per_second": 30.101,
"step": 8500
},
{
"epoch": 1.13,
"learning_rate": 0.00055,
"loss": 0.6084,
"step": 9000
},
{
"epoch": 1.13,
"eval_loss": 0.7597091197967529,
"eval_runtime": 4.2348,
"eval_samples_per_second": 236.137,
"eval_steps_per_second": 29.517,
"step": 9000
},
{
"epoch": 1.2,
"learning_rate": 0.0005250000000000001,
"loss": 0.5863,
"step": 9500
},
{
"epoch": 1.2,
"eval_loss": 0.7753661274909973,
"eval_runtime": 4.2434,
"eval_samples_per_second": 235.658,
"eval_steps_per_second": 29.457,
"step": 9500
},
{
"epoch": 1.26,
"learning_rate": 0.0005,
"loss": 0.5835,
"step": 10000
},
{
"epoch": 1.26,
"eval_loss": 0.7525186538696289,
"eval_runtime": 4.1329,
"eval_samples_per_second": 241.962,
"eval_steps_per_second": 30.245,
"step": 10000
},
{
"epoch": 1.32,
"learning_rate": 0.000475,
"loss": 0.5751,
"step": 10500
},
{
"epoch": 1.32,
"eval_loss": 0.7624223828315735,
"eval_runtime": 4.1841,
"eval_samples_per_second": 239.002,
"eval_steps_per_second": 29.875,
"step": 10500
},
{
"epoch": 1.39,
"learning_rate": 0.00045000000000000004,
"loss": 0.5746,
"step": 11000
},
{
"epoch": 1.39,
"eval_loss": 0.7706524729728699,
"eval_runtime": 4.2408,
"eval_samples_per_second": 235.806,
"eval_steps_per_second": 29.476,
"step": 11000
},
{
"epoch": 1.45,
"learning_rate": 0.000425,
"loss": 0.5847,
"step": 11500
},
{
"epoch": 1.45,
"eval_loss": 0.7473410367965698,
"eval_runtime": 4.1712,
"eval_samples_per_second": 239.742,
"eval_steps_per_second": 29.968,
"step": 11500
},
{
"epoch": 1.51,
"learning_rate": 0.0004,
"loss": 0.5759,
"step": 12000
},
{
"epoch": 1.51,
"eval_loss": 0.7421715259552002,
"eval_runtime": 4.1644,
"eval_samples_per_second": 240.128,
"eval_steps_per_second": 30.016,
"step": 12000
},
{
"epoch": 1.57,
"learning_rate": 0.000375,
"loss": 0.5922,
"step": 12500
},
{
"epoch": 1.57,
"eval_loss": 0.7362275123596191,
"eval_runtime": 4.1889,
"eval_samples_per_second": 238.726,
"eval_steps_per_second": 29.841,
"step": 12500
},
{
"epoch": 1.64,
"learning_rate": 0.00035,
"loss": 0.5678,
"step": 13000
},
{
"epoch": 1.64,
"eval_loss": 0.7327093482017517,
"eval_runtime": 4.151,
"eval_samples_per_second": 240.907,
"eval_steps_per_second": 30.113,
"step": 13000
},
{
"epoch": 1.7,
"learning_rate": 0.00032500000000000004,
"loss": 0.5627,
"step": 13500
},
{
"epoch": 1.7,
"eval_loss": 0.7284647226333618,
"eval_runtime": 4.2289,
"eval_samples_per_second": 236.47,
"eval_steps_per_second": 29.559,
"step": 13500
},
{
"epoch": 1.76,
"learning_rate": 0.0003,
"loss": 0.5568,
"step": 14000
},
{
"epoch": 1.76,
"eval_loss": 0.7164832949638367,
"eval_runtime": 4.2221,
"eval_samples_per_second": 236.848,
"eval_steps_per_second": 29.606,
"step": 14000
},
{
"epoch": 1.83,
"learning_rate": 0.000275,
"loss": 0.5589,
"step": 14500
},
{
"epoch": 1.83,
"eval_loss": 0.7153368592262268,
"eval_runtime": 4.2183,
"eval_samples_per_second": 237.06,
"eval_steps_per_second": 29.633,
"step": 14500
},
{
"epoch": 1.89,
"learning_rate": 0.00025,
"loss": 0.5675,
"step": 15000
},
{
"epoch": 1.89,
"eval_loss": 0.7135257720947266,
"eval_runtime": 4.2335,
"eval_samples_per_second": 236.21,
"eval_steps_per_second": 29.526,
"step": 15000
}
],
"logging_steps": 500,
"max_steps": 20000,
"num_train_epochs": 3,
"save_steps": 5000,
"total_flos": 2.66437480937472e+16,
"trial_name": null,
"trial_params": null
}