starcoder-schemapile-fk / trainer_state.json
tdoehmen's picture
Upload 14 files
b4c8c1c verified
raw
history blame
3.18 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9943289224952743,
"global_step": 198,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 0.0,
"loss": 1.0585,
"step": 1
},
{
"epoch": 0.12,
"learning_rate": 2e-05,
"loss": 0.6923,
"step": 8
},
{
"epoch": 0.24,
"learning_rate": 2e-05,
"loss": 0.5149,
"step": 16
},
{
"epoch": 0.36,
"learning_rate": 2e-05,
"loss": 0.3899,
"step": 24
},
{
"epoch": 0.48,
"learning_rate": 2e-05,
"loss": 0.3511,
"step": 32
},
{
"epoch": 0.6,
"learning_rate": 2e-05,
"loss": 0.3321,
"step": 40
},
{
"epoch": 0.73,
"learning_rate": 2e-05,
"loss": 0.3254,
"step": 48
},
{
"epoch": 0.85,
"learning_rate": 2e-05,
"loss": 0.3104,
"step": 56
},
{
"epoch": 0.97,
"learning_rate": 2e-05,
"loss": 0.3007,
"step": 64
},
{
"epoch": 1.09,
"learning_rate": 2e-05,
"loss": 0.2617,
"step": 72
},
{
"epoch": 1.21,
"learning_rate": 2e-05,
"loss": 0.2454,
"step": 80
},
{
"epoch": 1.33,
"learning_rate": 2e-05,
"loss": 0.2362,
"step": 88
},
{
"epoch": 1.45,
"learning_rate": 2e-05,
"loss": 0.2334,
"step": 96
},
{
"epoch": 1.57,
"learning_rate": 2e-05,
"loss": 0.2275,
"step": 104
},
{
"epoch": 1.69,
"learning_rate": 2e-05,
"loss": 0.23,
"step": 112
},
{
"epoch": 1.81,
"learning_rate": 2e-05,
"loss": 0.217,
"step": 120
},
{
"epoch": 1.94,
"learning_rate": 2e-05,
"loss": 0.2205,
"step": 128
},
{
"epoch": 2.06,
"learning_rate": 2e-05,
"loss": 0.2031,
"step": 136
},
{
"epoch": 2.18,
"learning_rate": 2e-05,
"loss": 0.1629,
"step": 144
},
{
"epoch": 2.3,
"learning_rate": 2e-05,
"loss": 0.167,
"step": 152
},
{
"epoch": 2.42,
"learning_rate": 2e-05,
"loss": 0.1627,
"step": 160
},
{
"epoch": 2.54,
"learning_rate": 2e-05,
"loss": 0.1594,
"step": 168
},
{
"epoch": 2.66,
"learning_rate": 2e-05,
"loss": 0.1553,
"step": 176
},
{
"epoch": 2.78,
"learning_rate": 2e-05,
"loss": 0.1623,
"step": 184
},
{
"epoch": 2.9,
"learning_rate": 2e-05,
"loss": 0.1576,
"step": 192
},
{
"epoch": 2.99,
"step": 198,
"total_flos": 424189693526016.0,
"train_loss": 0.2658990031540996,
"train_runtime": 7827.5947,
"train_samples_per_second": 3.244,
"train_steps_per_second": 0.025
}
],
"max_steps": 198,
"num_train_epochs": 3,
"total_flos": 424189693526016.0,
"trial_name": null,
"trial_params": null
}