w2v-bert-bem-bl / trainer_state.json
csikasote's picture
End of training
9ebe00c verified
{
"best_metric": 0.21356959640979767,
"best_model_checkpoint": "/scratch/skscla001/results/w2v-bert-bem-bl/checkpoint-5500",
"epoch": 9.99297259311314,
"eval_steps": 500,
"global_step": 7110,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.35137034434293746,
"grad_norm": 6.841489791870117,
"learning_rate": 0.00014879999999999998,
"loss": 1.1986,
"step": 250
},
{
"epoch": 0.7027406886858749,
"grad_norm": 5.7157793045043945,
"learning_rate": 0.0002988,
"loss": 0.5344,
"step": 500
},
{
"epoch": 0.7027406886858749,
"eval_loss": 0.5448043942451477,
"eval_runtime": 58.822,
"eval_samples_per_second": 25.365,
"eval_steps_per_second": 3.179,
"eval_wer": 0.7378665974565274,
"step": 500
},
{
"epoch": 1.0541110330288124,
"grad_norm": 1.5800367593765259,
"learning_rate": 0.00028874432677760965,
"loss": 0.4918,
"step": 750
},
{
"epoch": 1.4054813773717498,
"grad_norm": 1.6567301750183105,
"learning_rate": 0.0002774432677760968,
"loss": 0.4242,
"step": 1000
},
{
"epoch": 1.4054813773717498,
"eval_loss": 0.3054925501346588,
"eval_runtime": 56.7685,
"eval_samples_per_second": 26.282,
"eval_steps_per_second": 3.294,
"eval_wer": 0.6024742624794532,
"step": 1000
},
{
"epoch": 1.7568517217146873,
"grad_norm": 1.094335675239563,
"learning_rate": 0.0002660968229954614,
"loss": 0.3958,
"step": 1250
},
{
"epoch": 2.1082220660576247,
"grad_norm": 1.0848636627197266,
"learning_rate": 0.000254750378214826,
"loss": 0.3603,
"step": 1500
},
{
"epoch": 2.1082220660576247,
"eval_loss": 0.26925167441368103,
"eval_runtime": 56.9762,
"eval_samples_per_second": 26.186,
"eval_steps_per_second": 3.282,
"eval_wer": 0.5385413963145601,
"step": 1500
},
{
"epoch": 2.459592410400562,
"grad_norm": 0.9221932888031006,
"learning_rate": 0.00024340393343419062,
"loss": 0.3228,
"step": 1750
},
{
"epoch": 2.8109627547434997,
"grad_norm": 1.12798273563385,
"learning_rate": 0.0002320574886535552,
"loss": 0.3144,
"step": 2000
},
{
"epoch": 2.8109627547434997,
"eval_loss": 0.2683100700378418,
"eval_runtime": 56.9393,
"eval_samples_per_second": 26.203,
"eval_steps_per_second": 3.284,
"eval_wer": 0.5529025002162817,
"step": 2000
},
{
"epoch": 3.162333099086437,
"grad_norm": 0.6111273169517517,
"learning_rate": 0.0002207110438729198,
"loss": 0.2931,
"step": 2250
},
{
"epoch": 3.5137034434293746,
"grad_norm": 0.5683824419975281,
"learning_rate": 0.0002093645990922844,
"loss": 0.2656,
"step": 2500
},
{
"epoch": 3.5137034434293746,
"eval_loss": 0.24722729623317719,
"eval_runtime": 57.2449,
"eval_samples_per_second": 26.063,
"eval_steps_per_second": 3.267,
"eval_wer": 0.5258240332208669,
"step": 2500
},
{
"epoch": 3.865073787772312,
"grad_norm": 0.7263462543487549,
"learning_rate": 0.000198018154311649,
"loss": 0.2574,
"step": 2750
},
{
"epoch": 4.2164441321152495,
"grad_norm": 0.422783762216568,
"learning_rate": 0.0001866717095310136,
"loss": 0.2311,
"step": 3000
},
{
"epoch": 4.2164441321152495,
"eval_loss": 0.2352277785539627,
"eval_runtime": 56.81,
"eval_samples_per_second": 26.263,
"eval_steps_per_second": 3.292,
"eval_wer": 0.5025521238861493,
"step": 3000
},
{
"epoch": 4.567814476458187,
"grad_norm": 0.5190473794937134,
"learning_rate": 0.00017532526475037822,
"loss": 0.2133,
"step": 3250
},
{
"epoch": 4.919184820801124,
"grad_norm": 0.7750300765037537,
"learning_rate": 0.0001639788199697428,
"loss": 0.2106,
"step": 3500
},
{
"epoch": 4.919184820801124,
"eval_loss": 0.2326769232749939,
"eval_runtime": 56.8929,
"eval_samples_per_second": 26.225,
"eval_steps_per_second": 3.287,
"eval_wer": 0.5003027943593736,
"step": 3500
},
{
"epoch": 5.270555165144062,
"grad_norm": 0.37189096212387085,
"learning_rate": 0.0001526323751891074,
"loss": 0.1835,
"step": 3750
},
{
"epoch": 5.621925509486999,
"grad_norm": 0.5811314582824707,
"learning_rate": 0.000141285930408472,
"loss": 0.1816,
"step": 4000
},
{
"epoch": 5.621925509486999,
"eval_loss": 0.22979679703712463,
"eval_runtime": 56.9781,
"eval_samples_per_second": 26.185,
"eval_steps_per_second": 3.282,
"eval_wer": 0.49865905355134527,
"step": 4000
},
{
"epoch": 5.973295853829937,
"grad_norm": 1.2501654624938965,
"learning_rate": 0.0001299394856278366,
"loss": 0.1804,
"step": 4250
},
{
"epoch": 6.324666198172874,
"grad_norm": 0.40958958864212036,
"learning_rate": 0.0001185930408472012,
"loss": 0.1432,
"step": 4500
},
{
"epoch": 6.324666198172874,
"eval_loss": 0.21775686740875244,
"eval_runtime": 57.4498,
"eval_samples_per_second": 25.97,
"eval_steps_per_second": 3.255,
"eval_wer": 0.46855264296219395,
"step": 4500
},
{
"epoch": 6.676036542515812,
"grad_norm": 0.5354466438293457,
"learning_rate": 0.0001072465960665658,
"loss": 0.1475,
"step": 4750
},
{
"epoch": 7.027406886858749,
"grad_norm": 0.44123438000679016,
"learning_rate": 9.59001512859304e-05,
"loss": 0.1431,
"step": 5000
},
{
"epoch": 7.027406886858749,
"eval_loss": 0.21715782582759857,
"eval_runtime": 57.0515,
"eval_samples_per_second": 26.152,
"eval_steps_per_second": 3.278,
"eval_wer": 0.47469504282377367,
"step": 5000
},
{
"epoch": 7.378777231201687,
"grad_norm": 0.29582667350769043,
"learning_rate": 8.4553706505295e-05,
"loss": 0.1111,
"step": 5250
},
{
"epoch": 7.730147575544624,
"grad_norm": 0.5549105405807495,
"learning_rate": 7.32072617246596e-05,
"loss": 0.1069,
"step": 5500
},
{
"epoch": 7.730147575544624,
"eval_loss": 0.21356959640979767,
"eval_runtime": 56.8619,
"eval_samples_per_second": 26.239,
"eval_steps_per_second": 3.289,
"eval_wer": 0.45393200103815207,
"step": 5500
},
{
"epoch": 8.081517919887562,
"grad_norm": 0.632247269153595,
"learning_rate": 6.18608169440242e-05,
"loss": 0.1045,
"step": 5750
},
{
"epoch": 8.432888264230499,
"grad_norm": 0.3406793475151062,
"learning_rate": 5.05143721633888e-05,
"loss": 0.0767,
"step": 6000
},
{
"epoch": 8.432888264230499,
"eval_loss": 0.22697694599628448,
"eval_runtime": 56.8774,
"eval_samples_per_second": 26.232,
"eval_steps_per_second": 3.288,
"eval_wer": 0.4403495112033913,
"step": 6000
},
{
"epoch": 8.784258608573436,
"grad_norm": 0.4026772379875183,
"learning_rate": 3.91679273827534e-05,
"loss": 0.0781,
"step": 6250
},
{
"epoch": 9.135628952916374,
"grad_norm": 0.4836815595626831,
"learning_rate": 2.7821482602118e-05,
"loss": 0.0667,
"step": 6500
},
{
"epoch": 9.135628952916374,
"eval_loss": 0.23749949038028717,
"eval_runtime": 56.671,
"eval_samples_per_second": 26.327,
"eval_steps_per_second": 3.3,
"eval_wer": 0.4385327450471494,
"step": 6500
},
{
"epoch": 9.486999297259311,
"grad_norm": 0.17178182303905487,
"learning_rate": 1.64750378214826e-05,
"loss": 0.0502,
"step": 6750
},
{
"epoch": 9.838369641602249,
"grad_norm": 0.3071761131286621,
"learning_rate": 5.128593040847201e-06,
"loss": 0.0468,
"step": 7000
},
{
"epoch": 9.838369641602249,
"eval_loss": 0.2403486669063568,
"eval_runtime": 56.6826,
"eval_samples_per_second": 26.322,
"eval_steps_per_second": 3.299,
"eval_wer": 0.4353317761051994,
"step": 7000
},
{
"epoch": 9.99297259311314,
"step": 7110,
"total_flos": 2.094794810533582e+19,
"train_loss": 0.25149581422282674,
"train_runtime": 9293.6781,
"train_samples_per_second": 12.242,
"train_steps_per_second": 0.765
}
],
"logging_steps": 250,
"max_steps": 7110,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.094794810533582e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}