|
{ |
|
"best_metric": 0.21356959640979767, |
|
"best_model_checkpoint": "/scratch/skscla001/results/w2v-bert-bem-bl/checkpoint-5500", |
|
"epoch": 9.99297259311314, |
|
"eval_steps": 500, |
|
"global_step": 7110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.35137034434293746, |
|
"grad_norm": 6.841489791870117, |
|
"learning_rate": 0.00014879999999999998, |
|
"loss": 1.1986, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7027406886858749, |
|
"grad_norm": 5.7157793045043945, |
|
"learning_rate": 0.0002988, |
|
"loss": 0.5344, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7027406886858749, |
|
"eval_loss": 0.5448043942451477, |
|
"eval_runtime": 58.822, |
|
"eval_samples_per_second": 25.365, |
|
"eval_steps_per_second": 3.179, |
|
"eval_wer": 0.7378665974565274, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0541110330288124, |
|
"grad_norm": 1.5800367593765259, |
|
"learning_rate": 0.00028874432677760965, |
|
"loss": 0.4918, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.4054813773717498, |
|
"grad_norm": 1.6567301750183105, |
|
"learning_rate": 0.0002774432677760968, |
|
"loss": 0.4242, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4054813773717498, |
|
"eval_loss": 0.3054925501346588, |
|
"eval_runtime": 56.7685, |
|
"eval_samples_per_second": 26.282, |
|
"eval_steps_per_second": 3.294, |
|
"eval_wer": 0.6024742624794532, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7568517217146873, |
|
"grad_norm": 1.094335675239563, |
|
"learning_rate": 0.0002660968229954614, |
|
"loss": 0.3958, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.1082220660576247, |
|
"grad_norm": 1.0848636627197266, |
|
"learning_rate": 0.000254750378214826, |
|
"loss": 0.3603, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1082220660576247, |
|
"eval_loss": 0.26925167441368103, |
|
"eval_runtime": 56.9762, |
|
"eval_samples_per_second": 26.186, |
|
"eval_steps_per_second": 3.282, |
|
"eval_wer": 0.5385413963145601, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.459592410400562, |
|
"grad_norm": 0.9221932888031006, |
|
"learning_rate": 0.00024340393343419062, |
|
"loss": 0.3228, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.8109627547434997, |
|
"grad_norm": 1.12798273563385, |
|
"learning_rate": 0.0002320574886535552, |
|
"loss": 0.3144, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.8109627547434997, |
|
"eval_loss": 0.2683100700378418, |
|
"eval_runtime": 56.9393, |
|
"eval_samples_per_second": 26.203, |
|
"eval_steps_per_second": 3.284, |
|
"eval_wer": 0.5529025002162817, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.162333099086437, |
|
"grad_norm": 0.6111273169517517, |
|
"learning_rate": 0.0002207110438729198, |
|
"loss": 0.2931, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.5137034434293746, |
|
"grad_norm": 0.5683824419975281, |
|
"learning_rate": 0.0002093645990922844, |
|
"loss": 0.2656, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.5137034434293746, |
|
"eval_loss": 0.24722729623317719, |
|
"eval_runtime": 57.2449, |
|
"eval_samples_per_second": 26.063, |
|
"eval_steps_per_second": 3.267, |
|
"eval_wer": 0.5258240332208669, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.865073787772312, |
|
"grad_norm": 0.7263462543487549, |
|
"learning_rate": 0.000198018154311649, |
|
"loss": 0.2574, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 4.2164441321152495, |
|
"grad_norm": 0.422783762216568, |
|
"learning_rate": 0.0001866717095310136, |
|
"loss": 0.2311, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.2164441321152495, |
|
"eval_loss": 0.2352277785539627, |
|
"eval_runtime": 56.81, |
|
"eval_samples_per_second": 26.263, |
|
"eval_steps_per_second": 3.292, |
|
"eval_wer": 0.5025521238861493, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.567814476458187, |
|
"grad_norm": 0.5190473794937134, |
|
"learning_rate": 0.00017532526475037822, |
|
"loss": 0.2133, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.919184820801124, |
|
"grad_norm": 0.7750300765037537, |
|
"learning_rate": 0.0001639788199697428, |
|
"loss": 0.2106, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.919184820801124, |
|
"eval_loss": 0.2326769232749939, |
|
"eval_runtime": 56.8929, |
|
"eval_samples_per_second": 26.225, |
|
"eval_steps_per_second": 3.287, |
|
"eval_wer": 0.5003027943593736, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.270555165144062, |
|
"grad_norm": 0.37189096212387085, |
|
"learning_rate": 0.0001526323751891074, |
|
"loss": 0.1835, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 5.621925509486999, |
|
"grad_norm": 0.5811314582824707, |
|
"learning_rate": 0.000141285930408472, |
|
"loss": 0.1816, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.621925509486999, |
|
"eval_loss": 0.22979679703712463, |
|
"eval_runtime": 56.9781, |
|
"eval_samples_per_second": 26.185, |
|
"eval_steps_per_second": 3.282, |
|
"eval_wer": 0.49865905355134527, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.973295853829937, |
|
"grad_norm": 1.2501654624938965, |
|
"learning_rate": 0.0001299394856278366, |
|
"loss": 0.1804, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 6.324666198172874, |
|
"grad_norm": 0.40958958864212036, |
|
"learning_rate": 0.0001185930408472012, |
|
"loss": 0.1432, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.324666198172874, |
|
"eval_loss": 0.21775686740875244, |
|
"eval_runtime": 57.4498, |
|
"eval_samples_per_second": 25.97, |
|
"eval_steps_per_second": 3.255, |
|
"eval_wer": 0.46855264296219395, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.676036542515812, |
|
"grad_norm": 0.5354466438293457, |
|
"learning_rate": 0.0001072465960665658, |
|
"loss": 0.1475, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 7.027406886858749, |
|
"grad_norm": 0.44123438000679016, |
|
"learning_rate": 9.59001512859304e-05, |
|
"loss": 0.1431, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.027406886858749, |
|
"eval_loss": 0.21715782582759857, |
|
"eval_runtime": 57.0515, |
|
"eval_samples_per_second": 26.152, |
|
"eval_steps_per_second": 3.278, |
|
"eval_wer": 0.47469504282377367, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.378777231201687, |
|
"grad_norm": 0.29582667350769043, |
|
"learning_rate": 8.4553706505295e-05, |
|
"loss": 0.1111, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 7.730147575544624, |
|
"grad_norm": 0.5549105405807495, |
|
"learning_rate": 7.32072617246596e-05, |
|
"loss": 0.1069, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.730147575544624, |
|
"eval_loss": 0.21356959640979767, |
|
"eval_runtime": 56.8619, |
|
"eval_samples_per_second": 26.239, |
|
"eval_steps_per_second": 3.289, |
|
"eval_wer": 0.45393200103815207, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.081517919887562, |
|
"grad_norm": 0.632247269153595, |
|
"learning_rate": 6.18608169440242e-05, |
|
"loss": 0.1045, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 8.432888264230499, |
|
"grad_norm": 0.3406793475151062, |
|
"learning_rate": 5.05143721633888e-05, |
|
"loss": 0.0767, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.432888264230499, |
|
"eval_loss": 0.22697694599628448, |
|
"eval_runtime": 56.8774, |
|
"eval_samples_per_second": 26.232, |
|
"eval_steps_per_second": 3.288, |
|
"eval_wer": 0.4403495112033913, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.784258608573436, |
|
"grad_norm": 0.4026772379875183, |
|
"learning_rate": 3.91679273827534e-05, |
|
"loss": 0.0781, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 9.135628952916374, |
|
"grad_norm": 0.4836815595626831, |
|
"learning_rate": 2.7821482602118e-05, |
|
"loss": 0.0667, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.135628952916374, |
|
"eval_loss": 0.23749949038028717, |
|
"eval_runtime": 56.671, |
|
"eval_samples_per_second": 26.327, |
|
"eval_steps_per_second": 3.3, |
|
"eval_wer": 0.4385327450471494, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.486999297259311, |
|
"grad_norm": 0.17178182303905487, |
|
"learning_rate": 1.64750378214826e-05, |
|
"loss": 0.0502, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 9.838369641602249, |
|
"grad_norm": 0.3071761131286621, |
|
"learning_rate": 5.128593040847201e-06, |
|
"loss": 0.0468, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.838369641602249, |
|
"eval_loss": 0.2403486669063568, |
|
"eval_runtime": 56.6826, |
|
"eval_samples_per_second": 26.322, |
|
"eval_steps_per_second": 3.299, |
|
"eval_wer": 0.4353317761051994, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.99297259311314, |
|
"step": 7110, |
|
"total_flos": 2.094794810533582e+19, |
|
"train_loss": 0.25149581422282674, |
|
"train_runtime": 9293.6781, |
|
"train_samples_per_second": 12.242, |
|
"train_steps_per_second": 0.765 |
|
} |
|
], |
|
"logging_steps": 250, |
|
"max_steps": 7110, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.094794810533582e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|