|
{ |
|
"best_metric": 0.24260137975215912, |
|
"best_model_checkpoint": "wave2vec2-xlsr-Persian/checkpoint-240000", |
|
"epoch": 3.0, |
|
"eval_steps": 10000, |
|
"global_step": 255012, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1176415227518705, |
|
"grad_norm": 1.416466236114502, |
|
"learning_rate": 9.646079712769477e-06, |
|
"loss": 3.7691, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.1176415227518705, |
|
"eval_loss": 0.7940966486930847, |
|
"eval_runtime": 534.4277, |
|
"eval_samples_per_second": 16.743, |
|
"eval_steps_per_second": 2.094, |
|
"eval_wer": 0.6079105283797882, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.235283045503741, |
|
"grad_norm": 2.3572049140930176, |
|
"learning_rate": 9.252436892745226e-06, |
|
"loss": 0.8658, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.235283045503741, |
|
"eval_loss": 0.5118501782417297, |
|
"eval_runtime": 536.8454, |
|
"eval_samples_per_second": 16.668, |
|
"eval_steps_per_second": 2.084, |
|
"eval_wer": 0.4575246579700923, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.3529245682556115, |
|
"grad_norm": 2.209596872329712, |
|
"learning_rate": 8.858872809158622e-06, |
|
"loss": 0.6829, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.3529245682556115, |
|
"eval_loss": 0.4285117983818054, |
|
"eval_runtime": 539.6896, |
|
"eval_samples_per_second": 16.58, |
|
"eval_steps_per_second": 2.073, |
|
"eval_wer": 0.40392825696083584, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.470566091007482, |
|
"grad_norm": 2.2252326011657715, |
|
"learning_rate": 8.465348093790845e-06, |
|
"loss": 0.6078, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.470566091007482, |
|
"eval_loss": 0.38793477416038513, |
|
"eval_runtime": 547.0046, |
|
"eval_samples_per_second": 16.358, |
|
"eval_steps_per_second": 2.046, |
|
"eval_wer": 0.3664437028760849, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.5882076137593525, |
|
"grad_norm": 2.7398128509521484, |
|
"learning_rate": 8.071784010204244e-06, |
|
"loss": 0.5561, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.5882076137593525, |
|
"eval_loss": 0.3593791723251343, |
|
"eval_runtime": 551.125, |
|
"eval_samples_per_second": 16.236, |
|
"eval_steps_per_second": 2.03, |
|
"eval_wer": 0.34282162650112097, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.705849136511223, |
|
"grad_norm": 2.960217237472534, |
|
"learning_rate": 7.678259294836465e-06, |
|
"loss": 0.5168, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.705849136511223, |
|
"eval_loss": 0.33377397060394287, |
|
"eval_runtime": 546.4831, |
|
"eval_samples_per_second": 16.374, |
|
"eval_steps_per_second": 2.048, |
|
"eval_wer": 0.3202835389089079, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.8234906592630935, |
|
"grad_norm": 2.531191349029541, |
|
"learning_rate": 7.284734579468687e-06, |
|
"loss": 0.499, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.8234906592630935, |
|
"eval_loss": 0.3143016993999481, |
|
"eval_runtime": 550.3802, |
|
"eval_samples_per_second": 16.258, |
|
"eval_steps_per_second": 2.033, |
|
"eval_wer": 0.30924387157877603, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.941132182014964, |
|
"grad_norm": 2.0980894565582275, |
|
"learning_rate": 6.891131127663261e-06, |
|
"loss": 0.4768, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.941132182014964, |
|
"eval_loss": 0.3023754954338074, |
|
"eval_runtime": 546.9538, |
|
"eval_samples_per_second": 16.36, |
|
"eval_steps_per_second": 2.046, |
|
"eval_wer": 0.29365367113334173, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.0587737047668344, |
|
"grad_norm": 1.7364046573638916, |
|
"learning_rate": 6.497606412295483e-06, |
|
"loss": 0.4529, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.0587737047668344, |
|
"eval_loss": 0.29402047395706177, |
|
"eval_runtime": 544.3376, |
|
"eval_samples_per_second": 16.438, |
|
"eval_steps_per_second": 2.056, |
|
"eval_wer": 0.2827878859629002, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.176415227518705, |
|
"grad_norm": 3.4313299655914307, |
|
"learning_rate": 6.1040816969277054e-06, |
|
"loss": 0.44, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.176415227518705, |
|
"eval_loss": 0.2909528613090515, |
|
"eval_runtime": 543.2034, |
|
"eval_samples_per_second": 16.473, |
|
"eval_steps_per_second": 2.06, |
|
"eval_wer": 0.27463762217996435, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.2940567502705755, |
|
"grad_norm": 1.1621043682098389, |
|
"learning_rate": 5.710478245122278e-06, |
|
"loss": 0.4264, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.2940567502705755, |
|
"eval_loss": 0.2814837098121643, |
|
"eval_runtime": 547.8907, |
|
"eval_samples_per_second": 16.332, |
|
"eval_steps_per_second": 2.042, |
|
"eval_wer": 0.2683001724023115, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.4116982730224459, |
|
"grad_norm": 2.507716178894043, |
|
"learning_rate": 5.316914161535675e-06, |
|
"loss": 0.4189, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.4116982730224459, |
|
"eval_loss": 0.27175650000572205, |
|
"eval_runtime": 548.1953, |
|
"eval_samples_per_second": 16.323, |
|
"eval_steps_per_second": 2.041, |
|
"eval_wer": 0.26368304611946813, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.5293397957743164, |
|
"grad_norm": 2.3356528282165527, |
|
"learning_rate": 4.923428814386722e-06, |
|
"loss": 0.4052, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.5293397957743164, |
|
"eval_loss": 0.2673029899597168, |
|
"eval_runtime": 544.6822, |
|
"eval_samples_per_second": 16.428, |
|
"eval_steps_per_second": 2.054, |
|
"eval_wer": 0.25851467639420195, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.646981318526187, |
|
"grad_norm": 2.476557970046997, |
|
"learning_rate": 4.529864730800119e-06, |
|
"loss": 0.4044, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.646981318526187, |
|
"eval_loss": 0.26591917872428894, |
|
"eval_runtime": 545.1182, |
|
"eval_samples_per_second": 16.415, |
|
"eval_steps_per_second": 2.053, |
|
"eval_wer": 0.2534720937631799, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.7646228412780576, |
|
"grad_norm": 1.8470633029937744, |
|
"learning_rate": 4.136418751869991e-06, |
|
"loss": 0.4046, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.7646228412780576, |
|
"eval_loss": 0.2603091299533844, |
|
"eval_runtime": 545.5751, |
|
"eval_samples_per_second": 16.401, |
|
"eval_steps_per_second": 2.051, |
|
"eval_wer": 0.24952459877616556, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.8822643640299281, |
|
"grad_norm": 2.6165308952331543, |
|
"learning_rate": 3.742815300064564e-06, |
|
"loss": 0.3944, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.8822643640299281, |
|
"eval_loss": 0.2575734555721283, |
|
"eval_runtime": 545.6155, |
|
"eval_samples_per_second": 16.4, |
|
"eval_steps_per_second": 2.051, |
|
"eval_wer": 0.24606545368445198, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.9999058867817985, |
|
"grad_norm": 2.0350422859191895, |
|
"learning_rate": 3.3492512164779615e-06, |
|
"loss": 0.3876, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 1.9999058867817985, |
|
"eval_loss": 0.25538763403892517, |
|
"eval_runtime": 543.8449, |
|
"eval_samples_per_second": 16.453, |
|
"eval_steps_per_second": 2.058, |
|
"eval_wer": 0.24299106911630866, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 2.117547409533669, |
|
"grad_norm": 2.234062433242798, |
|
"learning_rate": 2.955726501110184e-06, |
|
"loss": 0.3836, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 2.117547409533669, |
|
"eval_loss": 0.25172147154808044, |
|
"eval_runtime": 543.9926, |
|
"eval_samples_per_second": 16.449, |
|
"eval_steps_per_second": 2.057, |
|
"eval_wer": 0.24226224389377649, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 2.2351889322855394, |
|
"grad_norm": 2.7782626152038574, |
|
"learning_rate": 2.5621624175235817e-06, |
|
"loss": 0.3767, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 2.2351889322855394, |
|
"eval_loss": 0.2503082752227783, |
|
"eval_runtime": 546.7703, |
|
"eval_samples_per_second": 16.365, |
|
"eval_steps_per_second": 2.047, |
|
"eval_wer": 0.23942833465286462, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 2.35283045503741, |
|
"grad_norm": 2.365490198135376, |
|
"learning_rate": 2.1686770703746284e-06, |
|
"loss": 0.3738, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 2.35283045503741, |
|
"eval_loss": 0.24804162979125977, |
|
"eval_runtime": 545.3342, |
|
"eval_samples_per_second": 16.408, |
|
"eval_steps_per_second": 2.052, |
|
"eval_wer": 0.23909166919474062, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 2.4704719777892805, |
|
"grad_norm": 3.105099678039551, |
|
"learning_rate": 1.7751523550068502e-06, |
|
"loss": 0.3707, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 2.4704719777892805, |
|
"eval_loss": 0.24506914615631104, |
|
"eval_runtime": 547.7088, |
|
"eval_samples_per_second": 16.337, |
|
"eval_steps_per_second": 2.043, |
|
"eval_wer": 0.23790409104025928, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 2.588113500541151, |
|
"grad_norm": 0.9898041486740112, |
|
"learning_rate": 1.3815882714202481e-06, |
|
"loss": 0.3649, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 2.588113500541151, |
|
"eval_loss": 0.24370211362838745, |
|
"eval_runtime": 547.4398, |
|
"eval_samples_per_second": 16.345, |
|
"eval_steps_per_second": 2.044, |
|
"eval_wer": 0.23637614780723498, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 2.7057550232930216, |
|
"grad_norm": 4.53593635559082, |
|
"learning_rate": 9.880241878336459e-07, |
|
"loss": 0.369, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 2.7057550232930216, |
|
"eval_loss": 0.24421393871307373, |
|
"eval_runtime": 549.6273, |
|
"eval_samples_per_second": 16.28, |
|
"eval_steps_per_second": 2.036, |
|
"eval_wer": 0.23538094991453876, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 2.8233965460448918, |
|
"grad_norm": 3.995215654373169, |
|
"learning_rate": 5.945388406846922e-07, |
|
"loss": 0.3608, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.8233965460448918, |
|
"eval_loss": 0.24260137975215912, |
|
"eval_runtime": 548.4399, |
|
"eval_samples_per_second": 16.315, |
|
"eval_steps_per_second": 2.04, |
|
"eval_wer": 0.23452633759776248, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.9410380687967628, |
|
"grad_norm": 1.9774836301803589, |
|
"learning_rate": 2.0097475709808989e-07, |
|
"loss": 0.3644, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 2.9410380687967628, |
|
"eval_loss": 0.24260272085666656, |
|
"eval_runtime": 548.9964, |
|
"eval_samples_per_second": 16.299, |
|
"eval_steps_per_second": 2.038, |
|
"eval_wer": 0.234019489600367, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 255012, |
|
"total_flos": 2.7896646019563717e+20, |
|
"train_loss": 0.5826510110653187, |
|
"train_runtime": 84804.3792, |
|
"train_samples_per_second": 6.014, |
|
"train_steps_per_second": 3.007 |
|
} |
|
], |
|
"logging_steps": 10000, |
|
"max_steps": 255012, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.7896646019563717e+20, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|