xls-r-300m-ur-cv7 / trainer_state.json
HarrisDePerceptron's picture
End of training
69fe7d1
raw
history blame
43.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 500.0,
"global_step": 12000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 4.17,
"learning_rate": 7.35e-06,
"loss": 18.5133,
"step": 100
},
{
"epoch": 4.17,
"eval_loss": 12.037576675415039,
"eval_runtime": 6.8573,
"eval_samples_per_second": 20.708,
"eval_steps_per_second": 2.625,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 8.33,
"learning_rate": 1.485e-05,
"loss": 7.9152,
"step": 200
},
{
"epoch": 8.33,
"eval_loss": 6.240577220916748,
"eval_runtime": 6.2762,
"eval_samples_per_second": 22.625,
"eval_steps_per_second": 2.868,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 12.5,
"learning_rate": 2.2349999999999998e-05,
"loss": 5.363,
"step": 300
},
{
"epoch": 12.5,
"eval_loss": 4.664888858795166,
"eval_runtime": 7.3621,
"eval_samples_per_second": 19.288,
"eval_steps_per_second": 2.445,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 16.67,
"learning_rate": 2.985e-05,
"loss": 4.05,
"step": 400
},
{
"epoch": 16.67,
"eval_loss": 3.5798044204711914,
"eval_runtime": 6.7794,
"eval_samples_per_second": 20.946,
"eval_steps_per_second": 2.655,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 20.83,
"learning_rate": 3.735e-05,
"loss": 3.3467,
"step": 500
},
{
"epoch": 20.83,
"eval_loss": 3.1930062770843506,
"eval_runtime": 7.0411,
"eval_samples_per_second": 20.167,
"eval_steps_per_second": 2.556,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 25.0,
"learning_rate": 4.484999999999999e-05,
"loss": 3.1638,
"step": 600
},
{
"epoch": 25.0,
"eval_loss": 3.098419666290283,
"eval_runtime": 6.9505,
"eval_samples_per_second": 20.43,
"eval_steps_per_second": 2.59,
"eval_wer": 1.0,
"step": 600
},
{
"epoch": 29.17,
"learning_rate": 5.234999999999999e-05,
"loss": 3.043,
"step": 700
},
{
"epoch": 29.17,
"eval_loss": 2.970651865005493,
"eval_runtime": 6.7343,
"eval_samples_per_second": 21.086,
"eval_steps_per_second": 2.673,
"eval_wer": 0.998256320836966,
"step": 700
},
{
"epoch": 33.33,
"learning_rate": 5.985e-05,
"loss": 2.9566,
"step": 800
},
{
"epoch": 33.33,
"eval_loss": 2.9295754432678223,
"eval_runtime": 6.7892,
"eval_samples_per_second": 20.916,
"eval_steps_per_second": 2.651,
"eval_wer": 1.0,
"step": 800
},
{
"epoch": 37.5,
"learning_rate": 6.735e-05,
"loss": 2.8994,
"step": 900
},
{
"epoch": 37.5,
"eval_loss": 2.8289785385131836,
"eval_runtime": 6.3572,
"eval_samples_per_second": 22.337,
"eval_steps_per_second": 2.831,
"eval_wer": 0.998256320836966,
"step": 900
},
{
"epoch": 41.67,
"learning_rate": 7.484999999999999e-05,
"loss": 2.6469,
"step": 1000
},
{
"epoch": 41.67,
"eval_loss": 2.244255781173706,
"eval_runtime": 7.0587,
"eval_samples_per_second": 20.117,
"eval_steps_per_second": 2.55,
"eval_wer": 0.976460331299041,
"step": 1000
},
{
"epoch": 45.83,
"learning_rate": 7.433181818181818e-05,
"loss": 2.1557,
"step": 1100
},
{
"epoch": 45.83,
"eval_loss": 1.5079460144042969,
"eval_runtime": 6.593,
"eval_samples_per_second": 21.538,
"eval_steps_per_second": 2.73,
"eval_wer": 0.8204010462074979,
"step": 1100
},
{
"epoch": 50.0,
"learning_rate": 7.364999999999999e-05,
"loss": 1.7524,
"step": 1200
},
{
"epoch": 50.0,
"eval_loss": 1.2472423315048218,
"eval_runtime": 6.884,
"eval_samples_per_second": 20.628,
"eval_steps_per_second": 2.615,
"eval_wer": 0.7593722755013078,
"step": 1200
},
{
"epoch": 54.17,
"learning_rate": 7.296818181818182e-05,
"loss": 1.54,
"step": 1300
},
{
"epoch": 54.17,
"eval_loss": 1.1611692905426025,
"eval_runtime": 6.7498,
"eval_samples_per_second": 21.038,
"eval_steps_per_second": 2.667,
"eval_wer": 0.7096774193548387,
"step": 1300
},
{
"epoch": 58.33,
"learning_rate": 7.228636363636363e-05,
"loss": 1.3985,
"step": 1400
},
{
"epoch": 58.33,
"eval_loss": 1.1158100366592407,
"eval_runtime": 6.9751,
"eval_samples_per_second": 20.358,
"eval_steps_per_second": 2.581,
"eval_wer": 0.7358326068003488,
"step": 1400
},
{
"epoch": 62.5,
"learning_rate": 7.160454545454545e-05,
"loss": 1.2869,
"step": 1500
},
{
"epoch": 62.5,
"eval_loss": 1.0453699827194214,
"eval_runtime": 6.542,
"eval_samples_per_second": 21.706,
"eval_steps_per_second": 2.751,
"eval_wer": 0.6931124673060157,
"step": 1500
},
{
"epoch": 66.67,
"learning_rate": 7.092272727272727e-05,
"loss": 1.1952,
"step": 1600
},
{
"epoch": 66.67,
"eval_loss": 1.0130492448806763,
"eval_runtime": 7.0228,
"eval_samples_per_second": 20.22,
"eval_steps_per_second": 2.563,
"eval_wer": 0.6852659110723627,
"step": 1600
},
{
"epoch": 70.83,
"learning_rate": 7.024090909090908e-05,
"loss": 1.1022,
"step": 1700
},
{
"epoch": 70.83,
"eval_loss": 1.017554521560669,
"eval_runtime": 6.8272,
"eval_samples_per_second": 20.799,
"eval_steps_per_second": 2.637,
"eval_wer": 0.6965998256320837,
"step": 1700
},
{
"epoch": 75.0,
"learning_rate": 6.95590909090909e-05,
"loss": 1.0346,
"step": 1800
},
{
"epoch": 75.0,
"eval_loss": 1.0053126811981201,
"eval_runtime": 6.9741,
"eval_samples_per_second": 20.361,
"eval_steps_per_second": 2.581,
"eval_wer": 0.6817785527462947,
"step": 1800
},
{
"epoch": 79.17,
"learning_rate": 6.887727272727272e-05,
"loss": 0.9707,
"step": 1900
},
{
"epoch": 79.17,
"eval_loss": 1.022377610206604,
"eval_runtime": 7.0064,
"eval_samples_per_second": 20.267,
"eval_steps_per_second": 2.569,
"eval_wer": 0.6713164777680907,
"step": 1900
},
{
"epoch": 83.33,
"learning_rate": 6.819545454545453e-05,
"loss": 0.917,
"step": 2000
},
{
"epoch": 83.33,
"eval_loss": 1.0382641553878784,
"eval_runtime": 6.7309,
"eval_samples_per_second": 21.097,
"eval_steps_per_second": 2.674,
"eval_wer": 0.6530078465562337,
"step": 2000
},
{
"epoch": 87.5,
"learning_rate": 6.751363636363636e-05,
"loss": 0.8574,
"step": 2100
},
{
"epoch": 87.5,
"eval_loss": 1.0632084608078003,
"eval_runtime": 7.3312,
"eval_samples_per_second": 19.369,
"eval_steps_per_second": 2.455,
"eval_wer": 0.6756756756756757,
"step": 2100
},
{
"epoch": 91.67,
"learning_rate": 6.683181818181818e-05,
"loss": 0.8021,
"step": 2200
},
{
"epoch": 91.67,
"eval_loss": 1.012629747390747,
"eval_runtime": 6.9334,
"eval_samples_per_second": 20.481,
"eval_steps_per_second": 2.596,
"eval_wer": 0.6442894507410637,
"step": 2200
},
{
"epoch": 95.83,
"learning_rate": 6.615e-05,
"loss": 0.7563,
"step": 2300
},
{
"epoch": 95.83,
"eval_loss": 1.0677976608276367,
"eval_runtime": 7.0411,
"eval_samples_per_second": 20.167,
"eval_steps_per_second": 2.556,
"eval_wer": 0.6713164777680907,
"step": 2300
},
{
"epoch": 100.0,
"learning_rate": 6.546818181818181e-05,
"loss": 0.709,
"step": 2400
},
{
"epoch": 100.0,
"eval_loss": 1.0755900144577026,
"eval_runtime": 6.7348,
"eval_samples_per_second": 21.085,
"eval_steps_per_second": 2.673,
"eval_wer": 0.6756756756756757,
"step": 2400
},
{
"epoch": 104.17,
"learning_rate": 6.478636363636363e-05,
"loss": 0.6775,
"step": 2500
},
{
"epoch": 104.17,
"eval_loss": 1.03966224193573,
"eval_runtime": 6.7793,
"eval_samples_per_second": 20.946,
"eval_steps_per_second": 2.655,
"eval_wer": 0.6913687881429816,
"step": 2500
},
{
"epoch": 108.33,
"learning_rate": 6.410454545454546e-05,
"loss": 0.6325,
"step": 2600
},
{
"epoch": 108.33,
"eval_loss": 1.055609107017517,
"eval_runtime": 6.9878,
"eval_samples_per_second": 20.321,
"eval_steps_per_second": 2.576,
"eval_wer": 0.6512641673931997,
"step": 2600
},
{
"epoch": 112.5,
"learning_rate": 6.342272727272726e-05,
"loss": 0.617,
"step": 2700
},
{
"epoch": 112.5,
"eval_loss": 1.1218976974487305,
"eval_runtime": 6.8023,
"eval_samples_per_second": 20.875,
"eval_steps_per_second": 2.646,
"eval_wer": 0.6922406277244987,
"step": 2700
},
{
"epoch": 116.67,
"learning_rate": 6.274090909090909e-05,
"loss": 0.5801,
"step": 2800
},
{
"epoch": 116.67,
"eval_loss": 1.112548589706421,
"eval_runtime": 6.7506,
"eval_samples_per_second": 21.035,
"eval_steps_per_second": 2.666,
"eval_wer": 0.6870095902353966,
"step": 2800
},
{
"epoch": 120.83,
"learning_rate": 6.205909090909091e-05,
"loss": 0.5367,
"step": 2900
},
{
"epoch": 120.83,
"eval_loss": 1.1397372484207153,
"eval_runtime": 6.876,
"eval_samples_per_second": 20.652,
"eval_steps_per_second": 2.618,
"eval_wer": 0.6564952048823016,
"step": 2900
},
{
"epoch": 125.0,
"learning_rate": 6.137727272727272e-05,
"loss": 0.5132,
"step": 3000
},
{
"epoch": 125.0,
"eval_loss": 1.1678012609481812,
"eval_runtime": 6.8221,
"eval_samples_per_second": 20.815,
"eval_steps_per_second": 2.638,
"eval_wer": 0.7000871839581517,
"step": 3000
},
{
"epoch": 129.17,
"learning_rate": 6.069545454545454e-05,
"loss": 0.4948,
"step": 3100
},
{
"epoch": 129.17,
"eval_loss": 1.1275851726531982,
"eval_runtime": 6.9051,
"eval_samples_per_second": 20.564,
"eval_steps_per_second": 2.607,
"eval_wer": 0.6643417611159547,
"step": 3100
},
{
"epoch": 133.33,
"learning_rate": 6.001363636363636e-05,
"loss": 0.457,
"step": 3200
},
{
"epoch": 133.33,
"eval_loss": 1.1464685201644897,
"eval_runtime": 6.6513,
"eval_samples_per_second": 21.349,
"eval_steps_per_second": 2.706,
"eval_wer": 0.6791630340017437,
"step": 3200
},
{
"epoch": 137.5,
"learning_rate": 5.933181818181817e-05,
"loss": 0.4538,
"step": 3300
},
{
"epoch": 137.5,
"eval_loss": 1.143470287322998,
"eval_runtime": 5.4611,
"eval_samples_per_second": 26.002,
"eval_steps_per_second": 3.296,
"eval_wer": 0.6809067131647777,
"step": 3300
},
{
"epoch": 141.67,
"learning_rate": 5.8649999999999996e-05,
"loss": 0.4227,
"step": 3400
},
{
"epoch": 141.67,
"eval_loss": 1.1456971168518066,
"eval_runtime": 6.7115,
"eval_samples_per_second": 21.158,
"eval_steps_per_second": 2.682,
"eval_wer": 0.6599825632083697,
"step": 3400
},
{
"epoch": 145.83,
"learning_rate": 5.796818181818181e-05,
"loss": 0.4083,
"step": 3500
},
{
"epoch": 145.83,
"eval_loss": 1.179282307624817,
"eval_runtime": 7.0949,
"eval_samples_per_second": 20.014,
"eval_steps_per_second": 2.537,
"eval_wer": 0.6652136006974717,
"step": 3500
},
{
"epoch": 150.0,
"learning_rate": 5.7286363636363635e-05,
"loss": 0.3965,
"step": 3600
},
{
"epoch": 150.0,
"eval_loss": 1.2435046434402466,
"eval_runtime": 7.0689,
"eval_samples_per_second": 20.088,
"eval_steps_per_second": 2.546,
"eval_wer": 0.6878814298169137,
"step": 3600
},
{
"epoch": 154.17,
"learning_rate": 5.6604545454545445e-05,
"loss": 0.382,
"step": 3700
},
{
"epoch": 154.17,
"eval_loss": 1.2228599786758423,
"eval_runtime": 7.0115,
"eval_samples_per_second": 20.252,
"eval_steps_per_second": 2.567,
"eval_wer": 0.6826503923278117,
"step": 3700
},
{
"epoch": 158.33,
"learning_rate": 5.592272727272727e-05,
"loss": 0.3452,
"step": 3800
},
{
"epoch": 158.33,
"eval_loss": 1.229727029800415,
"eval_runtime": 6.7478,
"eval_samples_per_second": 21.044,
"eval_steps_per_second": 2.668,
"eval_wer": 0.6878814298169137,
"step": 3800
},
{
"epoch": 162.5,
"learning_rate": 5.5240909090909085e-05,
"loss": 0.3434,
"step": 3900
},
{
"epoch": 162.5,
"eval_loss": 1.2349668741226196,
"eval_runtime": 7.0134,
"eval_samples_per_second": 20.247,
"eval_steps_per_second": 2.567,
"eval_wer": 0.6887532693984307,
"step": 3900
},
{
"epoch": 166.67,
"learning_rate": 5.455909090909091e-05,
"loss": 0.3276,
"step": 4000
},
{
"epoch": 166.67,
"eval_loss": 1.2186285257339478,
"eval_runtime": 6.7562,
"eval_samples_per_second": 21.018,
"eval_steps_per_second": 2.664,
"eval_wer": 0.6922406277244987,
"step": 4000
},
{
"epoch": 170.83,
"learning_rate": 5.387727272727272e-05,
"loss": 0.3052,
"step": 4100
},
{
"epoch": 170.83,
"eval_loss": 1.233783483505249,
"eval_runtime": 6.5925,
"eval_samples_per_second": 21.54,
"eval_steps_per_second": 2.73,
"eval_wer": 0.6870095902353966,
"step": 4100
},
{
"epoch": 175.0,
"learning_rate": 5.319545454545454e-05,
"loss": 0.3025,
"step": 4200
},
{
"epoch": 175.0,
"eval_loss": 1.272440791130066,
"eval_runtime": 6.8253,
"eval_samples_per_second": 20.805,
"eval_steps_per_second": 2.637,
"eval_wer": 0.7079337401918047,
"step": 4200
},
{
"epoch": 179.17,
"learning_rate": 5.251363636363636e-05,
"loss": 0.2916,
"step": 4300
},
{
"epoch": 179.17,
"eval_loss": 1.2757943868637085,
"eval_runtime": 6.6107,
"eval_samples_per_second": 21.48,
"eval_steps_per_second": 2.723,
"eval_wer": 0.6974716652136007,
"step": 4300
},
{
"epoch": 183.33,
"learning_rate": 5.183181818181818e-05,
"loss": 0.2709,
"step": 4400
},
{
"epoch": 183.33,
"eval_loss": 1.2726093530654907,
"eval_runtime": 6.7657,
"eval_samples_per_second": 20.988,
"eval_steps_per_second": 2.66,
"eval_wer": 0.6748038360941587,
"step": 4400
},
{
"epoch": 187.5,
"learning_rate": 5.1149999999999996e-05,
"loss": 0.2707,
"step": 4500
},
{
"epoch": 187.5,
"eval_loss": 1.264315128326416,
"eval_runtime": 6.6322,
"eval_samples_per_second": 21.411,
"eval_steps_per_second": 2.714,
"eval_wer": 0.6957279860505667,
"step": 4500
},
{
"epoch": 191.67,
"learning_rate": 5.046818181818181e-05,
"loss": 0.262,
"step": 4600
},
{
"epoch": 191.67,
"eval_loss": 1.321448564529419,
"eval_runtime": 6.7016,
"eval_samples_per_second": 21.189,
"eval_steps_per_second": 2.686,
"eval_wer": 0.7131647776809067,
"step": 4600
},
{
"epoch": 195.83,
"learning_rate": 4.9786363636363636e-05,
"loss": 0.2453,
"step": 4700
},
{
"epoch": 195.83,
"eval_loss": 1.2952940464019775,
"eval_runtime": 6.8362,
"eval_samples_per_second": 20.772,
"eval_steps_per_second": 2.633,
"eval_wer": 0.6861377506538797,
"step": 4700
},
{
"epoch": 200.0,
"learning_rate": 4.910454545454545e-05,
"loss": 0.248,
"step": 4800
},
{
"epoch": 200.0,
"eval_loss": 1.362170696258545,
"eval_runtime": 6.5267,
"eval_samples_per_second": 21.757,
"eval_steps_per_second": 2.758,
"eval_wer": 0.6774193548387096,
"step": 4800
},
{
"epoch": 204.17,
"learning_rate": 4.842272727272727e-05,
"loss": 0.2325,
"step": 4900
},
{
"epoch": 204.17,
"eval_loss": 1.3594108819961548,
"eval_runtime": 6.7956,
"eval_samples_per_second": 20.896,
"eval_steps_per_second": 2.649,
"eval_wer": 0.6835222319093287,
"step": 4900
},
{
"epoch": 208.33,
"learning_rate": 4.7740909090909085e-05,
"loss": 0.2124,
"step": 5000
},
{
"epoch": 208.33,
"eval_loss": 1.3367135524749756,
"eval_runtime": 6.8257,
"eval_samples_per_second": 20.804,
"eval_steps_per_second": 2.637,
"eval_wer": 0.6652136006974717,
"step": 5000
},
{
"epoch": 212.5,
"learning_rate": 4.706590909090908e-05,
"loss": 0.2253,
"step": 5100
},
{
"epoch": 212.5,
"eval_loss": 1.415678858757019,
"eval_runtime": 6.8381,
"eval_samples_per_second": 20.766,
"eval_steps_per_second": 2.632,
"eval_wer": 0.6878814298169137,
"step": 5100
},
{
"epoch": 216.67,
"learning_rate": 4.6384090909090906e-05,
"loss": 0.2059,
"step": 5200
},
{
"epoch": 216.67,
"eval_loss": 1.4359543323516846,
"eval_runtime": 6.8901,
"eval_samples_per_second": 20.609,
"eval_steps_per_second": 2.612,
"eval_wer": 0.7131647776809067,
"step": 5200
},
{
"epoch": 220.83,
"learning_rate": 4.570227272727272e-05,
"loss": 0.1951,
"step": 5300
},
{
"epoch": 220.83,
"eval_loss": 1.4606153964996338,
"eval_runtime": 6.7349,
"eval_samples_per_second": 21.084,
"eval_steps_per_second": 2.673,
"eval_wer": 0.7157802964254577,
"step": 5300
},
{
"epoch": 225.0,
"learning_rate": 4.5020454545454545e-05,
"loss": 0.1861,
"step": 5400
},
{
"epoch": 225.0,
"eval_loss": 1.469545841217041,
"eval_runtime": 6.7814,
"eval_samples_per_second": 20.94,
"eval_steps_per_second": 2.654,
"eval_wer": 0.7018308631211857,
"step": 5400
},
{
"epoch": 229.17,
"learning_rate": 4.4338636363636355e-05,
"loss": 0.1916,
"step": 5500
},
{
"epoch": 229.17,
"eval_loss": 1.4031471014022827,
"eval_runtime": 6.9155,
"eval_samples_per_second": 20.534,
"eval_steps_per_second": 2.603,
"eval_wer": 0.6739319965126417,
"step": 5500
},
{
"epoch": 233.33,
"learning_rate": 4.365681818181818e-05,
"loss": 0.1822,
"step": 5600
},
{
"epoch": 233.33,
"eval_loss": 1.4425878524780273,
"eval_runtime": 6.658,
"eval_samples_per_second": 21.328,
"eval_steps_per_second": 2.704,
"eval_wer": 0.6870095902353966,
"step": 5600
},
{
"epoch": 237.5,
"learning_rate": 4.2974999999999994e-05,
"loss": 0.1684,
"step": 5700
},
{
"epoch": 237.5,
"eval_loss": 1.4068546295166016,
"eval_runtime": 5.8333,
"eval_samples_per_second": 24.343,
"eval_steps_per_second": 3.086,
"eval_wer": 0.7053182214472538,
"step": 5700
},
{
"epoch": 241.67,
"learning_rate": 4.229318181818182e-05,
"loss": 0.1719,
"step": 5800
},
{
"epoch": 241.67,
"eval_loss": 1.4765515327453613,
"eval_runtime": 6.853,
"eval_samples_per_second": 20.721,
"eval_steps_per_second": 2.627,
"eval_wer": 0.6965998256320837,
"step": 5800
},
{
"epoch": 245.83,
"learning_rate": 4.161136363636363e-05,
"loss": 0.1569,
"step": 5900
},
{
"epoch": 245.83,
"eval_loss": 1.4509494304656982,
"eval_runtime": 7.6508,
"eval_samples_per_second": 18.56,
"eval_steps_per_second": 2.353,
"eval_wer": 0.6931124673060157,
"step": 5900
},
{
"epoch": 250.0,
"learning_rate": 4.092954545454545e-05,
"loss": 0.159,
"step": 6000
},
{
"epoch": 250.0,
"eval_loss": 1.4466707706451416,
"eval_runtime": 6.8247,
"eval_samples_per_second": 20.807,
"eval_steps_per_second": 2.637,
"eval_wer": 0.7096774193548387,
"step": 6000
},
{
"epoch": 254.17,
"learning_rate": 4.0247727272727273e-05,
"loss": 0.1476,
"step": 6100
},
{
"epoch": 254.17,
"eval_loss": 1.4616789817810059,
"eval_runtime": 6.6822,
"eval_samples_per_second": 21.25,
"eval_steps_per_second": 2.694,
"eval_wer": 0.6870095902353966,
"step": 6100
},
{
"epoch": 258.33,
"learning_rate": 3.956590909090909e-05,
"loss": 0.1497,
"step": 6200
},
{
"epoch": 258.33,
"eval_loss": 1.4459782838821411,
"eval_runtime": 7.0114,
"eval_samples_per_second": 20.253,
"eval_steps_per_second": 2.567,
"eval_wer": 0.6843940714908456,
"step": 6200
},
{
"epoch": 262.5,
"learning_rate": 3.8884090909090906e-05,
"loss": 0.1446,
"step": 6300
},
{
"epoch": 262.5,
"eval_loss": 1.5556844472885132,
"eval_runtime": 6.9912,
"eval_samples_per_second": 20.311,
"eval_steps_per_second": 2.575,
"eval_wer": 0.7088055797733217,
"step": 6300
},
{
"epoch": 266.67,
"learning_rate": 3.820227272727272e-05,
"loss": 0.1389,
"step": 6400
},
{
"epoch": 266.67,
"eval_loss": 1.4885756969451904,
"eval_runtime": 6.5474,
"eval_samples_per_second": 21.688,
"eval_steps_per_second": 2.749,
"eval_wer": 0.7140366172624237,
"step": 6400
},
{
"epoch": 270.83,
"learning_rate": 3.7520454545454546e-05,
"loss": 0.1331,
"step": 6500
},
{
"epoch": 270.83,
"eval_loss": 1.5526471138000488,
"eval_runtime": 6.5806,
"eval_samples_per_second": 21.579,
"eval_steps_per_second": 2.735,
"eval_wer": 0.7061900610287707,
"step": 6500
},
{
"epoch": 275.0,
"learning_rate": 3.683863636363636e-05,
"loss": 0.1344,
"step": 6600
},
{
"epoch": 275.0,
"eval_loss": 1.5419210195541382,
"eval_runtime": 6.8756,
"eval_samples_per_second": 20.653,
"eval_steps_per_second": 2.618,
"eval_wer": 0.7027027027027027,
"step": 6600
},
{
"epoch": 279.17,
"learning_rate": 3.615681818181818e-05,
"loss": 0.1198,
"step": 6700
},
{
"epoch": 279.17,
"eval_loss": 1.564064383506775,
"eval_runtime": 6.7671,
"eval_samples_per_second": 20.984,
"eval_steps_per_second": 2.66,
"eval_wer": 0.7000871839581517,
"step": 6700
},
{
"epoch": 283.33,
"learning_rate": 3.5474999999999995e-05,
"loss": 0.1242,
"step": 6800
},
{
"epoch": 283.33,
"eval_loss": 1.5390304327011108,
"eval_runtime": 6.7114,
"eval_samples_per_second": 21.158,
"eval_steps_per_second": 2.682,
"eval_wer": 0.7061900610287707,
"step": 6800
},
{
"epoch": 287.5,
"learning_rate": 3.479318181818181e-05,
"loss": 0.12,
"step": 6900
},
{
"epoch": 287.5,
"eval_loss": 1.540635585784912,
"eval_runtime": 6.7141,
"eval_samples_per_second": 21.149,
"eval_steps_per_second": 2.681,
"eval_wer": 0.7105492589363557,
"step": 6900
},
{
"epoch": 291.67,
"learning_rate": 3.4111363636363634e-05,
"loss": 0.1096,
"step": 7000
},
{
"epoch": 291.67,
"eval_loss": 1.5737296342849731,
"eval_runtime": 6.6657,
"eval_samples_per_second": 21.303,
"eval_steps_per_second": 2.7,
"eval_wer": 0.6974716652136007,
"step": 7000
},
{
"epoch": 295.83,
"learning_rate": 3.342954545454545e-05,
"loss": 0.113,
"step": 7100
},
{
"epoch": 295.83,
"eval_loss": 1.5494580268859863,
"eval_runtime": 6.8629,
"eval_samples_per_second": 20.691,
"eval_steps_per_second": 2.623,
"eval_wer": 0.7210113339145597,
"step": 7100
},
{
"epoch": 300.0,
"learning_rate": 3.2747727272727274e-05,
"loss": 0.108,
"step": 7200
},
{
"epoch": 300.0,
"eval_loss": 1.5374633073806763,
"eval_runtime": 7.1279,
"eval_samples_per_second": 19.922,
"eval_steps_per_second": 2.525,
"eval_wer": 0.6948561464690497,
"step": 7200
},
{
"epoch": 304.17,
"learning_rate": 3.207272727272727e-05,
"loss": 0.1072,
"step": 7300
},
{
"epoch": 304.17,
"eval_loss": 1.5337363481521606,
"eval_runtime": 7.1049,
"eval_samples_per_second": 19.986,
"eval_steps_per_second": 2.533,
"eval_wer": 0.7009590235396687,
"step": 7300
},
{
"epoch": 308.33,
"learning_rate": 3.139090909090909e-05,
"loss": 0.0979,
"step": 7400
},
{
"epoch": 308.33,
"eval_loss": 1.5927180051803589,
"eval_runtime": 6.6123,
"eval_samples_per_second": 21.475,
"eval_steps_per_second": 2.722,
"eval_wer": 0.7061900610287707,
"step": 7400
},
{
"epoch": 312.5,
"learning_rate": 3.0709090909090904e-05,
"loss": 0.0983,
"step": 7500
},
{
"epoch": 312.5,
"eval_loss": 1.5882079601287842,
"eval_runtime": 6.7832,
"eval_samples_per_second": 20.934,
"eval_steps_per_second": 2.654,
"eval_wer": 0.6843940714908456,
"step": 7500
},
{
"epoch": 316.67,
"learning_rate": 3.0027272727272724e-05,
"loss": 0.0977,
"step": 7600
},
{
"epoch": 316.67,
"eval_loss": 1.6189255714416504,
"eval_runtime": 6.9471,
"eval_samples_per_second": 20.44,
"eval_steps_per_second": 2.591,
"eval_wer": 0.6957279860505667,
"step": 7600
},
{
"epoch": 320.83,
"learning_rate": 2.9352272727272724e-05,
"loss": 0.0947,
"step": 7700
},
{
"epoch": 320.83,
"eval_loss": 1.5098397731781006,
"eval_runtime": 6.6241,
"eval_samples_per_second": 21.437,
"eval_steps_per_second": 2.717,
"eval_wer": 0.6817785527462947,
"step": 7700
},
{
"epoch": 325.0,
"learning_rate": 2.867045454545454e-05,
"loss": 0.0996,
"step": 7800
},
{
"epoch": 325.0,
"eval_loss": 1.6268917322158813,
"eval_runtime": 6.6505,
"eval_samples_per_second": 21.352,
"eval_steps_per_second": 2.707,
"eval_wer": 0.7253705318221447,
"step": 7800
},
{
"epoch": 329.17,
"learning_rate": 2.798863636363636e-05,
"loss": 0.0846,
"step": 7900
},
{
"epoch": 329.17,
"eval_loss": 1.6366547346115112,
"eval_runtime": 6.9003,
"eval_samples_per_second": 20.579,
"eval_steps_per_second": 2.609,
"eval_wer": 0.7088055797733217,
"step": 7900
},
{
"epoch": 333.33,
"learning_rate": 2.7306818181818177e-05,
"loss": 0.0953,
"step": 8000
},
{
"epoch": 333.33,
"eval_loss": 1.5965033769607544,
"eval_runtime": 7.0137,
"eval_samples_per_second": 20.246,
"eval_steps_per_second": 2.566,
"eval_wer": 0.7122929380993898,
"step": 8000
},
{
"epoch": 337.5,
"learning_rate": 2.6624999999999997e-05,
"loss": 0.0906,
"step": 8100
},
{
"epoch": 337.5,
"eval_loss": 1.6095737218856812,
"eval_runtime": 6.5521,
"eval_samples_per_second": 21.672,
"eval_steps_per_second": 2.747,
"eval_wer": 0.7122929380993898,
"step": 8100
},
{
"epoch": 341.67,
"learning_rate": 2.5943181818181813e-05,
"loss": 0.093,
"step": 8200
},
{
"epoch": 341.67,
"eval_loss": 1.5953401327133179,
"eval_runtime": 6.9512,
"eval_samples_per_second": 20.428,
"eval_steps_per_second": 2.589,
"eval_wer": 0.6983435047951178,
"step": 8200
},
{
"epoch": 345.83,
"learning_rate": 2.5261363636363633e-05,
"loss": 0.0784,
"step": 8300
},
{
"epoch": 345.83,
"eval_loss": 1.5884095430374146,
"eval_runtime": 6.8928,
"eval_samples_per_second": 20.601,
"eval_steps_per_second": 2.611,
"eval_wer": 0.6913687881429816,
"step": 8300
},
{
"epoch": 350.0,
"learning_rate": 2.457954545454545e-05,
"loss": 0.0769,
"step": 8400
},
{
"epoch": 350.0,
"eval_loss": 1.5793629884719849,
"eval_runtime": 6.8179,
"eval_samples_per_second": 20.828,
"eval_steps_per_second": 2.64,
"eval_wer": 0.6870095902353966,
"step": 8400
},
{
"epoch": 354.17,
"learning_rate": 2.3897727272727272e-05,
"loss": 0.0782,
"step": 8500
},
{
"epoch": 354.17,
"eval_loss": 1.6580848693847656,
"eval_runtime": 6.9462,
"eval_samples_per_second": 20.443,
"eval_steps_per_second": 2.591,
"eval_wer": 0.6817785527462947,
"step": 8500
},
{
"epoch": 358.33,
"learning_rate": 2.3215909090909092e-05,
"loss": 0.0764,
"step": 8600
},
{
"epoch": 358.33,
"eval_loss": 1.6554986238479614,
"eval_runtime": 7.0359,
"eval_samples_per_second": 20.182,
"eval_steps_per_second": 2.558,
"eval_wer": 0.7088055797733217,
"step": 8600
},
{
"epoch": 362.5,
"learning_rate": 2.253409090909091e-05,
"loss": 0.073,
"step": 8700
},
{
"epoch": 362.5,
"eval_loss": 1.6465544700622559,
"eval_runtime": 6.8477,
"eval_samples_per_second": 20.737,
"eval_steps_per_second": 2.629,
"eval_wer": 0.6931124673060157,
"step": 8700
},
{
"epoch": 366.67,
"learning_rate": 2.1852272727272725e-05,
"loss": 0.0703,
"step": 8800
},
{
"epoch": 366.67,
"eval_loss": 1.6614816188812256,
"eval_runtime": 7.0115,
"eval_samples_per_second": 20.252,
"eval_steps_per_second": 2.567,
"eval_wer": 0.7114210985178727,
"step": 8800
},
{
"epoch": 370.83,
"learning_rate": 2.1170454545454545e-05,
"loss": 0.0707,
"step": 8900
},
{
"epoch": 370.83,
"eval_loss": 1.6742826700210571,
"eval_runtime": 6.7568,
"eval_samples_per_second": 21.016,
"eval_steps_per_second": 2.664,
"eval_wer": 0.7079337401918047,
"step": 8900
},
{
"epoch": 375.0,
"learning_rate": 2.048863636363636e-05,
"loss": 0.0647,
"step": 9000
},
{
"epoch": 375.0,
"eval_loss": 1.6451914310455322,
"eval_runtime": 6.781,
"eval_samples_per_second": 20.941,
"eval_steps_per_second": 2.654,
"eval_wer": 0.7166521360069747,
"step": 9000
},
{
"epoch": 379.17,
"learning_rate": 1.980681818181818e-05,
"loss": 0.0614,
"step": 9100
},
{
"epoch": 379.17,
"eval_loss": 1.7081646919250488,
"eval_runtime": 6.6825,
"eval_samples_per_second": 21.25,
"eval_steps_per_second": 2.694,
"eval_wer": 0.7122929380993898,
"step": 9100
},
{
"epoch": 383.33,
"learning_rate": 1.9124999999999997e-05,
"loss": 0.0646,
"step": 9200
},
{
"epoch": 383.33,
"eval_loss": 1.684810757637024,
"eval_runtime": 6.7457,
"eval_samples_per_second": 21.05,
"eval_steps_per_second": 2.668,
"eval_wer": 0.7183958151700087,
"step": 9200
},
{
"epoch": 387.5,
"learning_rate": 1.8443181818181817e-05,
"loss": 0.0648,
"step": 9300
},
{
"epoch": 387.5,
"eval_loss": 1.65809166431427,
"eval_runtime": 6.5015,
"eval_samples_per_second": 21.841,
"eval_steps_per_second": 2.769,
"eval_wer": 0.7088055797733217,
"step": 9300
},
{
"epoch": 391.67,
"learning_rate": 1.7761363636363633e-05,
"loss": 0.0625,
"step": 9400
},
{
"epoch": 391.67,
"eval_loss": 1.7315229177474976,
"eval_runtime": 7.2222,
"eval_samples_per_second": 19.662,
"eval_steps_per_second": 2.492,
"eval_wer": 0.7340889276373147,
"step": 9400
},
{
"epoch": 395.83,
"learning_rate": 1.7079545454545453e-05,
"loss": 0.0637,
"step": 9500
},
{
"epoch": 395.83,
"eval_loss": 1.683098316192627,
"eval_runtime": 6.8952,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 2.611,
"eval_wer": 0.7027027027027027,
"step": 9500
},
{
"epoch": 400.0,
"learning_rate": 1.639772727272727e-05,
"loss": 0.0558,
"step": 9600
},
{
"epoch": 400.0,
"eval_loss": 1.7159340381622314,
"eval_runtime": 6.6968,
"eval_samples_per_second": 21.204,
"eval_steps_per_second": 2.688,
"eval_wer": 0.7279860505666957,
"step": 9600
},
{
"epoch": 404.17,
"learning_rate": 1.571590909090909e-05,
"loss": 0.0563,
"step": 9700
},
{
"epoch": 404.17,
"eval_loss": 1.7474530935287476,
"eval_runtime": 6.7706,
"eval_samples_per_second": 20.973,
"eval_steps_per_second": 2.659,
"eval_wer": 0.7157802964254577,
"step": 9700
},
{
"epoch": 408.33,
"learning_rate": 1.5034090909090908e-05,
"loss": 0.0568,
"step": 9800
},
{
"epoch": 408.33,
"eval_loss": 1.6776412725448608,
"eval_runtime": 6.9524,
"eval_samples_per_second": 20.425,
"eval_steps_per_second": 2.589,
"eval_wer": 0.6992153443766347,
"step": 9800
},
{
"epoch": 412.5,
"learning_rate": 1.4352272727272727e-05,
"loss": 0.0574,
"step": 9900
},
{
"epoch": 412.5,
"eval_loss": 1.7150009870529175,
"eval_runtime": 6.8865,
"eval_samples_per_second": 20.62,
"eval_steps_per_second": 2.614,
"eval_wer": 0.6983435047951178,
"step": 9900
},
{
"epoch": 416.67,
"learning_rate": 1.3670454545454545e-05,
"loss": 0.0561,
"step": 10000
},
{
"epoch": 416.67,
"eval_loss": 1.7315118312835693,
"eval_runtime": 6.8566,
"eval_samples_per_second": 20.71,
"eval_steps_per_second": 2.625,
"eval_wer": 0.7140366172624237,
"step": 10000
},
{
"epoch": 420.83,
"learning_rate": 1.2988636363636363e-05,
"loss": 0.0494,
"step": 10100
},
{
"epoch": 420.83,
"eval_loss": 1.6868910789489746,
"eval_runtime": 6.9481,
"eval_samples_per_second": 20.437,
"eval_steps_per_second": 2.591,
"eval_wer": 0.7218831734960767,
"step": 10100
},
{
"epoch": 425.0,
"learning_rate": 1.2306818181818182e-05,
"loss": 0.0495,
"step": 10200
},
{
"epoch": 425.0,
"eval_loss": 1.749950885772705,
"eval_runtime": 6.8281,
"eval_samples_per_second": 20.796,
"eval_steps_per_second": 2.636,
"eval_wer": 0.7262423714036618,
"step": 10200
},
{
"epoch": 429.17,
"learning_rate": 1.1625e-05,
"loss": 0.0542,
"step": 10300
},
{
"epoch": 429.17,
"eval_loss": 1.7298214435577393,
"eval_runtime": 6.7073,
"eval_samples_per_second": 21.171,
"eval_steps_per_second": 2.684,
"eval_wer": 0.7271142109851787,
"step": 10300
},
{
"epoch": 433.33,
"learning_rate": 1.0943181818181818e-05,
"loss": 0.0509,
"step": 10400
},
{
"epoch": 433.33,
"eval_loss": 1.7334100008010864,
"eval_runtime": 6.7752,
"eval_samples_per_second": 20.959,
"eval_steps_per_second": 2.657,
"eval_wer": 0.7262423714036618,
"step": 10400
},
{
"epoch": 437.5,
"learning_rate": 1.0268181818181817e-05,
"loss": 0.046,
"step": 10500
},
{
"epoch": 437.5,
"eval_loss": 1.7047654390335083,
"eval_runtime": 6.3102,
"eval_samples_per_second": 22.503,
"eval_steps_per_second": 2.853,
"eval_wer": 0.7192676547515258,
"step": 10500
},
{
"epoch": 441.67,
"learning_rate": 9.586363636363636e-06,
"loss": 0.0423,
"step": 10600
},
{
"epoch": 441.67,
"eval_loss": 1.716819167137146,
"eval_runtime": 7.0132,
"eval_samples_per_second": 20.247,
"eval_steps_per_second": 2.567,
"eval_wer": 0.7192676547515258,
"step": 10600
},
{
"epoch": 445.83,
"learning_rate": 8.904545454545453e-06,
"loss": 0.0477,
"step": 10700
},
{
"epoch": 445.83,
"eval_loss": 1.7387615442276,
"eval_runtime": 6.7039,
"eval_samples_per_second": 21.182,
"eval_steps_per_second": 2.685,
"eval_wer": 0.7210113339145597,
"step": 10700
},
{
"epoch": 450.0,
"learning_rate": 8.222727272727273e-06,
"loss": 0.0436,
"step": 10800
},
{
"epoch": 450.0,
"eval_loss": 1.7278592586517334,
"eval_runtime": 6.9747,
"eval_samples_per_second": 20.359,
"eval_steps_per_second": 2.581,
"eval_wer": 0.7166521360069747,
"step": 10800
},
{
"epoch": 454.17,
"learning_rate": 7.540909090909091e-06,
"loss": 0.0466,
"step": 10900
},
{
"epoch": 454.17,
"eval_loss": 1.696805715560913,
"eval_runtime": 6.8023,
"eval_samples_per_second": 20.875,
"eval_steps_per_second": 2.646,
"eval_wer": 0.7053182214472538,
"step": 10900
},
{
"epoch": 458.33,
"learning_rate": 6.859090909090909e-06,
"loss": 0.0424,
"step": 11000
},
{
"epoch": 458.33,
"eval_loss": 1.7237237691879272,
"eval_runtime": 7.027,
"eval_samples_per_second": 20.208,
"eval_steps_per_second": 2.562,
"eval_wer": 0.7183958151700087,
"step": 11000
},
{
"epoch": 462.5,
"learning_rate": 6.177272727272727e-06,
"loss": 0.0447,
"step": 11100
},
{
"epoch": 462.5,
"eval_loss": 1.721848726272583,
"eval_runtime": 6.7393,
"eval_samples_per_second": 21.07,
"eval_steps_per_second": 2.671,
"eval_wer": 0.7183958151700087,
"step": 11100
},
{
"epoch": 466.67,
"learning_rate": 5.495454545454545e-06,
"loss": 0.0455,
"step": 11200
},
{
"epoch": 466.67,
"eval_loss": 1.7505738735198975,
"eval_runtime": 6.8936,
"eval_samples_per_second": 20.599,
"eval_steps_per_second": 2.611,
"eval_wer": 0.7218831734960767,
"step": 11200
},
{
"epoch": 470.83,
"learning_rate": 4.813636363636364e-06,
"loss": 0.0446,
"step": 11300
},
{
"epoch": 470.83,
"eval_loss": 1.7541626691818237,
"eval_runtime": 6.9153,
"eval_samples_per_second": 20.534,
"eval_steps_per_second": 2.603,
"eval_wer": 0.7279860505666957,
"step": 11300
},
{
"epoch": 475.0,
"learning_rate": 4.131818181818182e-06,
"loss": 0.043,
"step": 11400
},
{
"epoch": 475.0,
"eval_loss": 1.750115156173706,
"eval_runtime": 6.8671,
"eval_samples_per_second": 20.678,
"eval_steps_per_second": 2.621,
"eval_wer": 0.7201394943330427,
"step": 11400
},
{
"epoch": 479.17,
"learning_rate": 3.4499999999999996e-06,
"loss": 0.0397,
"step": 11500
},
{
"epoch": 479.17,
"eval_loss": 1.783726692199707,
"eval_runtime": 6.759,
"eval_samples_per_second": 21.009,
"eval_steps_per_second": 2.663,
"eval_wer": 0.7244986922406277,
"step": 11500
},
{
"epoch": 483.33,
"learning_rate": 2.768181818181818e-06,
"loss": 0.0402,
"step": 11600
},
{
"epoch": 483.33,
"eval_loss": 1.7762095928192139,
"eval_runtime": 6.8013,
"eval_samples_per_second": 20.878,
"eval_steps_per_second": 2.647,
"eval_wer": 0.7175239755884917,
"step": 11600
},
{
"epoch": 487.5,
"learning_rate": 2.086363636363636e-06,
"loss": 0.039,
"step": 11700
},
{
"epoch": 487.5,
"eval_loss": 1.7771002054214478,
"eval_runtime": 6.9246,
"eval_samples_per_second": 20.507,
"eval_steps_per_second": 2.599,
"eval_wer": 0.7262423714036618,
"step": 11700
},
{
"epoch": 491.67,
"learning_rate": 1.4045454545454545e-06,
"loss": 0.0402,
"step": 11800
},
{
"epoch": 491.67,
"eval_loss": 1.7563738822937012,
"eval_runtime": 7.1476,
"eval_samples_per_second": 19.867,
"eval_steps_per_second": 2.518,
"eval_wer": 0.7218831734960767,
"step": 11800
},
{
"epoch": 495.83,
"learning_rate": 7.227272727272726e-07,
"loss": 0.0368,
"step": 11900
},
{
"epoch": 495.83,
"eval_loss": 1.7552615404129028,
"eval_runtime": 6.927,
"eval_samples_per_second": 20.499,
"eval_steps_per_second": 2.599,
"eval_wer": 0.7192676547515258,
"step": 11900
},
{
"epoch": 500.0,
"learning_rate": 4.090909090909091e-08,
"loss": 0.0395,
"step": 12000
},
{
"epoch": 500.0,
"eval_loss": 1.7564287185668945,
"eval_runtime": 6.7877,
"eval_samples_per_second": 20.92,
"eval_steps_per_second": 2.652,
"eval_wer": 0.7201394943330427,
"step": 12000
},
{
"epoch": 500.0,
"step": 12000,
"total_flos": 2.5005546169759453e+19,
"train_loss": 0.7251964689095814,
"train_runtime": 16734.3269,
"train_samples_per_second": 11.294,
"train_steps_per_second": 0.717
}
],
"max_steps": 12000,
"num_train_epochs": 500,
"total_flos": 2.5005546169759453e+19,
"trial_name": null,
"trial_params": null
}