|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 500.0, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 7.35e-06, |
|
"loss": 18.5133, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 12.037576675415039, |
|
"eval_runtime": 6.8573, |
|
"eval_samples_per_second": 20.708, |
|
"eval_steps_per_second": 2.625, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 1.485e-05, |
|
"loss": 7.9152, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 6.240577220916748, |
|
"eval_runtime": 6.2762, |
|
"eval_samples_per_second": 22.625, |
|
"eval_steps_per_second": 2.868, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 2.2349999999999998e-05, |
|
"loss": 5.363, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 4.664888858795166, |
|
"eval_runtime": 7.3621, |
|
"eval_samples_per_second": 19.288, |
|
"eval_steps_per_second": 2.445, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 2.985e-05, |
|
"loss": 4.05, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_loss": 3.5798044204711914, |
|
"eval_runtime": 6.7794, |
|
"eval_samples_per_second": 20.946, |
|
"eval_steps_per_second": 2.655, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 3.735e-05, |
|
"loss": 3.3467, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"eval_loss": 3.1930062770843506, |
|
"eval_runtime": 7.0411, |
|
"eval_samples_per_second": 20.167, |
|
"eval_steps_per_second": 2.556, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 4.484999999999999e-05, |
|
"loss": 3.1638, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 3.098419666290283, |
|
"eval_runtime": 6.9505, |
|
"eval_samples_per_second": 20.43, |
|
"eval_steps_per_second": 2.59, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"learning_rate": 5.234999999999999e-05, |
|
"loss": 3.043, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"eval_loss": 2.970651865005493, |
|
"eval_runtime": 6.7343, |
|
"eval_samples_per_second": 21.086, |
|
"eval_steps_per_second": 2.673, |
|
"eval_wer": 0.998256320836966, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 5.985e-05, |
|
"loss": 2.9566, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 2.9295754432678223, |
|
"eval_runtime": 6.7892, |
|
"eval_samples_per_second": 20.916, |
|
"eval_steps_per_second": 2.651, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 6.735e-05, |
|
"loss": 2.8994, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"eval_loss": 2.8289785385131836, |
|
"eval_runtime": 6.3572, |
|
"eval_samples_per_second": 22.337, |
|
"eval_steps_per_second": 2.831, |
|
"eval_wer": 0.998256320836966, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 7.484999999999999e-05, |
|
"loss": 2.6469, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"eval_loss": 2.244255781173706, |
|
"eval_runtime": 7.0587, |
|
"eval_samples_per_second": 20.117, |
|
"eval_steps_per_second": 2.55, |
|
"eval_wer": 0.976460331299041, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 45.83, |
|
"learning_rate": 7.433181818181818e-05, |
|
"loss": 2.1557, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.83, |
|
"eval_loss": 1.5079460144042969, |
|
"eval_runtime": 6.593, |
|
"eval_samples_per_second": 21.538, |
|
"eval_steps_per_second": 2.73, |
|
"eval_wer": 0.8204010462074979, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 7.364999999999999e-05, |
|
"loss": 1.7524, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 1.2472423315048218, |
|
"eval_runtime": 6.884, |
|
"eval_samples_per_second": 20.628, |
|
"eval_steps_per_second": 2.615, |
|
"eval_wer": 0.7593722755013078, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 54.17, |
|
"learning_rate": 7.296818181818182e-05, |
|
"loss": 1.54, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 54.17, |
|
"eval_loss": 1.1611692905426025, |
|
"eval_runtime": 6.7498, |
|
"eval_samples_per_second": 21.038, |
|
"eval_steps_per_second": 2.667, |
|
"eval_wer": 0.7096774193548387, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 7.228636363636363e-05, |
|
"loss": 1.3985, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"eval_loss": 1.1158100366592407, |
|
"eval_runtime": 6.9751, |
|
"eval_samples_per_second": 20.358, |
|
"eval_steps_per_second": 2.581, |
|
"eval_wer": 0.7358326068003488, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 7.160454545454545e-05, |
|
"loss": 1.2869, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"eval_loss": 1.0453699827194214, |
|
"eval_runtime": 6.542, |
|
"eval_samples_per_second": 21.706, |
|
"eval_steps_per_second": 2.751, |
|
"eval_wer": 0.6931124673060157, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 7.092272727272727e-05, |
|
"loss": 1.1952, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"eval_loss": 1.0130492448806763, |
|
"eval_runtime": 7.0228, |
|
"eval_samples_per_second": 20.22, |
|
"eval_steps_per_second": 2.563, |
|
"eval_wer": 0.6852659110723627, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 70.83, |
|
"learning_rate": 7.024090909090908e-05, |
|
"loss": 1.1022, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 70.83, |
|
"eval_loss": 1.017554521560669, |
|
"eval_runtime": 6.8272, |
|
"eval_samples_per_second": 20.799, |
|
"eval_steps_per_second": 2.637, |
|
"eval_wer": 0.6965998256320837, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 6.95590909090909e-05, |
|
"loss": 1.0346, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 1.0053126811981201, |
|
"eval_runtime": 6.9741, |
|
"eval_samples_per_second": 20.361, |
|
"eval_steps_per_second": 2.581, |
|
"eval_wer": 0.6817785527462947, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 79.17, |
|
"learning_rate": 6.887727272727272e-05, |
|
"loss": 0.9707, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 79.17, |
|
"eval_loss": 1.022377610206604, |
|
"eval_runtime": 7.0064, |
|
"eval_samples_per_second": 20.267, |
|
"eval_steps_per_second": 2.569, |
|
"eval_wer": 0.6713164777680907, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 6.819545454545453e-05, |
|
"loss": 0.917, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_loss": 1.0382641553878784, |
|
"eval_runtime": 6.7309, |
|
"eval_samples_per_second": 21.097, |
|
"eval_steps_per_second": 2.674, |
|
"eval_wer": 0.6530078465562337, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"learning_rate": 6.751363636363636e-05, |
|
"loss": 0.8574, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"eval_loss": 1.0632084608078003, |
|
"eval_runtime": 7.3312, |
|
"eval_samples_per_second": 19.369, |
|
"eval_steps_per_second": 2.455, |
|
"eval_wer": 0.6756756756756757, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"learning_rate": 6.683181818181818e-05, |
|
"loss": 0.8021, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"eval_loss": 1.012629747390747, |
|
"eval_runtime": 6.9334, |
|
"eval_samples_per_second": 20.481, |
|
"eval_steps_per_second": 2.596, |
|
"eval_wer": 0.6442894507410637, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 95.83, |
|
"learning_rate": 6.615e-05, |
|
"loss": 0.7563, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 95.83, |
|
"eval_loss": 1.0677976608276367, |
|
"eval_runtime": 7.0411, |
|
"eval_samples_per_second": 20.167, |
|
"eval_steps_per_second": 2.556, |
|
"eval_wer": 0.6713164777680907, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 6.546818181818181e-05, |
|
"loss": 0.709, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 1.0755900144577026, |
|
"eval_runtime": 6.7348, |
|
"eval_samples_per_second": 21.085, |
|
"eval_steps_per_second": 2.673, |
|
"eval_wer": 0.6756756756756757, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 104.17, |
|
"learning_rate": 6.478636363636363e-05, |
|
"loss": 0.6775, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 104.17, |
|
"eval_loss": 1.03966224193573, |
|
"eval_runtime": 6.7793, |
|
"eval_samples_per_second": 20.946, |
|
"eval_steps_per_second": 2.655, |
|
"eval_wer": 0.6913687881429816, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"learning_rate": 6.410454545454546e-05, |
|
"loss": 0.6325, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"eval_loss": 1.055609107017517, |
|
"eval_runtime": 6.9878, |
|
"eval_samples_per_second": 20.321, |
|
"eval_steps_per_second": 2.576, |
|
"eval_wer": 0.6512641673931997, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 112.5, |
|
"learning_rate": 6.342272727272726e-05, |
|
"loss": 0.617, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 112.5, |
|
"eval_loss": 1.1218976974487305, |
|
"eval_runtime": 6.8023, |
|
"eval_samples_per_second": 20.875, |
|
"eval_steps_per_second": 2.646, |
|
"eval_wer": 0.6922406277244987, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"learning_rate": 6.274090909090909e-05, |
|
"loss": 0.5801, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"eval_loss": 1.112548589706421, |
|
"eval_runtime": 6.7506, |
|
"eval_samples_per_second": 21.035, |
|
"eval_steps_per_second": 2.666, |
|
"eval_wer": 0.6870095902353966, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 120.83, |
|
"learning_rate": 6.205909090909091e-05, |
|
"loss": 0.5367, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 120.83, |
|
"eval_loss": 1.1397372484207153, |
|
"eval_runtime": 6.876, |
|
"eval_samples_per_second": 20.652, |
|
"eval_steps_per_second": 2.618, |
|
"eval_wer": 0.6564952048823016, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 6.137727272727272e-05, |
|
"loss": 0.5132, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_loss": 1.1678012609481812, |
|
"eval_runtime": 6.8221, |
|
"eval_samples_per_second": 20.815, |
|
"eval_steps_per_second": 2.638, |
|
"eval_wer": 0.7000871839581517, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 129.17, |
|
"learning_rate": 6.069545454545454e-05, |
|
"loss": 0.4948, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 129.17, |
|
"eval_loss": 1.1275851726531982, |
|
"eval_runtime": 6.9051, |
|
"eval_samples_per_second": 20.564, |
|
"eval_steps_per_second": 2.607, |
|
"eval_wer": 0.6643417611159547, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"learning_rate": 6.001363636363636e-05, |
|
"loss": 0.457, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"eval_loss": 1.1464685201644897, |
|
"eval_runtime": 6.6513, |
|
"eval_samples_per_second": 21.349, |
|
"eval_steps_per_second": 2.706, |
|
"eval_wer": 0.6791630340017437, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 137.5, |
|
"learning_rate": 5.933181818181817e-05, |
|
"loss": 0.4538, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 137.5, |
|
"eval_loss": 1.143470287322998, |
|
"eval_runtime": 5.4611, |
|
"eval_samples_per_second": 26.002, |
|
"eval_steps_per_second": 3.296, |
|
"eval_wer": 0.6809067131647777, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 141.67, |
|
"learning_rate": 5.8649999999999996e-05, |
|
"loss": 0.4227, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 141.67, |
|
"eval_loss": 1.1456971168518066, |
|
"eval_runtime": 6.7115, |
|
"eval_samples_per_second": 21.158, |
|
"eval_steps_per_second": 2.682, |
|
"eval_wer": 0.6599825632083697, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 145.83, |
|
"learning_rate": 5.796818181818181e-05, |
|
"loss": 0.4083, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 145.83, |
|
"eval_loss": 1.179282307624817, |
|
"eval_runtime": 7.0949, |
|
"eval_samples_per_second": 20.014, |
|
"eval_steps_per_second": 2.537, |
|
"eval_wer": 0.6652136006974717, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 5.7286363636363635e-05, |
|
"loss": 0.3965, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_loss": 1.2435046434402466, |
|
"eval_runtime": 7.0689, |
|
"eval_samples_per_second": 20.088, |
|
"eval_steps_per_second": 2.546, |
|
"eval_wer": 0.6878814298169137, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 154.17, |
|
"learning_rate": 5.6604545454545445e-05, |
|
"loss": 0.382, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 154.17, |
|
"eval_loss": 1.2228599786758423, |
|
"eval_runtime": 7.0115, |
|
"eval_samples_per_second": 20.252, |
|
"eval_steps_per_second": 2.567, |
|
"eval_wer": 0.6826503923278117, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 158.33, |
|
"learning_rate": 5.592272727272727e-05, |
|
"loss": 0.3452, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 158.33, |
|
"eval_loss": 1.229727029800415, |
|
"eval_runtime": 6.7478, |
|
"eval_samples_per_second": 21.044, |
|
"eval_steps_per_second": 2.668, |
|
"eval_wer": 0.6878814298169137, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 162.5, |
|
"learning_rate": 5.5240909090909085e-05, |
|
"loss": 0.3434, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 162.5, |
|
"eval_loss": 1.2349668741226196, |
|
"eval_runtime": 7.0134, |
|
"eval_samples_per_second": 20.247, |
|
"eval_steps_per_second": 2.567, |
|
"eval_wer": 0.6887532693984307, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"learning_rate": 5.455909090909091e-05, |
|
"loss": 0.3276, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"eval_loss": 1.2186285257339478, |
|
"eval_runtime": 6.7562, |
|
"eval_samples_per_second": 21.018, |
|
"eval_steps_per_second": 2.664, |
|
"eval_wer": 0.6922406277244987, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 170.83, |
|
"learning_rate": 5.387727272727272e-05, |
|
"loss": 0.3052, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 170.83, |
|
"eval_loss": 1.233783483505249, |
|
"eval_runtime": 6.5925, |
|
"eval_samples_per_second": 21.54, |
|
"eval_steps_per_second": 2.73, |
|
"eval_wer": 0.6870095902353966, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 5.319545454545454e-05, |
|
"loss": 0.3025, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_loss": 1.272440791130066, |
|
"eval_runtime": 6.8253, |
|
"eval_samples_per_second": 20.805, |
|
"eval_steps_per_second": 2.637, |
|
"eval_wer": 0.7079337401918047, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 179.17, |
|
"learning_rate": 5.251363636363636e-05, |
|
"loss": 0.2916, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 179.17, |
|
"eval_loss": 1.2757943868637085, |
|
"eval_runtime": 6.6107, |
|
"eval_samples_per_second": 21.48, |
|
"eval_steps_per_second": 2.723, |
|
"eval_wer": 0.6974716652136007, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 183.33, |
|
"learning_rate": 5.183181818181818e-05, |
|
"loss": 0.2709, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 183.33, |
|
"eval_loss": 1.2726093530654907, |
|
"eval_runtime": 6.7657, |
|
"eval_samples_per_second": 20.988, |
|
"eval_steps_per_second": 2.66, |
|
"eval_wer": 0.6748038360941587, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 187.5, |
|
"learning_rate": 5.1149999999999996e-05, |
|
"loss": 0.2707, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 187.5, |
|
"eval_loss": 1.264315128326416, |
|
"eval_runtime": 6.6322, |
|
"eval_samples_per_second": 21.411, |
|
"eval_steps_per_second": 2.714, |
|
"eval_wer": 0.6957279860505667, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 191.67, |
|
"learning_rate": 5.046818181818181e-05, |
|
"loss": 0.262, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 191.67, |
|
"eval_loss": 1.321448564529419, |
|
"eval_runtime": 6.7016, |
|
"eval_samples_per_second": 21.189, |
|
"eval_steps_per_second": 2.686, |
|
"eval_wer": 0.7131647776809067, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 195.83, |
|
"learning_rate": 4.9786363636363636e-05, |
|
"loss": 0.2453, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 195.83, |
|
"eval_loss": 1.2952940464019775, |
|
"eval_runtime": 6.8362, |
|
"eval_samples_per_second": 20.772, |
|
"eval_steps_per_second": 2.633, |
|
"eval_wer": 0.6861377506538797, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 4.910454545454545e-05, |
|
"loss": 0.248, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_loss": 1.362170696258545, |
|
"eval_runtime": 6.5267, |
|
"eval_samples_per_second": 21.757, |
|
"eval_steps_per_second": 2.758, |
|
"eval_wer": 0.6774193548387096, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 204.17, |
|
"learning_rate": 4.842272727272727e-05, |
|
"loss": 0.2325, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 204.17, |
|
"eval_loss": 1.3594108819961548, |
|
"eval_runtime": 6.7956, |
|
"eval_samples_per_second": 20.896, |
|
"eval_steps_per_second": 2.649, |
|
"eval_wer": 0.6835222319093287, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 208.33, |
|
"learning_rate": 4.7740909090909085e-05, |
|
"loss": 0.2124, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 208.33, |
|
"eval_loss": 1.3367135524749756, |
|
"eval_runtime": 6.8257, |
|
"eval_samples_per_second": 20.804, |
|
"eval_steps_per_second": 2.637, |
|
"eval_wer": 0.6652136006974717, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 212.5, |
|
"learning_rate": 4.706590909090908e-05, |
|
"loss": 0.2253, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 212.5, |
|
"eval_loss": 1.415678858757019, |
|
"eval_runtime": 6.8381, |
|
"eval_samples_per_second": 20.766, |
|
"eval_steps_per_second": 2.632, |
|
"eval_wer": 0.6878814298169137, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 216.67, |
|
"learning_rate": 4.6384090909090906e-05, |
|
"loss": 0.2059, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 216.67, |
|
"eval_loss": 1.4359543323516846, |
|
"eval_runtime": 6.8901, |
|
"eval_samples_per_second": 20.609, |
|
"eval_steps_per_second": 2.612, |
|
"eval_wer": 0.7131647776809067, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 220.83, |
|
"learning_rate": 4.570227272727272e-05, |
|
"loss": 0.1951, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 220.83, |
|
"eval_loss": 1.4606153964996338, |
|
"eval_runtime": 6.7349, |
|
"eval_samples_per_second": 21.084, |
|
"eval_steps_per_second": 2.673, |
|
"eval_wer": 0.7157802964254577, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"learning_rate": 4.5020454545454545e-05, |
|
"loss": 0.1861, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"eval_loss": 1.469545841217041, |
|
"eval_runtime": 6.7814, |
|
"eval_samples_per_second": 20.94, |
|
"eval_steps_per_second": 2.654, |
|
"eval_wer": 0.7018308631211857, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 229.17, |
|
"learning_rate": 4.4338636363636355e-05, |
|
"loss": 0.1916, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 229.17, |
|
"eval_loss": 1.4031471014022827, |
|
"eval_runtime": 6.9155, |
|
"eval_samples_per_second": 20.534, |
|
"eval_steps_per_second": 2.603, |
|
"eval_wer": 0.6739319965126417, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 233.33, |
|
"learning_rate": 4.365681818181818e-05, |
|
"loss": 0.1822, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 233.33, |
|
"eval_loss": 1.4425878524780273, |
|
"eval_runtime": 6.658, |
|
"eval_samples_per_second": 21.328, |
|
"eval_steps_per_second": 2.704, |
|
"eval_wer": 0.6870095902353966, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 237.5, |
|
"learning_rate": 4.2974999999999994e-05, |
|
"loss": 0.1684, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 237.5, |
|
"eval_loss": 1.4068546295166016, |
|
"eval_runtime": 5.8333, |
|
"eval_samples_per_second": 24.343, |
|
"eval_steps_per_second": 3.086, |
|
"eval_wer": 0.7053182214472538, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 241.67, |
|
"learning_rate": 4.229318181818182e-05, |
|
"loss": 0.1719, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 241.67, |
|
"eval_loss": 1.4765515327453613, |
|
"eval_runtime": 6.853, |
|
"eval_samples_per_second": 20.721, |
|
"eval_steps_per_second": 2.627, |
|
"eval_wer": 0.6965998256320837, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 245.83, |
|
"learning_rate": 4.161136363636363e-05, |
|
"loss": 0.1569, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 245.83, |
|
"eval_loss": 1.4509494304656982, |
|
"eval_runtime": 7.6508, |
|
"eval_samples_per_second": 18.56, |
|
"eval_steps_per_second": 2.353, |
|
"eval_wer": 0.6931124673060157, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 4.092954545454545e-05, |
|
"loss": 0.159, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_loss": 1.4466707706451416, |
|
"eval_runtime": 6.8247, |
|
"eval_samples_per_second": 20.807, |
|
"eval_steps_per_second": 2.637, |
|
"eval_wer": 0.7096774193548387, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 254.17, |
|
"learning_rate": 4.0247727272727273e-05, |
|
"loss": 0.1476, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 254.17, |
|
"eval_loss": 1.4616789817810059, |
|
"eval_runtime": 6.6822, |
|
"eval_samples_per_second": 21.25, |
|
"eval_steps_per_second": 2.694, |
|
"eval_wer": 0.6870095902353966, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 258.33, |
|
"learning_rate": 3.956590909090909e-05, |
|
"loss": 0.1497, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 258.33, |
|
"eval_loss": 1.4459782838821411, |
|
"eval_runtime": 7.0114, |
|
"eval_samples_per_second": 20.253, |
|
"eval_steps_per_second": 2.567, |
|
"eval_wer": 0.6843940714908456, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 262.5, |
|
"learning_rate": 3.8884090909090906e-05, |
|
"loss": 0.1446, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 262.5, |
|
"eval_loss": 1.5556844472885132, |
|
"eval_runtime": 6.9912, |
|
"eval_samples_per_second": 20.311, |
|
"eval_steps_per_second": 2.575, |
|
"eval_wer": 0.7088055797733217, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 266.67, |
|
"learning_rate": 3.820227272727272e-05, |
|
"loss": 0.1389, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 266.67, |
|
"eval_loss": 1.4885756969451904, |
|
"eval_runtime": 6.5474, |
|
"eval_samples_per_second": 21.688, |
|
"eval_steps_per_second": 2.749, |
|
"eval_wer": 0.7140366172624237, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 270.83, |
|
"learning_rate": 3.7520454545454546e-05, |
|
"loss": 0.1331, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 270.83, |
|
"eval_loss": 1.5526471138000488, |
|
"eval_runtime": 6.5806, |
|
"eval_samples_per_second": 21.579, |
|
"eval_steps_per_second": 2.735, |
|
"eval_wer": 0.7061900610287707, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"learning_rate": 3.683863636363636e-05, |
|
"loss": 0.1344, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"eval_loss": 1.5419210195541382, |
|
"eval_runtime": 6.8756, |
|
"eval_samples_per_second": 20.653, |
|
"eval_steps_per_second": 2.618, |
|
"eval_wer": 0.7027027027027027, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 279.17, |
|
"learning_rate": 3.615681818181818e-05, |
|
"loss": 0.1198, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 279.17, |
|
"eval_loss": 1.564064383506775, |
|
"eval_runtime": 6.7671, |
|
"eval_samples_per_second": 20.984, |
|
"eval_steps_per_second": 2.66, |
|
"eval_wer": 0.7000871839581517, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 283.33, |
|
"learning_rate": 3.5474999999999995e-05, |
|
"loss": 0.1242, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 283.33, |
|
"eval_loss": 1.5390304327011108, |
|
"eval_runtime": 6.7114, |
|
"eval_samples_per_second": 21.158, |
|
"eval_steps_per_second": 2.682, |
|
"eval_wer": 0.7061900610287707, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 287.5, |
|
"learning_rate": 3.479318181818181e-05, |
|
"loss": 0.12, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 287.5, |
|
"eval_loss": 1.540635585784912, |
|
"eval_runtime": 6.7141, |
|
"eval_samples_per_second": 21.149, |
|
"eval_steps_per_second": 2.681, |
|
"eval_wer": 0.7105492589363557, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 291.67, |
|
"learning_rate": 3.4111363636363634e-05, |
|
"loss": 0.1096, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 291.67, |
|
"eval_loss": 1.5737296342849731, |
|
"eval_runtime": 6.6657, |
|
"eval_samples_per_second": 21.303, |
|
"eval_steps_per_second": 2.7, |
|
"eval_wer": 0.6974716652136007, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 295.83, |
|
"learning_rate": 3.342954545454545e-05, |
|
"loss": 0.113, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 295.83, |
|
"eval_loss": 1.5494580268859863, |
|
"eval_runtime": 6.8629, |
|
"eval_samples_per_second": 20.691, |
|
"eval_steps_per_second": 2.623, |
|
"eval_wer": 0.7210113339145597, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 3.2747727272727274e-05, |
|
"loss": 0.108, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_loss": 1.5374633073806763, |
|
"eval_runtime": 7.1279, |
|
"eval_samples_per_second": 19.922, |
|
"eval_steps_per_second": 2.525, |
|
"eval_wer": 0.6948561464690497, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 304.17, |
|
"learning_rate": 3.207272727272727e-05, |
|
"loss": 0.1072, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 304.17, |
|
"eval_loss": 1.5337363481521606, |
|
"eval_runtime": 7.1049, |
|
"eval_samples_per_second": 19.986, |
|
"eval_steps_per_second": 2.533, |
|
"eval_wer": 0.7009590235396687, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 308.33, |
|
"learning_rate": 3.139090909090909e-05, |
|
"loss": 0.0979, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 308.33, |
|
"eval_loss": 1.5927180051803589, |
|
"eval_runtime": 6.6123, |
|
"eval_samples_per_second": 21.475, |
|
"eval_steps_per_second": 2.722, |
|
"eval_wer": 0.7061900610287707, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 312.5, |
|
"learning_rate": 3.0709090909090904e-05, |
|
"loss": 0.0983, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 312.5, |
|
"eval_loss": 1.5882079601287842, |
|
"eval_runtime": 6.7832, |
|
"eval_samples_per_second": 20.934, |
|
"eval_steps_per_second": 2.654, |
|
"eval_wer": 0.6843940714908456, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 316.67, |
|
"learning_rate": 3.0027272727272724e-05, |
|
"loss": 0.0977, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 316.67, |
|
"eval_loss": 1.6189255714416504, |
|
"eval_runtime": 6.9471, |
|
"eval_samples_per_second": 20.44, |
|
"eval_steps_per_second": 2.591, |
|
"eval_wer": 0.6957279860505667, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 320.83, |
|
"learning_rate": 2.9352272727272724e-05, |
|
"loss": 0.0947, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 320.83, |
|
"eval_loss": 1.5098397731781006, |
|
"eval_runtime": 6.6241, |
|
"eval_samples_per_second": 21.437, |
|
"eval_steps_per_second": 2.717, |
|
"eval_wer": 0.6817785527462947, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"learning_rate": 2.867045454545454e-05, |
|
"loss": 0.0996, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"eval_loss": 1.6268917322158813, |
|
"eval_runtime": 6.6505, |
|
"eval_samples_per_second": 21.352, |
|
"eval_steps_per_second": 2.707, |
|
"eval_wer": 0.7253705318221447, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 329.17, |
|
"learning_rate": 2.798863636363636e-05, |
|
"loss": 0.0846, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 329.17, |
|
"eval_loss": 1.6366547346115112, |
|
"eval_runtime": 6.9003, |
|
"eval_samples_per_second": 20.579, |
|
"eval_steps_per_second": 2.609, |
|
"eval_wer": 0.7088055797733217, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 333.33, |
|
"learning_rate": 2.7306818181818177e-05, |
|
"loss": 0.0953, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 333.33, |
|
"eval_loss": 1.5965033769607544, |
|
"eval_runtime": 7.0137, |
|
"eval_samples_per_second": 20.246, |
|
"eval_steps_per_second": 2.566, |
|
"eval_wer": 0.7122929380993898, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 337.5, |
|
"learning_rate": 2.6624999999999997e-05, |
|
"loss": 0.0906, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 337.5, |
|
"eval_loss": 1.6095737218856812, |
|
"eval_runtime": 6.5521, |
|
"eval_samples_per_second": 21.672, |
|
"eval_steps_per_second": 2.747, |
|
"eval_wer": 0.7122929380993898, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 341.67, |
|
"learning_rate": 2.5943181818181813e-05, |
|
"loss": 0.093, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 341.67, |
|
"eval_loss": 1.5953401327133179, |
|
"eval_runtime": 6.9512, |
|
"eval_samples_per_second": 20.428, |
|
"eval_steps_per_second": 2.589, |
|
"eval_wer": 0.6983435047951178, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 345.83, |
|
"learning_rate": 2.5261363636363633e-05, |
|
"loss": 0.0784, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 345.83, |
|
"eval_loss": 1.5884095430374146, |
|
"eval_runtime": 6.8928, |
|
"eval_samples_per_second": 20.601, |
|
"eval_steps_per_second": 2.611, |
|
"eval_wer": 0.6913687881429816, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"learning_rate": 2.457954545454545e-05, |
|
"loss": 0.0769, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"eval_loss": 1.5793629884719849, |
|
"eval_runtime": 6.8179, |
|
"eval_samples_per_second": 20.828, |
|
"eval_steps_per_second": 2.64, |
|
"eval_wer": 0.6870095902353966, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 354.17, |
|
"learning_rate": 2.3897727272727272e-05, |
|
"loss": 0.0782, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 354.17, |
|
"eval_loss": 1.6580848693847656, |
|
"eval_runtime": 6.9462, |
|
"eval_samples_per_second": 20.443, |
|
"eval_steps_per_second": 2.591, |
|
"eval_wer": 0.6817785527462947, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 358.33, |
|
"learning_rate": 2.3215909090909092e-05, |
|
"loss": 0.0764, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 358.33, |
|
"eval_loss": 1.6554986238479614, |
|
"eval_runtime": 7.0359, |
|
"eval_samples_per_second": 20.182, |
|
"eval_steps_per_second": 2.558, |
|
"eval_wer": 0.7088055797733217, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 362.5, |
|
"learning_rate": 2.253409090909091e-05, |
|
"loss": 0.073, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 362.5, |
|
"eval_loss": 1.6465544700622559, |
|
"eval_runtime": 6.8477, |
|
"eval_samples_per_second": 20.737, |
|
"eval_steps_per_second": 2.629, |
|
"eval_wer": 0.6931124673060157, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 366.67, |
|
"learning_rate": 2.1852272727272725e-05, |
|
"loss": 0.0703, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 366.67, |
|
"eval_loss": 1.6614816188812256, |
|
"eval_runtime": 7.0115, |
|
"eval_samples_per_second": 20.252, |
|
"eval_steps_per_second": 2.567, |
|
"eval_wer": 0.7114210985178727, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 370.83, |
|
"learning_rate": 2.1170454545454545e-05, |
|
"loss": 0.0707, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 370.83, |
|
"eval_loss": 1.6742826700210571, |
|
"eval_runtime": 6.7568, |
|
"eval_samples_per_second": 21.016, |
|
"eval_steps_per_second": 2.664, |
|
"eval_wer": 0.7079337401918047, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"learning_rate": 2.048863636363636e-05, |
|
"loss": 0.0647, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"eval_loss": 1.6451914310455322, |
|
"eval_runtime": 6.781, |
|
"eval_samples_per_second": 20.941, |
|
"eval_steps_per_second": 2.654, |
|
"eval_wer": 0.7166521360069747, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 379.17, |
|
"learning_rate": 1.980681818181818e-05, |
|
"loss": 0.0614, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 379.17, |
|
"eval_loss": 1.7081646919250488, |
|
"eval_runtime": 6.6825, |
|
"eval_samples_per_second": 21.25, |
|
"eval_steps_per_second": 2.694, |
|
"eval_wer": 0.7122929380993898, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 383.33, |
|
"learning_rate": 1.9124999999999997e-05, |
|
"loss": 0.0646, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 383.33, |
|
"eval_loss": 1.684810757637024, |
|
"eval_runtime": 6.7457, |
|
"eval_samples_per_second": 21.05, |
|
"eval_steps_per_second": 2.668, |
|
"eval_wer": 0.7183958151700087, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 387.5, |
|
"learning_rate": 1.8443181818181817e-05, |
|
"loss": 0.0648, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 387.5, |
|
"eval_loss": 1.65809166431427, |
|
"eval_runtime": 6.5015, |
|
"eval_samples_per_second": 21.841, |
|
"eval_steps_per_second": 2.769, |
|
"eval_wer": 0.7088055797733217, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 391.67, |
|
"learning_rate": 1.7761363636363633e-05, |
|
"loss": 0.0625, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 391.67, |
|
"eval_loss": 1.7315229177474976, |
|
"eval_runtime": 7.2222, |
|
"eval_samples_per_second": 19.662, |
|
"eval_steps_per_second": 2.492, |
|
"eval_wer": 0.7340889276373147, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 395.83, |
|
"learning_rate": 1.7079545454545453e-05, |
|
"loss": 0.0637, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 395.83, |
|
"eval_loss": 1.683098316192627, |
|
"eval_runtime": 6.8952, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 2.611, |
|
"eval_wer": 0.7027027027027027, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"learning_rate": 1.639772727272727e-05, |
|
"loss": 0.0558, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_loss": 1.7159340381622314, |
|
"eval_runtime": 6.6968, |
|
"eval_samples_per_second": 21.204, |
|
"eval_steps_per_second": 2.688, |
|
"eval_wer": 0.7279860505666957, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 404.17, |
|
"learning_rate": 1.571590909090909e-05, |
|
"loss": 0.0563, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 404.17, |
|
"eval_loss": 1.7474530935287476, |
|
"eval_runtime": 6.7706, |
|
"eval_samples_per_second": 20.973, |
|
"eval_steps_per_second": 2.659, |
|
"eval_wer": 0.7157802964254577, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 408.33, |
|
"learning_rate": 1.5034090909090908e-05, |
|
"loss": 0.0568, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 408.33, |
|
"eval_loss": 1.6776412725448608, |
|
"eval_runtime": 6.9524, |
|
"eval_samples_per_second": 20.425, |
|
"eval_steps_per_second": 2.589, |
|
"eval_wer": 0.6992153443766347, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 412.5, |
|
"learning_rate": 1.4352272727272727e-05, |
|
"loss": 0.0574, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 412.5, |
|
"eval_loss": 1.7150009870529175, |
|
"eval_runtime": 6.8865, |
|
"eval_samples_per_second": 20.62, |
|
"eval_steps_per_second": 2.614, |
|
"eval_wer": 0.6983435047951178, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 416.67, |
|
"learning_rate": 1.3670454545454545e-05, |
|
"loss": 0.0561, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 416.67, |
|
"eval_loss": 1.7315118312835693, |
|
"eval_runtime": 6.8566, |
|
"eval_samples_per_second": 20.71, |
|
"eval_steps_per_second": 2.625, |
|
"eval_wer": 0.7140366172624237, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 420.83, |
|
"learning_rate": 1.2988636363636363e-05, |
|
"loss": 0.0494, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 420.83, |
|
"eval_loss": 1.6868910789489746, |
|
"eval_runtime": 6.9481, |
|
"eval_samples_per_second": 20.437, |
|
"eval_steps_per_second": 2.591, |
|
"eval_wer": 0.7218831734960767, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 425.0, |
|
"learning_rate": 1.2306818181818182e-05, |
|
"loss": 0.0495, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 425.0, |
|
"eval_loss": 1.749950885772705, |
|
"eval_runtime": 6.8281, |
|
"eval_samples_per_second": 20.796, |
|
"eval_steps_per_second": 2.636, |
|
"eval_wer": 0.7262423714036618, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 429.17, |
|
"learning_rate": 1.1625e-05, |
|
"loss": 0.0542, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 429.17, |
|
"eval_loss": 1.7298214435577393, |
|
"eval_runtime": 6.7073, |
|
"eval_samples_per_second": 21.171, |
|
"eval_steps_per_second": 2.684, |
|
"eval_wer": 0.7271142109851787, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 433.33, |
|
"learning_rate": 1.0943181818181818e-05, |
|
"loss": 0.0509, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 433.33, |
|
"eval_loss": 1.7334100008010864, |
|
"eval_runtime": 6.7752, |
|
"eval_samples_per_second": 20.959, |
|
"eval_steps_per_second": 2.657, |
|
"eval_wer": 0.7262423714036618, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 437.5, |
|
"learning_rate": 1.0268181818181817e-05, |
|
"loss": 0.046, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 437.5, |
|
"eval_loss": 1.7047654390335083, |
|
"eval_runtime": 6.3102, |
|
"eval_samples_per_second": 22.503, |
|
"eval_steps_per_second": 2.853, |
|
"eval_wer": 0.7192676547515258, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 441.67, |
|
"learning_rate": 9.586363636363636e-06, |
|
"loss": 0.0423, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 441.67, |
|
"eval_loss": 1.716819167137146, |
|
"eval_runtime": 7.0132, |
|
"eval_samples_per_second": 20.247, |
|
"eval_steps_per_second": 2.567, |
|
"eval_wer": 0.7192676547515258, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 445.83, |
|
"learning_rate": 8.904545454545453e-06, |
|
"loss": 0.0477, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 445.83, |
|
"eval_loss": 1.7387615442276, |
|
"eval_runtime": 6.7039, |
|
"eval_samples_per_second": 21.182, |
|
"eval_steps_per_second": 2.685, |
|
"eval_wer": 0.7210113339145597, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 450.0, |
|
"learning_rate": 8.222727272727273e-06, |
|
"loss": 0.0436, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 450.0, |
|
"eval_loss": 1.7278592586517334, |
|
"eval_runtime": 6.9747, |
|
"eval_samples_per_second": 20.359, |
|
"eval_steps_per_second": 2.581, |
|
"eval_wer": 0.7166521360069747, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 454.17, |
|
"learning_rate": 7.540909090909091e-06, |
|
"loss": 0.0466, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 454.17, |
|
"eval_loss": 1.696805715560913, |
|
"eval_runtime": 6.8023, |
|
"eval_samples_per_second": 20.875, |
|
"eval_steps_per_second": 2.646, |
|
"eval_wer": 0.7053182214472538, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 458.33, |
|
"learning_rate": 6.859090909090909e-06, |
|
"loss": 0.0424, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 458.33, |
|
"eval_loss": 1.7237237691879272, |
|
"eval_runtime": 7.027, |
|
"eval_samples_per_second": 20.208, |
|
"eval_steps_per_second": 2.562, |
|
"eval_wer": 0.7183958151700087, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 462.5, |
|
"learning_rate": 6.177272727272727e-06, |
|
"loss": 0.0447, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 462.5, |
|
"eval_loss": 1.721848726272583, |
|
"eval_runtime": 6.7393, |
|
"eval_samples_per_second": 21.07, |
|
"eval_steps_per_second": 2.671, |
|
"eval_wer": 0.7183958151700087, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 466.67, |
|
"learning_rate": 5.495454545454545e-06, |
|
"loss": 0.0455, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 466.67, |
|
"eval_loss": 1.7505738735198975, |
|
"eval_runtime": 6.8936, |
|
"eval_samples_per_second": 20.599, |
|
"eval_steps_per_second": 2.611, |
|
"eval_wer": 0.7218831734960767, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 470.83, |
|
"learning_rate": 4.813636363636364e-06, |
|
"loss": 0.0446, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 470.83, |
|
"eval_loss": 1.7541626691818237, |
|
"eval_runtime": 6.9153, |
|
"eval_samples_per_second": 20.534, |
|
"eval_steps_per_second": 2.603, |
|
"eval_wer": 0.7279860505666957, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 475.0, |
|
"learning_rate": 4.131818181818182e-06, |
|
"loss": 0.043, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 475.0, |
|
"eval_loss": 1.750115156173706, |
|
"eval_runtime": 6.8671, |
|
"eval_samples_per_second": 20.678, |
|
"eval_steps_per_second": 2.621, |
|
"eval_wer": 0.7201394943330427, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 479.17, |
|
"learning_rate": 3.4499999999999996e-06, |
|
"loss": 0.0397, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 479.17, |
|
"eval_loss": 1.783726692199707, |
|
"eval_runtime": 6.759, |
|
"eval_samples_per_second": 21.009, |
|
"eval_steps_per_second": 2.663, |
|
"eval_wer": 0.7244986922406277, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 483.33, |
|
"learning_rate": 2.768181818181818e-06, |
|
"loss": 0.0402, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 483.33, |
|
"eval_loss": 1.7762095928192139, |
|
"eval_runtime": 6.8013, |
|
"eval_samples_per_second": 20.878, |
|
"eval_steps_per_second": 2.647, |
|
"eval_wer": 0.7175239755884917, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 487.5, |
|
"learning_rate": 2.086363636363636e-06, |
|
"loss": 0.039, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 487.5, |
|
"eval_loss": 1.7771002054214478, |
|
"eval_runtime": 6.9246, |
|
"eval_samples_per_second": 20.507, |
|
"eval_steps_per_second": 2.599, |
|
"eval_wer": 0.7262423714036618, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 491.67, |
|
"learning_rate": 1.4045454545454545e-06, |
|
"loss": 0.0402, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 491.67, |
|
"eval_loss": 1.7563738822937012, |
|
"eval_runtime": 7.1476, |
|
"eval_samples_per_second": 19.867, |
|
"eval_steps_per_second": 2.518, |
|
"eval_wer": 0.7218831734960767, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 495.83, |
|
"learning_rate": 7.227272727272726e-07, |
|
"loss": 0.0368, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 495.83, |
|
"eval_loss": 1.7552615404129028, |
|
"eval_runtime": 6.927, |
|
"eval_samples_per_second": 20.499, |
|
"eval_steps_per_second": 2.599, |
|
"eval_wer": 0.7192676547515258, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"learning_rate": 4.090909090909091e-08, |
|
"loss": 0.0395, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"eval_loss": 1.7564287185668945, |
|
"eval_runtime": 6.7877, |
|
"eval_samples_per_second": 20.92, |
|
"eval_steps_per_second": 2.652, |
|
"eval_wer": 0.7201394943330427, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"step": 12000, |
|
"total_flos": 2.5005546169759453e+19, |
|
"train_loss": 0.7251964689095814, |
|
"train_runtime": 16734.3269, |
|
"train_samples_per_second": 11.294, |
|
"train_steps_per_second": 0.717 |
|
} |
|
], |
|
"max_steps": 12000, |
|
"num_train_epochs": 500, |
|
"total_flos": 2.5005546169759453e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|