|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 11300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.82e-05, |
|
"loss": 10.446, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001182, |
|
"loss": 3.652, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00017819999999999997, |
|
"loss": 3.2102, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0002382, |
|
"loss": 3.1322, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.0002982, |
|
"loss": 3.0798, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"eval_loss": 3.001023054122925, |
|
"eval_runtime": 56.1867, |
|
"eval_samples_per_second": 30.31, |
|
"eval_steps_per_second": 0.961, |
|
"eval_wer": 1.0011981566820276, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00029730555555555554, |
|
"loss": 2.5544, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.0002945277777777777, |
|
"loss": 1.8568, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00029174999999999996, |
|
"loss": 1.6575, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.0002889722222222222, |
|
"loss": 1.5125, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.0002861944444444444, |
|
"loss": 1.4336, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_loss": 0.8481376767158508, |
|
"eval_runtime": 54.9915, |
|
"eval_samples_per_second": 30.968, |
|
"eval_steps_per_second": 0.982, |
|
"eval_wer": 0.6910599078341014, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.0002834166666666666, |
|
"loss": 1.3709, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 0.00028063888888888886, |
|
"loss": 1.3058, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 0.0002778611111111111, |
|
"loss": 1.2604, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 0.0002750833333333333, |
|
"loss": 1.2065, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 0.00027230555555555553, |
|
"loss": 1.2062, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"eval_loss": 0.7312180995941162, |
|
"eval_runtime": 55.5977, |
|
"eval_samples_per_second": 30.631, |
|
"eval_steps_per_second": 0.971, |
|
"eval_wer": 0.6332718894009216, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 0.00026952777777777777, |
|
"loss": 1.1712, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 0.00026674999999999995, |
|
"loss": 1.1348, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 0.0002639722222222222, |
|
"loss": 1.1077, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 0.00026119444444444443, |
|
"loss": 1.0821, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 0.0002584166666666666, |
|
"loss": 1.0481, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"eval_loss": 0.6849815845489502, |
|
"eval_runtime": 54.2403, |
|
"eval_samples_per_second": 31.397, |
|
"eval_steps_per_second": 0.996, |
|
"eval_wer": 0.5358525345622119, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"learning_rate": 0.0002556388888888889, |
|
"loss": 1.0495, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 0.0002528611111111111, |
|
"loss": 1.0119, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.35, |
|
"learning_rate": 0.0002500833333333333, |
|
"loss": 1.0156, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.24, |
|
"learning_rate": 0.00024730555555555557, |
|
"loss": 0.9916, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"learning_rate": 0.00024452777777777776, |
|
"loss": 0.9837, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"eval_loss": 0.6336787343025208, |
|
"eval_runtime": 55.0498, |
|
"eval_samples_per_second": 30.936, |
|
"eval_steps_per_second": 0.981, |
|
"eval_wer": 0.5316129032258065, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 0.00024174999999999997, |
|
"loss": 0.9558, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 23.89, |
|
"learning_rate": 0.0002389722222222222, |
|
"loss": 0.9523, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"learning_rate": 0.00023619444444444442, |
|
"loss": 0.946, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 25.66, |
|
"learning_rate": 0.00023341666666666663, |
|
"loss": 0.909, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 0.00023063888888888887, |
|
"loss": 0.9108, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"eval_loss": 0.6257887482643127, |
|
"eval_runtime": 55.6823, |
|
"eval_samples_per_second": 30.584, |
|
"eval_steps_per_second": 0.97, |
|
"eval_wer": 0.507926267281106, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 27.43, |
|
"learning_rate": 0.00022786111111111108, |
|
"loss": 0.9005, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 28.32, |
|
"learning_rate": 0.00022508333333333332, |
|
"loss": 0.9069, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 0.00022230555555555553, |
|
"loss": 0.8605, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 30.09, |
|
"learning_rate": 0.00021952777777777774, |
|
"loss": 0.8815, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 0.00021674999999999998, |
|
"loss": 0.8439, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_loss": 0.6301265954971313, |
|
"eval_runtime": 54.5193, |
|
"eval_samples_per_second": 31.237, |
|
"eval_steps_per_second": 0.99, |
|
"eval_wer": 0.48884792626728113, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 31.86, |
|
"learning_rate": 0.0002139722222222222, |
|
"loss": 0.8522, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 32.74, |
|
"learning_rate": 0.0002111944444444444, |
|
"loss": 0.8477, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 33.63, |
|
"learning_rate": 0.00020841666666666665, |
|
"loss": 0.7978, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 34.51, |
|
"learning_rate": 0.00020563888888888886, |
|
"loss": 0.8127, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"learning_rate": 0.0002028611111111111, |
|
"loss": 0.7901, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"eval_loss": 0.6244927048683167, |
|
"eval_runtime": 53.6992, |
|
"eval_samples_per_second": 31.714, |
|
"eval_steps_per_second": 1.006, |
|
"eval_wer": 0.4976958525345622, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 36.28, |
|
"learning_rate": 0.0002000833333333333, |
|
"loss": 0.7978, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 37.17, |
|
"learning_rate": 0.00019730555555555552, |
|
"loss": 0.8046, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 38.05, |
|
"learning_rate": 0.00019452777777777776, |
|
"loss": 0.7892, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 38.94, |
|
"learning_rate": 0.00019174999999999997, |
|
"loss": 0.7657, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"learning_rate": 0.00018899999999999999, |
|
"loss": 0.7669, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"eval_loss": 0.6164370775222778, |
|
"eval_runtime": 54.5713, |
|
"eval_samples_per_second": 31.207, |
|
"eval_steps_per_second": 0.99, |
|
"eval_wer": 0.4671889400921659, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 40.71, |
|
"learning_rate": 0.0001862222222222222, |
|
"loss": 0.7389, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 41.59, |
|
"learning_rate": 0.00018344444444444444, |
|
"loss": 0.7595, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 42.48, |
|
"learning_rate": 0.00018066666666666665, |
|
"loss": 0.7264, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 43.36, |
|
"learning_rate": 0.00017788888888888886, |
|
"loss": 0.7251, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"learning_rate": 0.0001751111111111111, |
|
"loss": 0.7196, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"eval_loss": 0.6039230227470398, |
|
"eval_runtime": 55.8782, |
|
"eval_samples_per_second": 30.477, |
|
"eval_steps_per_second": 0.966, |
|
"eval_wer": 0.4688479262672811, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 45.13, |
|
"learning_rate": 0.0001723333333333333, |
|
"loss": 0.6932, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"learning_rate": 0.00016955555555555555, |
|
"loss": 0.6983, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 46.9, |
|
"learning_rate": 0.00016677777777777776, |
|
"loss": 0.6769, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 47.79, |
|
"learning_rate": 0.00016399999999999997, |
|
"loss": 0.6928, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 48.67, |
|
"learning_rate": 0.00016122222222222221, |
|
"loss": 0.6715, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 48.67, |
|
"eval_loss": 0.5900057554244995, |
|
"eval_runtime": 53.576, |
|
"eval_samples_per_second": 31.787, |
|
"eval_steps_per_second": 1.008, |
|
"eval_wer": 0.45732718894009217, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 49.56, |
|
"learning_rate": 0.00015849999999999998, |
|
"loss": 0.6833, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 50.44, |
|
"learning_rate": 0.0001557222222222222, |
|
"loss": 0.6673, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 51.33, |
|
"learning_rate": 0.00015294444444444443, |
|
"loss": 0.6791, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 52.21, |
|
"learning_rate": 0.00015016666666666664, |
|
"loss": 0.6292, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 53.1, |
|
"learning_rate": 0.00014738888888888888, |
|
"loss": 0.6441, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 53.1, |
|
"eval_loss": 0.7002069354057312, |
|
"eval_runtime": 55.3769, |
|
"eval_samples_per_second": 30.753, |
|
"eval_steps_per_second": 0.975, |
|
"eval_wer": 0.479815668202765, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 53.98, |
|
"learning_rate": 0.0001446111111111111, |
|
"loss": 0.6217, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 54.87, |
|
"learning_rate": 0.00014183333333333333, |
|
"loss": 0.6131, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 55.75, |
|
"learning_rate": 0.00013905555555555554, |
|
"loss": 0.6211, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 56.64, |
|
"learning_rate": 0.00013627777777777775, |
|
"loss": 0.6104, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 57.52, |
|
"learning_rate": 0.0001335, |
|
"loss": 0.5938, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 57.52, |
|
"eval_loss": 0.6249451637268066, |
|
"eval_runtime": 55.668, |
|
"eval_samples_per_second": 30.592, |
|
"eval_steps_per_second": 0.97, |
|
"eval_wer": 0.4578801843317972, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 58.41, |
|
"learning_rate": 0.0001307222222222222, |
|
"loss": 0.6015, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 0.00012794444444444442, |
|
"loss": 0.591, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 60.18, |
|
"learning_rate": 0.00012516666666666666, |
|
"loss": 0.5734, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 61.06, |
|
"learning_rate": 0.0001223888888888889, |
|
"loss": 0.5685, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 61.95, |
|
"learning_rate": 0.0001196111111111111, |
|
"loss": 0.5541, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 61.95, |
|
"eval_loss": 0.6184473633766174, |
|
"eval_runtime": 55.2876, |
|
"eval_samples_per_second": 30.803, |
|
"eval_steps_per_second": 0.977, |
|
"eval_wer": 0.4424884792626728, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 62.83, |
|
"learning_rate": 0.00011683333333333332, |
|
"loss": 0.5546, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 63.72, |
|
"learning_rate": 0.00011405555555555554, |
|
"loss": 0.5473, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 64.6, |
|
"learning_rate": 0.00011127777777777777, |
|
"loss": 0.5592, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 65.49, |
|
"learning_rate": 0.0001085, |
|
"loss": 0.5349, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 66.37, |
|
"learning_rate": 0.0001057222222222222, |
|
"loss": 0.5506, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 66.37, |
|
"eval_loss": 0.6962713003158569, |
|
"eval_runtime": 55.4374, |
|
"eval_samples_per_second": 30.719, |
|
"eval_steps_per_second": 0.974, |
|
"eval_wer": 0.45852534562211983, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 67.26, |
|
"learning_rate": 0.00010294444444444443, |
|
"loss": 0.5313, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 68.14, |
|
"learning_rate": 0.00010016666666666666, |
|
"loss": 0.5267, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 69.03, |
|
"learning_rate": 9.738888888888888e-05, |
|
"loss": 0.5222, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 69.91, |
|
"learning_rate": 9.46111111111111e-05, |
|
"loss": 0.5101, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"learning_rate": 9.183333333333332e-05, |
|
"loss": 0.4998, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"eval_loss": 0.6778160333633423, |
|
"eval_runtime": 56.1738, |
|
"eval_samples_per_second": 30.317, |
|
"eval_steps_per_second": 0.961, |
|
"eval_wer": 0.44682027649769585, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 71.68, |
|
"learning_rate": 8.905555555555555e-05, |
|
"loss": 0.4941, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 72.57, |
|
"learning_rate": 8.627777777777776e-05, |
|
"loss": 0.492, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 73.45, |
|
"learning_rate": 8.349999999999998e-05, |
|
"loss": 0.4741, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 74.34, |
|
"learning_rate": 8.072222222222222e-05, |
|
"loss": 0.495, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 75.22, |
|
"learning_rate": 7.794444444444445e-05, |
|
"loss": 0.4729, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 75.22, |
|
"eval_loss": 0.6383044719696045, |
|
"eval_runtime": 53.9538, |
|
"eval_samples_per_second": 31.564, |
|
"eval_steps_per_second": 1.001, |
|
"eval_wer": 0.4392626728110599, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 76.11, |
|
"learning_rate": 7.516666666666665e-05, |
|
"loss": 0.4696, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"learning_rate": 7.238888888888889e-05, |
|
"loss": 0.4581, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 77.88, |
|
"learning_rate": 6.961111111111111e-05, |
|
"loss": 0.4583, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 78.76, |
|
"learning_rate": 6.683333333333332e-05, |
|
"loss": 0.4451, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 79.65, |
|
"learning_rate": 6.405555555555555e-05, |
|
"loss": 0.4535, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 79.65, |
|
"eval_loss": 0.6592639684677124, |
|
"eval_runtime": 53.7527, |
|
"eval_samples_per_second": 31.682, |
|
"eval_steps_per_second": 1.005, |
|
"eval_wer": 0.4368663594470046, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 80.53, |
|
"learning_rate": 6.130555555555555e-05, |
|
"loss": 0.4324, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 81.42, |
|
"learning_rate": 5.8527777777777774e-05, |
|
"loss": 0.4546, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 82.3, |
|
"learning_rate": 5.574999999999999e-05, |
|
"loss": 0.4391, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 83.19, |
|
"learning_rate": 5.297222222222222e-05, |
|
"loss": 0.4306, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 84.07, |
|
"learning_rate": 5.019444444444444e-05, |
|
"loss": 0.4358, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 84.07, |
|
"eval_loss": 0.6913911700248718, |
|
"eval_runtime": 55.4673, |
|
"eval_samples_per_second": 30.703, |
|
"eval_steps_per_second": 0.974, |
|
"eval_wer": 0.4422119815668203, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 84.96, |
|
"learning_rate": 4.741666666666666e-05, |
|
"loss": 0.4095, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 85.84, |
|
"learning_rate": 4.463888888888888e-05, |
|
"loss": 0.4148, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 86.73, |
|
"learning_rate": 4.186111111111111e-05, |
|
"loss": 0.4113, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 87.61, |
|
"learning_rate": 3.9083333333333326e-05, |
|
"loss": 0.405, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 88.5, |
|
"learning_rate": 3.630555555555555e-05, |
|
"loss": 0.402, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 88.5, |
|
"eval_loss": 0.6743763089179993, |
|
"eval_runtime": 53.7113, |
|
"eval_samples_per_second": 31.707, |
|
"eval_steps_per_second": 1.005, |
|
"eval_wer": 0.4269124423963134, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 89.38, |
|
"learning_rate": 3.352777777777777e-05, |
|
"loss": 0.3915, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 90.27, |
|
"learning_rate": 3.0749999999999995e-05, |
|
"loss": 0.394, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 91.15, |
|
"learning_rate": 2.7972222222222217e-05, |
|
"loss": 0.392, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 92.04, |
|
"learning_rate": 2.519444444444444e-05, |
|
"loss": 0.3937, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 92.92, |
|
"learning_rate": 2.2416666666666665e-05, |
|
"loss": 0.3946, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 92.92, |
|
"eval_loss": 0.6894700527191162, |
|
"eval_runtime": 55.0145, |
|
"eval_samples_per_second": 30.955, |
|
"eval_steps_per_second": 0.982, |
|
"eval_wer": 0.42746543778801843, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 93.81, |
|
"learning_rate": 1.9638888888888887e-05, |
|
"loss": 0.3881, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 94.69, |
|
"learning_rate": 1.686111111111111e-05, |
|
"loss": 0.3778, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 95.58, |
|
"learning_rate": 1.4083333333333331e-05, |
|
"loss": 0.3783, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 96.46, |
|
"learning_rate": 1.1305555555555553e-05, |
|
"loss": 0.3612, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 97.35, |
|
"learning_rate": 8.527777777777777e-06, |
|
"loss": 0.3734, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 97.35, |
|
"eval_loss": 0.6888979077339172, |
|
"eval_runtime": 55.289, |
|
"eval_samples_per_second": 30.802, |
|
"eval_steps_per_second": 0.977, |
|
"eval_wer": 0.4319815668202765, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 98.23, |
|
"learning_rate": 5.749999999999999e-06, |
|
"loss": 0.3635, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 99.12, |
|
"learning_rate": 2.9999999999999997e-06, |
|
"loss": 0.3775, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.222222222222222e-07, |
|
"loss": 0.3737, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 11300, |
|
"total_flos": 4.438667922365353e+19, |
|
"train_loss": 0.9041624231254105, |
|
"train_runtime": 18573.3759, |
|
"train_samples_per_second": 19.388, |
|
"train_steps_per_second": 0.608 |
|
} |
|
], |
|
"max_steps": 11300, |
|
"num_train_epochs": 100, |
|
"total_flos": 4.438667922365353e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|