wav2vec2-large-xls-r-300m-greek / trainer_state.json
infinitejoy's picture
End of training
53a8894
raw
history blame
19.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 11300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.88,
"learning_rate": 5.82e-05,
"loss": 10.446,
"step": 100
},
{
"epoch": 1.77,
"learning_rate": 0.0001182,
"loss": 3.652,
"step": 200
},
{
"epoch": 2.65,
"learning_rate": 0.00017819999999999997,
"loss": 3.2102,
"step": 300
},
{
"epoch": 3.54,
"learning_rate": 0.0002382,
"loss": 3.1322,
"step": 400
},
{
"epoch": 4.42,
"learning_rate": 0.0002982,
"loss": 3.0798,
"step": 500
},
{
"epoch": 4.42,
"eval_loss": 3.001023054122925,
"eval_runtime": 56.1867,
"eval_samples_per_second": 30.31,
"eval_steps_per_second": 0.961,
"eval_wer": 1.0011981566820276,
"step": 500
},
{
"epoch": 5.31,
"learning_rate": 0.00029730555555555554,
"loss": 2.5544,
"step": 600
},
{
"epoch": 6.19,
"learning_rate": 0.0002945277777777777,
"loss": 1.8568,
"step": 700
},
{
"epoch": 7.08,
"learning_rate": 0.00029174999999999996,
"loss": 1.6575,
"step": 800
},
{
"epoch": 7.96,
"learning_rate": 0.0002889722222222222,
"loss": 1.5125,
"step": 900
},
{
"epoch": 8.85,
"learning_rate": 0.0002861944444444444,
"loss": 1.4336,
"step": 1000
},
{
"epoch": 8.85,
"eval_loss": 0.8481376767158508,
"eval_runtime": 54.9915,
"eval_samples_per_second": 30.968,
"eval_steps_per_second": 0.982,
"eval_wer": 0.6910599078341014,
"step": 1000
},
{
"epoch": 9.73,
"learning_rate": 0.0002834166666666666,
"loss": 1.3709,
"step": 1100
},
{
"epoch": 10.62,
"learning_rate": 0.00028063888888888886,
"loss": 1.3058,
"step": 1200
},
{
"epoch": 11.5,
"learning_rate": 0.0002778611111111111,
"loss": 1.2604,
"step": 1300
},
{
"epoch": 12.39,
"learning_rate": 0.0002750833333333333,
"loss": 1.2065,
"step": 1400
},
{
"epoch": 13.27,
"learning_rate": 0.00027230555555555553,
"loss": 1.2062,
"step": 1500
},
{
"epoch": 13.27,
"eval_loss": 0.7312180995941162,
"eval_runtime": 55.5977,
"eval_samples_per_second": 30.631,
"eval_steps_per_second": 0.971,
"eval_wer": 0.6332718894009216,
"step": 1500
},
{
"epoch": 14.16,
"learning_rate": 0.00026952777777777777,
"loss": 1.1712,
"step": 1600
},
{
"epoch": 15.04,
"learning_rate": 0.00026674999999999995,
"loss": 1.1348,
"step": 1700
},
{
"epoch": 15.93,
"learning_rate": 0.0002639722222222222,
"loss": 1.1077,
"step": 1800
},
{
"epoch": 16.81,
"learning_rate": 0.00026119444444444443,
"loss": 1.0821,
"step": 1900
},
{
"epoch": 17.7,
"learning_rate": 0.0002584166666666666,
"loss": 1.0481,
"step": 2000
},
{
"epoch": 17.7,
"eval_loss": 0.6849815845489502,
"eval_runtime": 54.2403,
"eval_samples_per_second": 31.397,
"eval_steps_per_second": 0.996,
"eval_wer": 0.5358525345622119,
"step": 2000
},
{
"epoch": 18.58,
"learning_rate": 0.0002556388888888889,
"loss": 1.0495,
"step": 2100
},
{
"epoch": 19.47,
"learning_rate": 0.0002528611111111111,
"loss": 1.0119,
"step": 2200
},
{
"epoch": 20.35,
"learning_rate": 0.0002500833333333333,
"loss": 1.0156,
"step": 2300
},
{
"epoch": 21.24,
"learning_rate": 0.00024730555555555557,
"loss": 0.9916,
"step": 2400
},
{
"epoch": 22.12,
"learning_rate": 0.00024452777777777776,
"loss": 0.9837,
"step": 2500
},
{
"epoch": 22.12,
"eval_loss": 0.6336787343025208,
"eval_runtime": 55.0498,
"eval_samples_per_second": 30.936,
"eval_steps_per_second": 0.981,
"eval_wer": 0.5316129032258065,
"step": 2500
},
{
"epoch": 23.01,
"learning_rate": 0.00024174999999999997,
"loss": 0.9558,
"step": 2600
},
{
"epoch": 23.89,
"learning_rate": 0.0002389722222222222,
"loss": 0.9523,
"step": 2700
},
{
"epoch": 24.78,
"learning_rate": 0.00023619444444444442,
"loss": 0.946,
"step": 2800
},
{
"epoch": 25.66,
"learning_rate": 0.00023341666666666663,
"loss": 0.909,
"step": 2900
},
{
"epoch": 26.55,
"learning_rate": 0.00023063888888888887,
"loss": 0.9108,
"step": 3000
},
{
"epoch": 26.55,
"eval_loss": 0.6257887482643127,
"eval_runtime": 55.6823,
"eval_samples_per_second": 30.584,
"eval_steps_per_second": 0.97,
"eval_wer": 0.507926267281106,
"step": 3000
},
{
"epoch": 27.43,
"learning_rate": 0.00022786111111111108,
"loss": 0.9005,
"step": 3100
},
{
"epoch": 28.32,
"learning_rate": 0.00022508333333333332,
"loss": 0.9069,
"step": 3200
},
{
"epoch": 29.2,
"learning_rate": 0.00022230555555555553,
"loss": 0.8605,
"step": 3300
},
{
"epoch": 30.09,
"learning_rate": 0.00021952777777777774,
"loss": 0.8815,
"step": 3400
},
{
"epoch": 30.97,
"learning_rate": 0.00021674999999999998,
"loss": 0.8439,
"step": 3500
},
{
"epoch": 30.97,
"eval_loss": 0.6301265954971313,
"eval_runtime": 54.5193,
"eval_samples_per_second": 31.237,
"eval_steps_per_second": 0.99,
"eval_wer": 0.48884792626728113,
"step": 3500
},
{
"epoch": 31.86,
"learning_rate": 0.0002139722222222222,
"loss": 0.8522,
"step": 3600
},
{
"epoch": 32.74,
"learning_rate": 0.0002111944444444444,
"loss": 0.8477,
"step": 3700
},
{
"epoch": 33.63,
"learning_rate": 0.00020841666666666665,
"loss": 0.7978,
"step": 3800
},
{
"epoch": 34.51,
"learning_rate": 0.00020563888888888886,
"loss": 0.8127,
"step": 3900
},
{
"epoch": 35.4,
"learning_rate": 0.0002028611111111111,
"loss": 0.7901,
"step": 4000
},
{
"epoch": 35.4,
"eval_loss": 0.6244927048683167,
"eval_runtime": 53.6992,
"eval_samples_per_second": 31.714,
"eval_steps_per_second": 1.006,
"eval_wer": 0.4976958525345622,
"step": 4000
},
{
"epoch": 36.28,
"learning_rate": 0.0002000833333333333,
"loss": 0.7978,
"step": 4100
},
{
"epoch": 37.17,
"learning_rate": 0.00019730555555555552,
"loss": 0.8046,
"step": 4200
},
{
"epoch": 38.05,
"learning_rate": 0.00019452777777777776,
"loss": 0.7892,
"step": 4300
},
{
"epoch": 38.94,
"learning_rate": 0.00019174999999999997,
"loss": 0.7657,
"step": 4400
},
{
"epoch": 39.82,
"learning_rate": 0.00018899999999999999,
"loss": 0.7669,
"step": 4500
},
{
"epoch": 39.82,
"eval_loss": 0.6164370775222778,
"eval_runtime": 54.5713,
"eval_samples_per_second": 31.207,
"eval_steps_per_second": 0.99,
"eval_wer": 0.4671889400921659,
"step": 4500
},
{
"epoch": 40.71,
"learning_rate": 0.0001862222222222222,
"loss": 0.7389,
"step": 4600
},
{
"epoch": 41.59,
"learning_rate": 0.00018344444444444444,
"loss": 0.7595,
"step": 4700
},
{
"epoch": 42.48,
"learning_rate": 0.00018066666666666665,
"loss": 0.7264,
"step": 4800
},
{
"epoch": 43.36,
"learning_rate": 0.00017788888888888886,
"loss": 0.7251,
"step": 4900
},
{
"epoch": 44.25,
"learning_rate": 0.0001751111111111111,
"loss": 0.7196,
"step": 5000
},
{
"epoch": 44.25,
"eval_loss": 0.6039230227470398,
"eval_runtime": 55.8782,
"eval_samples_per_second": 30.477,
"eval_steps_per_second": 0.966,
"eval_wer": 0.4688479262672811,
"step": 5000
},
{
"epoch": 45.13,
"learning_rate": 0.0001723333333333333,
"loss": 0.6932,
"step": 5100
},
{
"epoch": 46.02,
"learning_rate": 0.00016955555555555555,
"loss": 0.6983,
"step": 5200
},
{
"epoch": 46.9,
"learning_rate": 0.00016677777777777776,
"loss": 0.6769,
"step": 5300
},
{
"epoch": 47.79,
"learning_rate": 0.00016399999999999997,
"loss": 0.6928,
"step": 5400
},
{
"epoch": 48.67,
"learning_rate": 0.00016122222222222221,
"loss": 0.6715,
"step": 5500
},
{
"epoch": 48.67,
"eval_loss": 0.5900057554244995,
"eval_runtime": 53.576,
"eval_samples_per_second": 31.787,
"eval_steps_per_second": 1.008,
"eval_wer": 0.45732718894009217,
"step": 5500
},
{
"epoch": 49.56,
"learning_rate": 0.00015849999999999998,
"loss": 0.6833,
"step": 5600
},
{
"epoch": 50.44,
"learning_rate": 0.0001557222222222222,
"loss": 0.6673,
"step": 5700
},
{
"epoch": 51.33,
"learning_rate": 0.00015294444444444443,
"loss": 0.6791,
"step": 5800
},
{
"epoch": 52.21,
"learning_rate": 0.00015016666666666664,
"loss": 0.6292,
"step": 5900
},
{
"epoch": 53.1,
"learning_rate": 0.00014738888888888888,
"loss": 0.6441,
"step": 6000
},
{
"epoch": 53.1,
"eval_loss": 0.7002069354057312,
"eval_runtime": 55.3769,
"eval_samples_per_second": 30.753,
"eval_steps_per_second": 0.975,
"eval_wer": 0.479815668202765,
"step": 6000
},
{
"epoch": 53.98,
"learning_rate": 0.0001446111111111111,
"loss": 0.6217,
"step": 6100
},
{
"epoch": 54.87,
"learning_rate": 0.00014183333333333333,
"loss": 0.6131,
"step": 6200
},
{
"epoch": 55.75,
"learning_rate": 0.00013905555555555554,
"loss": 0.6211,
"step": 6300
},
{
"epoch": 56.64,
"learning_rate": 0.00013627777777777775,
"loss": 0.6104,
"step": 6400
},
{
"epoch": 57.52,
"learning_rate": 0.0001335,
"loss": 0.5938,
"step": 6500
},
{
"epoch": 57.52,
"eval_loss": 0.6249451637268066,
"eval_runtime": 55.668,
"eval_samples_per_second": 30.592,
"eval_steps_per_second": 0.97,
"eval_wer": 0.4578801843317972,
"step": 6500
},
{
"epoch": 58.41,
"learning_rate": 0.0001307222222222222,
"loss": 0.6015,
"step": 6600
},
{
"epoch": 59.29,
"learning_rate": 0.00012794444444444442,
"loss": 0.591,
"step": 6700
},
{
"epoch": 60.18,
"learning_rate": 0.00012516666666666666,
"loss": 0.5734,
"step": 6800
},
{
"epoch": 61.06,
"learning_rate": 0.0001223888888888889,
"loss": 0.5685,
"step": 6900
},
{
"epoch": 61.95,
"learning_rate": 0.0001196111111111111,
"loss": 0.5541,
"step": 7000
},
{
"epoch": 61.95,
"eval_loss": 0.6184473633766174,
"eval_runtime": 55.2876,
"eval_samples_per_second": 30.803,
"eval_steps_per_second": 0.977,
"eval_wer": 0.4424884792626728,
"step": 7000
},
{
"epoch": 62.83,
"learning_rate": 0.00011683333333333332,
"loss": 0.5546,
"step": 7100
},
{
"epoch": 63.72,
"learning_rate": 0.00011405555555555554,
"loss": 0.5473,
"step": 7200
},
{
"epoch": 64.6,
"learning_rate": 0.00011127777777777777,
"loss": 0.5592,
"step": 7300
},
{
"epoch": 65.49,
"learning_rate": 0.0001085,
"loss": 0.5349,
"step": 7400
},
{
"epoch": 66.37,
"learning_rate": 0.0001057222222222222,
"loss": 0.5506,
"step": 7500
},
{
"epoch": 66.37,
"eval_loss": 0.6962713003158569,
"eval_runtime": 55.4374,
"eval_samples_per_second": 30.719,
"eval_steps_per_second": 0.974,
"eval_wer": 0.45852534562211983,
"step": 7500
},
{
"epoch": 67.26,
"learning_rate": 0.00010294444444444443,
"loss": 0.5313,
"step": 7600
},
{
"epoch": 68.14,
"learning_rate": 0.00010016666666666666,
"loss": 0.5267,
"step": 7700
},
{
"epoch": 69.03,
"learning_rate": 9.738888888888888e-05,
"loss": 0.5222,
"step": 7800
},
{
"epoch": 69.91,
"learning_rate": 9.46111111111111e-05,
"loss": 0.5101,
"step": 7900
},
{
"epoch": 70.8,
"learning_rate": 9.183333333333332e-05,
"loss": 0.4998,
"step": 8000
},
{
"epoch": 70.8,
"eval_loss": 0.6778160333633423,
"eval_runtime": 56.1738,
"eval_samples_per_second": 30.317,
"eval_steps_per_second": 0.961,
"eval_wer": 0.44682027649769585,
"step": 8000
},
{
"epoch": 71.68,
"learning_rate": 8.905555555555555e-05,
"loss": 0.4941,
"step": 8100
},
{
"epoch": 72.57,
"learning_rate": 8.627777777777776e-05,
"loss": 0.492,
"step": 8200
},
{
"epoch": 73.45,
"learning_rate": 8.349999999999998e-05,
"loss": 0.4741,
"step": 8300
},
{
"epoch": 74.34,
"learning_rate": 8.072222222222222e-05,
"loss": 0.495,
"step": 8400
},
{
"epoch": 75.22,
"learning_rate": 7.794444444444445e-05,
"loss": 0.4729,
"step": 8500
},
{
"epoch": 75.22,
"eval_loss": 0.6383044719696045,
"eval_runtime": 53.9538,
"eval_samples_per_second": 31.564,
"eval_steps_per_second": 1.001,
"eval_wer": 0.4392626728110599,
"step": 8500
},
{
"epoch": 76.11,
"learning_rate": 7.516666666666665e-05,
"loss": 0.4696,
"step": 8600
},
{
"epoch": 76.99,
"learning_rate": 7.238888888888889e-05,
"loss": 0.4581,
"step": 8700
},
{
"epoch": 77.88,
"learning_rate": 6.961111111111111e-05,
"loss": 0.4583,
"step": 8800
},
{
"epoch": 78.76,
"learning_rate": 6.683333333333332e-05,
"loss": 0.4451,
"step": 8900
},
{
"epoch": 79.65,
"learning_rate": 6.405555555555555e-05,
"loss": 0.4535,
"step": 9000
},
{
"epoch": 79.65,
"eval_loss": 0.6592639684677124,
"eval_runtime": 53.7527,
"eval_samples_per_second": 31.682,
"eval_steps_per_second": 1.005,
"eval_wer": 0.4368663594470046,
"step": 9000
},
{
"epoch": 80.53,
"learning_rate": 6.130555555555555e-05,
"loss": 0.4324,
"step": 9100
},
{
"epoch": 81.42,
"learning_rate": 5.8527777777777774e-05,
"loss": 0.4546,
"step": 9200
},
{
"epoch": 82.3,
"learning_rate": 5.574999999999999e-05,
"loss": 0.4391,
"step": 9300
},
{
"epoch": 83.19,
"learning_rate": 5.297222222222222e-05,
"loss": 0.4306,
"step": 9400
},
{
"epoch": 84.07,
"learning_rate": 5.019444444444444e-05,
"loss": 0.4358,
"step": 9500
},
{
"epoch": 84.07,
"eval_loss": 0.6913911700248718,
"eval_runtime": 55.4673,
"eval_samples_per_second": 30.703,
"eval_steps_per_second": 0.974,
"eval_wer": 0.4422119815668203,
"step": 9500
},
{
"epoch": 84.96,
"learning_rate": 4.741666666666666e-05,
"loss": 0.4095,
"step": 9600
},
{
"epoch": 85.84,
"learning_rate": 4.463888888888888e-05,
"loss": 0.4148,
"step": 9700
},
{
"epoch": 86.73,
"learning_rate": 4.186111111111111e-05,
"loss": 0.4113,
"step": 9800
},
{
"epoch": 87.61,
"learning_rate": 3.9083333333333326e-05,
"loss": 0.405,
"step": 9900
},
{
"epoch": 88.5,
"learning_rate": 3.630555555555555e-05,
"loss": 0.402,
"step": 10000
},
{
"epoch": 88.5,
"eval_loss": 0.6743763089179993,
"eval_runtime": 53.7113,
"eval_samples_per_second": 31.707,
"eval_steps_per_second": 1.005,
"eval_wer": 0.4269124423963134,
"step": 10000
},
{
"epoch": 89.38,
"learning_rate": 3.352777777777777e-05,
"loss": 0.3915,
"step": 10100
},
{
"epoch": 90.27,
"learning_rate": 3.0749999999999995e-05,
"loss": 0.394,
"step": 10200
},
{
"epoch": 91.15,
"learning_rate": 2.7972222222222217e-05,
"loss": 0.392,
"step": 10300
},
{
"epoch": 92.04,
"learning_rate": 2.519444444444444e-05,
"loss": 0.3937,
"step": 10400
},
{
"epoch": 92.92,
"learning_rate": 2.2416666666666665e-05,
"loss": 0.3946,
"step": 10500
},
{
"epoch": 92.92,
"eval_loss": 0.6894700527191162,
"eval_runtime": 55.0145,
"eval_samples_per_second": 30.955,
"eval_steps_per_second": 0.982,
"eval_wer": 0.42746543778801843,
"step": 10500
},
{
"epoch": 93.81,
"learning_rate": 1.9638888888888887e-05,
"loss": 0.3881,
"step": 10600
},
{
"epoch": 94.69,
"learning_rate": 1.686111111111111e-05,
"loss": 0.3778,
"step": 10700
},
{
"epoch": 95.58,
"learning_rate": 1.4083333333333331e-05,
"loss": 0.3783,
"step": 10800
},
{
"epoch": 96.46,
"learning_rate": 1.1305555555555553e-05,
"loss": 0.3612,
"step": 10900
},
{
"epoch": 97.35,
"learning_rate": 8.527777777777777e-06,
"loss": 0.3734,
"step": 11000
},
{
"epoch": 97.35,
"eval_loss": 0.6888979077339172,
"eval_runtime": 55.289,
"eval_samples_per_second": 30.802,
"eval_steps_per_second": 0.977,
"eval_wer": 0.4319815668202765,
"step": 11000
},
{
"epoch": 98.23,
"learning_rate": 5.749999999999999e-06,
"loss": 0.3635,
"step": 11100
},
{
"epoch": 99.12,
"learning_rate": 2.9999999999999997e-06,
"loss": 0.3775,
"step": 11200
},
{
"epoch": 100.0,
"learning_rate": 2.222222222222222e-07,
"loss": 0.3737,
"step": 11300
},
{
"epoch": 100.0,
"step": 11300,
"total_flos": 4.438667922365353e+19,
"train_loss": 0.9041624231254105,
"train_runtime": 18573.3759,
"train_samples_per_second": 19.388,
"train_steps_per_second": 0.608
}
],
"max_steps": 11300,
"num_train_epochs": 100,
"total_flos": 4.438667922365353e+19,
"trial_name": null,
"trial_params": null
}