whisper-small-pashto / trainer_state.json
ihanif's picture
End of training
f2e60d8
raw
history blame
32.5 kB
{
"best_metric": 63.10532687651331,
"best_model_checkpoint": "./checkpoint-5100",
"epoch": 742.8571428571429,
"global_step": 5200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.57,
"learning_rate": 1.32e-07,
"loss": 2.8512,
"step": 25
},
{
"epoch": 7.14,
"learning_rate": 2.8199999999999996e-07,
"loss": 2.7082,
"step": 50
},
{
"epoch": 10.71,
"learning_rate": 2.9305263157894735e-07,
"loss": 2.3515,
"step": 75
},
{
"epoch": 14.29,
"learning_rate": 2.851578947368421e-07,
"loss": 2.0871,
"step": 100
},
{
"epoch": 14.29,
"eval_loss": 2.0101583003997803,
"eval_runtime": 720.7636,
"eval_samples_per_second": 0.71,
"eval_steps_per_second": 0.044,
"eval_wer": 230.27391041162227,
"step": 100
},
{
"epoch": 17.86,
"learning_rate": 2.7726315789473684e-07,
"loss": 1.8622,
"step": 125
},
{
"epoch": 21.43,
"learning_rate": 2.693684210526316e-07,
"loss": 1.7104,
"step": 150
},
{
"epoch": 25.0,
"learning_rate": 2.614736842105263e-07,
"loss": 1.5736,
"step": 175
},
{
"epoch": 28.57,
"learning_rate": 2.53578947368421e-07,
"loss": 1.465,
"step": 200
},
{
"epoch": 28.57,
"eval_loss": 1.4968725442886353,
"eval_runtime": 628.5425,
"eval_samples_per_second": 0.815,
"eval_steps_per_second": 0.051,
"eval_wer": 137.24273607748182,
"step": 200
},
{
"epoch": 32.14,
"learning_rate": 2.4568421052631577e-07,
"loss": 1.3669,
"step": 225
},
{
"epoch": 35.71,
"learning_rate": 2.3778947368421054e-07,
"loss": 1.2898,
"step": 250
},
{
"epoch": 39.29,
"learning_rate": 2.2989473684210523e-07,
"loss": 1.2205,
"step": 275
},
{
"epoch": 42.86,
"learning_rate": 2.2199999999999998e-07,
"loss": 1.1617,
"step": 300
},
{
"epoch": 42.86,
"eval_loss": 1.2715740203857422,
"eval_runtime": 499.3378,
"eval_samples_per_second": 1.025,
"eval_steps_per_second": 0.064,
"eval_wer": 76.32415254237289,
"step": 300
},
{
"epoch": 46.43,
"learning_rate": 2.1410526315789472e-07,
"loss": 1.1091,
"step": 325
},
{
"epoch": 50.0,
"learning_rate": 2.0621052631578947e-07,
"loss": 1.0738,
"step": 350
},
{
"epoch": 53.57,
"learning_rate": 1.9831578947368419e-07,
"loss": 1.033,
"step": 375
},
{
"epoch": 57.14,
"learning_rate": 1.9042105263157893e-07,
"loss": 1.0019,
"step": 400
},
{
"epoch": 57.14,
"eval_loss": 1.16450834274292,
"eval_runtime": 480.3701,
"eval_samples_per_second": 1.066,
"eval_steps_per_second": 0.067,
"eval_wer": 71.37560532687651,
"step": 400
},
{
"epoch": 60.71,
"learning_rate": 1.8252631578947368e-07,
"loss": 0.9712,
"step": 425
},
{
"epoch": 64.29,
"learning_rate": 1.7463157894736842e-07,
"loss": 0.9437,
"step": 450
},
{
"epoch": 67.86,
"learning_rate": 1.6673684210526314e-07,
"loss": 0.9215,
"step": 475
},
{
"epoch": 71.43,
"learning_rate": 1.588421052631579e-07,
"loss": 0.9052,
"step": 500
},
{
"epoch": 71.43,
"eval_loss": 1.1051170825958252,
"eval_runtime": 486.3374,
"eval_samples_per_second": 1.053,
"eval_steps_per_second": 0.066,
"eval_wer": 69.78662227602905,
"step": 500
},
{
"epoch": 75.0,
"learning_rate": 1.5094736842105263e-07,
"loss": 0.8773,
"step": 525
},
{
"epoch": 78.57,
"learning_rate": 1.4305263157894735e-07,
"loss": 0.8643,
"step": 550
},
{
"epoch": 82.14,
"learning_rate": 1.351578947368421e-07,
"loss": 0.8449,
"step": 575
},
{
"epoch": 85.71,
"learning_rate": 1.2726315789473684e-07,
"loss": 0.8334,
"step": 600
},
{
"epoch": 85.71,
"eval_loss": 1.0691100358963013,
"eval_runtime": 478.3463,
"eval_samples_per_second": 1.07,
"eval_steps_per_second": 0.067,
"eval_wer": 68.26573849878935,
"step": 600
},
{
"epoch": 89.29,
"learning_rate": 1.1936842105263156e-07,
"loss": 0.8132,
"step": 625
},
{
"epoch": 92.86,
"learning_rate": 1.1147368421052631e-07,
"loss": 0.8058,
"step": 650
},
{
"epoch": 96.43,
"learning_rate": 1.0357894736842104e-07,
"loss": 0.7913,
"step": 675
},
{
"epoch": 100.0,
"learning_rate": 9.568421052631579e-08,
"loss": 0.7838,
"step": 700
},
{
"epoch": 100.0,
"eval_loss": 1.0482958555221558,
"eval_runtime": 478.3861,
"eval_samples_per_second": 1.07,
"eval_steps_per_second": 0.067,
"eval_wer": 67.16858353510897,
"step": 700
},
{
"epoch": 103.57,
"learning_rate": 8.778947368421052e-08,
"loss": 0.7768,
"step": 725
},
{
"epoch": 107.14,
"learning_rate": 7.989473684210526e-08,
"loss": 0.7673,
"step": 750
},
{
"epoch": 110.71,
"learning_rate": 7.2e-08,
"loss": 0.7643,
"step": 775
},
{
"epoch": 114.29,
"learning_rate": 6.410526315789473e-08,
"loss": 0.7539,
"step": 800
},
{
"epoch": 114.29,
"eval_loss": 1.0362622737884521,
"eval_runtime": 484.8883,
"eval_samples_per_second": 1.056,
"eval_steps_per_second": 0.066,
"eval_wer": 66.41949152542372,
"step": 800
},
{
"epoch": 117.86,
"learning_rate": 5.621052631578947e-08,
"loss": 0.7527,
"step": 825
},
{
"epoch": 121.43,
"learning_rate": 4.8315789473684206e-08,
"loss": 0.7441,
"step": 850
},
{
"epoch": 125.0,
"learning_rate": 4.0421052631578945e-08,
"loss": 0.7417,
"step": 875
},
{
"epoch": 128.57,
"learning_rate": 3.2526315789473684e-08,
"loss": 0.7377,
"step": 900
},
{
"epoch": 128.57,
"eval_loss": 1.0297424793243408,
"eval_runtime": 471.9816,
"eval_samples_per_second": 1.085,
"eval_steps_per_second": 0.068,
"eval_wer": 66.20006053268766,
"step": 900
},
{
"epoch": 132.14,
"learning_rate": 2.463157894736842e-08,
"loss": 0.7387,
"step": 925
},
{
"epoch": 135.71,
"learning_rate": 1.673684210526316e-08,
"loss": 0.7329,
"step": 950
},
{
"epoch": 139.29,
"learning_rate": 8.842105263157895e-09,
"loss": 0.7312,
"step": 975
},
{
"epoch": 142.86,
"learning_rate": 9.473684210526316e-10,
"loss": 0.7325,
"step": 1000
},
{
"epoch": 142.86,
"eval_loss": 1.0276601314544678,
"eval_runtime": 477.2948,
"eval_samples_per_second": 1.073,
"eval_steps_per_second": 0.067,
"eval_wer": 66.00332929782083,
"step": 1000
},
{
"epoch": 146.43,
"learning_rate": 1.5046153846153844e-07,
"loss": 0.7238,
"step": 1025
},
{
"epoch": 150.0,
"learning_rate": 1.466153846153846e-07,
"loss": 0.7163,
"step": 1050
},
{
"epoch": 153.57,
"learning_rate": 1.4276923076923076e-07,
"loss": 0.7028,
"step": 1075
},
{
"epoch": 157.14,
"learning_rate": 1.389230769230769e-07,
"loss": 0.6952,
"step": 1100
},
{
"epoch": 157.14,
"eval_loss": 1.0121634006500244,
"eval_runtime": 471.3724,
"eval_samples_per_second": 1.086,
"eval_steps_per_second": 0.068,
"eval_wer": 65.05750605326877,
"step": 1100
},
{
"epoch": 160.71,
"learning_rate": 1.3507692307692308e-07,
"loss": 0.6843,
"step": 1125
},
{
"epoch": 164.29,
"learning_rate": 1.3123076923076923e-07,
"loss": 0.6699,
"step": 1150
},
{
"epoch": 167.86,
"learning_rate": 1.2738461538461538e-07,
"loss": 0.6671,
"step": 1175
},
{
"epoch": 171.43,
"learning_rate": 1.2353846153846153e-07,
"loss": 0.6531,
"step": 1200
},
{
"epoch": 171.43,
"eval_loss": 1.0014406442642212,
"eval_runtime": 475.4519,
"eval_samples_per_second": 1.077,
"eval_steps_per_second": 0.067,
"eval_wer": 64.42191283292978,
"step": 1200
},
{
"epoch": 175.0,
"learning_rate": 1.1969230769230767e-07,
"loss": 0.6487,
"step": 1225
},
{
"epoch": 178.57,
"learning_rate": 1.1584615384615385e-07,
"loss": 0.6369,
"step": 1250
},
{
"epoch": 182.14,
"learning_rate": 1.12e-07,
"loss": 0.6336,
"step": 1275
},
{
"epoch": 185.71,
"learning_rate": 1.0815384615384614e-07,
"loss": 0.6189,
"step": 1300
},
{
"epoch": 185.71,
"eval_loss": 0.9944669008255005,
"eval_runtime": 470.4039,
"eval_samples_per_second": 1.088,
"eval_steps_per_second": 0.068,
"eval_wer": 63.79388619854721,
"step": 1300
},
{
"epoch": 189.29,
"learning_rate": 1.043076923076923e-07,
"loss": 0.6213,
"step": 1325
},
{
"epoch": 192.86,
"learning_rate": 1.0046153846153845e-07,
"loss": 0.608,
"step": 1350
},
{
"epoch": 196.43,
"learning_rate": 9.66153846153846e-08,
"loss": 0.6029,
"step": 1375
},
{
"epoch": 200.0,
"learning_rate": 9.276923076923078e-08,
"loss": 0.5993,
"step": 1400
},
{
"epoch": 200.0,
"eval_loss": 0.9895604252815247,
"eval_runtime": 473.2431,
"eval_samples_per_second": 1.082,
"eval_steps_per_second": 0.068,
"eval_wer": 63.35502421307506,
"step": 1400
},
{
"epoch": 203.57,
"learning_rate": 8.892307692307692e-08,
"loss": 0.593,
"step": 1425
},
{
"epoch": 207.14,
"learning_rate": 8.507692307692307e-08,
"loss": 0.5817,
"step": 1450
},
{
"epoch": 210.71,
"learning_rate": 8.123076923076922e-08,
"loss": 0.5782,
"step": 1475
},
{
"epoch": 214.29,
"learning_rate": 7.738461538461538e-08,
"loss": 0.5757,
"step": 1500
},
{
"epoch": 214.29,
"eval_loss": 0.9864457845687866,
"eval_runtime": 474.414,
"eval_samples_per_second": 1.079,
"eval_steps_per_second": 0.067,
"eval_wer": 63.22639225181598,
"step": 1500
},
{
"epoch": 217.86,
"learning_rate": 7.353846153846153e-08,
"loss": 0.5706,
"step": 1525
},
{
"epoch": 221.43,
"learning_rate": 6.969230769230769e-08,
"loss": 0.5624,
"step": 1550
},
{
"epoch": 225.0,
"learning_rate": 6.584615384615385e-08,
"loss": 0.5638,
"step": 1575
},
{
"epoch": 228.57,
"learning_rate": 6.2e-08,
"loss": 0.5601,
"step": 1600
},
{
"epoch": 228.57,
"eval_loss": 0.9844600558280945,
"eval_runtime": 478.7212,
"eval_samples_per_second": 1.07,
"eval_steps_per_second": 0.067,
"eval_wer": 62.916162227602904,
"step": 1600
},
{
"epoch": 232.14,
"learning_rate": 5.815384615384615e-08,
"loss": 0.5537,
"step": 1625
},
{
"epoch": 235.71,
"learning_rate": 5.430769230769231e-08,
"loss": 0.5488,
"step": 1650
},
{
"epoch": 239.29,
"learning_rate": 5.0461538461538456e-08,
"loss": 0.5479,
"step": 1675
},
{
"epoch": 242.86,
"learning_rate": 4.661538461538461e-08,
"loss": 0.5482,
"step": 1700
},
{
"epoch": 242.86,
"eval_loss": 0.9833234548568726,
"eval_runtime": 488.3079,
"eval_samples_per_second": 1.049,
"eval_steps_per_second": 0.066,
"eval_wer": 62.817796610169495,
"step": 1700
},
{
"epoch": 246.43,
"learning_rate": 4.2769230769230765e-08,
"loss": 0.5441,
"step": 1725
},
{
"epoch": 250.0,
"learning_rate": 3.892307692307692e-08,
"loss": 0.54,
"step": 1750
},
{
"epoch": 253.57,
"learning_rate": 3.5076923076923074e-08,
"loss": 0.538,
"step": 1775
},
{
"epoch": 257.14,
"learning_rate": 3.123076923076923e-08,
"loss": 0.5382,
"step": 1800
},
{
"epoch": 257.14,
"eval_loss": 0.9826769828796387,
"eval_runtime": 486.3686,
"eval_samples_per_second": 1.053,
"eval_steps_per_second": 0.066,
"eval_wer": 62.84049636803874,
"step": 1800
},
{
"epoch": 260.71,
"learning_rate": 2.7384615384615387e-08,
"loss": 0.5343,
"step": 1825
},
{
"epoch": 264.29,
"learning_rate": 2.3538461538461535e-08,
"loss": 0.5313,
"step": 1850
},
{
"epoch": 267.86,
"learning_rate": 1.9692307692307693e-08,
"loss": 0.5318,
"step": 1875
},
{
"epoch": 271.43,
"learning_rate": 1.5846153846153844e-08,
"loss": 0.5325,
"step": 1900
},
{
"epoch": 271.43,
"eval_loss": 0.9823360443115234,
"eval_runtime": 483.6609,
"eval_samples_per_second": 1.059,
"eval_steps_per_second": 0.066,
"eval_wer": 62.76483050847458,
"step": 1900
},
{
"epoch": 275.0,
"learning_rate": 1.2e-08,
"loss": 0.529,
"step": 1925
},
{
"epoch": 278.57,
"learning_rate": 8.153846153846154e-09,
"loss": 0.5294,
"step": 1950
},
{
"epoch": 282.14,
"learning_rate": 4.307692307692307e-09,
"loss": 0.525,
"step": 1975
},
{
"epoch": 285.71,
"learning_rate": 4.615384615384615e-10,
"loss": 0.5287,
"step": 2000
},
{
"epoch": 285.71,
"eval_loss": 0.9822061061859131,
"eval_runtime": 484.044,
"eval_samples_per_second": 1.058,
"eval_steps_per_second": 0.066,
"eval_wer": 62.817796610169495,
"step": 2000
},
{
"epoch": 289.29,
"learning_rate": 1.9853333333333334e-07,
"loss": 0.523,
"step": 2025
},
{
"epoch": 292.86,
"learning_rate": 1.9686666666666667e-07,
"loss": 0.5133,
"step": 2050
},
{
"epoch": 296.43,
"learning_rate": 1.9519999999999997e-07,
"loss": 0.4999,
"step": 2075
},
{
"epoch": 300.0,
"learning_rate": 1.935333333333333e-07,
"loss": 0.4924,
"step": 2100
},
{
"epoch": 303.57,
"learning_rate": 1.9186666666666666e-07,
"loss": 0.4855,
"step": 2125
},
{
"epoch": 307.14,
"learning_rate": 1.902e-07,
"loss": 0.4701,
"step": 2150
},
{
"epoch": 310.71,
"learning_rate": 1.8853333333333333e-07,
"loss": 0.4601,
"step": 2175
},
{
"epoch": 314.29,
"learning_rate": 1.8686666666666669e-07,
"loss": 0.4525,
"step": 2200
},
{
"epoch": 317.86,
"learning_rate": 1.852e-07,
"loss": 0.4448,
"step": 2225
},
{
"epoch": 321.43,
"learning_rate": 1.8353333333333332e-07,
"loss": 0.4364,
"step": 2250
},
{
"epoch": 325.0,
"learning_rate": 1.8186666666666665e-07,
"loss": 0.4232,
"step": 2275
},
{
"epoch": 328.57,
"learning_rate": 1.8019999999999999e-07,
"loss": 0.4163,
"step": 2300
},
{
"epoch": 332.14,
"learning_rate": 1.7853333333333334e-07,
"loss": 0.4089,
"step": 2325
},
{
"epoch": 335.71,
"learning_rate": 1.7686666666666668e-07,
"loss": 0.4031,
"step": 2350
},
{
"epoch": 339.29,
"learning_rate": 1.7519999999999998e-07,
"loss": 0.3887,
"step": 2375
},
{
"epoch": 342.86,
"learning_rate": 1.735333333333333e-07,
"loss": 0.3826,
"step": 2400
},
{
"epoch": 346.43,
"learning_rate": 1.7186666666666667e-07,
"loss": 0.3766,
"step": 2425
},
{
"epoch": 350.0,
"learning_rate": 1.702e-07,
"loss": 0.3647,
"step": 2450
},
{
"epoch": 353.57,
"learning_rate": 1.6853333333333333e-07,
"loss": 0.3601,
"step": 2475
},
{
"epoch": 357.14,
"learning_rate": 1.6686666666666664e-07,
"loss": 0.3494,
"step": 2500
},
{
"epoch": 357.14,
"eval_loss": 1.0025562047958374,
"eval_runtime": 489.174,
"eval_samples_per_second": 1.047,
"eval_steps_per_second": 0.065,
"eval_wer": 61.61470944309927,
"step": 2500
},
{
"epoch": 360.71,
"learning_rate": 1.652e-07,
"loss": 0.3448,
"step": 2525
},
{
"epoch": 364.29,
"learning_rate": 1.6353333333333333e-07,
"loss": 0.3367,
"step": 2550
},
{
"epoch": 367.86,
"learning_rate": 1.6186666666666666e-07,
"loss": 0.3308,
"step": 2575
},
{
"epoch": 371.43,
"learning_rate": 1.602e-07,
"loss": 0.3226,
"step": 2600
},
{
"epoch": 375.0,
"learning_rate": 1.5853333333333335e-07,
"loss": 0.3165,
"step": 2625
},
{
"epoch": 378.57,
"learning_rate": 1.5686666666666666e-07,
"loss": 0.3099,
"step": 2650
},
{
"epoch": 382.14,
"learning_rate": 1.552e-07,
"loss": 0.3021,
"step": 2675
},
{
"epoch": 385.71,
"learning_rate": 1.5353333333333332e-07,
"loss": 0.2964,
"step": 2700
},
{
"epoch": 389.29,
"learning_rate": 1.5186666666666668e-07,
"loss": 0.2901,
"step": 2725
},
{
"epoch": 392.86,
"learning_rate": 1.502e-07,
"loss": 0.284,
"step": 2750
},
{
"epoch": 396.43,
"learning_rate": 1.4853333333333334e-07,
"loss": 0.279,
"step": 2775
},
{
"epoch": 400.0,
"learning_rate": 1.4686666666666667e-07,
"loss": 0.2715,
"step": 2800
},
{
"epoch": 403.57,
"learning_rate": 1.4519999999999998e-07,
"loss": 0.2646,
"step": 2825
},
{
"epoch": 407.14,
"learning_rate": 1.4353333333333333e-07,
"loss": 0.2606,
"step": 2850
},
{
"epoch": 410.71,
"learning_rate": 1.4186666666666667e-07,
"loss": 0.2564,
"step": 2875
},
{
"epoch": 414.29,
"learning_rate": 1.402e-07,
"loss": 0.2486,
"step": 2900
},
{
"epoch": 417.86,
"learning_rate": 1.3853333333333333e-07,
"loss": 0.2463,
"step": 2925
},
{
"epoch": 421.43,
"learning_rate": 1.3686666666666666e-07,
"loss": 0.239,
"step": 2950
},
{
"epoch": 425.0,
"learning_rate": 1.352e-07,
"loss": 0.2341,
"step": 2975
},
{
"epoch": 428.57,
"learning_rate": 1.3353333333333332e-07,
"loss": 0.2287,
"step": 3000
},
{
"epoch": 428.57,
"eval_loss": 1.0533033609390259,
"eval_runtime": 465.8233,
"eval_samples_per_second": 1.099,
"eval_steps_per_second": 0.069,
"eval_wer": 61.516343825665864,
"step": 3000
},
{
"epoch": 432.14,
"learning_rate": 1.3186666666666666e-07,
"loss": 0.2231,
"step": 3025
},
{
"epoch": 435.71,
"learning_rate": 1.302e-07,
"loss": 0.2225,
"step": 3050
},
{
"epoch": 439.29,
"learning_rate": 1.2853333333333332e-07,
"loss": 0.216,
"step": 3075
},
{
"epoch": 442.86,
"learning_rate": 1.2686666666666665e-07,
"loss": 0.2099,
"step": 3100
},
{
"epoch": 446.43,
"learning_rate": 1.2519999999999998e-07,
"loss": 0.2053,
"step": 3125
},
{
"epoch": 450.0,
"learning_rate": 1.2353333333333334e-07,
"loss": 0.2034,
"step": 3150
},
{
"epoch": 453.57,
"learning_rate": 1.2186666666666665e-07,
"loss": 0.1979,
"step": 3175
},
{
"epoch": 457.14,
"learning_rate": 1.202e-07,
"loss": 0.1938,
"step": 3200
},
{
"epoch": 460.71,
"learning_rate": 1.1853333333333334e-07,
"loss": 0.19,
"step": 3225
},
{
"epoch": 464.29,
"learning_rate": 1.1686666666666665e-07,
"loss": 0.1877,
"step": 3250
},
{
"epoch": 467.86,
"learning_rate": 1.152e-07,
"loss": 0.1814,
"step": 3275
},
{
"epoch": 471.43,
"learning_rate": 1.1353333333333333e-07,
"loss": 0.1785,
"step": 3300
},
{
"epoch": 475.0,
"learning_rate": 1.1186666666666666e-07,
"loss": 0.1738,
"step": 3325
},
{
"epoch": 478.57,
"learning_rate": 1.102e-07,
"loss": 0.1716,
"step": 3350
},
{
"epoch": 482.14,
"learning_rate": 1.0853333333333333e-07,
"loss": 0.1666,
"step": 3375
},
{
"epoch": 485.71,
"learning_rate": 1.0686666666666666e-07,
"loss": 0.1663,
"step": 3400
},
{
"epoch": 489.29,
"learning_rate": 1.052e-07,
"loss": 0.1611,
"step": 3425
},
{
"epoch": 492.86,
"learning_rate": 1.0353333333333332e-07,
"loss": 0.158,
"step": 3450
},
{
"epoch": 496.43,
"learning_rate": 1.0186666666666667e-07,
"loss": 0.1556,
"step": 3475
},
{
"epoch": 500.0,
"learning_rate": 1.002e-07,
"loss": 0.1525,
"step": 3500
},
{
"epoch": 500.0,
"eval_loss": 1.1040765047073364,
"eval_runtime": 467.1311,
"eval_samples_per_second": 1.096,
"eval_steps_per_second": 0.069,
"eval_wer": 62.05357142857143,
"step": 3500
},
{
"epoch": 503.57,
"learning_rate": 9.853333333333333e-08,
"loss": 0.1499,
"step": 3525
},
{
"epoch": 507.14,
"learning_rate": 9.686666666666666e-08,
"loss": 0.1474,
"step": 3550
},
{
"epoch": 510.71,
"learning_rate": 9.52e-08,
"loss": 0.1429,
"step": 3575
},
{
"epoch": 514.29,
"learning_rate": 9.353333333333332e-08,
"loss": 0.1407,
"step": 3600
},
{
"epoch": 517.86,
"learning_rate": 9.186666666666667e-08,
"loss": 0.1394,
"step": 3625
},
{
"epoch": 521.43,
"learning_rate": 9.02e-08,
"loss": 0.136,
"step": 3650
},
{
"epoch": 525.0,
"learning_rate": 8.853333333333333e-08,
"loss": 0.1347,
"step": 3675
},
{
"epoch": 528.57,
"learning_rate": 8.686666666666666e-08,
"loss": 0.1307,
"step": 3700
},
{
"epoch": 532.14,
"learning_rate": 8.519999999999998e-08,
"loss": 0.1297,
"step": 3725
},
{
"epoch": 535.71,
"learning_rate": 8.353333333333333e-08,
"loss": 0.1282,
"step": 3750
},
{
"epoch": 539.29,
"learning_rate": 8.186666666666667e-08,
"loss": 0.1247,
"step": 3775
},
{
"epoch": 542.86,
"learning_rate": 8.019999999999999e-08,
"loss": 0.1225,
"step": 3800
},
{
"epoch": 546.43,
"learning_rate": 7.853333333333334e-08,
"loss": 0.1206,
"step": 3825
},
{
"epoch": 550.0,
"learning_rate": 7.686666666666667e-08,
"loss": 0.1197,
"step": 3850
},
{
"epoch": 553.57,
"learning_rate": 7.519999999999999e-08,
"loss": 0.117,
"step": 3875
},
{
"epoch": 557.14,
"learning_rate": 7.353333333333333e-08,
"loss": 0.1154,
"step": 3900
},
{
"epoch": 560.71,
"learning_rate": 7.186666666666666e-08,
"loss": 0.1128,
"step": 3925
},
{
"epoch": 564.29,
"learning_rate": 7.02e-08,
"loss": 0.1133,
"step": 3950
},
{
"epoch": 567.86,
"learning_rate": 6.853333333333334e-08,
"loss": 0.1091,
"step": 3975
},
{
"epoch": 571.43,
"learning_rate": 6.686666666666666e-08,
"loss": 0.1089,
"step": 4000
},
{
"epoch": 571.43,
"eval_loss": 1.1451234817504883,
"eval_runtime": 481.8401,
"eval_samples_per_second": 1.063,
"eval_steps_per_second": 0.066,
"eval_wer": 62.50756658595642,
"step": 4000
},
{
"epoch": 575.0,
"learning_rate": 6.519999999999999e-08,
"loss": 0.1069,
"step": 4025
},
{
"epoch": 578.57,
"learning_rate": 6.353333333333333e-08,
"loss": 0.1055,
"step": 4050
},
{
"epoch": 582.14,
"learning_rate": 6.186666666666666e-08,
"loss": 0.1043,
"step": 4075
},
{
"epoch": 585.71,
"learning_rate": 6.02e-08,
"loss": 0.1031,
"step": 4100
},
{
"epoch": 589.29,
"learning_rate": 5.8533333333333335e-08,
"loss": 0.1021,
"step": 4125
},
{
"epoch": 592.86,
"learning_rate": 5.6866666666666666e-08,
"loss": 0.1,
"step": 4150
},
{
"epoch": 596.43,
"learning_rate": 5.52e-08,
"loss": 0.0987,
"step": 4175
},
{
"epoch": 600.0,
"learning_rate": 5.353333333333333e-08,
"loss": 0.0984,
"step": 4200
},
{
"epoch": 603.57,
"learning_rate": 5.186666666666667e-08,
"loss": 0.0959,
"step": 4225
},
{
"epoch": 607.14,
"learning_rate": 5.02e-08,
"loss": 0.0957,
"step": 4250
},
{
"epoch": 610.71,
"learning_rate": 4.853333333333333e-08,
"loss": 0.094,
"step": 4275
},
{
"epoch": 614.29,
"learning_rate": 4.686666666666667e-08,
"loss": 0.0931,
"step": 4300
},
{
"epoch": 617.86,
"learning_rate": 4.52e-08,
"loss": 0.0945,
"step": 4325
},
{
"epoch": 621.43,
"learning_rate": 4.353333333333333e-08,
"loss": 0.0909,
"step": 4350
},
{
"epoch": 625.0,
"learning_rate": 4.1866666666666664e-08,
"loss": 0.0909,
"step": 4375
},
{
"epoch": 628.57,
"learning_rate": 4.02e-08,
"loss": 0.0885,
"step": 4400
},
{
"epoch": 632.14,
"learning_rate": 3.8533333333333334e-08,
"loss": 0.0893,
"step": 4425
},
{
"epoch": 635.71,
"learning_rate": 3.6866666666666666e-08,
"loss": 0.089,
"step": 4450
},
{
"epoch": 639.29,
"learning_rate": 3.52e-08,
"loss": 0.0875,
"step": 4475
},
{
"epoch": 642.86,
"learning_rate": 3.3533333333333336e-08,
"loss": 0.0871,
"step": 4500
},
{
"epoch": 642.86,
"eval_loss": 1.1704117059707642,
"eval_runtime": 470.4344,
"eval_samples_per_second": 1.088,
"eval_steps_per_second": 0.068,
"eval_wer": 62.93129539951574,
"step": 4500
},
{
"epoch": 646.43,
"learning_rate": 3.186666666666666e-08,
"loss": 0.0866,
"step": 4525
},
{
"epoch": 650.0,
"learning_rate": 3.02e-08,
"loss": 0.0849,
"step": 4550
},
{
"epoch": 653.57,
"learning_rate": 2.853333333333333e-08,
"loss": 0.0851,
"step": 4575
},
{
"epoch": 657.14,
"learning_rate": 2.6866666666666666e-08,
"loss": 0.0837,
"step": 4600
},
{
"epoch": 660.71,
"learning_rate": 2.52e-08,
"loss": 0.0841,
"step": 4625
},
{
"epoch": 664.29,
"learning_rate": 2.3533333333333332e-08,
"loss": 0.0836,
"step": 4650
},
{
"epoch": 667.86,
"learning_rate": 2.1866666666666667e-08,
"loss": 0.0826,
"step": 4675
},
{
"epoch": 671.43,
"learning_rate": 2.02e-08,
"loss": 0.0826,
"step": 4700
},
{
"epoch": 675.0,
"learning_rate": 1.8533333333333334e-08,
"loss": 0.0822,
"step": 4725
},
{
"epoch": 678.57,
"learning_rate": 1.6866666666666665e-08,
"loss": 0.0823,
"step": 4750
},
{
"epoch": 682.14,
"learning_rate": 1.52e-08,
"loss": 0.0818,
"step": 4775
},
{
"epoch": 685.71,
"learning_rate": 1.3533333333333332e-08,
"loss": 0.0804,
"step": 4800
},
{
"epoch": 689.29,
"learning_rate": 1.1866666666666665e-08,
"loss": 0.0808,
"step": 4825
},
{
"epoch": 692.86,
"learning_rate": 1.02e-08,
"loss": 0.0806,
"step": 4850
},
{
"epoch": 696.43,
"learning_rate": 8.533333333333334e-09,
"loss": 0.0817,
"step": 4875
},
{
"epoch": 700.0,
"learning_rate": 6.866666666666666e-09,
"loss": 0.0805,
"step": 4900
},
{
"epoch": 703.57,
"learning_rate": 5.1999999999999994e-09,
"loss": 0.0797,
"step": 4925
},
{
"epoch": 707.14,
"learning_rate": 3.533333333333333e-09,
"loss": 0.0802,
"step": 4950
},
{
"epoch": 710.71,
"learning_rate": 1.8666666666666664e-09,
"loss": 0.08,
"step": 4975
},
{
"epoch": 714.29,
"learning_rate": 1.9999999999999998e-10,
"loss": 0.0797,
"step": 5000
},
{
"epoch": 714.29,
"eval_loss": 1.1790930032730103,
"eval_runtime": 463.678,
"eval_samples_per_second": 1.104,
"eval_steps_per_second": 0.069,
"eval_wer": 63.16585956416465,
"step": 5000
},
{
"epoch": 717.86,
"learning_rate": 1.0289017341040463e-08,
"loss": 0.0796,
"step": 5025
},
{
"epoch": 721.43,
"learning_rate": 8.84393063583815e-09,
"loss": 0.0804,
"step": 5050
},
{
"epoch": 725.0,
"learning_rate": 7.398843930635838e-09,
"loss": 0.0799,
"step": 5075
},
{
"epoch": 728.57,
"learning_rate": 5.953757225433526e-09,
"loss": 0.0799,
"step": 5100
},
{
"epoch": 728.57,
"eval_loss": 1.1799688339233398,
"eval_runtime": 463.8166,
"eval_samples_per_second": 1.104,
"eval_steps_per_second": 0.069,
"eval_wer": 63.10532687651331,
"step": 5100
},
{
"epoch": 732.14,
"learning_rate": 4.508670520231213e-09,
"loss": 0.079,
"step": 5125
},
{
"epoch": 735.71,
"learning_rate": 3.0635838150289017e-09,
"loss": 0.0791,
"step": 5150
},
{
"epoch": 739.29,
"learning_rate": 1.6184971098265895e-09,
"loss": 0.0789,
"step": 5175
},
{
"epoch": 742.86,
"learning_rate": 1.7341040462427745e-10,
"loss": 0.0791,
"step": 5200
},
{
"epoch": 742.86,
"eval_loss": 1.1803033351898193,
"eval_runtime": 463.7133,
"eval_samples_per_second": 1.104,
"eval_steps_per_second": 0.069,
"eval_wer": 63.112893462469735,
"step": 5200
},
{
"epoch": 742.86,
"step": 5200,
"total_flos": 9.240100498685952e+19,
"train_loss": 0.0030569748695080095,
"train_runtime": 1690.7628,
"train_samples_per_second": 196.834,
"train_steps_per_second": 3.076
}
],
"max_steps": 5200,
"num_train_epochs": 743,
"total_flos": 9.240100498685952e+19,
"trial_name": null,
"trial_params": null
}