{ "best_metric": 63.10532687651331, "best_model_checkpoint": "./checkpoint-5100", "epoch": 742.8571428571429, "global_step": 5200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.57, "learning_rate": 1.32e-07, "loss": 2.8512, "step": 25 }, { "epoch": 7.14, "learning_rate": 2.8199999999999996e-07, "loss": 2.7082, "step": 50 }, { "epoch": 10.71, "learning_rate": 2.9305263157894735e-07, "loss": 2.3515, "step": 75 }, { "epoch": 14.29, "learning_rate": 2.851578947368421e-07, "loss": 2.0871, "step": 100 }, { "epoch": 14.29, "eval_loss": 2.0101583003997803, "eval_runtime": 720.7636, "eval_samples_per_second": 0.71, "eval_steps_per_second": 0.044, "eval_wer": 230.27391041162227, "step": 100 }, { "epoch": 17.86, "learning_rate": 2.7726315789473684e-07, "loss": 1.8622, "step": 125 }, { "epoch": 21.43, "learning_rate": 2.693684210526316e-07, "loss": 1.7104, "step": 150 }, { "epoch": 25.0, "learning_rate": 2.614736842105263e-07, "loss": 1.5736, "step": 175 }, { "epoch": 28.57, "learning_rate": 2.53578947368421e-07, "loss": 1.465, "step": 200 }, { "epoch": 28.57, "eval_loss": 1.4968725442886353, "eval_runtime": 628.5425, "eval_samples_per_second": 0.815, "eval_steps_per_second": 0.051, "eval_wer": 137.24273607748182, "step": 200 }, { "epoch": 32.14, "learning_rate": 2.4568421052631577e-07, "loss": 1.3669, "step": 225 }, { "epoch": 35.71, "learning_rate": 2.3778947368421054e-07, "loss": 1.2898, "step": 250 }, { "epoch": 39.29, "learning_rate": 2.2989473684210523e-07, "loss": 1.2205, "step": 275 }, { "epoch": 42.86, "learning_rate": 2.2199999999999998e-07, "loss": 1.1617, "step": 300 }, { "epoch": 42.86, "eval_loss": 1.2715740203857422, "eval_runtime": 499.3378, "eval_samples_per_second": 1.025, "eval_steps_per_second": 0.064, "eval_wer": 76.32415254237289, "step": 300 }, { "epoch": 46.43, "learning_rate": 2.1410526315789472e-07, "loss": 1.1091, "step": 325 }, { "epoch": 50.0, "learning_rate": 2.0621052631578947e-07, "loss": 1.0738, "step": 350 }, { "epoch": 53.57, "learning_rate": 1.9831578947368419e-07, "loss": 1.033, "step": 375 }, { "epoch": 57.14, "learning_rate": 1.9042105263157893e-07, "loss": 1.0019, "step": 400 }, { "epoch": 57.14, "eval_loss": 1.16450834274292, "eval_runtime": 480.3701, "eval_samples_per_second": 1.066, "eval_steps_per_second": 0.067, "eval_wer": 71.37560532687651, "step": 400 }, { "epoch": 60.71, "learning_rate": 1.8252631578947368e-07, "loss": 0.9712, "step": 425 }, { "epoch": 64.29, "learning_rate": 1.7463157894736842e-07, "loss": 0.9437, "step": 450 }, { "epoch": 67.86, "learning_rate": 1.6673684210526314e-07, "loss": 0.9215, "step": 475 }, { "epoch": 71.43, "learning_rate": 1.588421052631579e-07, "loss": 0.9052, "step": 500 }, { "epoch": 71.43, "eval_loss": 1.1051170825958252, "eval_runtime": 486.3374, "eval_samples_per_second": 1.053, "eval_steps_per_second": 0.066, "eval_wer": 69.78662227602905, "step": 500 }, { "epoch": 75.0, "learning_rate": 1.5094736842105263e-07, "loss": 0.8773, "step": 525 }, { "epoch": 78.57, "learning_rate": 1.4305263157894735e-07, "loss": 0.8643, "step": 550 }, { "epoch": 82.14, "learning_rate": 1.351578947368421e-07, "loss": 0.8449, "step": 575 }, { "epoch": 85.71, "learning_rate": 1.2726315789473684e-07, "loss": 0.8334, "step": 600 }, { "epoch": 85.71, "eval_loss": 1.0691100358963013, "eval_runtime": 478.3463, "eval_samples_per_second": 1.07, "eval_steps_per_second": 0.067, "eval_wer": 68.26573849878935, "step": 600 }, { "epoch": 89.29, "learning_rate": 1.1936842105263156e-07, "loss": 0.8132, "step": 625 }, { "epoch": 92.86, "learning_rate": 1.1147368421052631e-07, "loss": 0.8058, "step": 650 }, { "epoch": 96.43, "learning_rate": 1.0357894736842104e-07, "loss": 0.7913, "step": 675 }, { "epoch": 100.0, "learning_rate": 9.568421052631579e-08, "loss": 0.7838, "step": 700 }, { "epoch": 100.0, "eval_loss": 1.0482958555221558, "eval_runtime": 478.3861, "eval_samples_per_second": 1.07, "eval_steps_per_second": 0.067, "eval_wer": 67.16858353510897, "step": 700 }, { "epoch": 103.57, "learning_rate": 8.778947368421052e-08, "loss": 0.7768, "step": 725 }, { "epoch": 107.14, "learning_rate": 7.989473684210526e-08, "loss": 0.7673, "step": 750 }, { "epoch": 110.71, "learning_rate": 7.2e-08, "loss": 0.7643, "step": 775 }, { "epoch": 114.29, "learning_rate": 6.410526315789473e-08, "loss": 0.7539, "step": 800 }, { "epoch": 114.29, "eval_loss": 1.0362622737884521, "eval_runtime": 484.8883, "eval_samples_per_second": 1.056, "eval_steps_per_second": 0.066, "eval_wer": 66.41949152542372, "step": 800 }, { "epoch": 117.86, "learning_rate": 5.621052631578947e-08, "loss": 0.7527, "step": 825 }, { "epoch": 121.43, "learning_rate": 4.8315789473684206e-08, "loss": 0.7441, "step": 850 }, { "epoch": 125.0, "learning_rate": 4.0421052631578945e-08, "loss": 0.7417, "step": 875 }, { "epoch": 128.57, "learning_rate": 3.2526315789473684e-08, "loss": 0.7377, "step": 900 }, { "epoch": 128.57, "eval_loss": 1.0297424793243408, "eval_runtime": 471.9816, "eval_samples_per_second": 1.085, "eval_steps_per_second": 0.068, "eval_wer": 66.20006053268766, "step": 900 }, { "epoch": 132.14, "learning_rate": 2.463157894736842e-08, "loss": 0.7387, "step": 925 }, { "epoch": 135.71, "learning_rate": 1.673684210526316e-08, "loss": 0.7329, "step": 950 }, { "epoch": 139.29, "learning_rate": 8.842105263157895e-09, "loss": 0.7312, "step": 975 }, { "epoch": 142.86, "learning_rate": 9.473684210526316e-10, "loss": 0.7325, "step": 1000 }, { "epoch": 142.86, "eval_loss": 1.0276601314544678, "eval_runtime": 477.2948, "eval_samples_per_second": 1.073, "eval_steps_per_second": 0.067, "eval_wer": 66.00332929782083, "step": 1000 }, { "epoch": 146.43, "learning_rate": 1.5046153846153844e-07, "loss": 0.7238, "step": 1025 }, { "epoch": 150.0, "learning_rate": 1.466153846153846e-07, "loss": 0.7163, "step": 1050 }, { "epoch": 153.57, "learning_rate": 1.4276923076923076e-07, "loss": 0.7028, "step": 1075 }, { "epoch": 157.14, "learning_rate": 1.389230769230769e-07, "loss": 0.6952, "step": 1100 }, { "epoch": 157.14, "eval_loss": 1.0121634006500244, "eval_runtime": 471.3724, "eval_samples_per_second": 1.086, "eval_steps_per_second": 0.068, "eval_wer": 65.05750605326877, "step": 1100 }, { "epoch": 160.71, "learning_rate": 1.3507692307692308e-07, "loss": 0.6843, "step": 1125 }, { "epoch": 164.29, "learning_rate": 1.3123076923076923e-07, "loss": 0.6699, "step": 1150 }, { "epoch": 167.86, "learning_rate": 1.2738461538461538e-07, "loss": 0.6671, "step": 1175 }, { "epoch": 171.43, "learning_rate": 1.2353846153846153e-07, "loss": 0.6531, "step": 1200 }, { "epoch": 171.43, "eval_loss": 1.0014406442642212, "eval_runtime": 475.4519, "eval_samples_per_second": 1.077, "eval_steps_per_second": 0.067, "eval_wer": 64.42191283292978, "step": 1200 }, { "epoch": 175.0, "learning_rate": 1.1969230769230767e-07, "loss": 0.6487, "step": 1225 }, { "epoch": 178.57, "learning_rate": 1.1584615384615385e-07, "loss": 0.6369, "step": 1250 }, { "epoch": 182.14, "learning_rate": 1.12e-07, "loss": 0.6336, "step": 1275 }, { "epoch": 185.71, "learning_rate": 1.0815384615384614e-07, "loss": 0.6189, "step": 1300 }, { "epoch": 185.71, "eval_loss": 0.9944669008255005, "eval_runtime": 470.4039, "eval_samples_per_second": 1.088, "eval_steps_per_second": 0.068, "eval_wer": 63.79388619854721, "step": 1300 }, { "epoch": 189.29, "learning_rate": 1.043076923076923e-07, "loss": 0.6213, "step": 1325 }, { "epoch": 192.86, "learning_rate": 1.0046153846153845e-07, "loss": 0.608, "step": 1350 }, { "epoch": 196.43, "learning_rate": 9.66153846153846e-08, "loss": 0.6029, "step": 1375 }, { "epoch": 200.0, "learning_rate": 9.276923076923078e-08, "loss": 0.5993, "step": 1400 }, { "epoch": 200.0, "eval_loss": 0.9895604252815247, "eval_runtime": 473.2431, "eval_samples_per_second": 1.082, "eval_steps_per_second": 0.068, "eval_wer": 63.35502421307506, "step": 1400 }, { "epoch": 203.57, "learning_rate": 8.892307692307692e-08, "loss": 0.593, "step": 1425 }, { "epoch": 207.14, "learning_rate": 8.507692307692307e-08, "loss": 0.5817, "step": 1450 }, { "epoch": 210.71, "learning_rate": 8.123076923076922e-08, "loss": 0.5782, "step": 1475 }, { "epoch": 214.29, "learning_rate": 7.738461538461538e-08, "loss": 0.5757, "step": 1500 }, { "epoch": 214.29, "eval_loss": 0.9864457845687866, "eval_runtime": 474.414, "eval_samples_per_second": 1.079, "eval_steps_per_second": 0.067, "eval_wer": 63.22639225181598, "step": 1500 }, { "epoch": 217.86, "learning_rate": 7.353846153846153e-08, "loss": 0.5706, "step": 1525 }, { "epoch": 221.43, "learning_rate": 6.969230769230769e-08, "loss": 0.5624, "step": 1550 }, { "epoch": 225.0, "learning_rate": 6.584615384615385e-08, "loss": 0.5638, "step": 1575 }, { "epoch": 228.57, "learning_rate": 6.2e-08, "loss": 0.5601, "step": 1600 }, { "epoch": 228.57, "eval_loss": 0.9844600558280945, "eval_runtime": 478.7212, "eval_samples_per_second": 1.07, "eval_steps_per_second": 0.067, "eval_wer": 62.916162227602904, "step": 1600 }, { "epoch": 232.14, "learning_rate": 5.815384615384615e-08, "loss": 0.5537, "step": 1625 }, { "epoch": 235.71, "learning_rate": 5.430769230769231e-08, "loss": 0.5488, "step": 1650 }, { "epoch": 239.29, "learning_rate": 5.0461538461538456e-08, "loss": 0.5479, "step": 1675 }, { "epoch": 242.86, "learning_rate": 4.661538461538461e-08, "loss": 0.5482, "step": 1700 }, { "epoch": 242.86, "eval_loss": 0.9833234548568726, "eval_runtime": 488.3079, "eval_samples_per_second": 1.049, "eval_steps_per_second": 0.066, "eval_wer": 62.817796610169495, "step": 1700 }, { "epoch": 246.43, "learning_rate": 4.2769230769230765e-08, "loss": 0.5441, "step": 1725 }, { "epoch": 250.0, "learning_rate": 3.892307692307692e-08, "loss": 0.54, "step": 1750 }, { "epoch": 253.57, "learning_rate": 3.5076923076923074e-08, "loss": 0.538, "step": 1775 }, { "epoch": 257.14, "learning_rate": 3.123076923076923e-08, "loss": 0.5382, "step": 1800 }, { "epoch": 257.14, "eval_loss": 0.9826769828796387, "eval_runtime": 486.3686, "eval_samples_per_second": 1.053, "eval_steps_per_second": 0.066, "eval_wer": 62.84049636803874, "step": 1800 }, { "epoch": 260.71, "learning_rate": 2.7384615384615387e-08, "loss": 0.5343, "step": 1825 }, { "epoch": 264.29, "learning_rate": 2.3538461538461535e-08, "loss": 0.5313, "step": 1850 }, { "epoch": 267.86, "learning_rate": 1.9692307692307693e-08, "loss": 0.5318, "step": 1875 }, { "epoch": 271.43, "learning_rate": 1.5846153846153844e-08, "loss": 0.5325, "step": 1900 }, { "epoch": 271.43, "eval_loss": 0.9823360443115234, "eval_runtime": 483.6609, "eval_samples_per_second": 1.059, "eval_steps_per_second": 0.066, "eval_wer": 62.76483050847458, "step": 1900 }, { "epoch": 275.0, "learning_rate": 1.2e-08, "loss": 0.529, "step": 1925 }, { "epoch": 278.57, "learning_rate": 8.153846153846154e-09, "loss": 0.5294, "step": 1950 }, { "epoch": 282.14, "learning_rate": 4.307692307692307e-09, "loss": 0.525, "step": 1975 }, { "epoch": 285.71, "learning_rate": 4.615384615384615e-10, "loss": 0.5287, "step": 2000 }, { "epoch": 285.71, "eval_loss": 0.9822061061859131, "eval_runtime": 484.044, "eval_samples_per_second": 1.058, "eval_steps_per_second": 0.066, "eval_wer": 62.817796610169495, "step": 2000 }, { "epoch": 289.29, "learning_rate": 1.9853333333333334e-07, "loss": 0.523, "step": 2025 }, { "epoch": 292.86, "learning_rate": 1.9686666666666667e-07, "loss": 0.5133, "step": 2050 }, { "epoch": 296.43, "learning_rate": 1.9519999999999997e-07, "loss": 0.4999, "step": 2075 }, { "epoch": 300.0, "learning_rate": 1.935333333333333e-07, "loss": 0.4924, "step": 2100 }, { "epoch": 303.57, "learning_rate": 1.9186666666666666e-07, "loss": 0.4855, "step": 2125 }, { "epoch": 307.14, "learning_rate": 1.902e-07, "loss": 0.4701, "step": 2150 }, { "epoch": 310.71, "learning_rate": 1.8853333333333333e-07, "loss": 0.4601, "step": 2175 }, { "epoch": 314.29, "learning_rate": 1.8686666666666669e-07, "loss": 0.4525, "step": 2200 }, { "epoch": 317.86, "learning_rate": 1.852e-07, "loss": 0.4448, "step": 2225 }, { "epoch": 321.43, "learning_rate": 1.8353333333333332e-07, "loss": 0.4364, "step": 2250 }, { "epoch": 325.0, "learning_rate": 1.8186666666666665e-07, "loss": 0.4232, "step": 2275 }, { "epoch": 328.57, "learning_rate": 1.8019999999999999e-07, "loss": 0.4163, "step": 2300 }, { "epoch": 332.14, "learning_rate": 1.7853333333333334e-07, "loss": 0.4089, "step": 2325 }, { "epoch": 335.71, "learning_rate": 1.7686666666666668e-07, "loss": 0.4031, "step": 2350 }, { "epoch": 339.29, "learning_rate": 1.7519999999999998e-07, "loss": 0.3887, "step": 2375 }, { "epoch": 342.86, "learning_rate": 1.735333333333333e-07, "loss": 0.3826, "step": 2400 }, { "epoch": 346.43, "learning_rate": 1.7186666666666667e-07, "loss": 0.3766, "step": 2425 }, { "epoch": 350.0, "learning_rate": 1.702e-07, "loss": 0.3647, "step": 2450 }, { "epoch": 353.57, "learning_rate": 1.6853333333333333e-07, "loss": 0.3601, "step": 2475 }, { "epoch": 357.14, "learning_rate": 1.6686666666666664e-07, "loss": 0.3494, "step": 2500 }, { "epoch": 357.14, "eval_loss": 1.0025562047958374, "eval_runtime": 489.174, "eval_samples_per_second": 1.047, "eval_steps_per_second": 0.065, "eval_wer": 61.61470944309927, "step": 2500 }, { "epoch": 360.71, "learning_rate": 1.652e-07, "loss": 0.3448, "step": 2525 }, { "epoch": 364.29, "learning_rate": 1.6353333333333333e-07, "loss": 0.3367, "step": 2550 }, { "epoch": 367.86, "learning_rate": 1.6186666666666666e-07, "loss": 0.3308, "step": 2575 }, { "epoch": 371.43, "learning_rate": 1.602e-07, "loss": 0.3226, "step": 2600 }, { "epoch": 375.0, "learning_rate": 1.5853333333333335e-07, "loss": 0.3165, "step": 2625 }, { "epoch": 378.57, "learning_rate": 1.5686666666666666e-07, "loss": 0.3099, "step": 2650 }, { "epoch": 382.14, "learning_rate": 1.552e-07, "loss": 0.3021, "step": 2675 }, { "epoch": 385.71, "learning_rate": 1.5353333333333332e-07, "loss": 0.2964, "step": 2700 }, { "epoch": 389.29, "learning_rate": 1.5186666666666668e-07, "loss": 0.2901, "step": 2725 }, { "epoch": 392.86, "learning_rate": 1.502e-07, "loss": 0.284, "step": 2750 }, { "epoch": 396.43, "learning_rate": 1.4853333333333334e-07, "loss": 0.279, "step": 2775 }, { "epoch": 400.0, "learning_rate": 1.4686666666666667e-07, "loss": 0.2715, "step": 2800 }, { "epoch": 403.57, "learning_rate": 1.4519999999999998e-07, "loss": 0.2646, "step": 2825 }, { "epoch": 407.14, "learning_rate": 1.4353333333333333e-07, "loss": 0.2606, "step": 2850 }, { "epoch": 410.71, "learning_rate": 1.4186666666666667e-07, "loss": 0.2564, "step": 2875 }, { "epoch": 414.29, "learning_rate": 1.402e-07, "loss": 0.2486, "step": 2900 }, { "epoch": 417.86, "learning_rate": 1.3853333333333333e-07, "loss": 0.2463, "step": 2925 }, { "epoch": 421.43, "learning_rate": 1.3686666666666666e-07, "loss": 0.239, "step": 2950 }, { "epoch": 425.0, "learning_rate": 1.352e-07, "loss": 0.2341, "step": 2975 }, { "epoch": 428.57, "learning_rate": 1.3353333333333332e-07, "loss": 0.2287, "step": 3000 }, { "epoch": 428.57, "eval_loss": 1.0533033609390259, "eval_runtime": 465.8233, "eval_samples_per_second": 1.099, "eval_steps_per_second": 0.069, "eval_wer": 61.516343825665864, "step": 3000 }, { "epoch": 432.14, "learning_rate": 1.3186666666666666e-07, "loss": 0.2231, "step": 3025 }, { "epoch": 435.71, "learning_rate": 1.302e-07, "loss": 0.2225, "step": 3050 }, { "epoch": 439.29, "learning_rate": 1.2853333333333332e-07, "loss": 0.216, "step": 3075 }, { "epoch": 442.86, "learning_rate": 1.2686666666666665e-07, "loss": 0.2099, "step": 3100 }, { "epoch": 446.43, "learning_rate": 1.2519999999999998e-07, "loss": 0.2053, "step": 3125 }, { "epoch": 450.0, "learning_rate": 1.2353333333333334e-07, "loss": 0.2034, "step": 3150 }, { "epoch": 453.57, "learning_rate": 1.2186666666666665e-07, "loss": 0.1979, "step": 3175 }, { "epoch": 457.14, "learning_rate": 1.202e-07, "loss": 0.1938, "step": 3200 }, { "epoch": 460.71, "learning_rate": 1.1853333333333334e-07, "loss": 0.19, "step": 3225 }, { "epoch": 464.29, "learning_rate": 1.1686666666666665e-07, "loss": 0.1877, "step": 3250 }, { "epoch": 467.86, "learning_rate": 1.152e-07, "loss": 0.1814, "step": 3275 }, { "epoch": 471.43, "learning_rate": 1.1353333333333333e-07, "loss": 0.1785, "step": 3300 }, { "epoch": 475.0, "learning_rate": 1.1186666666666666e-07, "loss": 0.1738, "step": 3325 }, { "epoch": 478.57, "learning_rate": 1.102e-07, "loss": 0.1716, "step": 3350 }, { "epoch": 482.14, "learning_rate": 1.0853333333333333e-07, "loss": 0.1666, "step": 3375 }, { "epoch": 485.71, "learning_rate": 1.0686666666666666e-07, "loss": 0.1663, "step": 3400 }, { "epoch": 489.29, "learning_rate": 1.052e-07, "loss": 0.1611, "step": 3425 }, { "epoch": 492.86, "learning_rate": 1.0353333333333332e-07, "loss": 0.158, "step": 3450 }, { "epoch": 496.43, "learning_rate": 1.0186666666666667e-07, "loss": 0.1556, "step": 3475 }, { "epoch": 500.0, "learning_rate": 1.002e-07, "loss": 0.1525, "step": 3500 }, { "epoch": 500.0, "eval_loss": 1.1040765047073364, "eval_runtime": 467.1311, "eval_samples_per_second": 1.096, "eval_steps_per_second": 0.069, "eval_wer": 62.05357142857143, "step": 3500 }, { "epoch": 503.57, "learning_rate": 9.853333333333333e-08, "loss": 0.1499, "step": 3525 }, { "epoch": 507.14, "learning_rate": 9.686666666666666e-08, "loss": 0.1474, "step": 3550 }, { "epoch": 510.71, "learning_rate": 9.52e-08, "loss": 0.1429, "step": 3575 }, { "epoch": 514.29, "learning_rate": 9.353333333333332e-08, "loss": 0.1407, "step": 3600 }, { "epoch": 517.86, "learning_rate": 9.186666666666667e-08, "loss": 0.1394, "step": 3625 }, { "epoch": 521.43, "learning_rate": 9.02e-08, "loss": 0.136, "step": 3650 }, { "epoch": 525.0, "learning_rate": 8.853333333333333e-08, "loss": 0.1347, "step": 3675 }, { "epoch": 528.57, "learning_rate": 8.686666666666666e-08, "loss": 0.1307, "step": 3700 }, { "epoch": 532.14, "learning_rate": 8.519999999999998e-08, "loss": 0.1297, "step": 3725 }, { "epoch": 535.71, "learning_rate": 8.353333333333333e-08, "loss": 0.1282, "step": 3750 }, { "epoch": 539.29, "learning_rate": 8.186666666666667e-08, "loss": 0.1247, "step": 3775 }, { "epoch": 542.86, "learning_rate": 8.019999999999999e-08, "loss": 0.1225, "step": 3800 }, { "epoch": 546.43, "learning_rate": 7.853333333333334e-08, "loss": 0.1206, "step": 3825 }, { "epoch": 550.0, "learning_rate": 7.686666666666667e-08, "loss": 0.1197, "step": 3850 }, { "epoch": 553.57, "learning_rate": 7.519999999999999e-08, "loss": 0.117, "step": 3875 }, { "epoch": 557.14, "learning_rate": 7.353333333333333e-08, "loss": 0.1154, "step": 3900 }, { "epoch": 560.71, "learning_rate": 7.186666666666666e-08, "loss": 0.1128, "step": 3925 }, { "epoch": 564.29, "learning_rate": 7.02e-08, "loss": 0.1133, "step": 3950 }, { "epoch": 567.86, "learning_rate": 6.853333333333334e-08, "loss": 0.1091, "step": 3975 }, { "epoch": 571.43, "learning_rate": 6.686666666666666e-08, "loss": 0.1089, "step": 4000 }, { "epoch": 571.43, "eval_loss": 1.1451234817504883, "eval_runtime": 481.8401, "eval_samples_per_second": 1.063, "eval_steps_per_second": 0.066, "eval_wer": 62.50756658595642, "step": 4000 }, { "epoch": 575.0, "learning_rate": 6.519999999999999e-08, "loss": 0.1069, "step": 4025 }, { "epoch": 578.57, "learning_rate": 6.353333333333333e-08, "loss": 0.1055, "step": 4050 }, { "epoch": 582.14, "learning_rate": 6.186666666666666e-08, "loss": 0.1043, "step": 4075 }, { "epoch": 585.71, "learning_rate": 6.02e-08, "loss": 0.1031, "step": 4100 }, { "epoch": 589.29, "learning_rate": 5.8533333333333335e-08, "loss": 0.1021, "step": 4125 }, { "epoch": 592.86, "learning_rate": 5.6866666666666666e-08, "loss": 0.1, "step": 4150 }, { "epoch": 596.43, "learning_rate": 5.52e-08, "loss": 0.0987, "step": 4175 }, { "epoch": 600.0, "learning_rate": 5.353333333333333e-08, "loss": 0.0984, "step": 4200 }, { "epoch": 603.57, "learning_rate": 5.186666666666667e-08, "loss": 0.0959, "step": 4225 }, { "epoch": 607.14, "learning_rate": 5.02e-08, "loss": 0.0957, "step": 4250 }, { "epoch": 610.71, "learning_rate": 4.853333333333333e-08, "loss": 0.094, "step": 4275 }, { "epoch": 614.29, "learning_rate": 4.686666666666667e-08, "loss": 0.0931, "step": 4300 }, { "epoch": 617.86, "learning_rate": 4.52e-08, "loss": 0.0945, "step": 4325 }, { "epoch": 621.43, "learning_rate": 4.353333333333333e-08, "loss": 0.0909, "step": 4350 }, { "epoch": 625.0, "learning_rate": 4.1866666666666664e-08, "loss": 0.0909, "step": 4375 }, { "epoch": 628.57, "learning_rate": 4.02e-08, "loss": 0.0885, "step": 4400 }, { "epoch": 632.14, "learning_rate": 3.8533333333333334e-08, "loss": 0.0893, "step": 4425 }, { "epoch": 635.71, "learning_rate": 3.6866666666666666e-08, "loss": 0.089, "step": 4450 }, { "epoch": 639.29, "learning_rate": 3.52e-08, "loss": 0.0875, "step": 4475 }, { "epoch": 642.86, "learning_rate": 3.3533333333333336e-08, "loss": 0.0871, "step": 4500 }, { "epoch": 642.86, "eval_loss": 1.1704117059707642, "eval_runtime": 470.4344, "eval_samples_per_second": 1.088, "eval_steps_per_second": 0.068, "eval_wer": 62.93129539951574, "step": 4500 }, { "epoch": 646.43, "learning_rate": 3.186666666666666e-08, "loss": 0.0866, "step": 4525 }, { "epoch": 650.0, "learning_rate": 3.02e-08, "loss": 0.0849, "step": 4550 }, { "epoch": 653.57, "learning_rate": 2.853333333333333e-08, "loss": 0.0851, "step": 4575 }, { "epoch": 657.14, "learning_rate": 2.6866666666666666e-08, "loss": 0.0837, "step": 4600 }, { "epoch": 660.71, "learning_rate": 2.52e-08, "loss": 0.0841, "step": 4625 }, { "epoch": 664.29, "learning_rate": 2.3533333333333332e-08, "loss": 0.0836, "step": 4650 }, { "epoch": 667.86, "learning_rate": 2.1866666666666667e-08, "loss": 0.0826, "step": 4675 }, { "epoch": 671.43, "learning_rate": 2.02e-08, "loss": 0.0826, "step": 4700 }, { "epoch": 675.0, "learning_rate": 1.8533333333333334e-08, "loss": 0.0822, "step": 4725 }, { "epoch": 678.57, "learning_rate": 1.6866666666666665e-08, "loss": 0.0823, "step": 4750 }, { "epoch": 682.14, "learning_rate": 1.52e-08, "loss": 0.0818, "step": 4775 }, { "epoch": 685.71, "learning_rate": 1.3533333333333332e-08, "loss": 0.0804, "step": 4800 }, { "epoch": 689.29, "learning_rate": 1.1866666666666665e-08, "loss": 0.0808, "step": 4825 }, { "epoch": 692.86, "learning_rate": 1.02e-08, "loss": 0.0806, "step": 4850 }, { "epoch": 696.43, "learning_rate": 8.533333333333334e-09, "loss": 0.0817, "step": 4875 }, { "epoch": 700.0, "learning_rate": 6.866666666666666e-09, "loss": 0.0805, "step": 4900 }, { "epoch": 703.57, "learning_rate": 5.1999999999999994e-09, "loss": 0.0797, "step": 4925 }, { "epoch": 707.14, "learning_rate": 3.533333333333333e-09, "loss": 0.0802, "step": 4950 }, { "epoch": 710.71, "learning_rate": 1.8666666666666664e-09, "loss": 0.08, "step": 4975 }, { "epoch": 714.29, "learning_rate": 1.9999999999999998e-10, "loss": 0.0797, "step": 5000 }, { "epoch": 714.29, "eval_loss": 1.1790930032730103, "eval_runtime": 463.678, "eval_samples_per_second": 1.104, "eval_steps_per_second": 0.069, "eval_wer": 63.16585956416465, "step": 5000 }, { "epoch": 717.86, "learning_rate": 1.0289017341040463e-08, "loss": 0.0796, "step": 5025 }, { "epoch": 721.43, "learning_rate": 8.84393063583815e-09, "loss": 0.0804, "step": 5050 }, { "epoch": 725.0, "learning_rate": 7.398843930635838e-09, "loss": 0.0799, "step": 5075 }, { "epoch": 728.57, "learning_rate": 5.953757225433526e-09, "loss": 0.0799, "step": 5100 }, { "epoch": 728.57, "eval_loss": 1.1799688339233398, "eval_runtime": 463.8166, "eval_samples_per_second": 1.104, "eval_steps_per_second": 0.069, "eval_wer": 63.10532687651331, "step": 5100 }, { "epoch": 732.14, "learning_rate": 4.508670520231213e-09, "loss": 0.079, "step": 5125 }, { "epoch": 735.71, "learning_rate": 3.0635838150289017e-09, "loss": 0.0791, "step": 5150 }, { "epoch": 739.29, "learning_rate": 1.6184971098265895e-09, "loss": 0.0789, "step": 5175 }, { "epoch": 742.86, "learning_rate": 1.7341040462427745e-10, "loss": 0.0791, "step": 5200 }, { "epoch": 742.86, "eval_loss": 1.1803033351898193, "eval_runtime": 463.7133, "eval_samples_per_second": 1.104, "eval_steps_per_second": 0.069, "eval_wer": 63.112893462469735, "step": 5200 }, { "epoch": 742.86, "step": 5200, "total_flos": 9.240100498685952e+19, "train_loss": 0.0030569748695080095, "train_runtime": 1690.7628, "train_samples_per_second": 196.834, "train_steps_per_second": 3.076 } ], "max_steps": 5200, "num_train_epochs": 743, "total_flos": 9.240100498685952e+19, "trial_name": null, "trial_params": null }