|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8311170212765957, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.999, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7998, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.6289, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5845, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.948979591836737e-06, |
|
"loss": 0.5564, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.89795918367347e-06, |
|
"loss": 0.5448, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.846938775510205e-06, |
|
"loss": 0.5098, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.795918367346939e-06, |
|
"loss": 0.5203, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.744897959183674e-06, |
|
"loss": 0.5095, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.693877551020408e-06, |
|
"loss": 0.5024, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 0.507320761680603, |
|
"eval_runtime": 183.288, |
|
"eval_samples_per_second": 5.412, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.2948083939915912, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.642857142857144e-06, |
|
"loss": 0.4942, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.591836734693878e-06, |
|
"loss": 0.4981, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.540816326530612e-06, |
|
"loss": 0.4824, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.489795918367348e-06, |
|
"loss": 0.48, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.438775510204082e-06, |
|
"loss": 0.4599, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.387755102040818e-06, |
|
"loss": 0.4482, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.336734693877552e-06, |
|
"loss": 0.4674, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.285714285714288e-06, |
|
"loss": 0.4648, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.234693877551022e-06, |
|
"loss": 0.4611, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.183673469387756e-06, |
|
"loss": 0.4684, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 0.46387979388237, |
|
"eval_runtime": 182.1295, |
|
"eval_samples_per_second": 5.447, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.2783982515604453, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.13265306122449e-06, |
|
"loss": 0.4524, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.081632653061225e-06, |
|
"loss": 0.4511, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.03061224489796e-06, |
|
"loss": 0.4479, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.979591836734695e-06, |
|
"loss": 0.4444, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 0.4537, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.877551020408163e-06, |
|
"loss": 0.4349, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.826530612244899e-06, |
|
"loss": 0.4395, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.775510204081633e-06, |
|
"loss": 0.4236, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.724489795918369e-06, |
|
"loss": 0.428, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.673469387755103e-06, |
|
"loss": 0.4246, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 0.43956783413887024, |
|
"eval_runtime": 182.7895, |
|
"eval_samples_per_second": 5.427, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.27576818358615324, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.622448979591837e-06, |
|
"loss": 0.4315, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.431, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.520408163265307e-06, |
|
"loss": 0.4164, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.469387755102042e-06, |
|
"loss": 0.4197, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.418367346938776e-06, |
|
"loss": 0.4328, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.36734693877551e-06, |
|
"loss": 0.4136, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.316326530612246e-06, |
|
"loss": 0.4275, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.26530612244898e-06, |
|
"loss": 0.4084, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.214285714285714e-06, |
|
"loss": 0.4173, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.16326530612245e-06, |
|
"loss": 0.4132, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 0.4221729636192322, |
|
"eval_runtime": 183.8667, |
|
"eval_samples_per_second": 5.395, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.26639625122705635, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.112244897959184e-06, |
|
"loss": 0.4238, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.06122448979592e-06, |
|
"loss": 0.4153, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.010204081632654e-06, |
|
"loss": 0.4125, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.959183673469388e-06, |
|
"loss": 0.4161, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.908163265306124e-06, |
|
"loss": 0.4055, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.857142857142858e-06, |
|
"loss": 0.4028, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.806122448979593e-06, |
|
"loss": 0.4051, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.755102040816327e-06, |
|
"loss": 0.4036, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.704081632653061e-06, |
|
"loss": 0.4018, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.653061224489796e-06, |
|
"loss": 0.4021, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 0.4100790321826935, |
|
"eval_runtime": 182.7493, |
|
"eval_samples_per_second": 5.428, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.26334018632735084, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.602040816326531e-06, |
|
"loss": 0.4036, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.551020408163265e-06, |
|
"loss": 0.3895, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.4034, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.448979591836736e-06, |
|
"loss": 0.4014, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.39795918367347e-06, |
|
"loss": 0.4036, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.346938775510205e-06, |
|
"loss": 0.3913, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.295918367346939e-06, |
|
"loss": 0.3865, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.244897959183675e-06, |
|
"loss": 0.3969, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.193877551020409e-06, |
|
"loss": 0.3928, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.3871, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.39817574620246887, |
|
"eval_runtime": 183.3354, |
|
"eval_samples_per_second": 5.411, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.26191402270748826, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.091836734693878e-06, |
|
"loss": 0.4017, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.0408163265306125e-06, |
|
"loss": 0.4, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.989795918367348e-06, |
|
"loss": 0.3988, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.938775510204082e-06, |
|
"loss": 0.3863, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.887755102040817e-06, |
|
"loss": 0.381, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.836734693877551e-06, |
|
"loss": 0.373, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.785714285714287e-06, |
|
"loss": 0.3968, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.734693877551021e-06, |
|
"loss": 0.3876, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.683673469387756e-06, |
|
"loss": 0.3899, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.63265306122449e-06, |
|
"loss": 0.3813, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.3895135223865509, |
|
"eval_runtime": 182.7821, |
|
"eval_samples_per_second": 5.427, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.25772813987516435, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.581632653061225e-06, |
|
"loss": 0.3816, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.530612244897959e-06, |
|
"loss": 0.3805, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.4795918367346946e-06, |
|
"loss": 0.3901, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.4285714285714295e-06, |
|
"loss": 0.3785, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.3775510204081635e-06, |
|
"loss": 0.3803, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.326530612244899e-06, |
|
"loss": 0.3803, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.275510204081633e-06, |
|
"loss": 0.3767, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.224489795918368e-06, |
|
"loss": 0.3728, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.173469387755102e-06, |
|
"loss": 0.3747, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.122448979591837e-06, |
|
"loss": 0.3878, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 0.38268980383872986, |
|
"eval_runtime": 182.9118, |
|
"eval_samples_per_second": 5.423, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.2533014761719546, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.071428571428571e-06, |
|
"loss": 0.3725, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.020408163265307e-06, |
|
"loss": 0.3761, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.969387755102042e-06, |
|
"loss": 0.3851, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.918367346938776e-06, |
|
"loss": 0.3764, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.867346938775511e-06, |
|
"loss": 0.3737, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.816326530612246e-06, |
|
"loss": 0.3724, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.7653061224489805e-06, |
|
"loss": 0.3837, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.3666, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.663265306122449e-06, |
|
"loss": 0.3748, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.6122448979591834e-06, |
|
"loss": 0.3704, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 0.3770383894443512, |
|
"eval_runtime": 183.5258, |
|
"eval_samples_per_second": 5.405, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.2533385193828601, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.561224489795919e-06, |
|
"loss": 0.3625, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.510204081632653e-06, |
|
"loss": 0.3609, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.459183673469388e-06, |
|
"loss": 0.3715, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.408163265306123e-06, |
|
"loss": 0.3676, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.357142857142857e-06, |
|
"loss": 0.3703, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.306122448979593e-06, |
|
"loss": 0.3606, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.255102040816327e-06, |
|
"loss": 0.3691, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.204081632653062e-06, |
|
"loss": 0.3776, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.153061224489796e-06, |
|
"loss": 0.3652, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.1020408163265315e-06, |
|
"loss": 0.3516, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 0.3713986277580261, |
|
"eval_runtime": 182.6295, |
|
"eval_samples_per_second": 5.432, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.25398677557370675, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.0510204081632655e-06, |
|
"loss": 0.3715, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3557, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.948979591836735e-06, |
|
"loss": 0.3676, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.897959183673469e-06, |
|
"loss": 0.3661, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.846938775510204e-06, |
|
"loss": 0.3649, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.795918367346939e-06, |
|
"loss": 0.3525, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.744897959183674e-06, |
|
"loss": 0.3715, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.693877551020409e-06, |
|
"loss": 0.3639, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.642857142857144e-06, |
|
"loss": 0.3619, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.591836734693878e-06, |
|
"loss": 0.3792, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.3675037920475006, |
|
"eval_runtime": 183.0666, |
|
"eval_samples_per_second": 5.419, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.24952306865959142, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.540816326530613e-06, |
|
"loss": 0.3601, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.489795918367348e-06, |
|
"loss": 0.363, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.438775510204082e-06, |
|
"loss": 0.3729, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.3877551020408165e-06, |
|
"loss": 0.365, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.336734693877551e-06, |
|
"loss": 0.356, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.3539, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.234693877551021e-06, |
|
"loss": 0.3608, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.183673469387755e-06, |
|
"loss": 0.361, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.13265306122449e-06, |
|
"loss": 0.3486, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.081632653061225e-06, |
|
"loss": 0.3476, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.36362290382385254, |
|
"eval_runtime": 182.4755, |
|
"eval_samples_per_second": 5.436, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.24563353151451167, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.03061224489796e-06, |
|
"loss": 0.3764, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.979591836734694e-06, |
|
"loss": 0.3542, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.928571428571429e-06, |
|
"loss": 0.3615, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.877551020408164e-06, |
|
"loss": 0.3655, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.826530612244898e-06, |
|
"loss": 0.3709, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.7755102040816327e-06, |
|
"loss": 0.3613, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.724489795918368e-06, |
|
"loss": 0.3608, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.6734693877551024e-06, |
|
"loss": 0.353, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.6224489795918373e-06, |
|
"loss": 0.354, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"loss": 0.3522, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 0.36105236411094666, |
|
"eval_runtime": 182.9956, |
|
"eval_samples_per_second": 5.421, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.24617065807264174, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.5204081632653062e-06, |
|
"loss": 0.3526, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.469387755102041e-06, |
|
"loss": 0.3578, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.4183673469387756e-06, |
|
"loss": 0.3519, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.3673469387755105e-06, |
|
"loss": 0.3487, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.316326530612245e-06, |
|
"loss": 0.354, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.2653061224489794e-06, |
|
"loss": 0.3605, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.2142857142857147e-06, |
|
"loss": 0.3644, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.1632653061224496e-06, |
|
"loss": 0.3592, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.112244897959184e-06, |
|
"loss": 0.3502, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.0612244897959185e-06, |
|
"loss": 0.3545, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.35600587725639343, |
|
"eval_runtime": 183.8697, |
|
"eval_samples_per_second": 5.395, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.243985108629216, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.0102040816326534e-06, |
|
"loss": 0.3603, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.959183673469388e-06, |
|
"loss": 0.3518, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.908163265306123e-06, |
|
"loss": 0.3635, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.352, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.8061224489795917e-06, |
|
"loss": 0.3506, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.7551020408163266e-06, |
|
"loss": 0.3449, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.7040816326530615e-06, |
|
"loss": 0.3591, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.6530612244897964e-06, |
|
"loss": 0.3499, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.602040816326531e-06, |
|
"loss": 0.3546, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.5510204081632657e-06, |
|
"loss": 0.3426, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 0.3543338179588318, |
|
"eval_runtime": 184.2454, |
|
"eval_samples_per_second": 5.384, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.24635587412716933, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.354, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.4489795918367347e-06, |
|
"loss": 0.3416, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.3979591836734696e-06, |
|
"loss": 0.355, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.3469387755102044e-06, |
|
"loss": 0.355, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.295918367346939e-06, |
|
"loss": 0.3428, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.244897959183674e-06, |
|
"loss": 0.35, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.1938775510204083e-06, |
|
"loss": 0.3511, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.1428571428571427e-06, |
|
"loss": 0.3477, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0918367346938776e-06, |
|
"loss": 0.3494, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0408163265306125e-06, |
|
"loss": 0.3437, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.3523597717285156, |
|
"eval_runtime": 184.0518, |
|
"eval_samples_per_second": 5.39, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.24635587412716933, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.989795918367347e-06, |
|
"loss": 0.35, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.938775510204082e-06, |
|
"loss": 0.339, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8877551020408163e-06, |
|
"loss": 0.3504, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8367346938775512e-06, |
|
"loss": 0.3401, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7857142857142859e-06, |
|
"loss": 0.3424, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7346938775510206e-06, |
|
"loss": 0.3567, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.6836734693877552e-06, |
|
"loss": 0.3622, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6326530612244897e-06, |
|
"loss": 0.3425, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5816326530612248e-06, |
|
"loss": 0.3546, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5306122448979593e-06, |
|
"loss": 0.3562, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 0.3507314622402191, |
|
"eval_runtime": 182.7316, |
|
"eval_samples_per_second": 5.429, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.2451519697727399, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.479591836734694e-06, |
|
"loss": 0.3458, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.3528, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3775510204081633e-06, |
|
"loss": 0.3499, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3265306122448982e-06, |
|
"loss": 0.3419, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2755102040816329e-06, |
|
"loss": 0.3436, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2244897959183673e-06, |
|
"loss": 0.3436, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.1734693877551022e-06, |
|
"loss": 0.3528, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.122448979591837e-06, |
|
"loss": 0.3397, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.0714285714285714e-06, |
|
"loss": 0.3449, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.0224489795918368e-06, |
|
"loss": 0.3555, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.34912917017936707, |
|
"eval_runtime": 183.2099, |
|
"eval_samples_per_second": 5.415, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.24263303143116446, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.714285714285715e-07, |
|
"loss": 0.3449, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.204081632653062e-07, |
|
"loss": 0.3393, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.693877551020409e-07, |
|
"loss": 0.3454, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.183673469387755e-07, |
|
"loss": 0.3411, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.673469387755103e-07, |
|
"loss": 0.3416, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.16326530612245e-07, |
|
"loss": 0.3298, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.653061224489797e-07, |
|
"loss": 0.3472, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.142857142857143e-07, |
|
"loss": 0.3519, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5.632653061224491e-07, |
|
"loss": 0.3387, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5.122448979591837e-07, |
|
"loss": 0.3397, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.34834620356559753, |
|
"eval_runtime": 183.7297, |
|
"eval_samples_per_second": 5.399, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.24191068881850677, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.612244897959184e-07, |
|
"loss": 0.3428, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.102040816326531e-07, |
|
"loss": 0.3493, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.591836734693878e-07, |
|
"loss": 0.3368, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.0816326530612243e-07, |
|
"loss": 0.3547, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.5714285714285716e-07, |
|
"loss": 0.3426, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0612244897959186e-07, |
|
"loss": 0.3496, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5510204081632654e-07, |
|
"loss": 0.3479, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0408163265306123e-07, |
|
"loss": 0.3472, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.306122448979593e-08, |
|
"loss": 0.3397, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.0408163265306126e-09, |
|
"loss": 0.3516, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.34790730476379395, |
|
"eval_runtime": 184.5434, |
|
"eval_samples_per_second": 5.375, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.2426515530366172, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"step": 5000, |
|
"total_flos": 9.23473281024e+19, |
|
"train_loss": 0.38958581829071043, |
|
"train_runtime": 37554.5281, |
|
"train_samples_per_second": 8.521, |
|
"train_steps_per_second": 0.133 |
|
} |
|
], |
|
"max_steps": 5000, |
|
"num_train_epochs": 1, |
|
"total_flos": 9.23473281024e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|