|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 7440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.3172043010752688e-05, |
|
"loss": 13.9663, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 15.575762748718262, |
|
"eval_runtime": 98.9721, |
|
"eval_samples_per_second": 18.632, |
|
"eval_steps_per_second": 2.334, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.6344086021505376e-05, |
|
"loss": 6.7623, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 3.259247064590454, |
|
"eval_runtime": 99.4059, |
|
"eval_samples_per_second": 18.55, |
|
"eval_steps_per_second": 2.324, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.978494623655914e-05, |
|
"loss": 3.0242, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 2.9369125366210938, |
|
"eval_runtime": 98.1481, |
|
"eval_samples_per_second": 18.788, |
|
"eval_steps_per_second": 2.354, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 5.32258064516129e-05, |
|
"loss": 2.8806, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 2.8887438774108887, |
|
"eval_runtime": 97.5774, |
|
"eval_samples_per_second": 18.898, |
|
"eval_steps_per_second": 2.367, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 2.8344, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_loss": 2.8125975131988525, |
|
"eval_runtime": 97.5647, |
|
"eval_samples_per_second": 18.9, |
|
"eval_steps_per_second": 2.368, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 8.010752688172043e-05, |
|
"loss": 2.7158, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_loss": 2.304689884185791, |
|
"eval_runtime": 97.2319, |
|
"eval_samples_per_second": 18.965, |
|
"eval_steps_per_second": 2.376, |
|
"eval_wer": 1.003542793212754, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 9.35483870967742e-05, |
|
"loss": 1.7164, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_loss": 0.7104876041412354, |
|
"eval_runtime": 96.4932, |
|
"eval_samples_per_second": 19.11, |
|
"eval_steps_per_second": 2.394, |
|
"eval_wer": 0.7594629871340668, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0757, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_loss": 0.5160450339317322, |
|
"eval_runtime": 97.694, |
|
"eval_samples_per_second": 18.875, |
|
"eval_steps_per_second": 2.365, |
|
"eval_wer": 0.6501025545403692, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9208, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"eval_loss": 0.43188726902008057, |
|
"eval_runtime": 101.9111, |
|
"eval_samples_per_second": 18.094, |
|
"eval_steps_per_second": 2.267, |
|
"eval_wer": 0.5842811859034123, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8012, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_loss": 0.39437660574913025, |
|
"eval_runtime": 97.2792, |
|
"eval_samples_per_second": 18.956, |
|
"eval_steps_per_second": 2.375, |
|
"eval_wer": 0.5456833861644602, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7334, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"eval_loss": 0.36806735396385193, |
|
"eval_runtime": 97.7448, |
|
"eval_samples_per_second": 18.865, |
|
"eval_steps_per_second": 2.363, |
|
"eval_wer": 0.5120268506432967, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6839, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"eval_loss": 0.35295596718788147, |
|
"eval_runtime": 104.7228, |
|
"eval_samples_per_second": 17.608, |
|
"eval_steps_per_second": 2.206, |
|
"eval_wer": 0.49822860339362296, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6599, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"eval_loss": 0.34405317902565, |
|
"eval_runtime": 100.6994, |
|
"eval_samples_per_second": 18.312, |
|
"eval_steps_per_second": 2.294, |
|
"eval_wer": 0.49561812418422524, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6101, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"eval_loss": 0.3226686716079712, |
|
"eval_runtime": 97.8943, |
|
"eval_samples_per_second": 18.837, |
|
"eval_steps_per_second": 2.36, |
|
"eval_wer": 0.4828454223382435, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5844, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"eval_loss": 0.34125906229019165, |
|
"eval_runtime": 98.6933, |
|
"eval_samples_per_second": 18.684, |
|
"eval_steps_per_second": 2.341, |
|
"eval_wer": 0.47445459630803655, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5754, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_loss": 0.33336642384529114, |
|
"eval_runtime": 99.0007, |
|
"eval_samples_per_second": 18.626, |
|
"eval_steps_per_second": 2.333, |
|
"eval_wer": 0.46140220026104795, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5611, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"eval_loss": 0.30597391724586487, |
|
"eval_runtime": 101.2827, |
|
"eval_samples_per_second": 18.206, |
|
"eval_steps_per_second": 2.281, |
|
"eval_wer": 0.4672757784821928, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5476, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"eval_loss": 0.3194342851638794, |
|
"eval_runtime": 105.7302, |
|
"eval_samples_per_second": 17.441, |
|
"eval_steps_per_second": 2.185, |
|
"eval_wer": 0.44881596121573747, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.526, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"eval_loss": 0.30019786953926086, |
|
"eval_runtime": 96.396, |
|
"eval_samples_per_second": 19.129, |
|
"eval_steps_per_second": 2.396, |
|
"eval_wer": 0.4449934738019765, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5267, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"eval_loss": 0.3249629735946655, |
|
"eval_runtime": 105.1317, |
|
"eval_samples_per_second": 17.54, |
|
"eval_steps_per_second": 2.197, |
|
"eval_wer": 0.43660264777176955, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.49, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"eval_loss": 0.2999746799468994, |
|
"eval_runtime": 97.027, |
|
"eval_samples_per_second": 19.005, |
|
"eval_steps_per_second": 2.381, |
|
"eval_wer": 0.4337124743613649, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4945, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"eval_loss": 0.30671271681785583, |
|
"eval_runtime": 96.8855, |
|
"eval_samples_per_second": 19.033, |
|
"eval_steps_per_second": 2.384, |
|
"eval_wer": 0.43100876375163155, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 0.0001, |
|
"loss": 0.485, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"eval_loss": 0.3134038746356964, |
|
"eval_runtime": 98.1951, |
|
"eval_samples_per_second": 18.779, |
|
"eval_steps_per_second": 2.352, |
|
"eval_wer": 0.43706880477344767, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.486, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"eval_loss": 0.3182927072048187, |
|
"eval_runtime": 100.6349, |
|
"eval_samples_per_second": 18.324, |
|
"eval_steps_per_second": 2.295, |
|
"eval_wer": 0.43044937534961775, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4617, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"eval_loss": 0.3004864752292633, |
|
"eval_runtime": 102.1006, |
|
"eval_samples_per_second": 18.061, |
|
"eval_steps_per_second": 2.262, |
|
"eval_wer": 0.4238299459257878, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.465, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 20.97, |
|
"eval_loss": 0.313532292842865, |
|
"eval_runtime": 101.2873, |
|
"eval_samples_per_second": 18.206, |
|
"eval_steps_per_second": 2.281, |
|
"eval_wer": 0.4311019951519672, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 21.77, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4414, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 21.77, |
|
"eval_loss": 0.3118414580821991, |
|
"eval_runtime": 97.4069, |
|
"eval_samples_per_second": 18.931, |
|
"eval_steps_per_second": 2.371, |
|
"eval_wer": 0.4256013425321648, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 22.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4424, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 22.58, |
|
"eval_loss": 0.3086116313934326, |
|
"eval_runtime": 97.4809, |
|
"eval_samples_per_second": 18.917, |
|
"eval_steps_per_second": 2.37, |
|
"eval_wer": 0.4160917396979303, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 23.39, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4352, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 23.39, |
|
"eval_loss": 0.3213385045528412, |
|
"eval_runtime": 96.5991, |
|
"eval_samples_per_second": 19.089, |
|
"eval_steps_per_second": 2.391, |
|
"eval_wer": 0.41879545030766363, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4324, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"eval_loss": 0.3011772036552429, |
|
"eval_runtime": 97.1915, |
|
"eval_samples_per_second": 18.973, |
|
"eval_steps_per_second": 2.377, |
|
"eval_wer": 0.4236434831251165, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4246, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.30194172263145447, |
|
"eval_runtime": 100.0292, |
|
"eval_samples_per_second": 18.435, |
|
"eval_steps_per_second": 2.309, |
|
"eval_wer": 0.4165578966996084, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4132, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"eval_loss": 0.299437552690506, |
|
"eval_runtime": 97.7952, |
|
"eval_samples_per_second": 18.856, |
|
"eval_steps_per_second": 2.362, |
|
"eval_wer": 0.4160917396979303, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4074, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"eval_loss": 0.31787610054016113, |
|
"eval_runtime": 97.3925, |
|
"eval_samples_per_second": 18.934, |
|
"eval_steps_per_second": 2.372, |
|
"eval_wer": 0.41795636770464295, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 27.42, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4031, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 27.42, |
|
"eval_loss": 0.296748548746109, |
|
"eval_runtime": 101.7762, |
|
"eval_samples_per_second": 18.118, |
|
"eval_steps_per_second": 2.27, |
|
"eval_wer": 0.40760768226738764, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 28.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4082, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 28.23, |
|
"eval_loss": 0.31329259276390076, |
|
"eval_runtime": 102.7944, |
|
"eval_samples_per_second": 17.939, |
|
"eval_steps_per_second": 2.247, |
|
"eval_wer": 0.41310833488719, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3912, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"eval_loss": 0.3259478509426117, |
|
"eval_runtime": 98.5616, |
|
"eval_samples_per_second": 18.709, |
|
"eval_steps_per_second": 2.344, |
|
"eval_wer": 0.41133693828081297, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 29.84, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3922, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 29.84, |
|
"eval_loss": 0.32406386733055115, |
|
"eval_runtime": 96.8005, |
|
"eval_samples_per_second": 19.049, |
|
"eval_steps_per_second": 2.386, |
|
"eval_wer": 0.41226925228416933, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"learning_rate": 9.795698924731184e-05, |
|
"loss": 0.3851, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"eval_loss": 0.29852330684661865, |
|
"eval_runtime": 105.0804, |
|
"eval_samples_per_second": 17.548, |
|
"eval_steps_per_second": 2.198, |
|
"eval_wer": 0.41133693828081297, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 31.45, |
|
"learning_rate": 9.526881720430108e-05, |
|
"loss": 0.3768, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 31.45, |
|
"eval_loss": 0.3223101794719696, |
|
"eval_runtime": 97.6458, |
|
"eval_samples_per_second": 18.885, |
|
"eval_steps_per_second": 2.366, |
|
"eval_wer": 0.41040462427745666, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 9.258064516129033e-05, |
|
"loss": 0.3728, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"eval_loss": 0.3212699890136719, |
|
"eval_runtime": 100.4336, |
|
"eval_samples_per_second": 18.36, |
|
"eval_steps_per_second": 2.3, |
|
"eval_wer": 0.40770091366772326, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 33.06, |
|
"learning_rate": 8.989247311827958e-05, |
|
"loss": 0.3646, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 33.06, |
|
"eval_loss": 0.31770506501197815, |
|
"eval_runtime": 97.2462, |
|
"eval_samples_per_second": 18.962, |
|
"eval_steps_per_second": 2.375, |
|
"eval_wer": 0.4116166324818199, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 33.87, |
|
"learning_rate": 8.720430107526883e-05, |
|
"loss": 0.3681, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 33.87, |
|
"eval_loss": 0.3010982871055603, |
|
"eval_runtime": 99.148, |
|
"eval_samples_per_second": 18.598, |
|
"eval_steps_per_second": 2.33, |
|
"eval_wer": 0.4105910870781279, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 34.68, |
|
"learning_rate": 8.451612903225808e-05, |
|
"loss": 0.3634, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 34.68, |
|
"eval_loss": 0.3075733780860901, |
|
"eval_runtime": 99.8799, |
|
"eval_samples_per_second": 18.462, |
|
"eval_steps_per_second": 2.313, |
|
"eval_wer": 0.4069550624650382, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 35.48, |
|
"learning_rate": 8.182795698924732e-05, |
|
"loss": 0.3471, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 35.48, |
|
"eval_loss": 0.31304171681404114, |
|
"eval_runtime": 99.6768, |
|
"eval_samples_per_second": 18.5, |
|
"eval_steps_per_second": 2.317, |
|
"eval_wer": 0.40266641804959913, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 36.29, |
|
"learning_rate": 7.913978494623657e-05, |
|
"loss": 0.3398, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 36.29, |
|
"eval_loss": 0.306076318025589, |
|
"eval_runtime": 103.5936, |
|
"eval_samples_per_second": 17.8, |
|
"eval_steps_per_second": 2.23, |
|
"eval_wer": 0.4037851948536267, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 37.1, |
|
"learning_rate": 7.645161290322582e-05, |
|
"loss": 0.3394, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 37.1, |
|
"eval_loss": 0.31049424409866333, |
|
"eval_runtime": 98.5424, |
|
"eval_samples_per_second": 18.713, |
|
"eval_steps_per_second": 2.344, |
|
"eval_wer": 0.4037851948536267, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 37.9, |
|
"learning_rate": 7.376344086021507e-05, |
|
"loss": 0.331, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 37.9, |
|
"eval_loss": 0.3191221058368683, |
|
"eval_runtime": 102.0886, |
|
"eval_samples_per_second": 18.063, |
|
"eval_steps_per_second": 2.263, |
|
"eval_wer": 0.3992168562371807, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"learning_rate": 7.10752688172043e-05, |
|
"loss": 0.3221, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"eval_loss": 0.3119024336338043, |
|
"eval_runtime": 99.689, |
|
"eval_samples_per_second": 18.498, |
|
"eval_steps_per_second": 2.317, |
|
"eval_wer": 0.39958978183852323, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 39.52, |
|
"learning_rate": 6.838709677419355e-05, |
|
"loss": 0.3207, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 39.52, |
|
"eval_loss": 0.3276441991329193, |
|
"eval_runtime": 96.7884, |
|
"eval_samples_per_second": 19.052, |
|
"eval_steps_per_second": 2.387, |
|
"eval_wer": 0.40313257505127725, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 6.56989247311828e-05, |
|
"loss": 0.3186, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"eval_loss": 0.3335840106010437, |
|
"eval_runtime": 98.1942, |
|
"eval_samples_per_second": 18.779, |
|
"eval_steps_per_second": 2.352, |
|
"eval_wer": 0.4032258064516129, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 41.13, |
|
"learning_rate": 6.301075268817205e-05, |
|
"loss": 0.3152, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 41.13, |
|
"eval_loss": 0.3129470944404602, |
|
"eval_runtime": 99.396, |
|
"eval_samples_per_second": 18.552, |
|
"eval_steps_per_second": 2.324, |
|
"eval_wer": 0.3941823606190565, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"learning_rate": 6.0322580645161295e-05, |
|
"loss": 0.3093, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"eval_loss": 0.31502029299736023, |
|
"eval_runtime": 97.1686, |
|
"eval_samples_per_second": 18.977, |
|
"eval_steps_per_second": 2.377, |
|
"eval_wer": 0.3930635838150289, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 42.74, |
|
"learning_rate": 5.763440860215054e-05, |
|
"loss": 0.2949, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 42.74, |
|
"eval_loss": 0.3235335052013397, |
|
"eval_runtime": 102.899, |
|
"eval_samples_per_second": 17.92, |
|
"eval_steps_per_second": 2.245, |
|
"eval_wer": 0.3953011374230841, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 43.55, |
|
"learning_rate": 5.494623655913979e-05, |
|
"loss": 0.3013, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 43.55, |
|
"eval_loss": 0.3291964530944824, |
|
"eval_runtime": 97.4704, |
|
"eval_samples_per_second": 18.919, |
|
"eval_steps_per_second": 2.37, |
|
"eval_wer": 0.3934365094163714, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 44.35, |
|
"learning_rate": 5.225806451612903e-05, |
|
"loss": 0.3092, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 44.35, |
|
"eval_loss": 0.3066520392894745, |
|
"eval_runtime": 96.9166, |
|
"eval_samples_per_second": 19.027, |
|
"eval_steps_per_second": 2.383, |
|
"eval_wer": 0.38616446019019207, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"learning_rate": 4.956989247311828e-05, |
|
"loss": 0.2938, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"eval_loss": 0.32365530729293823, |
|
"eval_runtime": 98.8953, |
|
"eval_samples_per_second": 18.646, |
|
"eval_steps_per_second": 2.336, |
|
"eval_wer": 0.38756293119522656, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"learning_rate": 4.688172043010753e-05, |
|
"loss": 0.2985, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"eval_loss": 0.3358381390571594, |
|
"eval_runtime": 96.7245, |
|
"eval_samples_per_second": 19.064, |
|
"eval_steps_per_second": 2.388, |
|
"eval_wer": 0.3876561625955622, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 46.77, |
|
"learning_rate": 4.4193548387096775e-05, |
|
"loss": 0.2943, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 46.77, |
|
"eval_loss": 0.3283197283744812, |
|
"eval_runtime": 98.5499, |
|
"eval_samples_per_second": 18.711, |
|
"eval_steps_per_second": 2.344, |
|
"eval_wer": 0.3928771210143576, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 47.58, |
|
"learning_rate": 4.1505376344086025e-05, |
|
"loss": 0.2754, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 47.58, |
|
"eval_loss": 0.33124613761901855, |
|
"eval_runtime": 97.2557, |
|
"eval_samples_per_second": 18.96, |
|
"eval_steps_per_second": 2.375, |
|
"eval_wer": 0.38952079060227485, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"learning_rate": 3.881720430107527e-05, |
|
"loss": 0.2906, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"eval_loss": 0.33012691140174866, |
|
"eval_runtime": 99.7418, |
|
"eval_samples_per_second": 18.488, |
|
"eval_steps_per_second": 2.316, |
|
"eval_wer": 0.39045310460563115, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 49.19, |
|
"learning_rate": 3.612903225806452e-05, |
|
"loss": 0.2751, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 49.19, |
|
"eval_loss": 0.3261188566684723, |
|
"eval_runtime": 98.7276, |
|
"eval_samples_per_second": 18.678, |
|
"eval_steps_per_second": 2.34, |
|
"eval_wer": 0.3873764683945553, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 3.344086021505377e-05, |
|
"loss": 0.2825, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.3322080671787262, |
|
"eval_runtime": 97.0188, |
|
"eval_samples_per_second": 19.007, |
|
"eval_steps_per_second": 2.381, |
|
"eval_wer": 0.38700354279321275, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 50.81, |
|
"learning_rate": 3.075268817204301e-05, |
|
"loss": 0.2727, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 50.81, |
|
"eval_loss": 0.3448057770729065, |
|
"eval_runtime": 98.9754, |
|
"eval_samples_per_second": 18.631, |
|
"eval_steps_per_second": 2.334, |
|
"eval_wer": 0.3928771210143576, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"learning_rate": 2.806451612903226e-05, |
|
"loss": 0.2896, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"eval_loss": 0.33276790380477905, |
|
"eval_runtime": 96.9108, |
|
"eval_samples_per_second": 19.028, |
|
"eval_steps_per_second": 2.384, |
|
"eval_wer": 0.38858847659891854, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 52.42, |
|
"learning_rate": 2.537634408602151e-05, |
|
"loss": 0.2753, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 52.42, |
|
"eval_loss": 0.32856670022010803, |
|
"eval_runtime": 105.68, |
|
"eval_samples_per_second": 17.449, |
|
"eval_steps_per_second": 2.186, |
|
"eval_wer": 0.3876561625955622, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 53.23, |
|
"learning_rate": 2.268817204301075e-05, |
|
"loss": 0.271, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 53.23, |
|
"eval_loss": 0.3287724256515503, |
|
"eval_runtime": 100.7427, |
|
"eval_samples_per_second": 18.304, |
|
"eval_steps_per_second": 2.293, |
|
"eval_wer": 0.3876561625955622, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 54.03, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2664, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 54.03, |
|
"eval_loss": 0.3290824890136719, |
|
"eval_runtime": 98.5577, |
|
"eval_samples_per_second": 18.71, |
|
"eval_steps_per_second": 2.344, |
|
"eval_wer": 0.3893343278016036, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 54.84, |
|
"learning_rate": 1.7311827956989248e-05, |
|
"loss": 0.2491, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 54.84, |
|
"eval_loss": 0.33568552136421204, |
|
"eval_runtime": 103.2565, |
|
"eval_samples_per_second": 17.858, |
|
"eval_steps_per_second": 2.237, |
|
"eval_wer": 0.388215550997576, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 55.65, |
|
"learning_rate": 1.4623655913978495e-05, |
|
"loss": 0.2569, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 55.65, |
|
"eval_loss": 0.3253258466720581, |
|
"eval_runtime": 104.0214, |
|
"eval_samples_per_second": 17.727, |
|
"eval_steps_per_second": 2.221, |
|
"eval_wer": 0.3876561625955622, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 56.45, |
|
"learning_rate": 1.1935483870967743e-05, |
|
"loss": 0.2568, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 56.45, |
|
"eval_loss": 0.3199854791164398, |
|
"eval_runtime": 103.944, |
|
"eval_samples_per_second": 17.74, |
|
"eval_steps_per_second": 2.222, |
|
"eval_wer": 0.3868170799925415, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 57.26, |
|
"learning_rate": 9.24731182795699e-06, |
|
"loss": 0.2668, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 57.26, |
|
"eval_loss": 0.32368558645248413, |
|
"eval_runtime": 103.5633, |
|
"eval_samples_per_second": 17.806, |
|
"eval_steps_per_second": 2.231, |
|
"eval_wer": 0.3862576915905277, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 58.06, |
|
"learning_rate": 6.586021505376344e-06, |
|
"loss": 0.253, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 58.06, |
|
"eval_loss": 0.3248105049133301, |
|
"eval_runtime": 103.7822, |
|
"eval_samples_per_second": 17.768, |
|
"eval_steps_per_second": 2.226, |
|
"eval_wer": 0.38504568338616446, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 58.87, |
|
"learning_rate": 3.8978494623655915e-06, |
|
"loss": 0.2621, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 58.87, |
|
"eval_loss": 0.3238443434238434, |
|
"eval_runtime": 97.9464, |
|
"eval_samples_per_second": 18.827, |
|
"eval_steps_per_second": 2.358, |
|
"eval_wer": 0.38448629498415066, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 59.68, |
|
"learning_rate": 1.2096774193548388e-06, |
|
"loss": 0.2571, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 59.68, |
|
"eval_loss": 0.3273279666900635, |
|
"eval_runtime": 101.5169, |
|
"eval_samples_per_second": 18.164, |
|
"eval_steps_per_second": 2.275, |
|
"eval_wer": 0.3852321461868357, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 7440, |
|
"total_flos": 2.7388066347434852e+19, |
|
"train_loss": 0.8272683016715511, |
|
"train_runtime": 19767.0967, |
|
"train_samples_per_second": 12.035, |
|
"train_steps_per_second": 0.376 |
|
} |
|
], |
|
"max_steps": 7440, |
|
"num_train_epochs": 60, |
|
"total_flos": 2.7388066347434852e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|