|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.906542056074766, |
|
"global_step": 3200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.7e-06, |
|
"loss": 9.9876, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 4.772322177886963, |
|
"eval_runtime": 43.7501, |
|
"eval_samples_per_second": 19.497, |
|
"eval_steps_per_second": 0.617, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.97e-05, |
|
"loss": 3.6526, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 3.8356621265411377, |
|
"eval_runtime": 43.2583, |
|
"eval_samples_per_second": 19.719, |
|
"eval_steps_per_second": 0.624, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.96e-05, |
|
"loss": 3.1832, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 3.42309832572937, |
|
"eval_runtime": 43.8662, |
|
"eval_samples_per_second": 19.446, |
|
"eval_steps_per_second": 0.616, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.960000000000001e-05, |
|
"loss": 3.0394, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 3.35982608795166, |
|
"eval_runtime": 43.5204, |
|
"eval_samples_per_second": 19.6, |
|
"eval_steps_per_second": 0.62, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 4.96e-05, |
|
"loss": 2.9964, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_loss": 3.0924816131591797, |
|
"eval_runtime": 42.6866, |
|
"eval_samples_per_second": 19.983, |
|
"eval_steps_per_second": 0.633, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 5.96e-05, |
|
"loss": 2.9881, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 3.0199828147888184, |
|
"eval_runtime": 44.2234, |
|
"eval_samples_per_second": 19.288, |
|
"eval_steps_per_second": 0.611, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 6.96e-05, |
|
"loss": 2.9678, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"eval_loss": 3.0672409534454346, |
|
"eval_runtime": 44.282, |
|
"eval_samples_per_second": 19.263, |
|
"eval_steps_per_second": 0.61, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 7.960000000000001e-05, |
|
"loss": 2.7541, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_loss": 2.397754669189453, |
|
"eval_runtime": 43.7165, |
|
"eval_samples_per_second": 19.512, |
|
"eval_steps_per_second": 0.618, |
|
"eval_wer": 0.9788053949903661, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 8.960000000000001e-05, |
|
"loss": 1.876, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"eval_loss": 1.5307629108428955, |
|
"eval_runtime": 43.4064, |
|
"eval_samples_per_second": 19.651, |
|
"eval_steps_per_second": 0.622, |
|
"eval_wer": 0.7509098694069792, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 9.960000000000001e-05, |
|
"loss": 1.4334, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"eval_loss": 1.435613751411438, |
|
"eval_runtime": 43.9507, |
|
"eval_samples_per_second": 19.408, |
|
"eval_steps_per_second": 0.614, |
|
"eval_wer": 0.620316848640548, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 9.565610859728508e-05, |
|
"loss": 1.223, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"eval_loss": 1.0539987087249756, |
|
"eval_runtime": 43.8182, |
|
"eval_samples_per_second": 19.467, |
|
"eval_steps_per_second": 0.616, |
|
"eval_wer": 0.5885249411260972, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 9.113122171945702e-05, |
|
"loss": 1.0139, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"eval_loss": 0.9824701547622681, |
|
"eval_runtime": 43.7265, |
|
"eval_samples_per_second": 19.508, |
|
"eval_steps_per_second": 0.617, |
|
"eval_wer": 0.4964675658317277, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 8.660633484162897e-05, |
|
"loss": 0.9349, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"eval_loss": 0.981073796749115, |
|
"eval_runtime": 43.357, |
|
"eval_samples_per_second": 19.674, |
|
"eval_steps_per_second": 0.623, |
|
"eval_wer": 0.4774138300149861, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 8.20814479638009e-05, |
|
"loss": 0.8395, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"eval_loss": 0.9597522616386414, |
|
"eval_runtime": 44.326, |
|
"eval_samples_per_second": 19.244, |
|
"eval_steps_per_second": 0.609, |
|
"eval_wer": 0.44166131449368445, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 7.755656108597285e-05, |
|
"loss": 0.8417, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"eval_loss": 0.9240782856941223, |
|
"eval_runtime": 44.9894, |
|
"eval_samples_per_second": 18.96, |
|
"eval_steps_per_second": 0.6, |
|
"eval_wer": 0.43513166345536286, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 7.30316742081448e-05, |
|
"loss": 0.7091, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"eval_loss": 0.9342209100723267, |
|
"eval_runtime": 43.4211, |
|
"eval_samples_per_second": 19.645, |
|
"eval_steps_per_second": 0.622, |
|
"eval_wer": 0.4319203596660244, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 6.850678733031674e-05, |
|
"loss": 0.6622, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"eval_loss": 0.895724892616272, |
|
"eval_runtime": 43.4542, |
|
"eval_samples_per_second": 19.63, |
|
"eval_steps_per_second": 0.621, |
|
"eval_wer": 0.41629201455791054, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"learning_rate": 6.398190045248869e-05, |
|
"loss": 0.6762, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"eval_loss": 0.9993765354156494, |
|
"eval_runtime": 44.1607, |
|
"eval_samples_per_second": 19.316, |
|
"eval_steps_per_second": 0.611, |
|
"eval_wer": 0.40023549561121813, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 5.945701357466064e-05, |
|
"loss": 0.6325, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"eval_loss": 0.9341434240341187, |
|
"eval_runtime": 42.9623, |
|
"eval_samples_per_second": 19.855, |
|
"eval_steps_per_second": 0.628, |
|
"eval_wer": 0.41168914579319205, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"learning_rate": 5.4932126696832586e-05, |
|
"loss": 0.5829, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"eval_loss": 1.0207712650299072, |
|
"eval_runtime": 42.8374, |
|
"eval_samples_per_second": 19.913, |
|
"eval_steps_per_second": 0.63, |
|
"eval_wer": 0.3980946264183258, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 5.0407239819004526e-05, |
|
"loss": 0.5598, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"eval_loss": 1.0339125394821167, |
|
"eval_runtime": 42.5183, |
|
"eval_samples_per_second": 20.062, |
|
"eval_steps_per_second": 0.635, |
|
"eval_wer": 0.39359880111325196, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 20.56, |
|
"learning_rate": 4.588235294117647e-05, |
|
"loss": 0.5637, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.56, |
|
"eval_loss": 0.9660681486129761, |
|
"eval_runtime": 44.6662, |
|
"eval_samples_per_second": 19.097, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.399165061014772, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"learning_rate": 4.1357466063348414e-05, |
|
"loss": 0.5396, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"eval_loss": 1.05593740940094, |
|
"eval_runtime": 44.132, |
|
"eval_samples_per_second": 19.328, |
|
"eval_steps_per_second": 0.612, |
|
"eval_wer": 0.3923142795975166, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 22.43, |
|
"learning_rate": 3.683257918552037e-05, |
|
"loss": 0.5001, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 22.43, |
|
"eval_loss": 0.9710575938224792, |
|
"eval_runtime": 43.7084, |
|
"eval_samples_per_second": 19.516, |
|
"eval_steps_per_second": 0.618, |
|
"eval_wer": 0.38589167201883967, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"learning_rate": 3.230769230769231e-05, |
|
"loss": 0.4797, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"eval_loss": 0.9755498766899109, |
|
"eval_runtime": 43.0821, |
|
"eval_samples_per_second": 19.799, |
|
"eval_steps_per_second": 0.627, |
|
"eval_wer": 0.3775422821665596, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"learning_rate": 2.7782805429864255e-05, |
|
"loss": 0.4993, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"eval_loss": 0.9937364459037781, |
|
"eval_runtime": 43.0983, |
|
"eval_samples_per_second": 19.792, |
|
"eval_steps_per_second": 0.626, |
|
"eval_wer": 0.37315350032113037, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"learning_rate": 2.3257918552036202e-05, |
|
"loss": 0.4728, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"eval_loss": 1.028990387916565, |
|
"eval_runtime": 44.5142, |
|
"eval_samples_per_second": 19.162, |
|
"eval_steps_per_second": 0.607, |
|
"eval_wer": 0.37229715264397345, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 26.17, |
|
"learning_rate": 1.8733031674208146e-05, |
|
"loss": 0.4479, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 26.17, |
|
"eval_loss": 0.9995871186256409, |
|
"eval_runtime": 43.2227, |
|
"eval_samples_per_second": 19.735, |
|
"eval_steps_per_second": 0.625, |
|
"eval_wer": 0.36897880539499034, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 1.4208144796380091e-05, |
|
"loss": 0.4675, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"eval_loss": 1.0029648542404175, |
|
"eval_runtime": 42.9456, |
|
"eval_samples_per_second": 19.862, |
|
"eval_steps_per_second": 0.629, |
|
"eval_wer": 0.37101263112823807, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"learning_rate": 9.683257918552037e-06, |
|
"loss": 0.4454, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"eval_loss": 1.0169341564178467, |
|
"eval_runtime": 43.9547, |
|
"eval_samples_per_second": 19.406, |
|
"eval_steps_per_second": 0.614, |
|
"eval_wer": 0.37037037037037035, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"learning_rate": 5.1583710407239815e-06, |
|
"loss": 0.4473, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"eval_loss": 1.0175889730453491, |
|
"eval_runtime": 43.8487, |
|
"eval_samples_per_second": 19.453, |
|
"eval_steps_per_second": 0.616, |
|
"eval_wer": 0.3707985442089488, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 29.91, |
|
"learning_rate": 6.334841628959276e-07, |
|
"loss": 0.4005, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 29.91, |
|
"eval_loss": 1.0105210542678833, |
|
"eval_runtime": 42.6213, |
|
"eval_samples_per_second": 20.013, |
|
"eval_steps_per_second": 0.633, |
|
"eval_wer": 0.36951402269321343, |
|
"step": 3200 |
|
} |
|
], |
|
"max_steps": 3210, |
|
"num_train_epochs": 30, |
|
"total_flos": 4.853367218401499e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|