|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 256.4102564102564, |
|
"global_step": 20000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.0005, |
|
"loss": 1.9105, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_loss": 0.1622474640607834, |
|
"eval_runtime": 72.1023, |
|
"eval_samples_per_second": 25.755, |
|
"eval_steps_per_second": 1.082, |
|
"eval_wer": 0.15307930881701373, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.0004871794871794872, |
|
"loss": 0.1119, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"eval_loss": 0.09705421328544617, |
|
"eval_runtime": 74.0065, |
|
"eval_samples_per_second": 25.092, |
|
"eval_steps_per_second": 1.054, |
|
"eval_wer": 0.09361984935755427, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.00047435897435897434, |
|
"loss": 0.0614, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"eval_loss": 0.10023126006126404, |
|
"eval_runtime": 74.336, |
|
"eval_samples_per_second": 24.981, |
|
"eval_steps_per_second": 1.049, |
|
"eval_wer": 0.09831634913602126, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0004615384615384616, |
|
"loss": 0.044, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"eval_loss": 0.10109349340200424, |
|
"eval_runtime": 68.958, |
|
"eval_samples_per_second": 26.929, |
|
"eval_steps_per_second": 1.131, |
|
"eval_wer": 0.09286663712893221, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0004487179487179487, |
|
"loss": 0.0366, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"eval_loss": 0.09319411963224411, |
|
"eval_runtime": 67.9099, |
|
"eval_samples_per_second": 27.345, |
|
"eval_steps_per_second": 1.149, |
|
"eval_wer": 0.08280903854674347, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0004358974358974359, |
|
"loss": 0.0315, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_loss": 0.09261373430490494, |
|
"eval_runtime": 68.2657, |
|
"eval_samples_per_second": 27.203, |
|
"eval_steps_per_second": 1.143, |
|
"eval_wer": 0.08803721754541427, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0004230769230769231, |
|
"loss": 0.0297, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"eval_loss": 0.09722220152616501, |
|
"eval_runtime": 69.3764, |
|
"eval_samples_per_second": 26.767, |
|
"eval_steps_per_second": 1.124, |
|
"eval_wer": 0.08821444395214886, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.00041025641025641023, |
|
"loss": 0.0216, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"eval_loss": 0.09113188087940216, |
|
"eval_runtime": 68.0225, |
|
"eval_samples_per_second": 27.3, |
|
"eval_steps_per_second": 1.147, |
|
"eval_wer": 0.07735932653965441, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0003974358974358974, |
|
"loss": 0.0211, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"eval_loss": 0.09818430244922638, |
|
"eval_runtime": 70.5517, |
|
"eval_samples_per_second": 26.321, |
|
"eval_steps_per_second": 1.106, |
|
"eval_wer": 0.08910057598582188, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"learning_rate": 0.00038461538461538467, |
|
"loss": 0.0187, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"eval_loss": 0.10086847096681595, |
|
"eval_runtime": 68.8313, |
|
"eval_samples_per_second": 26.979, |
|
"eval_steps_per_second": 1.133, |
|
"eval_wer": 0.08626495347806823, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 70.51, |
|
"learning_rate": 0.0003717948717948718, |
|
"loss": 0.02, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 70.51, |
|
"eval_loss": 0.09532515704631805, |
|
"eval_runtime": 65.875, |
|
"eval_samples_per_second": 28.19, |
|
"eval_steps_per_second": 1.184, |
|
"eval_wer": 0.08520159503766062, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 0.000358974358974359, |
|
"loss": 0.0163, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_loss": 0.1028498187661171, |
|
"eval_runtime": 62.6651, |
|
"eval_samples_per_second": 29.634, |
|
"eval_steps_per_second": 1.245, |
|
"eval_wer": 0.08041648205582631, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 0.00034615384615384613, |
|
"loss": 0.0128, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_loss": 0.09295257925987244, |
|
"eval_runtime": 68.7425, |
|
"eval_samples_per_second": 27.014, |
|
"eval_steps_per_second": 1.135, |
|
"eval_wer": 0.08555604785112982, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 89.74, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.0127, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 89.74, |
|
"eval_loss": 0.08917286247015, |
|
"eval_runtime": 65.0895, |
|
"eval_samples_per_second": 28.53, |
|
"eval_steps_per_second": 1.198, |
|
"eval_wer": 0.06756756756756757, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"learning_rate": 0.00032051282051282057, |
|
"loss": 0.0116, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"eval_loss": 0.08566667139530182, |
|
"eval_runtime": 62.4843, |
|
"eval_samples_per_second": 29.719, |
|
"eval_steps_per_second": 1.248, |
|
"eval_wer": 0.07527691626052282, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 102.56, |
|
"learning_rate": 0.0003076923076923077, |
|
"loss": 0.0139, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 102.56, |
|
"eval_loss": 0.10782884061336517, |
|
"eval_runtime": 62.9655, |
|
"eval_samples_per_second": 29.492, |
|
"eval_steps_per_second": 1.239, |
|
"eval_wer": 0.04811696942844484, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 108.97, |
|
"learning_rate": 0.0002948717948717949, |
|
"loss": 0.0107, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 108.97, |
|
"eval_loss": 0.09546608477830887, |
|
"eval_runtime": 64.1669, |
|
"eval_samples_per_second": 28.94, |
|
"eval_steps_per_second": 1.216, |
|
"eval_wer": 0.06827647319450598, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"learning_rate": 0.0002820769230769231, |
|
"loss": 0.0096, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"eval_loss": 0.08463115245103836, |
|
"eval_runtime": 63.761, |
|
"eval_samples_per_second": 29.124, |
|
"eval_steps_per_second": 1.223, |
|
"eval_wer": 0.06973859105006645, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 121.79, |
|
"learning_rate": 0.0002692564102564103, |
|
"loss": 0.0089, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 121.79, |
|
"eval_loss": 0.08535169810056686, |
|
"eval_runtime": 63.7326, |
|
"eval_samples_per_second": 29.137, |
|
"eval_steps_per_second": 1.224, |
|
"eval_wer": 0.06752326096588392, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 128.21, |
|
"learning_rate": 0.00025643589743589747, |
|
"loss": 0.0084, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 128.21, |
|
"eval_loss": 0.08750651776790619, |
|
"eval_runtime": 61.61, |
|
"eval_samples_per_second": 30.141, |
|
"eval_steps_per_second": 1.266, |
|
"eval_wer": 0.07793531236154187, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 134.62, |
|
"learning_rate": 0.00024361538461538463, |
|
"loss": 0.0074, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 134.62, |
|
"eval_loss": 0.08403545618057251, |
|
"eval_runtime": 60.7541, |
|
"eval_samples_per_second": 30.566, |
|
"eval_steps_per_second": 1.284, |
|
"eval_wer": 0.0770048737261852, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 141.03, |
|
"learning_rate": 0.00023082051282051282, |
|
"loss": 0.0061, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 141.03, |
|
"eval_loss": 0.09034867584705353, |
|
"eval_runtime": 63.3565, |
|
"eval_samples_per_second": 29.31, |
|
"eval_steps_per_second": 1.231, |
|
"eval_wer": 0.07540983606557378, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 147.44, |
|
"learning_rate": 0.000218, |
|
"loss": 0.0076, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 147.44, |
|
"eval_loss": 0.08722745627164841, |
|
"eval_runtime": 63.711, |
|
"eval_samples_per_second": 29.147, |
|
"eval_steps_per_second": 1.224, |
|
"eval_wer": 0.0769162605228179, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"learning_rate": 0.00020517948717948718, |
|
"loss": 0.0069, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"eval_loss": 0.08911967277526855, |
|
"eval_runtime": 61.1855, |
|
"eval_samples_per_second": 30.35, |
|
"eval_steps_per_second": 1.275, |
|
"eval_wer": 0.07722640673460346, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 160.26, |
|
"learning_rate": 0.00019235897435897437, |
|
"loss": 0.0061, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 160.26, |
|
"eval_loss": 0.09709189832210541, |
|
"eval_runtime": 63.0238, |
|
"eval_samples_per_second": 29.465, |
|
"eval_steps_per_second": 1.238, |
|
"eval_wer": 0.07735932653965441, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"learning_rate": 0.00017953846153846153, |
|
"loss": 0.0049, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"eval_loss": 0.09844444692134857, |
|
"eval_runtime": 61.9554, |
|
"eval_samples_per_second": 29.973, |
|
"eval_steps_per_second": 1.259, |
|
"eval_wer": 0.07261852015950376, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 173.08, |
|
"learning_rate": 0.00016671794871794872, |
|
"loss": 0.0045, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 173.08, |
|
"eval_loss": 0.09517823159694672, |
|
"eval_runtime": 61.492, |
|
"eval_samples_per_second": 30.199, |
|
"eval_steps_per_second": 1.268, |
|
"eval_wer": 0.07651750110766505, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 179.49, |
|
"learning_rate": 0.0001538974358974359, |
|
"loss": 0.0039, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 179.49, |
|
"eval_loss": 0.10154110193252563, |
|
"eval_runtime": 61.18, |
|
"eval_samples_per_second": 30.353, |
|
"eval_steps_per_second": 1.275, |
|
"eval_wer": 0.07620735489587949, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 185.9, |
|
"learning_rate": 0.00014107692307692307, |
|
"loss": 0.0031, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 185.9, |
|
"eval_loss": 0.09374968707561493, |
|
"eval_runtime": 61.1162, |
|
"eval_samples_per_second": 30.385, |
|
"eval_steps_per_second": 1.276, |
|
"eval_wer": 0.07124501550731059, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"learning_rate": 0.00012825641025641026, |
|
"loss": 0.0032, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"eval_loss": 0.09821684658527374, |
|
"eval_runtime": 60.073, |
|
"eval_samples_per_second": 30.912, |
|
"eval_steps_per_second": 1.298, |
|
"eval_wer": 0.06353566681435534, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 198.72, |
|
"learning_rate": 0.00011546153846153847, |
|
"loss": 0.0028, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 198.72, |
|
"eval_loss": 0.098084457218647, |
|
"eval_runtime": 61.9773, |
|
"eval_samples_per_second": 29.963, |
|
"eval_steps_per_second": 1.259, |
|
"eval_wer": 0.07434647762516615, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 205.13, |
|
"learning_rate": 0.00010266666666666668, |
|
"loss": 0.0024, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 205.13, |
|
"eval_loss": 0.10191462188959122, |
|
"eval_runtime": 65.8946, |
|
"eval_samples_per_second": 28.181, |
|
"eval_steps_per_second": 1.184, |
|
"eval_wer": 0.07120070890562694, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 211.54, |
|
"learning_rate": 8.987179487179488e-05, |
|
"loss": 0.0024, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 211.54, |
|
"eval_loss": 0.09566177427768707, |
|
"eval_runtime": 64.1104, |
|
"eval_samples_per_second": 28.966, |
|
"eval_steps_per_second": 1.217, |
|
"eval_wer": 0.07323881258307488, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 217.95, |
|
"learning_rate": 7.705128205128205e-05, |
|
"loss": 0.002, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 217.95, |
|
"eval_loss": 0.09413377195596695, |
|
"eval_runtime": 61.0383, |
|
"eval_samples_per_second": 30.424, |
|
"eval_steps_per_second": 1.278, |
|
"eval_wer": 0.07315019937970757, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 224.36, |
|
"learning_rate": 6.423076923076924e-05, |
|
"loss": 0.0015, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 224.36, |
|
"eval_loss": 0.10090558975934982, |
|
"eval_runtime": 61.85, |
|
"eval_samples_per_second": 30.024, |
|
"eval_steps_per_second": 1.261, |
|
"eval_wer": 0.07173238812583074, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"learning_rate": 5.1410256410256414e-05, |
|
"loss": 0.0017, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"eval_loss": 0.09554142504930496, |
|
"eval_runtime": 60.313, |
|
"eval_samples_per_second": 30.789, |
|
"eval_steps_per_second": 1.293, |
|
"eval_wer": 0.07301727957465662, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 237.18, |
|
"learning_rate": 3.858974358974359e-05, |
|
"loss": 0.0013, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 237.18, |
|
"eval_loss": 0.09889042377471924, |
|
"eval_runtime": 67.9718, |
|
"eval_samples_per_second": 27.32, |
|
"eval_steps_per_second": 1.148, |
|
"eval_wer": 0.07315019937970757, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 243.59, |
|
"learning_rate": 2.576923076923077e-05, |
|
"loss": 0.0013, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 243.59, |
|
"eval_loss": 0.09670563787221909, |
|
"eval_runtime": 66.758, |
|
"eval_samples_per_second": 27.817, |
|
"eval_steps_per_second": 1.168, |
|
"eval_wer": 0.07377049180327869, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 1.2948717948717948e-05, |
|
"loss": 0.0011, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_loss": 0.0980001762509346, |
|
"eval_runtime": 66.2994, |
|
"eval_samples_per_second": 28.009, |
|
"eval_steps_per_second": 1.176, |
|
"eval_wer": 0.07341603898980949, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 256.41, |
|
"learning_rate": 1.282051282051282e-07, |
|
"loss": 0.0008, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 256.41, |
|
"eval_loss": 0.09882301837205887, |
|
"eval_runtime": 65.0891, |
|
"eval_samples_per_second": 28.53, |
|
"eval_steps_per_second": 1.198, |
|
"eval_wer": 0.07359326539654408, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 256.41, |
|
"step": 20000, |
|
"total_flos": 2.406420737737408e+20, |
|
"train_loss": 0.06207249406576157, |
|
"train_runtime": 86281.9422, |
|
"train_samples_per_second": 22.253, |
|
"train_steps_per_second": 0.232 |
|
} |
|
], |
|
"max_steps": 20000, |
|
"num_train_epochs": 257, |
|
"total_flos": 2.406420737737408e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|