{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 100, "global_step": 7020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.43, "eval_loss": Infinity, "eval_runtime": 60.2276, "eval_samples_per_second": 12.42, "eval_steps_per_second": 1.561, "eval_wer": 1.258039732191856, "step": 100 }, { "epoch": 0.85, "eval_loss": Infinity, "eval_runtime": 58.2497, "eval_samples_per_second": 12.841, "eval_steps_per_second": 1.614, "eval_wer": 1.0, "step": 200 }, { "epoch": 1.28, "eval_loss": Infinity, "eval_runtime": 58.0059, "eval_samples_per_second": 12.895, "eval_steps_per_second": 1.621, "eval_wer": 1.0, "step": 300 }, { "epoch": 1.71, "eval_loss": Infinity, "eval_runtime": 57.8536, "eval_samples_per_second": 12.929, "eval_steps_per_second": 1.625, "eval_wer": 1.0, "step": 400 }, { "epoch": 2.14, "learning_rate": 7.807999999999999e-06, "loss": 12.36, "step": 500 }, { "epoch": 2.14, "eval_loss": Infinity, "eval_runtime": 59.1436, "eval_samples_per_second": 12.647, "eval_steps_per_second": 1.589, "eval_wer": 1.0, "step": 500 }, { "epoch": 2.56, "eval_loss": Infinity, "eval_runtime": 57.8973, "eval_samples_per_second": 12.919, "eval_steps_per_second": 1.624, "eval_wer": 1.0, "step": 600 }, { "epoch": 2.99, "eval_loss": Infinity, "eval_runtime": 57.5316, "eval_samples_per_second": 13.002, "eval_steps_per_second": 1.634, "eval_wer": 1.0, "step": 700 }, { "epoch": 3.42, "eval_loss": Infinity, "eval_runtime": 58.6719, "eval_samples_per_second": 12.749, "eval_steps_per_second": 1.602, "eval_wer": 1.0, "step": 800 }, { "epoch": 3.85, "eval_loss": Infinity, "eval_runtime": 58.8307, "eval_samples_per_second": 12.714, "eval_steps_per_second": 1.598, "eval_wer": 1.0, "step": 900 }, { "epoch": 4.27, "learning_rate": 7.414723926380368e-06, "loss": 8.4674, "step": 1000 }, { "epoch": 4.27, "eval_loss": Infinity, "eval_runtime": 58.475, "eval_samples_per_second": 12.792, "eval_steps_per_second": 1.608, "eval_wer": 1.0, "step": 1000 }, { "epoch": 4.7, "eval_loss": Infinity, "eval_runtime": 54.8849, "eval_samples_per_second": 13.629, "eval_steps_per_second": 1.713, "eval_wer": 1.0, "step": 1100 }, { "epoch": 5.13, "eval_loss": Infinity, "eval_runtime": 55.2282, "eval_samples_per_second": 13.544, "eval_steps_per_second": 1.702, "eval_wer": 1.0, "step": 1200 }, { "epoch": 5.56, "eval_loss": Infinity, "eval_runtime": 56.5087, "eval_samples_per_second": 13.237, "eval_steps_per_second": 1.663, "eval_wer": 1.0, "step": 1300 }, { "epoch": 5.98, "eval_loss": Infinity, "eval_runtime": 54.6135, "eval_samples_per_second": 13.696, "eval_steps_per_second": 1.721, "eval_wer": 1.0, "step": 1400 }, { "epoch": 6.41, "learning_rate": 6.813496932515337e-06, "loss": 6.9866, "step": 1500 }, { "epoch": 6.41, "eval_loss": Infinity, "eval_runtime": 54.3306, "eval_samples_per_second": 13.768, "eval_steps_per_second": 1.73, "eval_wer": 1.0, "step": 1500 }, { "epoch": 6.84, "eval_loss": Infinity, "eval_runtime": 54.6515, "eval_samples_per_second": 13.687, "eval_steps_per_second": 1.72, "eval_wer": 1.0, "step": 1600 }, { "epoch": 7.26, "eval_loss": Infinity, "eval_runtime": 54.747, "eval_samples_per_second": 13.663, "eval_steps_per_second": 1.717, "eval_wer": 1.0, "step": 1700 }, { "epoch": 7.69, "eval_loss": Infinity, "eval_runtime": 54.4355, "eval_samples_per_second": 13.741, "eval_steps_per_second": 1.727, "eval_wer": 1.0, "step": 1800 }, { "epoch": 8.12, "eval_loss": Infinity, "eval_runtime": 54.7161, "eval_samples_per_second": 13.671, "eval_steps_per_second": 1.718, "eval_wer": 1.0, "step": 1900 }, { "epoch": 8.55, "learning_rate": 6.213496932515337e-06, "loss": 6.8089, "step": 2000 }, { "epoch": 8.55, "eval_loss": Infinity, "eval_runtime": 54.3228, "eval_samples_per_second": 13.77, "eval_steps_per_second": 1.73, "eval_wer": 1.0, "step": 2000 }, { "epoch": 8.97, "eval_loss": Infinity, "eval_runtime": 54.3156, "eval_samples_per_second": 13.771, "eval_steps_per_second": 1.731, "eval_wer": 1.0, "step": 2100 }, { "epoch": 9.4, "eval_loss": Infinity, "eval_runtime": 53.9347, "eval_samples_per_second": 13.869, "eval_steps_per_second": 1.743, "eval_wer": 1.0, "step": 2200 }, { "epoch": 9.83, "eval_loss": Infinity, "eval_runtime": 53.9961, "eval_samples_per_second": 13.853, "eval_steps_per_second": 1.741, "eval_wer": 1.0, "step": 2300 }, { "epoch": 10.26, "eval_loss": Infinity, "eval_runtime": 54.7565, "eval_samples_per_second": 13.66, "eval_steps_per_second": 1.717, "eval_wer": 1.0, "step": 2400 }, { "epoch": 10.68, "learning_rate": 5.613496932515337e-06, "loss": 6.7847, "step": 2500 }, { "epoch": 10.68, "eval_loss": Infinity, "eval_runtime": 55.3504, "eval_samples_per_second": 13.514, "eval_steps_per_second": 1.698, "eval_wer": 1.0, "step": 2500 }, { "epoch": 11.11, "eval_loss": Infinity, "eval_runtime": 54.5118, "eval_samples_per_second": 13.722, "eval_steps_per_second": 1.724, "eval_wer": 1.0, "step": 2600 }, { "epoch": 11.54, "eval_loss": Infinity, "eval_runtime": 55.1125, "eval_samples_per_second": 13.572, "eval_steps_per_second": 1.706, "eval_wer": 1.0, "step": 2700 }, { "epoch": 11.97, "eval_loss": Infinity, "eval_runtime": 53.9713, "eval_samples_per_second": 13.859, "eval_steps_per_second": 1.742, "eval_wer": 1.0, "step": 2800 }, { "epoch": 12.39, "eval_loss": Infinity, "eval_runtime": 53.933, "eval_samples_per_second": 13.869, "eval_steps_per_second": 1.743, "eval_wer": 1.0, "step": 2900 }, { "epoch": 12.82, "learning_rate": 5.0122699386503064e-06, "loss": 6.7941, "step": 3000 }, { "epoch": 12.82, "eval_loss": Infinity, "eval_runtime": 53.7915, "eval_samples_per_second": 13.906, "eval_steps_per_second": 1.747, "eval_wer": 1.0, "step": 3000 }, { "epoch": 13.25, "eval_loss": Infinity, "eval_runtime": 54.0722, "eval_samples_per_second": 13.833, "eval_steps_per_second": 1.738, "eval_wer": 1.0, "step": 3100 }, { "epoch": 13.68, "eval_loss": Infinity, "eval_runtime": 53.6653, "eval_samples_per_second": 13.938, "eval_steps_per_second": 1.752, "eval_wer": 1.0, "step": 3200 }, { "epoch": 14.1, "eval_loss": Infinity, "eval_runtime": 53.481, "eval_samples_per_second": 13.986, "eval_steps_per_second": 1.758, "eval_wer": 1.0, "step": 3300 }, { "epoch": 14.53, "eval_loss": Infinity, "eval_runtime": 54.2383, "eval_samples_per_second": 13.791, "eval_steps_per_second": 1.733, "eval_wer": 1.0, "step": 3400 }, { "epoch": 14.96, "learning_rate": 4.412269938650307e-06, "loss": 6.7956, "step": 3500 }, { "epoch": 14.96, "eval_loss": Infinity, "eval_runtime": 54.7211, "eval_samples_per_second": 13.669, "eval_steps_per_second": 1.718, "eval_wer": 1.0, "step": 3500 }, { "epoch": 15.38, "eval_loss": Infinity, "eval_runtime": 54.1823, "eval_samples_per_second": 13.805, "eval_steps_per_second": 1.735, "eval_wer": 1.0, "step": 3600 }, { "epoch": 15.81, "eval_loss": Infinity, "eval_runtime": 54.1656, "eval_samples_per_second": 13.81, "eval_steps_per_second": 1.735, "eval_wer": 1.0, "step": 3700 }, { "epoch": 16.24, "eval_loss": Infinity, "eval_runtime": 54.5557, "eval_samples_per_second": 13.711, "eval_steps_per_second": 1.723, "eval_wer": 1.0, "step": 3800 }, { "epoch": 16.67, "eval_loss": Infinity, "eval_runtime": 54.9313, "eval_samples_per_second": 13.617, "eval_steps_per_second": 1.711, "eval_wer": 1.0, "step": 3900 }, { "epoch": 17.09, "learning_rate": 3.811042944785276e-06, "loss": 6.8102, "step": 4000 }, { "epoch": 17.09, "eval_loss": Infinity, "eval_runtime": 53.5372, "eval_samples_per_second": 13.972, "eval_steps_per_second": 1.756, "eval_wer": 1.0, "step": 4000 }, { "epoch": 17.52, "eval_loss": Infinity, "eval_runtime": 53.8907, "eval_samples_per_second": 13.88, "eval_steps_per_second": 1.744, "eval_wer": 1.0, "step": 4100 }, { "epoch": 17.95, "eval_loss": Infinity, "eval_runtime": 54.237, "eval_samples_per_second": 13.791, "eval_steps_per_second": 1.733, "eval_wer": 1.0, "step": 4200 }, { "epoch": 18.38, "eval_loss": Infinity, "eval_runtime": 55.2609, "eval_samples_per_second": 13.536, "eval_steps_per_second": 1.701, "eval_wer": 1.0, "step": 4300 }, { "epoch": 18.8, "eval_loss": Infinity, "eval_runtime": 54.5797, "eval_samples_per_second": 13.705, "eval_steps_per_second": 1.722, "eval_wer": 1.0, "step": 4400 }, { "epoch": 19.23, "learning_rate": 3.211042944785276e-06, "loss": 6.7761, "step": 4500 }, { "epoch": 19.23, "eval_loss": Infinity, "eval_runtime": 54.9682, "eval_samples_per_second": 13.608, "eval_steps_per_second": 1.71, "eval_wer": 1.0, "step": 4500 }, { "epoch": 19.66, "eval_loss": Infinity, "eval_runtime": 54.7806, "eval_samples_per_second": 13.654, "eval_steps_per_second": 1.716, "eval_wer": 1.0, "step": 4600 }, { "epoch": 20.09, "eval_loss": Infinity, "eval_runtime": 53.3191, "eval_samples_per_second": 14.029, "eval_steps_per_second": 1.763, "eval_wer": 1.0, "step": 4700 }, { "epoch": 20.51, "eval_loss": Infinity, "eval_runtime": 54.1748, "eval_samples_per_second": 13.807, "eval_steps_per_second": 1.735, "eval_wer": 1.0, "step": 4800 }, { "epoch": 20.94, "eval_loss": Infinity, "eval_runtime": 53.7395, "eval_samples_per_second": 13.919, "eval_steps_per_second": 1.749, "eval_wer": 1.0, "step": 4900 }, { "epoch": 21.37, "learning_rate": 2.6085889570552147e-06, "loss": 6.8063, "step": 5000 }, { "epoch": 21.37, "eval_loss": Infinity, "eval_runtime": 54.3884, "eval_samples_per_second": 13.753, "eval_steps_per_second": 1.728, "eval_wer": 1.0, "step": 5000 }, { "epoch": 21.79, "eval_loss": Infinity, "eval_runtime": 54.0413, "eval_samples_per_second": 13.841, "eval_steps_per_second": 1.739, "eval_wer": 1.0, "step": 5100 }, { "epoch": 22.22, "eval_loss": Infinity, "eval_runtime": 53.5209, "eval_samples_per_second": 13.976, "eval_steps_per_second": 1.756, "eval_wer": 1.0, "step": 5200 }, { "epoch": 22.65, "eval_loss": Infinity, "eval_runtime": 53.6053, "eval_samples_per_second": 13.954, "eval_steps_per_second": 1.754, "eval_wer": 1.0, "step": 5300 }, { "epoch": 23.08, "eval_loss": Infinity, "eval_runtime": 53.6011, "eval_samples_per_second": 13.955, "eval_steps_per_second": 1.754, "eval_wer": 1.0, "step": 5400 }, { "epoch": 23.5, "learning_rate": 2.0085889570552145e-06, "loss": 6.7934, "step": 5500 }, { "epoch": 23.5, "eval_loss": Infinity, "eval_runtime": 53.8555, "eval_samples_per_second": 13.889, "eval_steps_per_second": 1.745, "eval_wer": 1.0, "step": 5500 }, { "epoch": 23.93, "eval_loss": Infinity, "eval_runtime": 53.8589, "eval_samples_per_second": 13.888, "eval_steps_per_second": 1.745, "eval_wer": 1.0, "step": 5600 }, { "epoch": 24.36, "eval_loss": Infinity, "eval_runtime": 53.7804, "eval_samples_per_second": 13.908, "eval_steps_per_second": 1.748, "eval_wer": 1.0, "step": 5700 }, { "epoch": 24.79, "eval_loss": Infinity, "eval_runtime": 53.7894, "eval_samples_per_second": 13.906, "eval_steps_per_second": 1.748, "eval_wer": 1.0, "step": 5800 }, { "epoch": 25.21, "eval_loss": Infinity, "eval_runtime": 54.0321, "eval_samples_per_second": 13.844, "eval_steps_per_second": 1.74, "eval_wer": 1.0, "step": 5900 }, { "epoch": 25.64, "learning_rate": 1.4085889570552147e-06, "loss": 6.7819, "step": 6000 }, { "epoch": 25.64, "eval_loss": Infinity, "eval_runtime": 53.8026, "eval_samples_per_second": 13.903, "eval_steps_per_second": 1.747, "eval_wer": 1.0, "step": 6000 }, { "epoch": 26.07, "eval_loss": Infinity, "eval_runtime": 57.7929, "eval_samples_per_second": 12.943, "eval_steps_per_second": 1.626, "eval_wer": 1.0, "step": 6100 }, { "epoch": 26.5, "eval_loss": Infinity, "eval_runtime": 55.4788, "eval_samples_per_second": 13.483, "eval_steps_per_second": 1.694, "eval_wer": 1.0, "step": 6200 }, { "epoch": 26.92, "eval_loss": Infinity, "eval_runtime": 55.3669, "eval_samples_per_second": 13.51, "eval_steps_per_second": 1.698, "eval_wer": 1.0, "step": 6300 }, { "epoch": 27.35, "eval_loss": Infinity, "eval_runtime": 54.9557, "eval_samples_per_second": 13.611, "eval_steps_per_second": 1.71, "eval_wer": 1.0, "step": 6400 }, { "epoch": 27.78, "learning_rate": 8.07361963190184e-07, "loss": 6.8278, "step": 6500 }, { "epoch": 27.78, "eval_loss": Infinity, "eval_runtime": 54.648, "eval_samples_per_second": 13.688, "eval_steps_per_second": 1.72, "eval_wer": 1.0, "step": 6500 }, { "epoch": 28.21, "eval_loss": Infinity, "eval_runtime": 54.7948, "eval_samples_per_second": 13.651, "eval_steps_per_second": 1.715, "eval_wer": 1.0, "step": 6600 }, { "epoch": 28.63, "eval_loss": Infinity, "eval_runtime": 54.9921, "eval_samples_per_second": 13.602, "eval_steps_per_second": 1.709, "eval_wer": 1.0, "step": 6700 }, { "epoch": 29.06, "eval_loss": NaN, "eval_runtime": 54.8536, "eval_samples_per_second": 13.636, "eval_steps_per_second": 1.714, "eval_wer": 1.0, "step": 6800 }, { "epoch": 29.49, "eval_loss": NaN, "eval_runtime": 54.728, "eval_samples_per_second": 13.668, "eval_steps_per_second": 1.718, "eval_wer": 1.0, "step": 6900 }, { "epoch": 29.91, "learning_rate": 2.282208588957055e-07, "loss": 6.7427, "step": 7000 }, { "epoch": 29.91, "eval_loss": NaN, "eval_runtime": 54.4734, "eval_samples_per_second": 13.731, "eval_steps_per_second": 1.726, "eval_wer": 1.0, "step": 7000 }, { "epoch": 30.0, "step": 7020, "total_flos": 8.526629534148013e+18, "train_loss": 7.303107645844462, "train_runtime": 22559.2753, "train_samples_per_second": 9.943, "train_steps_per_second": 0.311 } ], "logging_steps": 500, "max_steps": 7020, "num_train_epochs": 30, "save_steps": 500, "total_flos": 8.526629534148013e+18, "trial_name": null, "trial_params": null }