|
{ |
|
"best_metric": 0.16280439496040344, |
|
"best_model_checkpoint": "./results/checkpoint-500", |
|
"epoch": 2.4038461538461537, |
|
"eval_steps": 20, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 1.8630799055099487, |
|
"learning_rate": 1.9615384615384617e-05, |
|
"loss": 0.4117, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.29503169655799866, |
|
"eval_runtime": 2.9188, |
|
"eval_samples_per_second": 31.863, |
|
"eval_steps_per_second": 8.223, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 9.186605453491211, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.3727, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"eval_accuracy": 0.8709677419354839, |
|
"eval_loss": 0.20473997294902802, |
|
"eval_runtime": 2.8638, |
|
"eval_samples_per_second": 32.475, |
|
"eval_steps_per_second": 8.381, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.28846153846153844, |
|
"grad_norm": 8.3997802734375, |
|
"learning_rate": 1.8846153846153846e-05, |
|
"loss": 0.2863, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28846153846153844, |
|
"eval_accuracy": 0.956989247311828, |
|
"eval_loss": 0.2754175662994385, |
|
"eval_runtime": 2.7512, |
|
"eval_samples_per_second": 33.803, |
|
"eval_steps_per_second": 8.723, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 0.0750838965177536, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 0.2698, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"eval_accuracy": 0.946236559139785, |
|
"eval_loss": 0.16919000446796417, |
|
"eval_runtime": 2.6759, |
|
"eval_samples_per_second": 34.754, |
|
"eval_steps_per_second": 8.969, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 0.0878710225224495, |
|
"learning_rate": 1.807692307692308e-05, |
|
"loss": 0.1232, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"eval_accuracy": 0.946236559139785, |
|
"eval_loss": 0.16618885099887848, |
|
"eval_runtime": 2.6422, |
|
"eval_samples_per_second": 35.197, |
|
"eval_steps_per_second": 9.083, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 0.038383062928915024, |
|
"learning_rate": 1.7692307692307694e-05, |
|
"loss": 0.0806, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"eval_accuracy": 0.946236559139785, |
|
"eval_loss": 0.15975366532802582, |
|
"eval_runtime": 2.6652, |
|
"eval_samples_per_second": 34.894, |
|
"eval_steps_per_second": 9.005, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6730769230769231, |
|
"grad_norm": 0.022622136399149895, |
|
"learning_rate": 1.730769230769231e-05, |
|
"loss": 0.2571, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6730769230769231, |
|
"eval_accuracy": 0.978494623655914, |
|
"eval_loss": 0.0726698786020279, |
|
"eval_runtime": 2.6685, |
|
"eval_samples_per_second": 34.85, |
|
"eval_steps_per_second": 8.994, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 9.369976043701172, |
|
"learning_rate": 1.6923076923076924e-05, |
|
"loss": 0.1374, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"eval_accuracy": 0.978494623655914, |
|
"eval_loss": 0.0701598972082138, |
|
"eval_runtime": 2.6859, |
|
"eval_samples_per_second": 34.626, |
|
"eval_steps_per_second": 8.936, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8653846153846154, |
|
"grad_norm": 7.6264119148254395, |
|
"learning_rate": 1.653846153846154e-05, |
|
"loss": 0.165, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8653846153846154, |
|
"eval_accuracy": 0.956989247311828, |
|
"eval_loss": 0.08651256561279297, |
|
"eval_runtime": 2.7028, |
|
"eval_samples_per_second": 34.409, |
|
"eval_steps_per_second": 8.88, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 0.03415932506322861, |
|
"learning_rate": 1.6153846153846154e-05, |
|
"loss": 0.528, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"eval_accuracy": 0.967741935483871, |
|
"eval_loss": 0.12613914906978607, |
|
"eval_runtime": 2.7168, |
|
"eval_samples_per_second": 34.231, |
|
"eval_steps_per_second": 8.834, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0576923076923077, |
|
"grad_norm": 0.17008128762245178, |
|
"learning_rate": 1.576923076923077e-05, |
|
"loss": 0.1023, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0576923076923077, |
|
"eval_accuracy": 0.9139784946236559, |
|
"eval_loss": 0.37878894805908203, |
|
"eval_runtime": 2.7414, |
|
"eval_samples_per_second": 33.924, |
|
"eval_steps_per_second": 8.755, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1538461538461537, |
|
"grad_norm": 0.010861682705581188, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 0.0278, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1538461538461537, |
|
"eval_accuracy": 0.946236559139785, |
|
"eval_loss": 0.1893940567970276, |
|
"eval_runtime": 2.7091, |
|
"eval_samples_per_second": 34.329, |
|
"eval_steps_per_second": 8.859, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.017718007788062096, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.1642, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.9139784946236559, |
|
"eval_loss": 0.3861010670661926, |
|
"eval_runtime": 2.6955, |
|
"eval_samples_per_second": 34.503, |
|
"eval_steps_per_second": 8.904, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3461538461538463, |
|
"grad_norm": 0.021516259759664536, |
|
"learning_rate": 1.4615384615384615e-05, |
|
"loss": 0.1108, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3461538461538463, |
|
"eval_accuracy": 0.9354838709677419, |
|
"eval_loss": 0.20804111659526825, |
|
"eval_runtime": 2.9167, |
|
"eval_samples_per_second": 31.886, |
|
"eval_steps_per_second": 8.229, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4423076923076923, |
|
"grad_norm": 0.013441790826618671, |
|
"learning_rate": 1.4230769230769232e-05, |
|
"loss": 0.1514, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4423076923076923, |
|
"eval_accuracy": 0.967741935483871, |
|
"eval_loss": 0.15365849435329437, |
|
"eval_runtime": 2.8225, |
|
"eval_samples_per_second": 32.95, |
|
"eval_steps_per_second": 8.503, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 0.02140916883945465, |
|
"learning_rate": 1.3846153846153847e-05, |
|
"loss": 0.0025, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"eval_accuracy": 0.946236559139785, |
|
"eval_loss": 0.30473944544792175, |
|
"eval_runtime": 2.6926, |
|
"eval_samples_per_second": 34.539, |
|
"eval_steps_per_second": 8.913, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6346153846153846, |
|
"grad_norm": 0.011240499094128609, |
|
"learning_rate": 1.3461538461538463e-05, |
|
"loss": 0.1124, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.6346153846153846, |
|
"eval_accuracy": 0.9354838709677419, |
|
"eval_loss": 0.24811075627803802, |
|
"eval_runtime": 2.6909, |
|
"eval_samples_per_second": 34.561, |
|
"eval_steps_per_second": 8.919, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7307692307692308, |
|
"grad_norm": 0.009658828377723694, |
|
"learning_rate": 1.3076923076923078e-05, |
|
"loss": 0.3081, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.7307692307692308, |
|
"eval_accuracy": 0.978494623655914, |
|
"eval_loss": 0.1271650344133377, |
|
"eval_runtime": 2.6952, |
|
"eval_samples_per_second": 34.505, |
|
"eval_steps_per_second": 8.905, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8269230769230769, |
|
"grad_norm": 48.972293853759766, |
|
"learning_rate": 1.2692307692307693e-05, |
|
"loss": 0.0764, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.8269230769230769, |
|
"eval_accuracy": 0.989247311827957, |
|
"eval_loss": 0.0935346931219101, |
|
"eval_runtime": 2.7195, |
|
"eval_samples_per_second": 34.198, |
|
"eval_steps_per_second": 8.825, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"grad_norm": 0.01087904442101717, |
|
"learning_rate": 1.230769230769231e-05, |
|
"loss": 0.0923, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"eval_accuracy": 0.9354838709677419, |
|
"eval_loss": 0.23266802728176117, |
|
"eval_runtime": 2.7195, |
|
"eval_samples_per_second": 34.197, |
|
"eval_steps_per_second": 8.825, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.019230769230769, |
|
"grad_norm": 19.043825149536133, |
|
"learning_rate": 1.1923076923076925e-05, |
|
"loss": 0.137, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.019230769230769, |
|
"eval_accuracy": 0.956989247311828, |
|
"eval_loss": 0.16817408800125122, |
|
"eval_runtime": 2.7341, |
|
"eval_samples_per_second": 34.015, |
|
"eval_steps_per_second": 8.778, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.1153846153846154, |
|
"grad_norm": 0.036368228495121, |
|
"learning_rate": 1.1538461538461538e-05, |
|
"loss": 0.0796, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.1153846153846154, |
|
"eval_accuracy": 0.956989247311828, |
|
"eval_loss": 0.16539430618286133, |
|
"eval_runtime": 2.7265, |
|
"eval_samples_per_second": 34.11, |
|
"eval_steps_per_second": 8.803, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.2115384615384617, |
|
"grad_norm": 0.03690864145755768, |
|
"learning_rate": 1.1153846153846154e-05, |
|
"loss": 0.0089, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.2115384615384617, |
|
"eval_accuracy": 0.956989247311828, |
|
"eval_loss": 0.2199721783399582, |
|
"eval_runtime": 2.7071, |
|
"eval_samples_per_second": 34.354, |
|
"eval_steps_per_second": 8.865, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 0.011880586855113506, |
|
"learning_rate": 1.076923076923077e-05, |
|
"loss": 0.1514, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"eval_accuracy": 0.956989247311828, |
|
"eval_loss": 0.20590856671333313, |
|
"eval_runtime": 2.697, |
|
"eval_samples_per_second": 34.482, |
|
"eval_steps_per_second": 8.899, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.4038461538461537, |
|
"grad_norm": 0.008282246068120003, |
|
"learning_rate": 1.0384615384615386e-05, |
|
"loss": 0.0044, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4038461538461537, |
|
"eval_accuracy": 0.956989247311828, |
|
"eval_loss": 0.16280439496040344, |
|
"eval_runtime": 2.7133, |
|
"eval_samples_per_second": 34.275, |
|
"eval_steps_per_second": 8.845, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1040, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 480317453780160.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|