adapters-opt-bf16-QLORA-super_glue-boolq
/
trainer_state-opt-fp16-QLORA-super_glue-boolq-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.984, | |
"eval_steps": 1, | |
"global_step": 62, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.032, | |
"grad_norm": 8.5625, | |
"learning_rate": 2.5e-05, | |
"loss": 0.733, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.032, | |
"eval_accuracy": 0.6, | |
"eval_loss": 0.7043750286102295, | |
"eval_runtime": 5.8309, | |
"eval_samples_per_second": 42.875, | |
"eval_steps_per_second": 5.488, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.064, | |
"grad_norm": 8.3125, | |
"learning_rate": 5e-05, | |
"loss": 0.7766, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.064, | |
"eval_accuracy": 0.58, | |
"eval_loss": 0.6986504197120667, | |
"eval_runtime": 5.9246, | |
"eval_samples_per_second": 42.197, | |
"eval_steps_per_second": 5.401, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.096, | |
"grad_norm": 8.75, | |
"learning_rate": 4.9166666666666665e-05, | |
"loss": 0.8106, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.096, | |
"eval_accuracy": 0.588, | |
"eval_loss": 0.696652352809906, | |
"eval_runtime": 5.8737, | |
"eval_samples_per_second": 42.562, | |
"eval_steps_per_second": 5.448, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.128, | |
"grad_norm": 11.625, | |
"learning_rate": 4.8333333333333334e-05, | |
"loss": 0.8419, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.128, | |
"eval_accuracy": 0.556, | |
"eval_loss": 0.7022871375083923, | |
"eval_runtime": 5.9244, | |
"eval_samples_per_second": 42.198, | |
"eval_steps_per_second": 5.401, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.16, | |
"grad_norm": 11.625, | |
"learning_rate": 4.75e-05, | |
"loss": 0.6818, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.16, | |
"eval_accuracy": 0.544, | |
"eval_loss": 0.7068164348602295, | |
"eval_runtime": 5.9214, | |
"eval_samples_per_second": 42.22, | |
"eval_steps_per_second": 5.404, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.192, | |
"grad_norm": 7.84375, | |
"learning_rate": 4.666666666666667e-05, | |
"loss": 0.6394, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.192, | |
"eval_accuracy": 0.548, | |
"eval_loss": 0.7022500038146973, | |
"eval_runtime": 5.9245, | |
"eval_samples_per_second": 42.198, | |
"eval_steps_per_second": 5.401, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.224, | |
"grad_norm": 11.4375, | |
"learning_rate": 4.5833333333333334e-05, | |
"loss": 0.7657, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.224, | |
"eval_accuracy": 0.552, | |
"eval_loss": 0.6960136890411377, | |
"eval_runtime": 5.9227, | |
"eval_samples_per_second": 42.21, | |
"eval_steps_per_second": 5.403, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.256, | |
"grad_norm": 11.4375, | |
"learning_rate": 4.5e-05, | |
"loss": 0.6863, | |
"step": 8 | |
}, | |
{ | |
"epoch": 0.256, | |
"eval_accuracy": 0.58, | |
"eval_loss": 0.6908359527587891, | |
"eval_runtime": 5.9191, | |
"eval_samples_per_second": 42.236, | |
"eval_steps_per_second": 5.406, | |
"step": 8 | |
}, | |
{ | |
"epoch": 0.288, | |
"grad_norm": 9.4375, | |
"learning_rate": 4.4166666666666665e-05, | |
"loss": 0.6082, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.288, | |
"eval_accuracy": 0.584, | |
"eval_loss": 0.6917402148246765, | |
"eval_runtime": 5.9192, | |
"eval_samples_per_second": 42.235, | |
"eval_steps_per_second": 5.406, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.32, | |
"grad_norm": 13.125, | |
"learning_rate": 4.3333333333333334e-05, | |
"loss": 0.7559, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.32, | |
"eval_accuracy": 0.604, | |
"eval_loss": 0.697378933429718, | |
"eval_runtime": 5.9209, | |
"eval_samples_per_second": 42.223, | |
"eval_steps_per_second": 5.405, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.352, | |
"grad_norm": 8.0, | |
"learning_rate": 4.25e-05, | |
"loss": 0.6119, | |
"step": 11 | |
}, | |
{ | |
"epoch": 0.352, | |
"eval_accuracy": 0.608, | |
"eval_loss": 0.7080517411231995, | |
"eval_runtime": 5.9254, | |
"eval_samples_per_second": 42.192, | |
"eval_steps_per_second": 5.401, | |
"step": 11 | |
}, | |
{ | |
"epoch": 0.384, | |
"grad_norm": 7.5625, | |
"learning_rate": 4.166666666666667e-05, | |
"loss": 0.7329, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.384, | |
"eval_accuracy": 0.608, | |
"eval_loss": 0.7140927910804749, | |
"eval_runtime": 5.9201, | |
"eval_samples_per_second": 42.229, | |
"eval_steps_per_second": 5.405, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.416, | |
"grad_norm": 12.3125, | |
"learning_rate": 4.0833333333333334e-05, | |
"loss": 0.5271, | |
"step": 13 | |
}, | |
{ | |
"epoch": 0.416, | |
"eval_accuracy": 0.604, | |
"eval_loss": 0.7197051048278809, | |
"eval_runtime": 5.922, | |
"eval_samples_per_second": 42.216, | |
"eval_steps_per_second": 5.404, | |
"step": 13 | |
}, | |
{ | |
"epoch": 0.448, | |
"grad_norm": 5.875, | |
"learning_rate": 4e-05, | |
"loss": 0.7666, | |
"step": 14 | |
}, | |
{ | |
"epoch": 0.448, | |
"eval_accuracy": 0.604, | |
"eval_loss": 0.7194697260856628, | |
"eval_runtime": 5.9186, | |
"eval_samples_per_second": 42.24, | |
"eval_steps_per_second": 5.407, | |
"step": 14 | |
}, | |
{ | |
"epoch": 0.48, | |
"grad_norm": 13.75, | |
"learning_rate": 3.9166666666666665e-05, | |
"loss": 0.7799, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.48, | |
"eval_accuracy": 0.604, | |
"eval_loss": 0.7156533002853394, | |
"eval_runtime": 5.9165, | |
"eval_samples_per_second": 42.255, | |
"eval_steps_per_second": 5.409, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.512, | |
"grad_norm": 6.40625, | |
"learning_rate": 3.8333333333333334e-05, | |
"loss": 0.6629, | |
"step": 16 | |
}, | |
{ | |
"epoch": 0.512, | |
"eval_accuracy": 0.604, | |
"eval_loss": 0.7086992263793945, | |
"eval_runtime": 5.9244, | |
"eval_samples_per_second": 42.198, | |
"eval_steps_per_second": 5.401, | |
"step": 16 | |
}, | |
{ | |
"epoch": 0.544, | |
"grad_norm": 5.25, | |
"learning_rate": 3.7500000000000003e-05, | |
"loss": 0.6303, | |
"step": 17 | |
}, | |
{ | |
"epoch": 0.544, | |
"eval_accuracy": 0.608, | |
"eval_loss": 0.7018144726753235, | |
"eval_runtime": 5.9242, | |
"eval_samples_per_second": 42.2, | |
"eval_steps_per_second": 5.402, | |
"step": 17 | |
}, | |
{ | |
"epoch": 0.576, | |
"grad_norm": 14.25, | |
"learning_rate": 3.6666666666666666e-05, | |
"loss": 0.9019, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.576, | |
"eval_accuracy": 0.608, | |
"eval_loss": 0.6947382688522339, | |
"eval_runtime": 5.9231, | |
"eval_samples_per_second": 42.208, | |
"eval_steps_per_second": 5.403, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.608, | |
"grad_norm": 5.6875, | |
"learning_rate": 3.5833333333333335e-05, | |
"loss": 0.6538, | |
"step": 19 | |
}, | |
{ | |
"epoch": 0.608, | |
"eval_accuracy": 0.6, | |
"eval_loss": 0.6891113519668579, | |
"eval_runtime": 5.9253, | |
"eval_samples_per_second": 42.192, | |
"eval_steps_per_second": 5.401, | |
"step": 19 | |
}, | |
{ | |
"epoch": 0.64, | |
"grad_norm": 6.90625, | |
"learning_rate": 3.5e-05, | |
"loss": 0.653, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.64, | |
"eval_accuracy": 0.592, | |
"eval_loss": 0.6882695555686951, | |
"eval_runtime": 5.9204, | |
"eval_samples_per_second": 42.227, | |
"eval_steps_per_second": 5.405, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.672, | |
"grad_norm": 12.1875, | |
"learning_rate": 3.4166666666666666e-05, | |
"loss": 0.6106, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.672, | |
"eval_accuracy": 0.592, | |
"eval_loss": 0.6871914267539978, | |
"eval_runtime": 5.9251, | |
"eval_samples_per_second": 42.194, | |
"eval_steps_per_second": 5.401, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.704, | |
"grad_norm": 7.28125, | |
"learning_rate": 3.3333333333333335e-05, | |
"loss": 0.6786, | |
"step": 22 | |
}, | |
{ | |
"epoch": 0.704, | |
"eval_accuracy": 0.592, | |
"eval_loss": 0.6855449080467224, | |
"eval_runtime": 5.9259, | |
"eval_samples_per_second": 42.187, | |
"eval_steps_per_second": 5.4, | |
"step": 22 | |
}, | |
{ | |
"epoch": 0.736, | |
"grad_norm": 6.875, | |
"learning_rate": 3.2500000000000004e-05, | |
"loss": 0.6258, | |
"step": 23 | |
}, | |
{ | |
"epoch": 0.736, | |
"eval_accuracy": 0.596, | |
"eval_loss": 0.6861640810966492, | |
"eval_runtime": 5.9226, | |
"eval_samples_per_second": 42.211, | |
"eval_steps_per_second": 5.403, | |
"step": 23 | |
}, | |
{ | |
"epoch": 0.768, | |
"grad_norm": 4.28125, | |
"learning_rate": 3.1666666666666666e-05, | |
"loss": 0.6652, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.768, | |
"eval_accuracy": 0.596, | |
"eval_loss": 0.6845800876617432, | |
"eval_runtime": 5.9223, | |
"eval_samples_per_second": 42.214, | |
"eval_steps_per_second": 5.403, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 5.84375, | |
"learning_rate": 3.0833333333333335e-05, | |
"loss": 0.6385, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_accuracy": 0.584, | |
"eval_loss": 0.6855800747871399, | |
"eval_runtime": 5.9257, | |
"eval_samples_per_second": 42.189, | |
"eval_steps_per_second": 5.4, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.832, | |
"grad_norm": 7.28125, | |
"learning_rate": 3e-05, | |
"loss": 0.6714, | |
"step": 26 | |
}, | |
{ | |
"epoch": 0.832, | |
"eval_accuracy": 0.588, | |
"eval_loss": 0.6856074333190918, | |
"eval_runtime": 5.9225, | |
"eval_samples_per_second": 42.212, | |
"eval_steps_per_second": 5.403, | |
"step": 26 | |
}, | |
{ | |
"epoch": 0.864, | |
"grad_norm": 8.0625, | |
"learning_rate": 2.916666666666667e-05, | |
"loss": 0.6958, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.864, | |
"eval_accuracy": 0.584, | |
"eval_loss": 0.6856620907783508, | |
"eval_runtime": 5.9226, | |
"eval_samples_per_second": 42.211, | |
"eval_steps_per_second": 5.403, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.896, | |
"grad_norm": 9.375, | |
"learning_rate": 2.8333333333333335e-05, | |
"loss": 0.8685, | |
"step": 28 | |
}, | |
{ | |
"epoch": 0.896, | |
"eval_accuracy": 0.588, | |
"eval_loss": 0.6862812638282776, | |
"eval_runtime": 5.9202, | |
"eval_samples_per_second": 42.228, | |
"eval_steps_per_second": 5.405, | |
"step": 28 | |
}, | |
{ | |
"epoch": 0.928, | |
"grad_norm": 9.5625, | |
"learning_rate": 2.7500000000000004e-05, | |
"loss": 0.7247, | |
"step": 29 | |
}, | |
{ | |
"epoch": 0.928, | |
"eval_accuracy": 0.592, | |
"eval_loss": 0.6854003667831421, | |
"eval_runtime": 5.9258, | |
"eval_samples_per_second": 42.188, | |
"eval_steps_per_second": 5.4, | |
"step": 29 | |
}, | |
{ | |
"epoch": 0.96, | |
"grad_norm": 5.96875, | |
"learning_rate": 2.6666666666666667e-05, | |
"loss": 0.6199, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.96, | |
"eval_accuracy": 0.592, | |
"eval_loss": 0.6848945021629333, | |
"eval_runtime": 5.9245, | |
"eval_samples_per_second": 42.197, | |
"eval_steps_per_second": 5.401, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.992, | |
"grad_norm": 5.34375, | |
"learning_rate": 2.5833333333333336e-05, | |
"loss": 0.6491, | |
"step": 31 | |
}, | |
{ | |
"epoch": 0.992, | |
"eval_accuracy": 0.592, | |
"eval_loss": 0.6858437657356262, | |
"eval_runtime": 5.9198, | |
"eval_samples_per_second": 42.231, | |
"eval_steps_per_second": 5.406, | |
"step": 31 | |
}, | |
{ | |
"epoch": 1.024, | |
"grad_norm": 5.3125, | |
"learning_rate": 2.5e-05, | |
"loss": 0.6674, | |
"step": 32 | |
}, | |
{ | |
"epoch": 1.024, | |
"eval_accuracy": 0.592, | |
"eval_loss": 0.6850195527076721, | |
"eval_runtime": 5.9233, | |
"eval_samples_per_second": 42.206, | |
"eval_steps_per_second": 5.402, | |
"step": 32 | |
}, | |
{ | |
"epoch": 1.056, | |
"grad_norm": 5.1875, | |
"learning_rate": 2.4166666666666667e-05, | |
"loss": 0.7468, | |
"step": 33 | |
}, | |
{ | |
"epoch": 1.056, | |
"eval_accuracy": 0.596, | |
"eval_loss": 0.6846699118614197, | |
"eval_runtime": 5.928, | |
"eval_samples_per_second": 42.173, | |
"eval_steps_per_second": 5.398, | |
"step": 33 | |
}, | |
{ | |
"epoch": 1.088, | |
"grad_norm": 11.6875, | |
"learning_rate": 2.3333333333333336e-05, | |
"loss": 0.7459, | |
"step": 34 | |
}, | |
{ | |
"epoch": 1.088, | |
"eval_accuracy": 0.6, | |
"eval_loss": 0.6840722560882568, | |
"eval_runtime": 5.9285, | |
"eval_samples_per_second": 42.169, | |
"eval_steps_per_second": 5.398, | |
"step": 34 | |
}, | |
{ | |
"epoch": 1.12, | |
"grad_norm": 6.875, | |
"learning_rate": 2.25e-05, | |
"loss": 0.6869, | |
"step": 35 | |
}, | |
{ | |
"epoch": 1.12, | |
"eval_accuracy": 0.596, | |
"eval_loss": 0.6836249828338623, | |
"eval_runtime": 5.9248, | |
"eval_samples_per_second": 42.195, | |
"eval_steps_per_second": 5.401, | |
"step": 35 | |
}, | |
{ | |
"epoch": 1.152, | |
"grad_norm": 4.6875, | |
"learning_rate": 2.1666666666666667e-05, | |
"loss": 0.6937, | |
"step": 36 | |
}, | |
{ | |
"epoch": 1.152, | |
"eval_accuracy": 0.592, | |
"eval_loss": 0.683789074420929, | |
"eval_runtime": 5.9264, | |
"eval_samples_per_second": 42.184, | |
"eval_steps_per_second": 5.4, | |
"step": 36 | |
}, | |
{ | |
"epoch": 1.184, | |
"grad_norm": 7.0625, | |
"learning_rate": 2.0833333333333336e-05, | |
"loss": 0.6686, | |
"step": 37 | |
}, | |
{ | |
"epoch": 1.184, | |
"eval_accuracy": 0.588, | |
"eval_loss": 0.684499979019165, | |
"eval_runtime": 5.9211, | |
"eval_samples_per_second": 42.222, | |
"eval_steps_per_second": 5.404, | |
"step": 37 | |
}, | |
{ | |
"epoch": 1.216, | |
"grad_norm": 7.15625, | |
"learning_rate": 2e-05, | |
"loss": 0.6833, | |
"step": 38 | |
}, | |
{ | |
"epoch": 1.216, | |
"eval_accuracy": 0.584, | |
"eval_loss": 0.6837460994720459, | |
"eval_runtime": 5.9186, | |
"eval_samples_per_second": 42.24, | |
"eval_steps_per_second": 5.407, | |
"step": 38 | |
}, | |
{ | |
"epoch": 1.248, | |
"grad_norm": 3.890625, | |
"learning_rate": 1.9166666666666667e-05, | |
"loss": 0.7075, | |
"step": 39 | |
}, | |
{ | |
"epoch": 1.248, | |
"eval_accuracy": 0.576, | |
"eval_loss": 0.6840898394584656, | |
"eval_runtime": 5.9275, | |
"eval_samples_per_second": 42.176, | |
"eval_steps_per_second": 5.399, | |
"step": 39 | |
}, | |
{ | |
"epoch": 1.28, | |
"grad_norm": 8.375, | |
"learning_rate": 1.8333333333333333e-05, | |
"loss": 0.6724, | |
"step": 40 | |
}, | |
{ | |
"epoch": 1.28, | |
"eval_accuracy": 0.568, | |
"eval_loss": 0.6857500076293945, | |
"eval_runtime": 5.9354, | |
"eval_samples_per_second": 42.12, | |
"eval_steps_per_second": 5.391, | |
"step": 40 | |
}, | |
{ | |
"epoch": 1.312, | |
"grad_norm": 12.0, | |
"learning_rate": 1.75e-05, | |
"loss": 0.7046, | |
"step": 41 | |
}, | |
{ | |
"epoch": 1.312, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6855937242507935, | |
"eval_runtime": 5.9205, | |
"eval_samples_per_second": 42.226, | |
"eval_steps_per_second": 5.405, | |
"step": 41 | |
}, | |
{ | |
"epoch": 1.3439999999999999, | |
"grad_norm": 5.03125, | |
"learning_rate": 1.6666666666666667e-05, | |
"loss": 0.6974, | |
"step": 42 | |
}, | |
{ | |
"epoch": 1.3439999999999999, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6870351433753967, | |
"eval_runtime": 5.9614, | |
"eval_samples_per_second": 41.937, | |
"eval_steps_per_second": 5.368, | |
"step": 42 | |
}, | |
{ | |
"epoch": 1.376, | |
"grad_norm": 4.03125, | |
"learning_rate": 1.5833333333333333e-05, | |
"loss": 0.6517, | |
"step": 43 | |
}, | |
{ | |
"epoch": 1.376, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6863359212875366, | |
"eval_runtime": 6.1355, | |
"eval_samples_per_second": 40.747, | |
"eval_steps_per_second": 5.216, | |
"step": 43 | |
}, | |
{ | |
"epoch": 1.408, | |
"grad_norm": 11.875, | |
"learning_rate": 1.5e-05, | |
"loss": 0.6137, | |
"step": 44 | |
}, | |
{ | |
"epoch": 1.408, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6860390901565552, | |
"eval_runtime": 6.039, | |
"eval_samples_per_second": 41.398, | |
"eval_steps_per_second": 5.299, | |
"step": 44 | |
}, | |
{ | |
"epoch": 1.44, | |
"grad_norm": 7.3125, | |
"learning_rate": 1.4166666666666668e-05, | |
"loss": 0.6235, | |
"step": 45 | |
}, | |
{ | |
"epoch": 1.44, | |
"eval_accuracy": 0.576, | |
"eval_loss": 0.6860234141349792, | |
"eval_runtime": 5.9313, | |
"eval_samples_per_second": 42.149, | |
"eval_steps_per_second": 5.395, | |
"step": 45 | |
}, | |
{ | |
"epoch": 1.472, | |
"grad_norm": 5.5, | |
"learning_rate": 1.3333333333333333e-05, | |
"loss": 0.6591, | |
"step": 46 | |
}, | |
{ | |
"epoch": 1.472, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6854648590087891, | |
"eval_runtime": 5.9399, | |
"eval_samples_per_second": 42.088, | |
"eval_steps_per_second": 5.387, | |
"step": 46 | |
}, | |
{ | |
"epoch": 1.504, | |
"grad_norm": 4.65625, | |
"learning_rate": 1.25e-05, | |
"loss": 0.6518, | |
"step": 47 | |
}, | |
{ | |
"epoch": 1.504, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6862617135047913, | |
"eval_runtime": 5.9388, | |
"eval_samples_per_second": 42.096, | |
"eval_steps_per_second": 5.388, | |
"step": 47 | |
}, | |
{ | |
"epoch": 1.536, | |
"grad_norm": 5.875, | |
"learning_rate": 1.1666666666666668e-05, | |
"loss": 0.7213, | |
"step": 48 | |
}, | |
{ | |
"epoch": 1.536, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6868203282356262, | |
"eval_runtime": 5.9401, | |
"eval_samples_per_second": 42.087, | |
"eval_steps_per_second": 5.387, | |
"step": 48 | |
}, | |
{ | |
"epoch": 1.568, | |
"grad_norm": 3.484375, | |
"learning_rate": 1.0833333333333334e-05, | |
"loss": 0.6219, | |
"step": 49 | |
}, | |
{ | |
"epoch": 1.568, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6863906383514404, | |
"eval_runtime": 5.9235, | |
"eval_samples_per_second": 42.205, | |
"eval_steps_per_second": 5.402, | |
"step": 49 | |
}, | |
{ | |
"epoch": 1.6, | |
"grad_norm": 11.25, | |
"learning_rate": 1e-05, | |
"loss": 0.6309, | |
"step": 50 | |
}, | |
{ | |
"epoch": 1.6, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6866523623466492, | |
"eval_runtime": 5.9289, | |
"eval_samples_per_second": 42.166, | |
"eval_steps_per_second": 5.397, | |
"step": 50 | |
}, | |
{ | |
"epoch": 1.6320000000000001, | |
"grad_norm": 4.96875, | |
"learning_rate": 9.166666666666666e-06, | |
"loss": 0.6311, | |
"step": 51 | |
}, | |
{ | |
"epoch": 1.6320000000000001, | |
"eval_accuracy": 0.576, | |
"eval_loss": 0.6857929825782776, | |
"eval_runtime": 5.9373, | |
"eval_samples_per_second": 42.106, | |
"eval_steps_per_second": 5.39, | |
"step": 51 | |
}, | |
{ | |
"epoch": 1.6640000000000001, | |
"grad_norm": 5.8125, | |
"learning_rate": 8.333333333333334e-06, | |
"loss": 0.7101, | |
"step": 52 | |
}, | |
{ | |
"epoch": 1.6640000000000001, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6864804625511169, | |
"eval_runtime": 5.9361, | |
"eval_samples_per_second": 42.115, | |
"eval_steps_per_second": 5.391, | |
"step": 52 | |
}, | |
{ | |
"epoch": 1.696, | |
"grad_norm": 5.0, | |
"learning_rate": 7.5e-06, | |
"loss": 0.6605, | |
"step": 53 | |
}, | |
{ | |
"epoch": 1.696, | |
"eval_accuracy": 0.568, | |
"eval_loss": 0.686718761920929, | |
"eval_runtime": 5.8868, | |
"eval_samples_per_second": 42.468, | |
"eval_steps_per_second": 5.436, | |
"step": 53 | |
}, | |
{ | |
"epoch": 1.728, | |
"grad_norm": 6.9375, | |
"learning_rate": 6.666666666666667e-06, | |
"loss": 0.7161, | |
"step": 54 | |
}, | |
{ | |
"epoch": 1.728, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6871640682220459, | |
"eval_runtime": 5.8848, | |
"eval_samples_per_second": 42.482, | |
"eval_steps_per_second": 5.438, | |
"step": 54 | |
}, | |
{ | |
"epoch": 1.76, | |
"grad_norm": 5.4375, | |
"learning_rate": 5.833333333333334e-06, | |
"loss": 0.7532, | |
"step": 55 | |
}, | |
{ | |
"epoch": 1.76, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6864922046661377, | |
"eval_runtime": 5.9431, | |
"eval_samples_per_second": 42.065, | |
"eval_steps_per_second": 5.384, | |
"step": 55 | |
}, | |
{ | |
"epoch": 1.792, | |
"grad_norm": 4.8125, | |
"learning_rate": 5e-06, | |
"loss": 0.7025, | |
"step": 56 | |
}, | |
{ | |
"epoch": 1.792, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6855624914169312, | |
"eval_runtime": 5.9353, | |
"eval_samples_per_second": 42.121, | |
"eval_steps_per_second": 5.391, | |
"step": 56 | |
}, | |
{ | |
"epoch": 1.8239999999999998, | |
"grad_norm": 9.0625, | |
"learning_rate": 4.166666666666667e-06, | |
"loss": 0.631, | |
"step": 57 | |
}, | |
{ | |
"epoch": 1.8239999999999998, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6870039105415344, | |
"eval_runtime": 5.8895, | |
"eval_samples_per_second": 42.448, | |
"eval_steps_per_second": 5.433, | |
"step": 57 | |
}, | |
{ | |
"epoch": 1.8559999999999999, | |
"grad_norm": 5.46875, | |
"learning_rate": 3.3333333333333333e-06, | |
"loss": 0.6203, | |
"step": 58 | |
}, | |
{ | |
"epoch": 1.8559999999999999, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6862148642539978, | |
"eval_runtime": 5.9398, | |
"eval_samples_per_second": 42.089, | |
"eval_steps_per_second": 5.387, | |
"step": 58 | |
}, | |
{ | |
"epoch": 1.888, | |
"grad_norm": 4.53125, | |
"learning_rate": 2.5e-06, | |
"loss": 0.6924, | |
"step": 59 | |
}, | |
{ | |
"epoch": 1.888, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6864296793937683, | |
"eval_runtime": 5.9454, | |
"eval_samples_per_second": 42.049, | |
"eval_steps_per_second": 5.382, | |
"step": 59 | |
}, | |
{ | |
"epoch": 1.92, | |
"grad_norm": 7.59375, | |
"learning_rate": 1.6666666666666667e-06, | |
"loss": 0.7316, | |
"step": 60 | |
}, | |
{ | |
"epoch": 1.92, | |
"eval_accuracy": 0.576, | |
"eval_loss": 0.6849257946014404, | |
"eval_runtime": 5.9302, | |
"eval_samples_per_second": 42.157, | |
"eval_steps_per_second": 5.396, | |
"step": 60 | |
}, | |
{ | |
"epoch": 1.952, | |
"grad_norm": 7.125, | |
"learning_rate": 8.333333333333333e-07, | |
"loss": 0.6077, | |
"step": 61 | |
}, | |
{ | |
"epoch": 1.952, | |
"eval_accuracy": 0.572, | |
"eval_loss": 0.6858437657356262, | |
"eval_runtime": 5.9452, | |
"eval_samples_per_second": 42.051, | |
"eval_steps_per_second": 5.383, | |
"step": 61 | |
}, | |
{ | |
"epoch": 1.984, | |
"grad_norm": 9.625, | |
"learning_rate": 0.0, | |
"loss": 0.5587, | |
"step": 62 | |
}, | |
{ | |
"epoch": 1.984, | |
"eval_accuracy": 0.576, | |
"eval_loss": 0.6857460737228394, | |
"eval_runtime": 5.9442, | |
"eval_samples_per_second": 42.058, | |
"eval_steps_per_second": 5.383, | |
"step": 62 | |
}, | |
{ | |
"epoch": 1.984, | |
"step": 62, | |
"total_flos": 1.3708912637247488e+16, | |
"train_loss": 0.6859967631678427, | |
"train_runtime": 502.3798, | |
"train_samples_per_second": 3.981, | |
"train_steps_per_second": 0.123 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 62, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": false, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.3708912637247488e+16, | |
"train_batch_size": 2, | |
"trial_name": null, | |
"trial_params": null | |
} | |