adapters-opt-bf16-QLORA-super_glue-cb
/
trainer_state-opt-fp16-QLORA-super_glue-cb-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 10.0, | |
"eval_steps": 1, | |
"global_step": 40, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.25, | |
"grad_norm": 22.375, | |
"learning_rate": 2.5e-05, | |
"loss": 1.3311, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.25, | |
"eval_accuracy": 0.3548387096774194, | |
"eval_f1": 0.2145748987854251, | |
"eval_loss": 1.257245421409607, | |
"eval_runtime": 1.4238, | |
"eval_samples_per_second": 43.545, | |
"eval_steps_per_second": 2.809, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.5, | |
"grad_norm": 23.125, | |
"learning_rate": 5e-05, | |
"loss": 1.3826, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.5, | |
"eval_accuracy": 0.3709677419354839, | |
"eval_f1": 0.18930041152263377, | |
"eval_loss": 1.2128275632858276, | |
"eval_runtime": 1.4194, | |
"eval_samples_per_second": 43.681, | |
"eval_steps_per_second": 2.818, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.75, | |
"grad_norm": 21.375, | |
"learning_rate": 4.868421052631579e-05, | |
"loss": 1.1411, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.75, | |
"eval_accuracy": 0.43548387096774194, | |
"eval_f1": 0.20689655172413793, | |
"eval_loss": 1.1111390590667725, | |
"eval_runtime": 1.3732, | |
"eval_samples_per_second": 45.15, | |
"eval_steps_per_second": 2.913, | |
"step": 3 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 19.125, | |
"learning_rate": 4.736842105263158e-05, | |
"loss": 1.0967, | |
"step": 4 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_accuracy": 0.45161290322580644, | |
"eval_f1": 0.20740740740740746, | |
"eval_loss": 1.0505292415618896, | |
"eval_runtime": 1.4197, | |
"eval_samples_per_second": 43.671, | |
"eval_steps_per_second": 2.817, | |
"step": 4 | |
}, | |
{ | |
"epoch": 1.25, | |
"grad_norm": 16.75, | |
"learning_rate": 4.605263157894737e-05, | |
"loss": 1.0499, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.25, | |
"eval_accuracy": 0.46774193548387094, | |
"eval_f1": 0.23196004993757802, | |
"eval_loss": 1.023846983909607, | |
"eval_runtime": 1.4198, | |
"eval_samples_per_second": 43.669, | |
"eval_steps_per_second": 2.817, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.5, | |
"grad_norm": 13.25, | |
"learning_rate": 4.473684210526316e-05, | |
"loss": 0.9663, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.5, | |
"eval_accuracy": 0.46774193548387094, | |
"eval_f1": 0.23196004993757802, | |
"eval_loss": 1.0119235515594482, | |
"eval_runtime": 1.4209, | |
"eval_samples_per_second": 43.633, | |
"eval_steps_per_second": 2.815, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.75, | |
"grad_norm": 14.625, | |
"learning_rate": 4.342105263157895e-05, | |
"loss": 1.0378, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.75, | |
"eval_accuracy": 0.46774193548387094, | |
"eval_f1": 0.21245421245421245, | |
"eval_loss": 1.0040794610977173, | |
"eval_runtime": 1.4206, | |
"eval_samples_per_second": 43.643, | |
"eval_steps_per_second": 2.816, | |
"step": 7 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 15.125, | |
"learning_rate": 4.210526315789474e-05, | |
"loss": 0.9995, | |
"step": 8 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_accuracy": 0.46774193548387094, | |
"eval_f1": 0.23196004993757802, | |
"eval_loss": 0.9963457584381104, | |
"eval_runtime": 1.4208, | |
"eval_samples_per_second": 43.638, | |
"eval_steps_per_second": 2.815, | |
"step": 8 | |
}, | |
{ | |
"epoch": 2.25, | |
"grad_norm": 6.09375, | |
"learning_rate": 4.078947368421053e-05, | |
"loss": 0.8568, | |
"step": 9 | |
}, | |
{ | |
"epoch": 2.25, | |
"eval_accuracy": 0.46774193548387094, | |
"eval_f1": 0.21245421245421245, | |
"eval_loss": 0.9923135042190552, | |
"eval_runtime": 1.4203, | |
"eval_samples_per_second": 43.652, | |
"eval_steps_per_second": 2.816, | |
"step": 9 | |
}, | |
{ | |
"epoch": 2.5, | |
"grad_norm": 8.6875, | |
"learning_rate": 3.9473684210526316e-05, | |
"loss": 0.9505, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 0.9838079810142517, | |
"eval_runtime": 1.4208, | |
"eval_samples_per_second": 43.636, | |
"eval_steps_per_second": 2.815, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.75, | |
"grad_norm": 12.0, | |
"learning_rate": 3.815789473684211e-05, | |
"loss": 1.0523, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.26990838618745594, | |
"eval_loss": 0.9715851545333862, | |
"eval_runtime": 1.4206, | |
"eval_samples_per_second": 43.644, | |
"eval_steps_per_second": 2.816, | |
"step": 11 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 9.25, | |
"learning_rate": 3.6842105263157895e-05, | |
"loss": 0.8034, | |
"step": 12 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_accuracy": 0.5, | |
"eval_f1": 0.3108974358974359, | |
"eval_loss": 0.9636703133583069, | |
"eval_runtime": 1.4203, | |
"eval_samples_per_second": 43.653, | |
"eval_steps_per_second": 2.816, | |
"step": 12 | |
}, | |
{ | |
"epoch": 3.25, | |
"grad_norm": 7.15625, | |
"learning_rate": 3.5526315789473684e-05, | |
"loss": 0.8381, | |
"step": 13 | |
}, | |
{ | |
"epoch": 3.25, | |
"eval_accuracy": 0.5, | |
"eval_f1": 0.3108974358974359, | |
"eval_loss": 0.9614336490631104, | |
"eval_runtime": 1.4203, | |
"eval_samples_per_second": 43.652, | |
"eval_steps_per_second": 2.816, | |
"step": 13 | |
}, | |
{ | |
"epoch": 3.5, | |
"grad_norm": 5.65625, | |
"learning_rate": 3.421052631578947e-05, | |
"loss": 0.9831, | |
"step": 14 | |
}, | |
{ | |
"epoch": 3.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9595750570297241, | |
"eval_runtime": 1.3701, | |
"eval_samples_per_second": 45.253, | |
"eval_steps_per_second": 2.92, | |
"step": 14 | |
}, | |
{ | |
"epoch": 3.75, | |
"grad_norm": 4.21875, | |
"learning_rate": 3.289473684210527e-05, | |
"loss": 0.7901, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9619613289833069, | |
"eval_runtime": 1.4199, | |
"eval_samples_per_second": 43.667, | |
"eval_steps_per_second": 2.817, | |
"step": 15 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 6.65625, | |
"learning_rate": 3.157894736842105e-05, | |
"loss": 0.7296, | |
"step": 16 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9658597111701965, | |
"eval_runtime": 1.4204, | |
"eval_samples_per_second": 43.65, | |
"eval_steps_per_second": 2.816, | |
"step": 16 | |
}, | |
{ | |
"epoch": 4.25, | |
"grad_norm": 8.25, | |
"learning_rate": 3.0263157894736844e-05, | |
"loss": 0.7682, | |
"step": 17 | |
}, | |
{ | |
"epoch": 4.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9644736051559448, | |
"eval_runtime": 1.4189, | |
"eval_samples_per_second": 43.695, | |
"eval_steps_per_second": 2.819, | |
"step": 17 | |
}, | |
{ | |
"epoch": 4.5, | |
"grad_norm": 6.71875, | |
"learning_rate": 2.8947368421052634e-05, | |
"loss": 0.88, | |
"step": 18 | |
}, | |
{ | |
"epoch": 4.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9645444750785828, | |
"eval_runtime": 1.419, | |
"eval_samples_per_second": 43.694, | |
"eval_steps_per_second": 2.819, | |
"step": 18 | |
}, | |
{ | |
"epoch": 4.75, | |
"grad_norm": 3.921875, | |
"learning_rate": 2.7631578947368426e-05, | |
"loss": 0.8078, | |
"step": 19 | |
}, | |
{ | |
"epoch": 4.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.961181640625, | |
"eval_runtime": 1.4194, | |
"eval_samples_per_second": 43.679, | |
"eval_steps_per_second": 2.818, | |
"step": 19 | |
}, | |
{ | |
"epoch": 5.0, | |
"grad_norm": 5.09375, | |
"learning_rate": 2.6315789473684212e-05, | |
"loss": 0.7689, | |
"step": 20 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9596459269523621, | |
"eval_runtime": 1.419, | |
"eval_samples_per_second": 43.692, | |
"eval_steps_per_second": 2.819, | |
"step": 20 | |
}, | |
{ | |
"epoch": 5.25, | |
"grad_norm": 10.0, | |
"learning_rate": 2.5e-05, | |
"loss": 1.0543, | |
"step": 21 | |
}, | |
{ | |
"epoch": 5.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9581180810928345, | |
"eval_runtime": 1.4191, | |
"eval_samples_per_second": 43.691, | |
"eval_steps_per_second": 2.819, | |
"step": 21 | |
}, | |
{ | |
"epoch": 5.5, | |
"grad_norm": 3.515625, | |
"learning_rate": 2.368421052631579e-05, | |
"loss": 0.7845, | |
"step": 22 | |
}, | |
{ | |
"epoch": 5.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9573659300804138, | |
"eval_runtime": 1.4193, | |
"eval_samples_per_second": 43.684, | |
"eval_steps_per_second": 2.818, | |
"step": 22 | |
}, | |
{ | |
"epoch": 5.75, | |
"grad_norm": 2.765625, | |
"learning_rate": 2.236842105263158e-05, | |
"loss": 0.7907, | |
"step": 23 | |
}, | |
{ | |
"epoch": 5.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9584291577339172, | |
"eval_runtime": 1.4194, | |
"eval_samples_per_second": 43.681, | |
"eval_steps_per_second": 2.818, | |
"step": 23 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 3.15625, | |
"learning_rate": 2.105263157894737e-05, | |
"loss": 0.7345, | |
"step": 24 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9615005850791931, | |
"eval_runtime": 1.4194, | |
"eval_samples_per_second": 43.679, | |
"eval_steps_per_second": 2.818, | |
"step": 24 | |
}, | |
{ | |
"epoch": 6.25, | |
"grad_norm": 4.25, | |
"learning_rate": 1.9736842105263158e-05, | |
"loss": 0.7753, | |
"step": 25 | |
}, | |
{ | |
"epoch": 6.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.965796709060669, | |
"eval_runtime": 1.4191, | |
"eval_samples_per_second": 43.689, | |
"eval_steps_per_second": 2.819, | |
"step": 25 | |
}, | |
{ | |
"epoch": 6.5, | |
"grad_norm": 4.3125, | |
"learning_rate": 1.8421052631578947e-05, | |
"loss": 0.7508, | |
"step": 26 | |
}, | |
{ | |
"epoch": 6.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9699903726577759, | |
"eval_runtime": 1.4192, | |
"eval_samples_per_second": 43.688, | |
"eval_steps_per_second": 2.819, | |
"step": 26 | |
}, | |
{ | |
"epoch": 6.75, | |
"grad_norm": 3.359375, | |
"learning_rate": 1.7105263157894737e-05, | |
"loss": 0.7477, | |
"step": 27 | |
}, | |
{ | |
"epoch": 6.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9734280705451965, | |
"eval_runtime": 1.4193, | |
"eval_samples_per_second": 43.684, | |
"eval_steps_per_second": 2.818, | |
"step": 27 | |
}, | |
{ | |
"epoch": 7.0, | |
"grad_norm": 4.71875, | |
"learning_rate": 1.5789473684210526e-05, | |
"loss": 0.9474, | |
"step": 28 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9768341779708862, | |
"eval_runtime": 1.4192, | |
"eval_samples_per_second": 43.687, | |
"eval_steps_per_second": 2.819, | |
"step": 28 | |
}, | |
{ | |
"epoch": 7.25, | |
"grad_norm": 5.125, | |
"learning_rate": 1.4473684210526317e-05, | |
"loss": 0.9033, | |
"step": 29 | |
}, | |
{ | |
"epoch": 7.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9821304678916931, | |
"eval_runtime": 1.4191, | |
"eval_samples_per_second": 43.691, | |
"eval_steps_per_second": 2.819, | |
"step": 29 | |
}, | |
{ | |
"epoch": 7.5, | |
"grad_norm": 3.859375, | |
"learning_rate": 1.3157894736842106e-05, | |
"loss": 0.7329, | |
"step": 30 | |
}, | |
{ | |
"epoch": 7.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9849578142166138, | |
"eval_runtime": 1.4195, | |
"eval_samples_per_second": 43.677, | |
"eval_steps_per_second": 2.818, | |
"step": 30 | |
}, | |
{ | |
"epoch": 7.75, | |
"grad_norm": 3.625, | |
"learning_rate": 1.1842105263157895e-05, | |
"loss": 0.7054, | |
"step": 31 | |
}, | |
{ | |
"epoch": 7.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9871038794517517, | |
"eval_runtime": 1.4195, | |
"eval_samples_per_second": 43.677, | |
"eval_steps_per_second": 2.818, | |
"step": 31 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 9.6875, | |
"learning_rate": 1.0526315789473684e-05, | |
"loss": 0.9037, | |
"step": 32 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.986328125, | |
"eval_runtime": 1.3698, | |
"eval_samples_per_second": 45.261, | |
"eval_steps_per_second": 2.92, | |
"step": 32 | |
}, | |
{ | |
"epoch": 8.25, | |
"grad_norm": 4.0, | |
"learning_rate": 9.210526315789474e-06, | |
"loss": 0.8046, | |
"step": 33 | |
}, | |
{ | |
"epoch": 8.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9895294904708862, | |
"eval_runtime": 1.421, | |
"eval_samples_per_second": 43.632, | |
"eval_steps_per_second": 2.815, | |
"step": 33 | |
}, | |
{ | |
"epoch": 8.5, | |
"grad_norm": 2.359375, | |
"learning_rate": 7.894736842105263e-06, | |
"loss": 0.7161, | |
"step": 34 | |
}, | |
{ | |
"epoch": 8.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9918173551559448, | |
"eval_runtime": 1.3694, | |
"eval_samples_per_second": 45.274, | |
"eval_steps_per_second": 2.921, | |
"step": 34 | |
}, | |
{ | |
"epoch": 8.75, | |
"grad_norm": 7.8125, | |
"learning_rate": 6.578947368421053e-06, | |
"loss": 0.8239, | |
"step": 35 | |
}, | |
{ | |
"epoch": 8.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9916952848434448, | |
"eval_runtime": 1.4202, | |
"eval_samples_per_second": 43.657, | |
"eval_steps_per_second": 2.817, | |
"step": 35 | |
}, | |
{ | |
"epoch": 9.0, | |
"grad_norm": 4.375, | |
"learning_rate": 5.263157894736842e-06, | |
"loss": 0.8111, | |
"step": 36 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9914905428886414, | |
"eval_runtime": 1.4189, | |
"eval_samples_per_second": 43.695, | |
"eval_steps_per_second": 2.819, | |
"step": 36 | |
}, | |
{ | |
"epoch": 9.25, | |
"grad_norm": 3.78125, | |
"learning_rate": 3.9473684210526315e-06, | |
"loss": 0.9011, | |
"step": 37 | |
}, | |
{ | |
"epoch": 9.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9921599626541138, | |
"eval_runtime": 1.4188, | |
"eval_samples_per_second": 43.7, | |
"eval_steps_per_second": 2.819, | |
"step": 37 | |
}, | |
{ | |
"epoch": 9.5, | |
"grad_norm": 2.90625, | |
"learning_rate": 2.631578947368421e-06, | |
"loss": 0.8858, | |
"step": 38 | |
}, | |
{ | |
"epoch": 9.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9920575618743896, | |
"eval_runtime": 1.4203, | |
"eval_samples_per_second": 43.653, | |
"eval_steps_per_second": 2.816, | |
"step": 38 | |
}, | |
{ | |
"epoch": 9.75, | |
"grad_norm": 3.765625, | |
"learning_rate": 1.3157894736842106e-06, | |
"loss": 0.822, | |
"step": 39 | |
}, | |
{ | |
"epoch": 9.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9910297989845276, | |
"eval_runtime": 1.4201, | |
"eval_samples_per_second": 43.658, | |
"eval_steps_per_second": 2.817, | |
"step": 39 | |
}, | |
{ | |
"epoch": 10.0, | |
"grad_norm": 3.203125, | |
"learning_rate": 0.0, | |
"loss": 0.7812, | |
"step": 40 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.293480615118042, | |
"eval_loss": 0.9894664883613586, | |
"eval_runtime": 1.4198, | |
"eval_samples_per_second": 43.668, | |
"eval_steps_per_second": 2.817, | |
"step": 40 | |
}, | |
{ | |
"epoch": 10.0, | |
"step": 40, | |
"total_flos": 1.4174270733156352e+16, | |
"train_loss": 0.8902546644210816, | |
"train_runtime": 176.2791, | |
"train_samples_per_second": 13.842, | |
"train_steps_per_second": 0.227 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 40, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": false, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.4174270733156352e+16, | |
"train_batch_size": 4, | |
"trial_name": null, | |
"trial_params": null | |
} | |