|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 24404, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 2.032832145690918, |
|
"eval_runtime": 62.2478, |
|
"eval_samples_per_second": 82.541, |
|
"eval_steps_per_second": 3.454, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 1.4367562532424927, |
|
"eval_runtime": 63.0927, |
|
"eval_samples_per_second": 81.436, |
|
"eval_steps_per_second": 3.408, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 1.2640273571014404, |
|
"eval_runtime": 61.4349, |
|
"eval_samples_per_second": 83.633, |
|
"eval_steps_per_second": 3.5, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 1.173414707183838, |
|
"eval_runtime": 64.7986, |
|
"eval_samples_per_second": 79.292, |
|
"eval_steps_per_second": 3.318, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.897557777413539e-05, |
|
"loss": 2.7729, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 1.1171976327896118, |
|
"eval_runtime": 62.4514, |
|
"eval_samples_per_second": 82.272, |
|
"eval_steps_per_second": 3.443, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 1.0716122388839722, |
|
"eval_runtime": 62.3679, |
|
"eval_samples_per_second": 82.382, |
|
"eval_steps_per_second": 3.447, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 1.0408052206039429, |
|
"eval_runtime": 62.3699, |
|
"eval_samples_per_second": 82.379, |
|
"eval_steps_per_second": 3.447, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 1.0002268552780151, |
|
"eval_runtime": 60.9857, |
|
"eval_samples_per_second": 84.249, |
|
"eval_steps_per_second": 3.525, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 0.9761040806770325, |
|
"eval_runtime": 63.3879, |
|
"eval_samples_per_second": 81.057, |
|
"eval_steps_per_second": 3.392, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.795115554827078e-05, |
|
"loss": 1.1066, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 0.9439008831977844, |
|
"eval_runtime": 60.9997, |
|
"eval_samples_per_second": 84.23, |
|
"eval_steps_per_second": 3.525, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 0.921099841594696, |
|
"eval_runtime": 61.9171, |
|
"eval_samples_per_second": 82.982, |
|
"eval_steps_per_second": 3.472, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 0.89990234375, |
|
"eval_runtime": 66.001, |
|
"eval_samples_per_second": 77.847, |
|
"eval_steps_per_second": 3.258, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 0.879767656326294, |
|
"eval_runtime": 65.3826, |
|
"eval_samples_per_second": 78.584, |
|
"eval_steps_per_second": 3.288, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 0.8532700538635254, |
|
"eval_runtime": 66.1288, |
|
"eval_samples_per_second": 77.697, |
|
"eval_steps_per_second": 3.251, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.692673332240616e-05, |
|
"loss": 0.9358, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 0.8349987864494324, |
|
"eval_runtime": 64.1308, |
|
"eval_samples_per_second": 80.118, |
|
"eval_steps_per_second": 3.353, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4388867263526664 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.1525091302583525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4388867263526664 |
|
}, |
|
"eval_loss": 0.8146479725837708, |
|
"eval_runtime": 65.0686, |
|
"eval_samples_per_second": 78.963, |
|
"eval_steps_per_second": 3.304, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4534838458544181 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.21174904672848718 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4534838458544181 |
|
}, |
|
"eval_loss": 0.7984741926193237, |
|
"eval_runtime": 64.3594, |
|
"eval_samples_per_second": 79.833, |
|
"eval_steps_per_second": 3.341, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4560140132347217 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.24178290688684612 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4560140132347217 |
|
}, |
|
"eval_loss": 0.7780113816261292, |
|
"eval_runtime": 66.3431, |
|
"eval_samples_per_second": 77.446, |
|
"eval_steps_per_second": 3.241, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.46321525885558584 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.24751285219162994 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4632152588555859 |
|
}, |
|
"eval_loss": 0.7548134326934814, |
|
"eval_runtime": 65.2338, |
|
"eval_samples_per_second": 78.763, |
|
"eval_steps_per_second": 3.296, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.590231109654155e-05, |
|
"loss": 0.8298, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.46924873491630986 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.2464816382147299 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.46924873491630986 |
|
}, |
|
"eval_loss": 0.7355437874794006, |
|
"eval_runtime": 66.2105, |
|
"eval_samples_per_second": 77.601, |
|
"eval_steps_per_second": 3.247, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.43557804593226934 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.24962525654931594 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.43557804593226934 |
|
}, |
|
"eval_loss": 0.7379868626594543, |
|
"eval_runtime": 66.543, |
|
"eval_samples_per_second": 77.213, |
|
"eval_steps_per_second": 3.231, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.46555079797586607 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.2992369280869709 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.46555079797586607 |
|
}, |
|
"eval_loss": 0.7291679978370667, |
|
"eval_runtime": 65.5998, |
|
"eval_samples_per_second": 78.323, |
|
"eval_steps_per_second": 3.277, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.49202024133904243 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3141049709544147 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.49202024133904243 |
|
}, |
|
"eval_loss": 0.6944931149482727, |
|
"eval_runtime": 65.8926, |
|
"eval_samples_per_second": 77.975, |
|
"eval_steps_per_second": 3.263, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.502724795640327 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3701433983343189 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.502724795640327 |
|
}, |
|
"eval_loss": 0.6872398257255554, |
|
"eval_runtime": 64.1033, |
|
"eval_samples_per_second": 80.152, |
|
"eval_steps_per_second": 3.354, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.4877888870676944e-05, |
|
"loss": 0.7424, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.49883223043985986 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.37174762565152936 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.49883223043985986 |
|
}, |
|
"eval_loss": 0.6769503951072693, |
|
"eval_runtime": 65.8998, |
|
"eval_samples_per_second": 77.967, |
|
"eval_steps_per_second": 3.263, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.503308680420397 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3680796226372295 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.503308680420397 |
|
}, |
|
"eval_loss": 0.6723325848579407, |
|
"eval_runtime": 64.9642, |
|
"eval_samples_per_second": 79.09, |
|
"eval_steps_per_second": 3.31, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5101206695212145 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.37785590629881227 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5101206695212145 |
|
}, |
|
"eval_loss": 0.6622567772865295, |
|
"eval_runtime": 67.697, |
|
"eval_samples_per_second": 75.897, |
|
"eval_steps_per_second": 3.176, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5058388478007007 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.37480207390602494 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5058388478007007 |
|
}, |
|
"eval_loss": 0.6581109166145325, |
|
"eval_runtime": 65.1127, |
|
"eval_samples_per_second": 78.909, |
|
"eval_steps_per_second": 3.302, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5081743869209809 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.35256174805106155 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5081743869209809 |
|
}, |
|
"eval_loss": 0.6534283757209778, |
|
"eval_runtime": 66.7769, |
|
"eval_samples_per_second": 76.943, |
|
"eval_steps_per_second": 3.22, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.385346664481233e-05, |
|
"loss": 0.6892, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.4706111327364733 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.372103581749401 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.4706111327364733 |
|
}, |
|
"eval_loss": 0.6582987308502197, |
|
"eval_runtime": 65.0583, |
|
"eval_samples_per_second": 78.975, |
|
"eval_steps_per_second": 3.305, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5044764499805372 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.37031807042249487 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5044764499805372 |
|
}, |
|
"eval_loss": 0.6426356434822083, |
|
"eval_runtime": 65.2533, |
|
"eval_samples_per_second": 78.739, |
|
"eval_steps_per_second": 3.295, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5058388478007007 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.38279117822149433 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5058388478007007 |
|
}, |
|
"eval_loss": 0.6383734941482544, |
|
"eval_runtime": 66.5881, |
|
"eval_samples_per_second": 77.161, |
|
"eval_steps_per_second": 3.229, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.49727520435967304 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.352874833319086 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.49727520435967304 |
|
}, |
|
"eval_loss": 0.6387777328491211, |
|
"eval_runtime": 65.3478, |
|
"eval_samples_per_second": 78.625, |
|
"eval_steps_per_second": 3.29, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.494550408719346 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3715541356743288 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.494550408719346 |
|
}, |
|
"eval_loss": 0.6494720578193665, |
|
"eval_runtime": 65.2886, |
|
"eval_samples_per_second": 78.697, |
|
"eval_steps_per_second": 3.293, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.282904441894772e-05, |
|
"loss": 0.6377, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.506033476060724 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.38420420926210797 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.506033476060724 |
|
}, |
|
"eval_loss": 0.6222317218780518, |
|
"eval_runtime": 64.502, |
|
"eval_samples_per_second": 79.656, |
|
"eval_steps_per_second": 3.333, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5064227325807708 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3885026107563052 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5064227325807708 |
|
}, |
|
"eval_loss": 0.6216300129890442, |
|
"eval_runtime": 65.7001, |
|
"eval_samples_per_second": 78.204, |
|
"eval_steps_per_second": 3.272, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5105099260412612 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.35318161489739225 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5105099260412612 |
|
}, |
|
"eval_loss": 0.6235440373420715, |
|
"eval_runtime": 65.6641, |
|
"eval_samples_per_second": 78.247, |
|
"eval_steps_per_second": 3.274, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5062281043207474 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.37238647965373683 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5062281043207474 |
|
}, |
|
"eval_loss": 0.6188907623291016, |
|
"eval_runtime": 65.7244, |
|
"eval_samples_per_second": 78.175, |
|
"eval_steps_per_second": 3.271, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.49980537173997663 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.38651240866909087 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.49980537173997663 |
|
}, |
|
"eval_loss": 0.6195840239524841, |
|
"eval_runtime": 65.3321, |
|
"eval_samples_per_second": 78.644, |
|
"eval_steps_per_second": 3.291, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.18046221930831e-05, |
|
"loss": 0.6149, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5075905021409108 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.37502556996910846 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5075905021409108 |
|
}, |
|
"eval_loss": 0.6072418093681335, |
|
"eval_runtime": 67.6156, |
|
"eval_samples_per_second": 75.988, |
|
"eval_steps_per_second": 3.18, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5145971195017517 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.38615080427173776 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5145971195017517 |
|
}, |
|
"eval_loss": 0.6034330725669861, |
|
"eval_runtime": 65.7798, |
|
"eval_samples_per_second": 78.109, |
|
"eval_steps_per_second": 3.268, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5165434021019852 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3732389735757712 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5165434021019852 |
|
}, |
|
"eval_loss": 0.6042677760124207, |
|
"eval_runtime": 65.3392, |
|
"eval_samples_per_second": 78.636, |
|
"eval_steps_per_second": 3.291, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5029194239003504 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.38007459621604844 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5029194239003504 |
|
}, |
|
"eval_loss": 0.6064484119415283, |
|
"eval_runtime": 64.1308, |
|
"eval_samples_per_second": 80.117, |
|
"eval_steps_per_second": 3.353, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5095367847411444 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3946719741195423 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5095367847411444 |
|
}, |
|
"eval_loss": 0.5933734774589539, |
|
"eval_runtime": 65.6434, |
|
"eval_samples_per_second": 78.271, |
|
"eval_steps_per_second": 3.275, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.078019996721849e-05, |
|
"loss": 0.5971, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5167380303620086 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.37773378306197386 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5167380303620086 |
|
}, |
|
"eval_loss": 0.5919108390808105, |
|
"eval_runtime": 63.9287, |
|
"eval_samples_per_second": 80.371, |
|
"eval_steps_per_second": 3.363, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5179057999221487 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.38732144074331154 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5179057999221487 |
|
}, |
|
"eval_loss": 0.6021246910095215, |
|
"eval_runtime": 67.8323, |
|
"eval_samples_per_second": 75.746, |
|
"eval_steps_per_second": 3.17, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.522966134682756 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.38578529254016203 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.522966134682756 |
|
}, |
|
"eval_loss": 0.5902037620544434, |
|
"eval_runtime": 65.8164, |
|
"eval_samples_per_second": 78.066, |
|
"eval_steps_per_second": 3.267, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5052549630206306 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3881510615594995 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5052549630206306 |
|
}, |
|
"eval_loss": 0.6161624789237976, |
|
"eval_runtime": 66.7087, |
|
"eval_samples_per_second": 77.021, |
|
"eval_steps_per_second": 3.223, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5223822499026859 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3790400980761349 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5223822499026859 |
|
}, |
|
"eval_loss": 0.5835235714912415, |
|
"eval_runtime": 67.5779, |
|
"eval_samples_per_second": 76.031, |
|
"eval_steps_per_second": 3.182, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.975577774135388e-05, |
|
"loss": 0.5745, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5241339042428961 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.38287080754019076 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5241339042428961 |
|
}, |
|
"eval_loss": 0.5865485668182373, |
|
"eval_runtime": 67.9881, |
|
"eval_samples_per_second": 75.572, |
|
"eval_steps_per_second": 3.162, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.514791747761775 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3932066438681785 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.514791747761775 |
|
}, |
|
"eval_loss": 0.582242488861084, |
|
"eval_runtime": 65.5261, |
|
"eval_samples_per_second": 78.412, |
|
"eval_steps_per_second": 3.281, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5266640716231997 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3948337517276894 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5266640716231997 |
|
}, |
|
"eval_loss": 0.5757012963294983, |
|
"eval_runtime": 66.1312, |
|
"eval_samples_per_second": 77.694, |
|
"eval_steps_per_second": 3.251, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5192681977423121 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.38846154581250647 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5192681977423121 |
|
}, |
|
"eval_loss": 0.580342173576355, |
|
"eval_runtime": 66.5531, |
|
"eval_samples_per_second": 77.202, |
|
"eval_steps_per_second": 3.231, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5278318411833398 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3917793049565631 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5278318411833398 |
|
}, |
|
"eval_loss": 0.5737766623497009, |
|
"eval_runtime": 68.6161, |
|
"eval_samples_per_second": 74.88, |
|
"eval_steps_per_second": 3.133, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8731355515489266e-05, |
|
"loss": 0.5605, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5299727520435967 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4000572874943932 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5299727520435967 |
|
}, |
|
"eval_loss": 0.5729739665985107, |
|
"eval_runtime": 65.0276, |
|
"eval_samples_per_second": 79.013, |
|
"eval_steps_per_second": 3.306, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5266640716231997 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3871595405264875 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5266640716231997 |
|
}, |
|
"eval_loss": 0.5737924575805664, |
|
"eval_runtime": 67.4714, |
|
"eval_samples_per_second": 76.151, |
|
"eval_steps_per_second": 3.187, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5278318411833398 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3912636643655603 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5278318411833398 |
|
}, |
|
"eval_loss": 0.5748183131217957, |
|
"eval_runtime": 66.8019, |
|
"eval_samples_per_second": 76.914, |
|
"eval_steps_per_second": 3.218, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5212144803425457 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3655456567099176 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5212144803425457 |
|
}, |
|
"eval_loss": 0.5782448649406433, |
|
"eval_runtime": 65.8052, |
|
"eval_samples_per_second": 78.079, |
|
"eval_steps_per_second": 3.267, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5124562086414948 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3896713760808098 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5124562086414948 |
|
}, |
|
"eval_loss": 0.5811282396316528, |
|
"eval_runtime": 65.8997, |
|
"eval_samples_per_second": 77.967, |
|
"eval_steps_per_second": 3.263, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7706933289624654e-05, |
|
"loss": 0.553, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5241339042428961 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3927065112402536 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5241339042428961 |
|
}, |
|
"eval_loss": 0.5662025809288025, |
|
"eval_runtime": 65.5268, |
|
"eval_samples_per_second": 78.411, |
|
"eval_steps_per_second": 3.281, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5161541455819385 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3924416598143773 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5161541455819385 |
|
}, |
|
"eval_loss": 0.5739487409591675, |
|
"eval_runtime": 65.309, |
|
"eval_samples_per_second": 78.672, |
|
"eval_steps_per_second": 3.292, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5190735694822888 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.39288818567242456 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5190735694822888 |
|
}, |
|
"eval_loss": 0.5729976892471313, |
|
"eval_runtime": 65.1321, |
|
"eval_samples_per_second": 78.886, |
|
"eval_steps_per_second": 3.301, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5128454651615415 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3945008643429393 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5128454651615415 |
|
}, |
|
"eval_loss": 0.5891692042350769, |
|
"eval_runtime": 66.1687, |
|
"eval_samples_per_second": 77.65, |
|
"eval_steps_per_second": 3.249, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5179057999221487 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.40357563770843996 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5179057999221487 |
|
}, |
|
"eval_loss": 0.5842686891555786, |
|
"eval_runtime": 64.3545, |
|
"eval_samples_per_second": 79.839, |
|
"eval_steps_per_second": 3.341, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.6682511063760036e-05, |
|
"loss": 0.5254, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5219929933826392 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.39726071722075873 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5219929933826392 |
|
}, |
|
"eval_loss": 0.5762518048286438, |
|
"eval_runtime": 66.0739, |
|
"eval_samples_per_second": 77.761, |
|
"eval_steps_per_second": 3.254, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5184896847022188 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3883086023616811 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5184896847022188 |
|
}, |
|
"eval_loss": 0.5790498852729797, |
|
"eval_runtime": 65.3688, |
|
"eval_samples_per_second": 78.6, |
|
"eval_steps_per_second": 3.289, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5260801868431296 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.39154498796599124 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5260801868431296 |
|
}, |
|
"eval_loss": 0.5696905851364136, |
|
"eval_runtime": 65.5045, |
|
"eval_samples_per_second": 78.437, |
|
"eval_steps_per_second": 3.282, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5171272868820552 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.39755561359688063 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5171272868820552 |
|
}, |
|
"eval_loss": 0.5722245573997498, |
|
"eval_runtime": 65.5097, |
|
"eval_samples_per_second": 78.431, |
|
"eval_steps_per_second": 3.282, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5231607629427792 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3999224263889678 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5231607629427792 |
|
}, |
|
"eval_loss": 0.5762615203857422, |
|
"eval_runtime": 67.2536, |
|
"eval_samples_per_second": 76.397, |
|
"eval_steps_per_second": 3.197, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.565808883789543e-05, |
|
"loss": 0.5282, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5239392759828727 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.40076468903176177 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5239392759828727 |
|
}, |
|
"eval_loss": 0.5675185918807983, |
|
"eval_runtime": 65.953, |
|
"eval_samples_per_second": 77.904, |
|
"eval_steps_per_second": 3.26, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5291942390035033 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3977209221647911 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5291942390035033 |
|
}, |
|
"eval_loss": 0.578584611415863, |
|
"eval_runtime": 66.5476, |
|
"eval_samples_per_second": 77.208, |
|
"eval_steps_per_second": 3.231, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5286103542234333 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3972971376121283 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5286103542234333 |
|
}, |
|
"eval_loss": 0.5829901099205017, |
|
"eval_runtime": 64.4078, |
|
"eval_samples_per_second": 79.773, |
|
"eval_steps_per_second": 3.338, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5332814324639937 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4020714398207229 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5332814324639937 |
|
}, |
|
"eval_loss": 0.5793033242225647, |
|
"eval_runtime": 66.9158, |
|
"eval_samples_per_second": 76.783, |
|
"eval_steps_per_second": 3.213, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5251070455430128 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.40305870728099247 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5251070455430128 |
|
}, |
|
"eval_loss": 0.5735189318656921, |
|
"eval_runtime": 64.7839, |
|
"eval_samples_per_second": 79.31, |
|
"eval_steps_per_second": 3.319, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.463366661203082e-05, |
|
"loss": 0.5098, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5309458933437136 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.40404686699618486 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5309458933437136 |
|
}, |
|
"eval_loss": 0.5664647221565247, |
|
"eval_runtime": 65.2418, |
|
"eval_samples_per_second": 78.753, |
|
"eval_steps_per_second": 3.295, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5227715064227326 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.3988976622706162 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5227715064227326 |
|
}, |
|
"eval_loss": 0.5650814175605774, |
|
"eval_runtime": 65.9036, |
|
"eval_samples_per_second": 77.962, |
|
"eval_steps_per_second": 3.262, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5266640716231997 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4099005096569608 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5266640716231997 |
|
}, |
|
"eval_loss": 0.5657761096954346, |
|
"eval_runtime": 64.8206, |
|
"eval_samples_per_second": 79.265, |
|
"eval_steps_per_second": 3.317, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5278318411833398 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.40307633583050173 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5278318411833398 |
|
}, |
|
"eval_loss": 0.5601173043251038, |
|
"eval_runtime": 66.1454, |
|
"eval_samples_per_second": 77.677, |
|
"eval_steps_per_second": 3.25, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5313351498637602 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.40205694039231865 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5313351498637602 |
|
}, |
|
"eval_loss": 0.5627759099006653, |
|
"eval_runtime": 64.9362, |
|
"eval_samples_per_second": 79.124, |
|
"eval_steps_per_second": 3.311, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.360924438616621e-05, |
|
"loss": 0.5085, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5293888672635266 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4100384148965112 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5293888672635266 |
|
}, |
|
"eval_loss": 0.5625594854354858, |
|
"eval_runtime": 65.2812, |
|
"eval_samples_per_second": 78.706, |
|
"eval_steps_per_second": 3.293, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5307512650836902 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4056927310816246 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5307512650836902 |
|
}, |
|
"eval_loss": 0.562485933303833, |
|
"eval_runtime": 65.2185, |
|
"eval_samples_per_second": 78.781, |
|
"eval_steps_per_second": 3.297, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5354223433242506 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4044048936522055 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5354223433242506 |
|
}, |
|
"eval_loss": 0.5521498918533325, |
|
"eval_runtime": 65.3339, |
|
"eval_samples_per_second": 78.642, |
|
"eval_steps_per_second": 3.291, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5330868042039704 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.40764293642642907 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5330868042039704 |
|
}, |
|
"eval_loss": 0.5571908950805664, |
|
"eval_runtime": 64.614, |
|
"eval_samples_per_second": 79.518, |
|
"eval_steps_per_second": 3.327, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5344492020241339 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4168299787554328 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5344492020241339 |
|
}, |
|
"eval_loss": 0.5639694333076477, |
|
"eval_runtime": 65.7823, |
|
"eval_samples_per_second": 78.106, |
|
"eval_steps_per_second": 3.268, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.2584822160301595e-05, |
|
"loss": 0.5066, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5342545737641106 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.40719472411169355 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5342545737641106 |
|
}, |
|
"eval_loss": 0.5575660467147827, |
|
"eval_runtime": 65.0464, |
|
"eval_samples_per_second": 78.99, |
|
"eval_steps_per_second": 3.305, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5256909303230829 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4166295902681743 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5256909303230829 |
|
}, |
|
"eval_loss": 0.5562366843223572, |
|
"eval_runtime": 66.5062, |
|
"eval_samples_per_second": 77.256, |
|
"eval_steps_per_second": 3.233, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5206305955624757 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4178763398872619 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5206305955624757 |
|
}, |
|
"eval_loss": 0.5629637837409973, |
|
"eval_runtime": 65.1119, |
|
"eval_samples_per_second": 78.91, |
|
"eval_steps_per_second": 3.302, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5194628260023355 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4240614359972321 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5194628260023355 |
|
}, |
|
"eval_loss": 0.5645840764045715, |
|
"eval_runtime": 65.7329, |
|
"eval_samples_per_second": 78.165, |
|
"eval_steps_per_second": 3.271, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5270533281432463 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4232811654810201 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5270533281432463 |
|
}, |
|
"eval_loss": 0.5628061294555664, |
|
"eval_runtime": 66.012, |
|
"eval_samples_per_second": 77.834, |
|
"eval_steps_per_second": 3.257, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.156039993443698e-05, |
|
"loss": 0.5043, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5192681977423121 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4062380982170455 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5192681977423121 |
|
}, |
|
"eval_loss": 0.5618172883987427, |
|
"eval_runtime": 64.6788, |
|
"eval_samples_per_second": 79.439, |
|
"eval_steps_per_second": 3.324, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5210198520825223 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.39500190437884825 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5210198520825223 |
|
}, |
|
"eval_loss": 0.5575444102287292, |
|
"eval_runtime": 66.1613, |
|
"eval_samples_per_second": 77.659, |
|
"eval_steps_per_second": 3.25, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5253016738030362 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43826705554260276 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5253016738030362 |
|
}, |
|
"eval_loss": 0.55515456199646, |
|
"eval_runtime": 64.9282, |
|
"eval_samples_per_second": 79.134, |
|
"eval_steps_per_second": 3.311, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5313351498637602 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4106274369569952 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5313351498637602 |
|
}, |
|
"eval_loss": 0.569816529750824, |
|
"eval_runtime": 66.2885, |
|
"eval_samples_per_second": 77.51, |
|
"eval_steps_per_second": 3.243, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.522966134682756 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43811143120426327 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.522966134682756 |
|
}, |
|
"eval_loss": 0.5568819642066956, |
|
"eval_runtime": 65.3832, |
|
"eval_samples_per_second": 78.583, |
|
"eval_steps_per_second": 3.288, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.0535977708572365e-05, |
|
"loss": 0.5051, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5336706889840405 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.40680316881280776 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5336706889840405 |
|
}, |
|
"eval_loss": 0.5624731779098511, |
|
"eval_runtime": 64.3008, |
|
"eval_samples_per_second": 79.906, |
|
"eval_steps_per_second": 3.344, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5151810042818217 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4369801679839114 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5151810042818217 |
|
}, |
|
"eval_loss": 0.5694777369499207, |
|
"eval_runtime": 65.4516, |
|
"eval_samples_per_second": 78.501, |
|
"eval_steps_per_second": 3.285, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5268586998832231 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4132459851165683 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5268586998832231 |
|
}, |
|
"eval_loss": 0.56379634141922, |
|
"eval_runtime": 64.9267, |
|
"eval_samples_per_second": 79.135, |
|
"eval_steps_per_second": 3.311, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.526274815103153 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.40248592881859246 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.526274815103153 |
|
}, |
|
"eval_loss": 0.5575631856918335, |
|
"eval_runtime": 67.2319, |
|
"eval_samples_per_second": 76.422, |
|
"eval_steps_per_second": 3.198, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5258855585831063 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4075480681122011 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5258855585831063 |
|
}, |
|
"eval_loss": 0.5513472557067871, |
|
"eval_runtime": 66.4523, |
|
"eval_samples_per_second": 77.319, |
|
"eval_steps_per_second": 3.235, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.9511555482707753e-05, |
|
"loss": 0.4944, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5188789412222655 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.41372907313005425 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5188789412222655 |
|
}, |
|
"eval_loss": 0.5596012473106384, |
|
"eval_runtime": 65.824, |
|
"eval_samples_per_second": 78.057, |
|
"eval_steps_per_second": 3.266, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5278318411833398 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4203493324138542 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5278318411833398 |
|
}, |
|
"eval_loss": 0.5546495914459229, |
|
"eval_runtime": 65.4744, |
|
"eval_samples_per_second": 78.473, |
|
"eval_steps_per_second": 3.284, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.535616971584274 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4063341022177307 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.535616971584274 |
|
}, |
|
"eval_loss": 0.5501392483711243, |
|
"eval_runtime": 66.7163, |
|
"eval_samples_per_second": 77.013, |
|
"eval_steps_per_second": 3.223, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5188789412222655 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4212355713439969 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5188789412222655 |
|
}, |
|
"eval_loss": 0.5635867714881897, |
|
"eval_runtime": 66.1134, |
|
"eval_samples_per_second": 77.715, |
|
"eval_steps_per_second": 3.252, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5239392759828727 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4234908453840735 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5239392759828727 |
|
}, |
|
"eval_loss": 0.557228147983551, |
|
"eval_runtime": 64.7253, |
|
"eval_samples_per_second": 79.382, |
|
"eval_steps_per_second": 3.322, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.848713325684314e-05, |
|
"loss": 0.4908, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5251070455430128 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.42864065587210554 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5251070455430128 |
|
}, |
|
"eval_loss": 0.5468738675117493, |
|
"eval_runtime": 66.3227, |
|
"eval_samples_per_second": 77.47, |
|
"eval_steps_per_second": 3.242, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5239392759828727 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.41795817815383196 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5239392759828727 |
|
}, |
|
"eval_loss": 0.5483611226081848, |
|
"eval_runtime": 65.701, |
|
"eval_samples_per_second": 78.203, |
|
"eval_steps_per_second": 3.272, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5214091086025691 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4024775749504264 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5214091086025691 |
|
}, |
|
"eval_loss": 0.5590547323226929, |
|
"eval_runtime": 64.5911, |
|
"eval_samples_per_second": 79.547, |
|
"eval_steps_per_second": 3.329, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5344492020241339 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4370419038340574 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5344492020241339 |
|
}, |
|
"eval_loss": 0.5482434630393982, |
|
"eval_runtime": 65.2112, |
|
"eval_samples_per_second": 78.79, |
|
"eval_steps_per_second": 3.297, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5260801868431296 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4312513940073556 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5260801868431296 |
|
}, |
|
"eval_loss": 0.5548846125602722, |
|
"eval_runtime": 66.5938, |
|
"eval_samples_per_second": 77.154, |
|
"eval_steps_per_second": 3.229, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.7462711030978526e-05, |
|
"loss": 0.4956, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.523550019462826 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.41662322590714945 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.523550019462826 |
|
}, |
|
"eval_loss": 0.5459250211715698, |
|
"eval_runtime": 65.0832, |
|
"eval_samples_per_second": 78.945, |
|
"eval_steps_per_second": 3.303, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5216037368625924 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4343169858642386 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5216037368625924 |
|
}, |
|
"eval_loss": 0.5509054660797119, |
|
"eval_runtime": 66.0147, |
|
"eval_samples_per_second": 77.831, |
|
"eval_steps_per_second": 3.257, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.520241339042429 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4111126597526932 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.520241339042429 |
|
}, |
|
"eval_loss": 0.5683469176292419, |
|
"eval_runtime": 66.7093, |
|
"eval_samples_per_second": 77.021, |
|
"eval_steps_per_second": 3.223, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5219929933826392 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4175089884590279 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5219929933826392 |
|
}, |
|
"eval_loss": 0.556067168712616, |
|
"eval_runtime": 65.0023, |
|
"eval_samples_per_second": 79.043, |
|
"eval_steps_per_second": 3.308, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5325029194239004 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4221971320619319 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5325029194239004 |
|
}, |
|
"eval_loss": 0.5479483008384705, |
|
"eval_runtime": 66.1944, |
|
"eval_samples_per_second": 77.62, |
|
"eval_steps_per_second": 3.248, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.6438288805113915e-05, |
|
"loss": 0.491, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5173219151420786 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.41131088875014477 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5173219151420786 |
|
}, |
|
"eval_loss": 0.5584209561347961, |
|
"eval_runtime": 65.4294, |
|
"eval_samples_per_second": 78.527, |
|
"eval_steps_per_second": 3.286, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5305566368236668 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.42505103683817463 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5305566368236668 |
|
}, |
|
"eval_loss": 0.5507027506828308, |
|
"eval_runtime": 67.0783, |
|
"eval_samples_per_second": 76.597, |
|
"eval_steps_per_second": 3.205, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5284157259634099 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43059965616956497 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5284157259634099 |
|
}, |
|
"eval_loss": 0.5484103560447693, |
|
"eval_runtime": 65.412, |
|
"eval_samples_per_second": 78.548, |
|
"eval_steps_per_second": 3.287, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.523550019462826 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4283240140586777 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.523550019462826 |
|
}, |
|
"eval_loss": 0.5518194437026978, |
|
"eval_runtime": 65.4314, |
|
"eval_samples_per_second": 78.525, |
|
"eval_steps_per_second": 3.286, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5249124172829895 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4202384595807186 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5249124172829895 |
|
}, |
|
"eval_loss": 0.5580205917358398, |
|
"eval_runtime": 64.3287, |
|
"eval_samples_per_second": 79.871, |
|
"eval_steps_per_second": 3.342, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.5413866579249306e-05, |
|
"loss": 0.4882, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5328921759439471 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4168927822368606 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5328921759439471 |
|
}, |
|
"eval_loss": 0.5494405031204224, |
|
"eval_runtime": 66.1472, |
|
"eval_samples_per_second": 77.675, |
|
"eval_steps_per_second": 3.25, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5334760607240171 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4259674463534441 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5334760607240171 |
|
}, |
|
"eval_loss": 0.54230135679245, |
|
"eval_runtime": 65.4715, |
|
"eval_samples_per_second": 78.477, |
|
"eval_steps_per_second": 3.284, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5344492020241339 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43189802609389794 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5344492020241339 |
|
}, |
|
"eval_loss": 0.5440633893013, |
|
"eval_runtime": 64.7322, |
|
"eval_samples_per_second": 79.373, |
|
"eval_steps_per_second": 3.321, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5274425846632931 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43452702916498576 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5274425846632931 |
|
}, |
|
"eval_loss": 0.5580821633338928, |
|
"eval_runtime": 66.4297, |
|
"eval_samples_per_second": 77.345, |
|
"eval_steps_per_second": 3.237, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5219929933826392 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.42868961422727936 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5219929933826392 |
|
}, |
|
"eval_loss": 0.5607529878616333, |
|
"eval_runtime": 64.7983, |
|
"eval_samples_per_second": 79.292, |
|
"eval_steps_per_second": 3.318, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.438944435338469e-05, |
|
"loss": 0.4764, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5309458933437136 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4501164041177361 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5309458933437136 |
|
}, |
|
"eval_loss": 0.5601127743721008, |
|
"eval_runtime": 66.0041, |
|
"eval_samples_per_second": 77.844, |
|
"eval_steps_per_second": 3.257, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.515375632541845 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44296253953069376 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.515375632541845 |
|
}, |
|
"eval_loss": 0.5829929113388062, |
|
"eval_runtime": 65.2358, |
|
"eval_samples_per_second": 78.76, |
|
"eval_steps_per_second": 3.296, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5241339042428961 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4557503585498297 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5241339042428961 |
|
}, |
|
"eval_loss": 0.5616418719291687, |
|
"eval_runtime": 64.7306, |
|
"eval_samples_per_second": 79.375, |
|
"eval_steps_per_second": 3.321, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5227715064227326 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43939555085391224 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5227715064227326 |
|
}, |
|
"eval_loss": 0.5720220804214478, |
|
"eval_runtime": 65.4508, |
|
"eval_samples_per_second": 78.502, |
|
"eval_steps_per_second": 3.285, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5249124172829895 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4453192886564673 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5249124172829895 |
|
}, |
|
"eval_loss": 0.5684590935707092, |
|
"eval_runtime": 64.1447, |
|
"eval_samples_per_second": 80.1, |
|
"eval_steps_per_second": 3.352, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.336502212752008e-05, |
|
"loss": 0.459, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5184896847022188 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45810101040373097 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5184896847022188 |
|
}, |
|
"eval_loss": 0.5632970333099365, |
|
"eval_runtime": 65.4112, |
|
"eval_samples_per_second": 78.549, |
|
"eval_steps_per_second": 3.287, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5301673803036201 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.41550375963718733 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5301673803036201 |
|
}, |
|
"eval_loss": 0.5601311922073364, |
|
"eval_runtime": 65.7981, |
|
"eval_samples_per_second": 78.087, |
|
"eval_steps_per_second": 3.268, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5326975476839237 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4152775382545546 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5326975476839237 |
|
}, |
|
"eval_loss": 0.5632578730583191, |
|
"eval_runtime": 65.4374, |
|
"eval_samples_per_second": 78.518, |
|
"eval_steps_per_second": 3.286, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5225768781627093 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4441168152604288 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5225768781627093 |
|
}, |
|
"eval_loss": 0.5665469765663147, |
|
"eval_runtime": 64.3132, |
|
"eval_samples_per_second": 79.89, |
|
"eval_steps_per_second": 3.343, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5225768781627093 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4189456259580441 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5225768781627093 |
|
}, |
|
"eval_loss": 0.5736687779426575, |
|
"eval_runtime": 65.6996, |
|
"eval_samples_per_second": 78.204, |
|
"eval_steps_per_second": 3.272, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.2340599901655468e-05, |
|
"loss": 0.4557, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5237446477228493 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44732903233612287 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5237446477228493 |
|
}, |
|
"eval_loss": 0.5651576519012451, |
|
"eval_runtime": 66.1178, |
|
"eval_samples_per_second": 77.71, |
|
"eval_steps_per_second": 3.252, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5268586998832231 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4296354631479917 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5268586998832231 |
|
}, |
|
"eval_loss": 0.566527247428894, |
|
"eval_runtime": 65.4163, |
|
"eval_samples_per_second": 78.543, |
|
"eval_steps_per_second": 3.287, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5194628260023355 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4523218163724315 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5194628260023355 |
|
}, |
|
"eval_loss": 0.5746815204620361, |
|
"eval_runtime": 66.5011, |
|
"eval_samples_per_second": 77.262, |
|
"eval_steps_per_second": 3.233, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5241339042428961 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.42963157337372265 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5241339042428961 |
|
}, |
|
"eval_loss": 0.5709651112556458, |
|
"eval_runtime": 64.9469, |
|
"eval_samples_per_second": 79.111, |
|
"eval_steps_per_second": 3.31, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.52899961074348 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4465492291635319 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.52899961074348 |
|
}, |
|
"eval_loss": 0.5652072429656982, |
|
"eval_runtime": 66.3178, |
|
"eval_samples_per_second": 77.475, |
|
"eval_steps_per_second": 3.242, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.1316177675790856e-05, |
|
"loss": 0.4539, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5276372129233164 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4330344585002601 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5276372129233164 |
|
}, |
|
"eval_loss": 0.5652056932449341, |
|
"eval_runtime": 64.4339, |
|
"eval_samples_per_second": 79.741, |
|
"eval_steps_per_second": 3.337, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5274425846632931 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43869182088814185 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5274425846632931 |
|
}, |
|
"eval_loss": 0.5703505277633667, |
|
"eval_runtime": 66.2068, |
|
"eval_samples_per_second": 77.605, |
|
"eval_steps_per_second": 3.247, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5258855585831063 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43879780170912847 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5258855585831063 |
|
}, |
|
"eval_loss": 0.5729069113731384, |
|
"eval_runtime": 65.0139, |
|
"eval_samples_per_second": 79.029, |
|
"eval_steps_per_second": 3.307, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5192681977423121 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4255072429603308 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5192681977423121 |
|
}, |
|
"eval_loss": 0.5682628154754639, |
|
"eval_runtime": 65.9744, |
|
"eval_samples_per_second": 77.879, |
|
"eval_steps_per_second": 3.259, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5264694433631764 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43346825826001506 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5264694433631764 |
|
}, |
|
"eval_loss": 0.5605142712593079, |
|
"eval_runtime": 65.0867, |
|
"eval_samples_per_second": 78.941, |
|
"eval_steps_per_second": 3.303, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.029175544992624e-05, |
|
"loss": 0.459, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5239392759828727 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44786889559299115 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5239392759828727 |
|
}, |
|
"eval_loss": 0.5726383924484253, |
|
"eval_runtime": 66.1098, |
|
"eval_samples_per_second": 77.719, |
|
"eval_steps_per_second": 3.252, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5291942390035033 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.453869134213484 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5291942390035033 |
|
}, |
|
"eval_loss": 0.566052258014679, |
|
"eval_runtime": 65.833, |
|
"eval_samples_per_second": 78.046, |
|
"eval_steps_per_second": 3.266, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5208252238224991 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43278533897219335 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5208252238224991 |
|
}, |
|
"eval_loss": 0.5726243853569031, |
|
"eval_runtime": 66.8914, |
|
"eval_samples_per_second": 76.811, |
|
"eval_steps_per_second": 3.214, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.52958349552355 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43445104819328556 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.52958349552355 |
|
}, |
|
"eval_loss": 0.5641396045684814, |
|
"eval_runtime": 67.2117, |
|
"eval_samples_per_second": 76.445, |
|
"eval_steps_per_second": 3.199, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5206305955624757 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4311693806201584 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5206305955624757 |
|
}, |
|
"eval_loss": 0.5807725787162781, |
|
"eval_runtime": 66.5101, |
|
"eval_samples_per_second": 77.251, |
|
"eval_steps_per_second": 3.233, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.926733322406163e-05, |
|
"loss": 0.4443, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5268586998832231 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4502312972729043 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5268586998832231 |
|
}, |
|
"eval_loss": 0.5696139931678772, |
|
"eval_runtime": 65.9075, |
|
"eval_samples_per_second": 77.958, |
|
"eval_steps_per_second": 3.262, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5282210977033865 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45560500346839616 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5282210977033865 |
|
}, |
|
"eval_loss": 0.5631005167961121, |
|
"eval_runtime": 65.994, |
|
"eval_samples_per_second": 77.856, |
|
"eval_steps_per_second": 3.258, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5200467107824056 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4500935823936061 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5200467107824056 |
|
}, |
|
"eval_loss": 0.5676321983337402, |
|
"eval_runtime": 67.7013, |
|
"eval_samples_per_second": 75.892, |
|
"eval_steps_per_second": 3.176, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5280264694433632 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4410401539944819 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5280264694433632 |
|
}, |
|
"eval_loss": 0.562995195388794, |
|
"eval_runtime": 65.3674, |
|
"eval_samples_per_second": 78.602, |
|
"eval_steps_per_second": 3.289, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5247177890229662 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43980399525374536 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5247177890229662 |
|
}, |
|
"eval_loss": 0.5720946788787842, |
|
"eval_runtime": 66.3575, |
|
"eval_samples_per_second": 77.429, |
|
"eval_steps_per_second": 3.24, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.8242910998197017e-05, |
|
"loss": 0.4542, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5260801868431296 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4401714578365292 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5260801868431296 |
|
}, |
|
"eval_loss": 0.5669940114021301, |
|
"eval_runtime": 65.3675, |
|
"eval_samples_per_second": 78.602, |
|
"eval_steps_per_second": 3.289, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5278318411833398 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43352621547332887 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5278318411833398 |
|
}, |
|
"eval_loss": 0.5640930533409119, |
|
"eval_runtime": 65.5202, |
|
"eval_samples_per_second": 78.419, |
|
"eval_steps_per_second": 3.281, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5264694433631764 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43784802366096 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5264694433631764 |
|
}, |
|
"eval_loss": 0.5642226934432983, |
|
"eval_runtime": 64.9095, |
|
"eval_samples_per_second": 79.156, |
|
"eval_steps_per_second": 3.312, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5315297781237835 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4357287377608377 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5315297781237835 |
|
}, |
|
"eval_loss": 0.5649986863136292, |
|
"eval_runtime": 65.4545, |
|
"eval_samples_per_second": 78.497, |
|
"eval_steps_per_second": 3.285, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5253016738030362 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45053423727995034 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5253016738030362 |
|
}, |
|
"eval_loss": 0.5697636008262634, |
|
"eval_runtime": 66.7839, |
|
"eval_samples_per_second": 76.935, |
|
"eval_steps_per_second": 3.219, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.7218488772332405e-05, |
|
"loss": 0.451, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.532308291163877 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43318094070825297 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.532308291163877 |
|
}, |
|
"eval_loss": 0.5685227513313293, |
|
"eval_runtime": 65.281, |
|
"eval_samples_per_second": 78.706, |
|
"eval_steps_per_second": 3.293, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5346438302841573 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44085402773126725 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5346438302841573 |
|
}, |
|
"eval_loss": 0.5624856352806091, |
|
"eval_runtime": 65.795, |
|
"eval_samples_per_second": 78.091, |
|
"eval_steps_per_second": 3.268, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5165434021019852 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4669507042700165 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5165434021019852 |
|
}, |
|
"eval_loss": 0.5654544830322266, |
|
"eval_runtime": 64.6982, |
|
"eval_samples_per_second": 79.415, |
|
"eval_steps_per_second": 3.323, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5321136629038536 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.43233762699260603 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5321136629038536 |
|
}, |
|
"eval_loss": 0.5595969557762146, |
|
"eval_runtime": 65.444, |
|
"eval_samples_per_second": 78.51, |
|
"eval_steps_per_second": 3.285, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5138186064616582 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44383270041686657 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5138186064616582 |
|
}, |
|
"eval_loss": 0.5683060884475708, |
|
"eval_runtime": 64.9405, |
|
"eval_samples_per_second": 79.119, |
|
"eval_steps_per_second": 3.311, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.6194066546467794e-05, |
|
"loss": 0.4526, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5217983651226158 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44043820237213194 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5217983651226158 |
|
}, |
|
"eval_loss": 0.5779083967208862, |
|
"eval_runtime": 66.9943, |
|
"eval_samples_per_second": 76.693, |
|
"eval_steps_per_second": 3.209, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5284157259634099 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4418742015293833 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5284157259634099 |
|
}, |
|
"eval_loss": 0.5650832056999207, |
|
"eval_runtime": 65.1199, |
|
"eval_samples_per_second": 78.901, |
|
"eval_steps_per_second": 3.302, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5313351498637602 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4478480562913326 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5313351498637602 |
|
}, |
|
"eval_loss": 0.5622133016586304, |
|
"eval_runtime": 65.2808, |
|
"eval_samples_per_second": 78.706, |
|
"eval_steps_per_second": 3.293, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5338653172440638 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44680022458953056 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5338653172440638 |
|
}, |
|
"eval_loss": 0.5588511228561401, |
|
"eval_runtime": 65.8894, |
|
"eval_samples_per_second": 77.979, |
|
"eval_steps_per_second": 3.263, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5305566368236668 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4609241185016919 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5305566368236668 |
|
}, |
|
"eval_loss": 0.5588091015815735, |
|
"eval_runtime": 66.5578, |
|
"eval_samples_per_second": 77.196, |
|
"eval_steps_per_second": 3.23, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.516964432060318e-05, |
|
"loss": 0.4489, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5328921759439471 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4330697830804149 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5328921759439471 |
|
}, |
|
"eval_loss": 0.5675057768821716, |
|
"eval_runtime": 64.7586, |
|
"eval_samples_per_second": 79.341, |
|
"eval_steps_per_second": 3.32, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5223822499026859 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45172351795604215 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5223822499026859 |
|
}, |
|
"eval_loss": 0.5728496313095093, |
|
"eval_runtime": 65.6773, |
|
"eval_samples_per_second": 78.231, |
|
"eval_steps_per_second": 3.274, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5336706889840405 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4601728614564582 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5336706889840405 |
|
}, |
|
"eval_loss": 0.5668734908103943, |
|
"eval_runtime": 65.4933, |
|
"eval_samples_per_second": 78.451, |
|
"eval_steps_per_second": 3.283, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5186843129622422 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.424912443220669 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5186843129622422 |
|
}, |
|
"eval_loss": 0.5832124948501587, |
|
"eval_runtime": 66.0442, |
|
"eval_samples_per_second": 77.796, |
|
"eval_steps_per_second": 3.255, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5214091086025691 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4560620803234329 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5214091086025691 |
|
}, |
|
"eval_loss": 0.570393443107605, |
|
"eval_runtime": 66.6267, |
|
"eval_samples_per_second": 77.116, |
|
"eval_steps_per_second": 3.227, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.414522209473857e-05, |
|
"loss": 0.4453, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5251070455430128 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4507490275513904 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5251070455430128 |
|
}, |
|
"eval_loss": 0.5699160695075989, |
|
"eval_runtime": 65.6551, |
|
"eval_samples_per_second": 78.257, |
|
"eval_steps_per_second": 3.275, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5216037368625924 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4390779841776778 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5216037368625924 |
|
}, |
|
"eval_loss": 0.5724750757217407, |
|
"eval_runtime": 65.0855, |
|
"eval_samples_per_second": 78.942, |
|
"eval_steps_per_second": 3.303, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.523550019462826 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4348358288094709 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.523550019462826 |
|
}, |
|
"eval_loss": 0.5676676034927368, |
|
"eval_runtime": 65.9415, |
|
"eval_samples_per_second": 77.918, |
|
"eval_steps_per_second": 3.26, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.52958349552355 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4412835917222539 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.52958349552355 |
|
}, |
|
"eval_loss": 0.5665853023529053, |
|
"eval_runtime": 64.4843, |
|
"eval_samples_per_second": 79.678, |
|
"eval_steps_per_second": 3.334, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5305566368236668 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4501012112552316 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5305566368236668 |
|
}, |
|
"eval_loss": 0.5652035474777222, |
|
"eval_runtime": 65.7573, |
|
"eval_samples_per_second": 78.136, |
|
"eval_steps_per_second": 3.27, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.3120799868873956e-05, |
|
"loss": 0.4419, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5305566368236668 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4349986006914519 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5305566368236668 |
|
}, |
|
"eval_loss": 0.5659220814704895, |
|
"eval_runtime": 64.2429, |
|
"eval_samples_per_second": 79.978, |
|
"eval_steps_per_second": 3.347, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5303620085636435 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4502960442686812 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5303620085636435 |
|
}, |
|
"eval_loss": 0.5597621202468872, |
|
"eval_runtime": 65.7239, |
|
"eval_samples_per_second": 78.176, |
|
"eval_steps_per_second": 3.271, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5348384585441807 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4535827940910189 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5348384585441807 |
|
}, |
|
"eval_loss": 0.5543330311775208, |
|
"eval_runtime": 66.2534, |
|
"eval_samples_per_second": 77.551, |
|
"eval_steps_per_second": 3.245, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.522966134682756 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4555867940063284 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.522966134682756 |
|
}, |
|
"eval_loss": 0.5589267015457153, |
|
"eval_runtime": 65.6099, |
|
"eval_samples_per_second": 78.311, |
|
"eval_steps_per_second": 3.277, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5270533281432463 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4552882900544635 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5270533281432463 |
|
}, |
|
"eval_loss": 0.5679383873939514, |
|
"eval_runtime": 64.2272, |
|
"eval_samples_per_second": 79.997, |
|
"eval_steps_per_second": 3.347, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.2096377643009343e-05, |
|
"loss": 0.4465, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5284157259634099 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44697535042783487 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5284157259634099 |
|
}, |
|
"eval_loss": 0.5660849213600159, |
|
"eval_runtime": 65.7257, |
|
"eval_samples_per_second": 78.173, |
|
"eval_steps_per_second": 3.271, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.526274815103153 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45912227016242724 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.526274815103153 |
|
}, |
|
"eval_loss": 0.5786118507385254, |
|
"eval_runtime": 65.7514, |
|
"eval_samples_per_second": 78.143, |
|
"eval_steps_per_second": 3.27, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5233553912028026 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4514329459423202 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5233553912028026 |
|
}, |
|
"eval_loss": 0.5863333344459534, |
|
"eval_runtime": 65.1687, |
|
"eval_samples_per_second": 78.842, |
|
"eval_steps_per_second": 3.299, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5268586998832231 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4562521468447126 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5268586998832231 |
|
}, |
|
"eval_loss": 0.5805368423461914, |
|
"eval_runtime": 66.5366, |
|
"eval_samples_per_second": 77.221, |
|
"eval_steps_per_second": 3.231, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5325029194239004 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45956515670564957 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5325029194239004 |
|
}, |
|
"eval_loss": 0.5740306377410889, |
|
"eval_runtime": 64.6401, |
|
"eval_samples_per_second": 79.486, |
|
"eval_steps_per_second": 3.326, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.107195541714473e-05, |
|
"loss": 0.4239, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5282210977033865 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45915033212245415 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5282210977033865 |
|
}, |
|
"eval_loss": 0.5756375789642334, |
|
"eval_runtime": 68.6905, |
|
"eval_samples_per_second": 74.799, |
|
"eval_steps_per_second": 3.13, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5245231607629428 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4461544721075864 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5245231607629428 |
|
}, |
|
"eval_loss": 0.582427442073822, |
|
"eval_runtime": 66.4502, |
|
"eval_samples_per_second": 77.321, |
|
"eval_steps_per_second": 3.236, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5216037368625924 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4586287753791167 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5216037368625924 |
|
}, |
|
"eval_loss": 0.5848153233528137, |
|
"eval_runtime": 66.8407, |
|
"eval_samples_per_second": 76.869, |
|
"eval_steps_per_second": 3.217, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5243285325029194 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44899785496859856 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5243285325029194 |
|
}, |
|
"eval_loss": 0.5789693593978882, |
|
"eval_runtime": 64.6355, |
|
"eval_samples_per_second": 79.492, |
|
"eval_steps_per_second": 3.326, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5307512650836902 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45259529720591607 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5307512650836902 |
|
}, |
|
"eval_loss": 0.5765287280082703, |
|
"eval_runtime": 65.9843, |
|
"eval_samples_per_second": 77.867, |
|
"eval_steps_per_second": 3.258, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.004753319128012e-05, |
|
"loss": 0.4262, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5237446477228493 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4596084810630643 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5237446477228493 |
|
}, |
|
"eval_loss": 0.5859604477882385, |
|
"eval_runtime": 64.6675, |
|
"eval_samples_per_second": 79.453, |
|
"eval_steps_per_second": 3.325, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5227715064227326 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.46148605295712625 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5227715064227326 |
|
}, |
|
"eval_loss": 0.5810762047767639, |
|
"eval_runtime": 64.6184, |
|
"eval_samples_per_second": 79.513, |
|
"eval_steps_per_second": 3.327, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5245231607629428 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45539993700221526 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5245231607629428 |
|
}, |
|
"eval_loss": 0.5829537510871887, |
|
"eval_runtime": 65.5451, |
|
"eval_samples_per_second": 78.389, |
|
"eval_steps_per_second": 3.28, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5256909303230829 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4484784466484443 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5256909303230829 |
|
}, |
|
"eval_loss": 0.5800737738609314, |
|
"eval_runtime": 65.0145, |
|
"eval_samples_per_second": 79.028, |
|
"eval_steps_per_second": 3.307, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5266640716231997 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45752780514666963 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5266640716231997 |
|
}, |
|
"eval_loss": 0.577346682548523, |
|
"eval_runtime": 66.4954, |
|
"eval_samples_per_second": 77.268, |
|
"eval_steps_per_second": 3.233, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 9.023110965415506e-06, |
|
"loss": 0.4264, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5303620085636435 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45750719642409793 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5303620085636435 |
|
}, |
|
"eval_loss": 0.5826326608657837, |
|
"eval_runtime": 66.7123, |
|
"eval_samples_per_second": 77.017, |
|
"eval_steps_per_second": 3.223, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5276372129233164 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4493522486957304 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5276372129233164 |
|
}, |
|
"eval_loss": 0.5857098698616028, |
|
"eval_runtime": 68.7242, |
|
"eval_samples_per_second": 74.763, |
|
"eval_steps_per_second": 3.128, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5293888672635266 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4554299299346091 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5293888672635266 |
|
}, |
|
"eval_loss": 0.582249104976654, |
|
"eval_runtime": 66.5201, |
|
"eval_samples_per_second": 77.24, |
|
"eval_steps_per_second": 3.232, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5249124172829895 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4618692825892151 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5249124172829895 |
|
}, |
|
"eval_loss": 0.5820056200027466, |
|
"eval_runtime": 67.0656, |
|
"eval_samples_per_second": 76.612, |
|
"eval_steps_per_second": 3.206, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5278318411833398 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.46204183748606525 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5278318411833398 |
|
}, |
|
"eval_loss": 0.5718916654586792, |
|
"eval_runtime": 66.261, |
|
"eval_samples_per_second": 77.542, |
|
"eval_steps_per_second": 3.245, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 7.998688739550894e-06, |
|
"loss": 0.4208, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5241339042428961 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45599482745719977 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5241339042428961 |
|
}, |
|
"eval_loss": 0.5849379301071167, |
|
"eval_runtime": 66.4821, |
|
"eval_samples_per_second": 77.284, |
|
"eval_steps_per_second": 3.234, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5237446477228493 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4555559080742948 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5237446477228493 |
|
}, |
|
"eval_loss": 0.5843728184700012, |
|
"eval_runtime": 64.7647, |
|
"eval_samples_per_second": 79.333, |
|
"eval_steps_per_second": 3.32, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5243285325029194 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45771013752228895 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5243285325029194 |
|
}, |
|
"eval_loss": 0.5808063745498657, |
|
"eval_runtime": 65.6904, |
|
"eval_samples_per_second": 78.215, |
|
"eval_steps_per_second": 3.273, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5260801868431296 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4631605032772811 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5260801868431296 |
|
}, |
|
"eval_loss": 0.5888592004776001, |
|
"eval_runtime": 64.5737, |
|
"eval_samples_per_second": 79.568, |
|
"eval_steps_per_second": 3.33, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5266640716231997 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44799630877745866 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5266640716231997 |
|
}, |
|
"eval_loss": 0.588912308216095, |
|
"eval_runtime": 66.1245, |
|
"eval_samples_per_second": 77.702, |
|
"eval_steps_per_second": 3.251, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 6.974266513686281e-06, |
|
"loss": 0.4228, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5264694433631764 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.46067694317610275 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5264694433631764 |
|
}, |
|
"eval_loss": 0.5871345400810242, |
|
"eval_runtime": 66.1371, |
|
"eval_samples_per_second": 77.687, |
|
"eval_steps_per_second": 3.251, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5299727520435967 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45858742112683054 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5299727520435967 |
|
}, |
|
"eval_loss": 0.5821002721786499, |
|
"eval_runtime": 64.6289, |
|
"eval_samples_per_second": 79.5, |
|
"eval_steps_per_second": 3.327, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5276372129233164 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.44706482899459093 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5276372129233164 |
|
}, |
|
"eval_loss": 0.5892929434776306, |
|
"eval_runtime": 65.9065, |
|
"eval_samples_per_second": 77.959, |
|
"eval_steps_per_second": 3.262, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5278318411833398 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4531243508380409 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5278318411833398 |
|
}, |
|
"eval_loss": 0.5871041417121887, |
|
"eval_runtime": 64.9299, |
|
"eval_samples_per_second": 79.131, |
|
"eval_steps_per_second": 3.311, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5291942390035033 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4544963505218613 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5291942390035033 |
|
}, |
|
"eval_loss": 0.581632137298584, |
|
"eval_runtime": 66.5734, |
|
"eval_samples_per_second": 77.178, |
|
"eval_steps_per_second": 3.23, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 5.949844287821669e-06, |
|
"loss": 0.4146, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.526274815103153 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.46000446310082077 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.526274815103153 |
|
}, |
|
"eval_loss": 0.5873268842697144, |
|
"eval_runtime": 66.2757, |
|
"eval_samples_per_second": 77.525, |
|
"eval_steps_per_second": 3.244, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5293888672635266 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4466423631159167 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5293888672635266 |
|
}, |
|
"eval_loss": 0.5862780809402466, |
|
"eval_runtime": 65.0169, |
|
"eval_samples_per_second": 79.026, |
|
"eval_steps_per_second": 3.307, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5274425846632931 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4553183199766613 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5274425846632931 |
|
}, |
|
"eval_loss": 0.5865354537963867, |
|
"eval_runtime": 66.6836, |
|
"eval_samples_per_second": 77.05, |
|
"eval_steps_per_second": 3.224, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.526274815103153 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4584618360909396 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.526274815103153 |
|
}, |
|
"eval_loss": 0.5862194299697876, |
|
"eval_runtime": 65.2945, |
|
"eval_samples_per_second": 78.69, |
|
"eval_steps_per_second": 3.293, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5253016738030362 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4556988660685002 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5253016738030362 |
|
}, |
|
"eval_loss": 0.5815604329109192, |
|
"eval_runtime": 66.8701, |
|
"eval_samples_per_second": 76.836, |
|
"eval_steps_per_second": 3.215, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.925422061957056e-06, |
|
"loss": 0.4179, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.52899961074348 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4502397089365151 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.52899961074348 |
|
}, |
|
"eval_loss": 0.5844454169273376, |
|
"eval_runtime": 64.4681, |
|
"eval_samples_per_second": 79.698, |
|
"eval_steps_per_second": 3.335, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5293888672635266 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45103878090924954 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5293888672635266 |
|
}, |
|
"eval_loss": 0.5837833881378174, |
|
"eval_runtime": 64.5629, |
|
"eval_samples_per_second": 79.581, |
|
"eval_steps_per_second": 3.33, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5264694433631764 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4605815902638898 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5264694433631764 |
|
}, |
|
"eval_loss": 0.5830559730529785, |
|
"eval_runtime": 65.8104, |
|
"eval_samples_per_second": 78.073, |
|
"eval_steps_per_second": 3.267, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.52958349552355 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45065967020468145 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.52958349552355 |
|
}, |
|
"eval_loss": 0.5816081166267395, |
|
"eval_runtime": 65.6528, |
|
"eval_samples_per_second": 78.26, |
|
"eval_steps_per_second": 3.275, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5293888672635266 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45309891731169716 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5293888672635266 |
|
}, |
|
"eval_loss": 0.5825657844543457, |
|
"eval_runtime": 65.9972, |
|
"eval_samples_per_second": 77.852, |
|
"eval_steps_per_second": 3.258, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.900999836092445e-06, |
|
"loss": 0.4259, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5303620085636435 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4533884487481862 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5303620085636435 |
|
}, |
|
"eval_loss": 0.5796229839324951, |
|
"eval_runtime": 64.285, |
|
"eval_samples_per_second": 79.925, |
|
"eval_steps_per_second": 3.344, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5305566368236668 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4515523588029331 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5305566368236668 |
|
}, |
|
"eval_loss": 0.5833083391189575, |
|
"eval_runtime": 65.2335, |
|
"eval_samples_per_second": 78.763, |
|
"eval_steps_per_second": 3.296, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5260801868431296 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4593256681180433 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5260801868431296 |
|
}, |
|
"eval_loss": 0.578184187412262, |
|
"eval_runtime": 66.0227, |
|
"eval_samples_per_second": 77.822, |
|
"eval_steps_per_second": 3.256, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5268586998832231 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4603837774249291 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5268586998832231 |
|
}, |
|
"eval_loss": 0.5812641382217407, |
|
"eval_runtime": 65.1177, |
|
"eval_samples_per_second": 78.903, |
|
"eval_steps_per_second": 3.302, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5319190346438303 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45997737731014476 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5319190346438303 |
|
}, |
|
"eval_loss": 0.5770907402038574, |
|
"eval_runtime": 65.3117, |
|
"eval_samples_per_second": 78.669, |
|
"eval_steps_per_second": 3.292, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.8765776102278315e-06, |
|
"loss": 0.4274, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5247177890229662 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45907215035166693 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5247177890229662 |
|
}, |
|
"eval_loss": 0.5833043456077576, |
|
"eval_runtime": 65.7031, |
|
"eval_samples_per_second": 78.2, |
|
"eval_steps_per_second": 3.272, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.526274815103153 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4541259526342647 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.526274815103153 |
|
}, |
|
"eval_loss": 0.5847244262695312, |
|
"eval_runtime": 65.8232, |
|
"eval_samples_per_second": 78.058, |
|
"eval_steps_per_second": 3.266, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5266640716231997 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.451974998095672 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5266640716231997 |
|
}, |
|
"eval_loss": 0.5817099809646606, |
|
"eval_runtime": 64.7736, |
|
"eval_samples_per_second": 79.322, |
|
"eval_steps_per_second": 3.319, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5288049824834566 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.46024834620814586 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5288049824834566 |
|
}, |
|
"eval_loss": 0.5799014568328857, |
|
"eval_runtime": 65.921, |
|
"eval_samples_per_second": 77.942, |
|
"eval_steps_per_second": 3.261, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5276372129233164 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4581019267498323 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5276372129233164 |
|
}, |
|
"eval_loss": 0.5810489058494568, |
|
"eval_runtime": 65.3442, |
|
"eval_samples_per_second": 78.63, |
|
"eval_steps_per_second": 3.29, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.8521553843632193e-06, |
|
"loss": 0.4289, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5272479564032697 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4541508997429681 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5272479564032697 |
|
}, |
|
"eval_loss": 0.5799488425254822, |
|
"eval_runtime": 65.7671, |
|
"eval_samples_per_second": 78.124, |
|
"eval_steps_per_second": 3.269, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5274425846632931 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4574637003228606 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5274425846632931 |
|
}, |
|
"eval_loss": 0.5793448686599731, |
|
"eval_runtime": 65.0934, |
|
"eval_samples_per_second": 78.933, |
|
"eval_steps_per_second": 3.303, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5276372129233164 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45638935381995627 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5276372129233164 |
|
}, |
|
"eval_loss": 0.5791721940040588, |
|
"eval_runtime": 64.3159, |
|
"eval_samples_per_second": 79.887, |
|
"eval_steps_per_second": 3.343, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5268586998832231 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45473822883034853 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5268586998832231 |
|
}, |
|
"eval_loss": 0.5804610848426819, |
|
"eval_runtime": 66.2146, |
|
"eval_samples_per_second": 77.596, |
|
"eval_steps_per_second": 3.247, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5270533281432463 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45519902851058563 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5270533281432463 |
|
}, |
|
"eval_loss": 0.5822835564613342, |
|
"eval_runtime": 65.6357, |
|
"eval_samples_per_second": 78.281, |
|
"eval_steps_per_second": 3.276, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 8.277331584986067e-07, |
|
"loss": 0.4174, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5278318411833398 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4562786712755399 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5278318411833398 |
|
}, |
|
"eval_loss": 0.5810161828994751, |
|
"eval_runtime": 65.4574, |
|
"eval_samples_per_second": 78.494, |
|
"eval_steps_per_second": 3.285, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5280264694433632 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.45638550607459255 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5280264694433632 |
|
}, |
|
"eval_loss": 0.5815566182136536, |
|
"eval_runtime": 64.709, |
|
"eval_samples_per_second": 79.402, |
|
"eval_steps_per_second": 3.323, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5276372129233164 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.456888857673732 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5276372129233164 |
|
}, |
|
"eval_loss": 0.5815967321395874, |
|
"eval_runtime": 65.864, |
|
"eval_samples_per_second": 78.009, |
|
"eval_steps_per_second": 3.264, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5274425846632931 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.456560066253126 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5274425846632931 |
|
}, |
|
"eval_loss": 0.5813802480697632, |
|
"eval_runtime": 65.7676, |
|
"eval_samples_per_second": 78.124, |
|
"eval_steps_per_second": 3.269, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_label_accuracy": { |
|
"accuracy": 0.5276372129233164 |
|
}, |
|
"eval_label_f1_macro": { |
|
"f1": 0.4561525816310299 |
|
}, |
|
"eval_label_f1_micro": { |
|
"f1": 0.5276372129233164 |
|
}, |
|
"eval_loss": 0.5812935829162598, |
|
"eval_runtime": 66.0073, |
|
"eval_samples_per_second": 77.84, |
|
"eval_steps_per_second": 3.257, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 24404, |
|
"total_flos": 5.079375568585728e+16, |
|
"train_loss": 0.5641878431614374, |
|
"train_runtime": 33021.6046, |
|
"train_samples_per_second": 11.824, |
|
"train_steps_per_second": 0.739 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 24404, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 5.079375568585728e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|