stance-detection-t5 / trainer_state.json
Alex Spangher
updated
4ce7ed9
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 100,
"global_step": 24404,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 2.032832145690918,
"eval_runtime": 62.2478,
"eval_samples_per_second": 82.541,
"eval_steps_per_second": 3.454,
"step": 100
},
{
"epoch": 0.03,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 1.4367562532424927,
"eval_runtime": 63.0927,
"eval_samples_per_second": 81.436,
"eval_steps_per_second": 3.408,
"step": 200
},
{
"epoch": 0.05,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 1.2640273571014404,
"eval_runtime": 61.4349,
"eval_samples_per_second": 83.633,
"eval_steps_per_second": 3.5,
"step": 300
},
{
"epoch": 0.07,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 1.173414707183838,
"eval_runtime": 64.7986,
"eval_samples_per_second": 79.292,
"eval_steps_per_second": 3.318,
"step": 400
},
{
"epoch": 0.08,
"learning_rate": 4.897557777413539e-05,
"loss": 2.7729,
"step": 500
},
{
"epoch": 0.08,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 1.1171976327896118,
"eval_runtime": 62.4514,
"eval_samples_per_second": 82.272,
"eval_steps_per_second": 3.443,
"step": 500
},
{
"epoch": 0.1,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 1.0716122388839722,
"eval_runtime": 62.3679,
"eval_samples_per_second": 82.382,
"eval_steps_per_second": 3.447,
"step": 600
},
{
"epoch": 0.11,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 1.0408052206039429,
"eval_runtime": 62.3699,
"eval_samples_per_second": 82.379,
"eval_steps_per_second": 3.447,
"step": 700
},
{
"epoch": 0.13,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 1.0002268552780151,
"eval_runtime": 60.9857,
"eval_samples_per_second": 84.249,
"eval_steps_per_second": 3.525,
"step": 800
},
{
"epoch": 0.15,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 0.9761040806770325,
"eval_runtime": 63.3879,
"eval_samples_per_second": 81.057,
"eval_steps_per_second": 3.392,
"step": 900
},
{
"epoch": 0.16,
"learning_rate": 4.795115554827078e-05,
"loss": 1.1066,
"step": 1000
},
{
"epoch": 0.16,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 0.9439008831977844,
"eval_runtime": 60.9997,
"eval_samples_per_second": 84.23,
"eval_steps_per_second": 3.525,
"step": 1000
},
{
"epoch": 0.18,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 0.921099841594696,
"eval_runtime": 61.9171,
"eval_samples_per_second": 82.982,
"eval_steps_per_second": 3.472,
"step": 1100
},
{
"epoch": 0.2,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 0.89990234375,
"eval_runtime": 66.001,
"eval_samples_per_second": 77.847,
"eval_steps_per_second": 3.258,
"step": 1200
},
{
"epoch": 0.21,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 0.879767656326294,
"eval_runtime": 65.3826,
"eval_samples_per_second": 78.584,
"eval_steps_per_second": 3.288,
"step": 1300
},
{
"epoch": 0.23,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 0.8532700538635254,
"eval_runtime": 66.1288,
"eval_samples_per_second": 77.697,
"eval_steps_per_second": 3.251,
"step": 1400
},
{
"epoch": 0.25,
"learning_rate": 4.692673332240616e-05,
"loss": 0.9358,
"step": 1500
},
{
"epoch": 0.25,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 0.8349987864494324,
"eval_runtime": 64.1308,
"eval_samples_per_second": 80.118,
"eval_steps_per_second": 3.353,
"step": 1500
},
{
"epoch": 0.26,
"eval_label_accuracy": {
"accuracy": 0.4388867263526664
},
"eval_label_f1_macro": {
"f1": 0.1525091302583525
},
"eval_label_f1_micro": {
"f1": 0.4388867263526664
},
"eval_loss": 0.8146479725837708,
"eval_runtime": 65.0686,
"eval_samples_per_second": 78.963,
"eval_steps_per_second": 3.304,
"step": 1600
},
{
"epoch": 0.28,
"eval_label_accuracy": {
"accuracy": 0.4534838458544181
},
"eval_label_f1_macro": {
"f1": 0.21174904672848718
},
"eval_label_f1_micro": {
"f1": 0.4534838458544181
},
"eval_loss": 0.7984741926193237,
"eval_runtime": 64.3594,
"eval_samples_per_second": 79.833,
"eval_steps_per_second": 3.341,
"step": 1700
},
{
"epoch": 0.3,
"eval_label_accuracy": {
"accuracy": 0.4560140132347217
},
"eval_label_f1_macro": {
"f1": 0.24178290688684612
},
"eval_label_f1_micro": {
"f1": 0.4560140132347217
},
"eval_loss": 0.7780113816261292,
"eval_runtime": 66.3431,
"eval_samples_per_second": 77.446,
"eval_steps_per_second": 3.241,
"step": 1800
},
{
"epoch": 0.31,
"eval_label_accuracy": {
"accuracy": 0.46321525885558584
},
"eval_label_f1_macro": {
"f1": 0.24751285219162994
},
"eval_label_f1_micro": {
"f1": 0.4632152588555859
},
"eval_loss": 0.7548134326934814,
"eval_runtime": 65.2338,
"eval_samples_per_second": 78.763,
"eval_steps_per_second": 3.296,
"step": 1900
},
{
"epoch": 0.33,
"learning_rate": 4.590231109654155e-05,
"loss": 0.8298,
"step": 2000
},
{
"epoch": 0.33,
"eval_label_accuracy": {
"accuracy": 0.46924873491630986
},
"eval_label_f1_macro": {
"f1": 0.2464816382147299
},
"eval_label_f1_micro": {
"f1": 0.46924873491630986
},
"eval_loss": 0.7355437874794006,
"eval_runtime": 66.2105,
"eval_samples_per_second": 77.601,
"eval_steps_per_second": 3.247,
"step": 2000
},
{
"epoch": 0.34,
"eval_label_accuracy": {
"accuracy": 0.43557804593226934
},
"eval_label_f1_macro": {
"f1": 0.24962525654931594
},
"eval_label_f1_micro": {
"f1": 0.43557804593226934
},
"eval_loss": 0.7379868626594543,
"eval_runtime": 66.543,
"eval_samples_per_second": 77.213,
"eval_steps_per_second": 3.231,
"step": 2100
},
{
"epoch": 0.36,
"eval_label_accuracy": {
"accuracy": 0.46555079797586607
},
"eval_label_f1_macro": {
"f1": 0.2992369280869709
},
"eval_label_f1_micro": {
"f1": 0.46555079797586607
},
"eval_loss": 0.7291679978370667,
"eval_runtime": 65.5998,
"eval_samples_per_second": 78.323,
"eval_steps_per_second": 3.277,
"step": 2200
},
{
"epoch": 0.38,
"eval_label_accuracy": {
"accuracy": 0.49202024133904243
},
"eval_label_f1_macro": {
"f1": 0.3141049709544147
},
"eval_label_f1_micro": {
"f1": 0.49202024133904243
},
"eval_loss": 0.6944931149482727,
"eval_runtime": 65.8926,
"eval_samples_per_second": 77.975,
"eval_steps_per_second": 3.263,
"step": 2300
},
{
"epoch": 0.39,
"eval_label_accuracy": {
"accuracy": 0.502724795640327
},
"eval_label_f1_macro": {
"f1": 0.3701433983343189
},
"eval_label_f1_micro": {
"f1": 0.502724795640327
},
"eval_loss": 0.6872398257255554,
"eval_runtime": 64.1033,
"eval_samples_per_second": 80.152,
"eval_steps_per_second": 3.354,
"step": 2400
},
{
"epoch": 0.41,
"learning_rate": 4.4877888870676944e-05,
"loss": 0.7424,
"step": 2500
},
{
"epoch": 0.41,
"eval_label_accuracy": {
"accuracy": 0.49883223043985986
},
"eval_label_f1_macro": {
"f1": 0.37174762565152936
},
"eval_label_f1_micro": {
"f1": 0.49883223043985986
},
"eval_loss": 0.6769503951072693,
"eval_runtime": 65.8998,
"eval_samples_per_second": 77.967,
"eval_steps_per_second": 3.263,
"step": 2500
},
{
"epoch": 0.43,
"eval_label_accuracy": {
"accuracy": 0.503308680420397
},
"eval_label_f1_macro": {
"f1": 0.3680796226372295
},
"eval_label_f1_micro": {
"f1": 0.503308680420397
},
"eval_loss": 0.6723325848579407,
"eval_runtime": 64.9642,
"eval_samples_per_second": 79.09,
"eval_steps_per_second": 3.31,
"step": 2600
},
{
"epoch": 0.44,
"eval_label_accuracy": {
"accuracy": 0.5101206695212145
},
"eval_label_f1_macro": {
"f1": 0.37785590629881227
},
"eval_label_f1_micro": {
"f1": 0.5101206695212145
},
"eval_loss": 0.6622567772865295,
"eval_runtime": 67.697,
"eval_samples_per_second": 75.897,
"eval_steps_per_second": 3.176,
"step": 2700
},
{
"epoch": 0.46,
"eval_label_accuracy": {
"accuracy": 0.5058388478007007
},
"eval_label_f1_macro": {
"f1": 0.37480207390602494
},
"eval_label_f1_micro": {
"f1": 0.5058388478007007
},
"eval_loss": 0.6581109166145325,
"eval_runtime": 65.1127,
"eval_samples_per_second": 78.909,
"eval_steps_per_second": 3.302,
"step": 2800
},
{
"epoch": 0.48,
"eval_label_accuracy": {
"accuracy": 0.5081743869209809
},
"eval_label_f1_macro": {
"f1": 0.35256174805106155
},
"eval_label_f1_micro": {
"f1": 0.5081743869209809
},
"eval_loss": 0.6534283757209778,
"eval_runtime": 66.7769,
"eval_samples_per_second": 76.943,
"eval_steps_per_second": 3.22,
"step": 2900
},
{
"epoch": 0.49,
"learning_rate": 4.385346664481233e-05,
"loss": 0.6892,
"step": 3000
},
{
"epoch": 0.49,
"eval_label_accuracy": {
"accuracy": 0.4706111327364733
},
"eval_label_f1_macro": {
"f1": 0.372103581749401
},
"eval_label_f1_micro": {
"f1": 0.4706111327364733
},
"eval_loss": 0.6582987308502197,
"eval_runtime": 65.0583,
"eval_samples_per_second": 78.975,
"eval_steps_per_second": 3.305,
"step": 3000
},
{
"epoch": 0.51,
"eval_label_accuracy": {
"accuracy": 0.5044764499805372
},
"eval_label_f1_macro": {
"f1": 0.37031807042249487
},
"eval_label_f1_micro": {
"f1": 0.5044764499805372
},
"eval_loss": 0.6426356434822083,
"eval_runtime": 65.2533,
"eval_samples_per_second": 78.739,
"eval_steps_per_second": 3.295,
"step": 3100
},
{
"epoch": 0.52,
"eval_label_accuracy": {
"accuracy": 0.5058388478007007
},
"eval_label_f1_macro": {
"f1": 0.38279117822149433
},
"eval_label_f1_micro": {
"f1": 0.5058388478007007
},
"eval_loss": 0.6383734941482544,
"eval_runtime": 66.5881,
"eval_samples_per_second": 77.161,
"eval_steps_per_second": 3.229,
"step": 3200
},
{
"epoch": 0.54,
"eval_label_accuracy": {
"accuracy": 0.49727520435967304
},
"eval_label_f1_macro": {
"f1": 0.352874833319086
},
"eval_label_f1_micro": {
"f1": 0.49727520435967304
},
"eval_loss": 0.6387777328491211,
"eval_runtime": 65.3478,
"eval_samples_per_second": 78.625,
"eval_steps_per_second": 3.29,
"step": 3300
},
{
"epoch": 0.56,
"eval_label_accuracy": {
"accuracy": 0.494550408719346
},
"eval_label_f1_macro": {
"f1": 0.3715541356743288
},
"eval_label_f1_micro": {
"f1": 0.494550408719346
},
"eval_loss": 0.6494720578193665,
"eval_runtime": 65.2886,
"eval_samples_per_second": 78.697,
"eval_steps_per_second": 3.293,
"step": 3400
},
{
"epoch": 0.57,
"learning_rate": 4.282904441894772e-05,
"loss": 0.6377,
"step": 3500
},
{
"epoch": 0.57,
"eval_label_accuracy": {
"accuracy": 0.506033476060724
},
"eval_label_f1_macro": {
"f1": 0.38420420926210797
},
"eval_label_f1_micro": {
"f1": 0.506033476060724
},
"eval_loss": 0.6222317218780518,
"eval_runtime": 64.502,
"eval_samples_per_second": 79.656,
"eval_steps_per_second": 3.333,
"step": 3500
},
{
"epoch": 0.59,
"eval_label_accuracy": {
"accuracy": 0.5064227325807708
},
"eval_label_f1_macro": {
"f1": 0.3885026107563052
},
"eval_label_f1_micro": {
"f1": 0.5064227325807708
},
"eval_loss": 0.6216300129890442,
"eval_runtime": 65.7001,
"eval_samples_per_second": 78.204,
"eval_steps_per_second": 3.272,
"step": 3600
},
{
"epoch": 0.61,
"eval_label_accuracy": {
"accuracy": 0.5105099260412612
},
"eval_label_f1_macro": {
"f1": 0.35318161489739225
},
"eval_label_f1_micro": {
"f1": 0.5105099260412612
},
"eval_loss": 0.6235440373420715,
"eval_runtime": 65.6641,
"eval_samples_per_second": 78.247,
"eval_steps_per_second": 3.274,
"step": 3700
},
{
"epoch": 0.62,
"eval_label_accuracy": {
"accuracy": 0.5062281043207474
},
"eval_label_f1_macro": {
"f1": 0.37238647965373683
},
"eval_label_f1_micro": {
"f1": 0.5062281043207474
},
"eval_loss": 0.6188907623291016,
"eval_runtime": 65.7244,
"eval_samples_per_second": 78.175,
"eval_steps_per_second": 3.271,
"step": 3800
},
{
"epoch": 0.64,
"eval_label_accuracy": {
"accuracy": 0.49980537173997663
},
"eval_label_f1_macro": {
"f1": 0.38651240866909087
},
"eval_label_f1_micro": {
"f1": 0.49980537173997663
},
"eval_loss": 0.6195840239524841,
"eval_runtime": 65.3321,
"eval_samples_per_second": 78.644,
"eval_steps_per_second": 3.291,
"step": 3900
},
{
"epoch": 0.66,
"learning_rate": 4.18046221930831e-05,
"loss": 0.6149,
"step": 4000
},
{
"epoch": 0.66,
"eval_label_accuracy": {
"accuracy": 0.5075905021409108
},
"eval_label_f1_macro": {
"f1": 0.37502556996910846
},
"eval_label_f1_micro": {
"f1": 0.5075905021409108
},
"eval_loss": 0.6072418093681335,
"eval_runtime": 67.6156,
"eval_samples_per_second": 75.988,
"eval_steps_per_second": 3.18,
"step": 4000
},
{
"epoch": 0.67,
"eval_label_accuracy": {
"accuracy": 0.5145971195017517
},
"eval_label_f1_macro": {
"f1": 0.38615080427173776
},
"eval_label_f1_micro": {
"f1": 0.5145971195017517
},
"eval_loss": 0.6034330725669861,
"eval_runtime": 65.7798,
"eval_samples_per_second": 78.109,
"eval_steps_per_second": 3.268,
"step": 4100
},
{
"epoch": 0.69,
"eval_label_accuracy": {
"accuracy": 0.5165434021019852
},
"eval_label_f1_macro": {
"f1": 0.3732389735757712
},
"eval_label_f1_micro": {
"f1": 0.5165434021019852
},
"eval_loss": 0.6042677760124207,
"eval_runtime": 65.3392,
"eval_samples_per_second": 78.636,
"eval_steps_per_second": 3.291,
"step": 4200
},
{
"epoch": 0.7,
"eval_label_accuracy": {
"accuracy": 0.5029194239003504
},
"eval_label_f1_macro": {
"f1": 0.38007459621604844
},
"eval_label_f1_micro": {
"f1": 0.5029194239003504
},
"eval_loss": 0.6064484119415283,
"eval_runtime": 64.1308,
"eval_samples_per_second": 80.117,
"eval_steps_per_second": 3.353,
"step": 4300
},
{
"epoch": 0.72,
"eval_label_accuracy": {
"accuracy": 0.5095367847411444
},
"eval_label_f1_macro": {
"f1": 0.3946719741195423
},
"eval_label_f1_micro": {
"f1": 0.5095367847411444
},
"eval_loss": 0.5933734774589539,
"eval_runtime": 65.6434,
"eval_samples_per_second": 78.271,
"eval_steps_per_second": 3.275,
"step": 4400
},
{
"epoch": 0.74,
"learning_rate": 4.078019996721849e-05,
"loss": 0.5971,
"step": 4500
},
{
"epoch": 0.74,
"eval_label_accuracy": {
"accuracy": 0.5167380303620086
},
"eval_label_f1_macro": {
"f1": 0.37773378306197386
},
"eval_label_f1_micro": {
"f1": 0.5167380303620086
},
"eval_loss": 0.5919108390808105,
"eval_runtime": 63.9287,
"eval_samples_per_second": 80.371,
"eval_steps_per_second": 3.363,
"step": 4500
},
{
"epoch": 0.75,
"eval_label_accuracy": {
"accuracy": 0.5179057999221487
},
"eval_label_f1_macro": {
"f1": 0.38732144074331154
},
"eval_label_f1_micro": {
"f1": 0.5179057999221487
},
"eval_loss": 0.6021246910095215,
"eval_runtime": 67.8323,
"eval_samples_per_second": 75.746,
"eval_steps_per_second": 3.17,
"step": 4600
},
{
"epoch": 0.77,
"eval_label_accuracy": {
"accuracy": 0.522966134682756
},
"eval_label_f1_macro": {
"f1": 0.38578529254016203
},
"eval_label_f1_micro": {
"f1": 0.522966134682756
},
"eval_loss": 0.5902037620544434,
"eval_runtime": 65.8164,
"eval_samples_per_second": 78.066,
"eval_steps_per_second": 3.267,
"step": 4700
},
{
"epoch": 0.79,
"eval_label_accuracy": {
"accuracy": 0.5052549630206306
},
"eval_label_f1_macro": {
"f1": 0.3881510615594995
},
"eval_label_f1_micro": {
"f1": 0.5052549630206306
},
"eval_loss": 0.6161624789237976,
"eval_runtime": 66.7087,
"eval_samples_per_second": 77.021,
"eval_steps_per_second": 3.223,
"step": 4800
},
{
"epoch": 0.8,
"eval_label_accuracy": {
"accuracy": 0.5223822499026859
},
"eval_label_f1_macro": {
"f1": 0.3790400980761349
},
"eval_label_f1_micro": {
"f1": 0.5223822499026859
},
"eval_loss": 0.5835235714912415,
"eval_runtime": 67.5779,
"eval_samples_per_second": 76.031,
"eval_steps_per_second": 3.182,
"step": 4900
},
{
"epoch": 0.82,
"learning_rate": 3.975577774135388e-05,
"loss": 0.5745,
"step": 5000
},
{
"epoch": 0.82,
"eval_label_accuracy": {
"accuracy": 0.5241339042428961
},
"eval_label_f1_macro": {
"f1": 0.38287080754019076
},
"eval_label_f1_micro": {
"f1": 0.5241339042428961
},
"eval_loss": 0.5865485668182373,
"eval_runtime": 67.9881,
"eval_samples_per_second": 75.572,
"eval_steps_per_second": 3.162,
"step": 5000
},
{
"epoch": 0.84,
"eval_label_accuracy": {
"accuracy": 0.514791747761775
},
"eval_label_f1_macro": {
"f1": 0.3932066438681785
},
"eval_label_f1_micro": {
"f1": 0.514791747761775
},
"eval_loss": 0.582242488861084,
"eval_runtime": 65.5261,
"eval_samples_per_second": 78.412,
"eval_steps_per_second": 3.281,
"step": 5100
},
{
"epoch": 0.85,
"eval_label_accuracy": {
"accuracy": 0.5266640716231997
},
"eval_label_f1_macro": {
"f1": 0.3948337517276894
},
"eval_label_f1_micro": {
"f1": 0.5266640716231997
},
"eval_loss": 0.5757012963294983,
"eval_runtime": 66.1312,
"eval_samples_per_second": 77.694,
"eval_steps_per_second": 3.251,
"step": 5200
},
{
"epoch": 0.87,
"eval_label_accuracy": {
"accuracy": 0.5192681977423121
},
"eval_label_f1_macro": {
"f1": 0.38846154581250647
},
"eval_label_f1_micro": {
"f1": 0.5192681977423121
},
"eval_loss": 0.580342173576355,
"eval_runtime": 66.5531,
"eval_samples_per_second": 77.202,
"eval_steps_per_second": 3.231,
"step": 5300
},
{
"epoch": 0.89,
"eval_label_accuracy": {
"accuracy": 0.5278318411833398
},
"eval_label_f1_macro": {
"f1": 0.3917793049565631
},
"eval_label_f1_micro": {
"f1": 0.5278318411833398
},
"eval_loss": 0.5737766623497009,
"eval_runtime": 68.6161,
"eval_samples_per_second": 74.88,
"eval_steps_per_second": 3.133,
"step": 5400
},
{
"epoch": 0.9,
"learning_rate": 3.8731355515489266e-05,
"loss": 0.5605,
"step": 5500
},
{
"epoch": 0.9,
"eval_label_accuracy": {
"accuracy": 0.5299727520435967
},
"eval_label_f1_macro": {
"f1": 0.4000572874943932
},
"eval_label_f1_micro": {
"f1": 0.5299727520435967
},
"eval_loss": 0.5729739665985107,
"eval_runtime": 65.0276,
"eval_samples_per_second": 79.013,
"eval_steps_per_second": 3.306,
"step": 5500
},
{
"epoch": 0.92,
"eval_label_accuracy": {
"accuracy": 0.5266640716231997
},
"eval_label_f1_macro": {
"f1": 0.3871595405264875
},
"eval_label_f1_micro": {
"f1": 0.5266640716231997
},
"eval_loss": 0.5737924575805664,
"eval_runtime": 67.4714,
"eval_samples_per_second": 76.151,
"eval_steps_per_second": 3.187,
"step": 5600
},
{
"epoch": 0.93,
"eval_label_accuracy": {
"accuracy": 0.5278318411833398
},
"eval_label_f1_macro": {
"f1": 0.3912636643655603
},
"eval_label_f1_micro": {
"f1": 0.5278318411833398
},
"eval_loss": 0.5748183131217957,
"eval_runtime": 66.8019,
"eval_samples_per_second": 76.914,
"eval_steps_per_second": 3.218,
"step": 5700
},
{
"epoch": 0.95,
"eval_label_accuracy": {
"accuracy": 0.5212144803425457
},
"eval_label_f1_macro": {
"f1": 0.3655456567099176
},
"eval_label_f1_micro": {
"f1": 0.5212144803425457
},
"eval_loss": 0.5782448649406433,
"eval_runtime": 65.8052,
"eval_samples_per_second": 78.079,
"eval_steps_per_second": 3.267,
"step": 5800
},
{
"epoch": 0.97,
"eval_label_accuracy": {
"accuracy": 0.5124562086414948
},
"eval_label_f1_macro": {
"f1": 0.3896713760808098
},
"eval_label_f1_micro": {
"f1": 0.5124562086414948
},
"eval_loss": 0.5811282396316528,
"eval_runtime": 65.8997,
"eval_samples_per_second": 77.967,
"eval_steps_per_second": 3.263,
"step": 5900
},
{
"epoch": 0.98,
"learning_rate": 3.7706933289624654e-05,
"loss": 0.553,
"step": 6000
},
{
"epoch": 0.98,
"eval_label_accuracy": {
"accuracy": 0.5241339042428961
},
"eval_label_f1_macro": {
"f1": 0.3927065112402536
},
"eval_label_f1_micro": {
"f1": 0.5241339042428961
},
"eval_loss": 0.5662025809288025,
"eval_runtime": 65.5268,
"eval_samples_per_second": 78.411,
"eval_steps_per_second": 3.281,
"step": 6000
},
{
"epoch": 1.0,
"eval_label_accuracy": {
"accuracy": 0.5161541455819385
},
"eval_label_f1_macro": {
"f1": 0.3924416598143773
},
"eval_label_f1_micro": {
"f1": 0.5161541455819385
},
"eval_loss": 0.5739487409591675,
"eval_runtime": 65.309,
"eval_samples_per_second": 78.672,
"eval_steps_per_second": 3.292,
"step": 6100
},
{
"epoch": 1.02,
"eval_label_accuracy": {
"accuracy": 0.5190735694822888
},
"eval_label_f1_macro": {
"f1": 0.39288818567242456
},
"eval_label_f1_micro": {
"f1": 0.5190735694822888
},
"eval_loss": 0.5729976892471313,
"eval_runtime": 65.1321,
"eval_samples_per_second": 78.886,
"eval_steps_per_second": 3.301,
"step": 6200
},
{
"epoch": 1.03,
"eval_label_accuracy": {
"accuracy": 0.5128454651615415
},
"eval_label_f1_macro": {
"f1": 0.3945008643429393
},
"eval_label_f1_micro": {
"f1": 0.5128454651615415
},
"eval_loss": 0.5891692042350769,
"eval_runtime": 66.1687,
"eval_samples_per_second": 77.65,
"eval_steps_per_second": 3.249,
"step": 6300
},
{
"epoch": 1.05,
"eval_label_accuracy": {
"accuracy": 0.5179057999221487
},
"eval_label_f1_macro": {
"f1": 0.40357563770843996
},
"eval_label_f1_micro": {
"f1": 0.5179057999221487
},
"eval_loss": 0.5842686891555786,
"eval_runtime": 64.3545,
"eval_samples_per_second": 79.839,
"eval_steps_per_second": 3.341,
"step": 6400
},
{
"epoch": 1.07,
"learning_rate": 3.6682511063760036e-05,
"loss": 0.5254,
"step": 6500
},
{
"epoch": 1.07,
"eval_label_accuracy": {
"accuracy": 0.5219929933826392
},
"eval_label_f1_macro": {
"f1": 0.39726071722075873
},
"eval_label_f1_micro": {
"f1": 0.5219929933826392
},
"eval_loss": 0.5762518048286438,
"eval_runtime": 66.0739,
"eval_samples_per_second": 77.761,
"eval_steps_per_second": 3.254,
"step": 6500
},
{
"epoch": 1.08,
"eval_label_accuracy": {
"accuracy": 0.5184896847022188
},
"eval_label_f1_macro": {
"f1": 0.3883086023616811
},
"eval_label_f1_micro": {
"f1": 0.5184896847022188
},
"eval_loss": 0.5790498852729797,
"eval_runtime": 65.3688,
"eval_samples_per_second": 78.6,
"eval_steps_per_second": 3.289,
"step": 6600
},
{
"epoch": 1.1,
"eval_label_accuracy": {
"accuracy": 0.5260801868431296
},
"eval_label_f1_macro": {
"f1": 0.39154498796599124
},
"eval_label_f1_micro": {
"f1": 0.5260801868431296
},
"eval_loss": 0.5696905851364136,
"eval_runtime": 65.5045,
"eval_samples_per_second": 78.437,
"eval_steps_per_second": 3.282,
"step": 6700
},
{
"epoch": 1.11,
"eval_label_accuracy": {
"accuracy": 0.5171272868820552
},
"eval_label_f1_macro": {
"f1": 0.39755561359688063
},
"eval_label_f1_micro": {
"f1": 0.5171272868820552
},
"eval_loss": 0.5722245573997498,
"eval_runtime": 65.5097,
"eval_samples_per_second": 78.431,
"eval_steps_per_second": 3.282,
"step": 6800
},
{
"epoch": 1.13,
"eval_label_accuracy": {
"accuracy": 0.5231607629427792
},
"eval_label_f1_macro": {
"f1": 0.3999224263889678
},
"eval_label_f1_micro": {
"f1": 0.5231607629427792
},
"eval_loss": 0.5762615203857422,
"eval_runtime": 67.2536,
"eval_samples_per_second": 76.397,
"eval_steps_per_second": 3.197,
"step": 6900
},
{
"epoch": 1.15,
"learning_rate": 3.565808883789543e-05,
"loss": 0.5282,
"step": 7000
},
{
"epoch": 1.15,
"eval_label_accuracy": {
"accuracy": 0.5239392759828727
},
"eval_label_f1_macro": {
"f1": 0.40076468903176177
},
"eval_label_f1_micro": {
"f1": 0.5239392759828727
},
"eval_loss": 0.5675185918807983,
"eval_runtime": 65.953,
"eval_samples_per_second": 77.904,
"eval_steps_per_second": 3.26,
"step": 7000
},
{
"epoch": 1.16,
"eval_label_accuracy": {
"accuracy": 0.5291942390035033
},
"eval_label_f1_macro": {
"f1": 0.3977209221647911
},
"eval_label_f1_micro": {
"f1": 0.5291942390035033
},
"eval_loss": 0.578584611415863,
"eval_runtime": 66.5476,
"eval_samples_per_second": 77.208,
"eval_steps_per_second": 3.231,
"step": 7100
},
{
"epoch": 1.18,
"eval_label_accuracy": {
"accuracy": 0.5286103542234333
},
"eval_label_f1_macro": {
"f1": 0.3972971376121283
},
"eval_label_f1_micro": {
"f1": 0.5286103542234333
},
"eval_loss": 0.5829901099205017,
"eval_runtime": 64.4078,
"eval_samples_per_second": 79.773,
"eval_steps_per_second": 3.338,
"step": 7200
},
{
"epoch": 1.2,
"eval_label_accuracy": {
"accuracy": 0.5332814324639937
},
"eval_label_f1_macro": {
"f1": 0.4020714398207229
},
"eval_label_f1_micro": {
"f1": 0.5332814324639937
},
"eval_loss": 0.5793033242225647,
"eval_runtime": 66.9158,
"eval_samples_per_second": 76.783,
"eval_steps_per_second": 3.213,
"step": 7300
},
{
"epoch": 1.21,
"eval_label_accuracy": {
"accuracy": 0.5251070455430128
},
"eval_label_f1_macro": {
"f1": 0.40305870728099247
},
"eval_label_f1_micro": {
"f1": 0.5251070455430128
},
"eval_loss": 0.5735189318656921,
"eval_runtime": 64.7839,
"eval_samples_per_second": 79.31,
"eval_steps_per_second": 3.319,
"step": 7400
},
{
"epoch": 1.23,
"learning_rate": 3.463366661203082e-05,
"loss": 0.5098,
"step": 7500
},
{
"epoch": 1.23,
"eval_label_accuracy": {
"accuracy": 0.5309458933437136
},
"eval_label_f1_macro": {
"f1": 0.40404686699618486
},
"eval_label_f1_micro": {
"f1": 0.5309458933437136
},
"eval_loss": 0.5664647221565247,
"eval_runtime": 65.2418,
"eval_samples_per_second": 78.753,
"eval_steps_per_second": 3.295,
"step": 7500
},
{
"epoch": 1.25,
"eval_label_accuracy": {
"accuracy": 0.5227715064227326
},
"eval_label_f1_macro": {
"f1": 0.3988976622706162
},
"eval_label_f1_micro": {
"f1": 0.5227715064227326
},
"eval_loss": 0.5650814175605774,
"eval_runtime": 65.9036,
"eval_samples_per_second": 77.962,
"eval_steps_per_second": 3.262,
"step": 7600
},
{
"epoch": 1.26,
"eval_label_accuracy": {
"accuracy": 0.5266640716231997
},
"eval_label_f1_macro": {
"f1": 0.4099005096569608
},
"eval_label_f1_micro": {
"f1": 0.5266640716231997
},
"eval_loss": 0.5657761096954346,
"eval_runtime": 64.8206,
"eval_samples_per_second": 79.265,
"eval_steps_per_second": 3.317,
"step": 7700
},
{
"epoch": 1.28,
"eval_label_accuracy": {
"accuracy": 0.5278318411833398
},
"eval_label_f1_macro": {
"f1": 0.40307633583050173
},
"eval_label_f1_micro": {
"f1": 0.5278318411833398
},
"eval_loss": 0.5601173043251038,
"eval_runtime": 66.1454,
"eval_samples_per_second": 77.677,
"eval_steps_per_second": 3.25,
"step": 7800
},
{
"epoch": 1.29,
"eval_label_accuracy": {
"accuracy": 0.5313351498637602
},
"eval_label_f1_macro": {
"f1": 0.40205694039231865
},
"eval_label_f1_micro": {
"f1": 0.5313351498637602
},
"eval_loss": 0.5627759099006653,
"eval_runtime": 64.9362,
"eval_samples_per_second": 79.124,
"eval_steps_per_second": 3.311,
"step": 7900
},
{
"epoch": 1.31,
"learning_rate": 3.360924438616621e-05,
"loss": 0.5085,
"step": 8000
},
{
"epoch": 1.31,
"eval_label_accuracy": {
"accuracy": 0.5293888672635266
},
"eval_label_f1_macro": {
"f1": 0.4100384148965112
},
"eval_label_f1_micro": {
"f1": 0.5293888672635266
},
"eval_loss": 0.5625594854354858,
"eval_runtime": 65.2812,
"eval_samples_per_second": 78.706,
"eval_steps_per_second": 3.293,
"step": 8000
},
{
"epoch": 1.33,
"eval_label_accuracy": {
"accuracy": 0.5307512650836902
},
"eval_label_f1_macro": {
"f1": 0.4056927310816246
},
"eval_label_f1_micro": {
"f1": 0.5307512650836902
},
"eval_loss": 0.562485933303833,
"eval_runtime": 65.2185,
"eval_samples_per_second": 78.781,
"eval_steps_per_second": 3.297,
"step": 8100
},
{
"epoch": 1.34,
"eval_label_accuracy": {
"accuracy": 0.5354223433242506
},
"eval_label_f1_macro": {
"f1": 0.4044048936522055
},
"eval_label_f1_micro": {
"f1": 0.5354223433242506
},
"eval_loss": 0.5521498918533325,
"eval_runtime": 65.3339,
"eval_samples_per_second": 78.642,
"eval_steps_per_second": 3.291,
"step": 8200
},
{
"epoch": 1.36,
"eval_label_accuracy": {
"accuracy": 0.5330868042039704
},
"eval_label_f1_macro": {
"f1": 0.40764293642642907
},
"eval_label_f1_micro": {
"f1": 0.5330868042039704
},
"eval_loss": 0.5571908950805664,
"eval_runtime": 64.614,
"eval_samples_per_second": 79.518,
"eval_steps_per_second": 3.327,
"step": 8300
},
{
"epoch": 1.38,
"eval_label_accuracy": {
"accuracy": 0.5344492020241339
},
"eval_label_f1_macro": {
"f1": 0.4168299787554328
},
"eval_label_f1_micro": {
"f1": 0.5344492020241339
},
"eval_loss": 0.5639694333076477,
"eval_runtime": 65.7823,
"eval_samples_per_second": 78.106,
"eval_steps_per_second": 3.268,
"step": 8400
},
{
"epoch": 1.39,
"learning_rate": 3.2584822160301595e-05,
"loss": 0.5066,
"step": 8500
},
{
"epoch": 1.39,
"eval_label_accuracy": {
"accuracy": 0.5342545737641106
},
"eval_label_f1_macro": {
"f1": 0.40719472411169355
},
"eval_label_f1_micro": {
"f1": 0.5342545737641106
},
"eval_loss": 0.5575660467147827,
"eval_runtime": 65.0464,
"eval_samples_per_second": 78.99,
"eval_steps_per_second": 3.305,
"step": 8500
},
{
"epoch": 1.41,
"eval_label_accuracy": {
"accuracy": 0.5256909303230829
},
"eval_label_f1_macro": {
"f1": 0.4166295902681743
},
"eval_label_f1_micro": {
"f1": 0.5256909303230829
},
"eval_loss": 0.5562366843223572,
"eval_runtime": 66.5062,
"eval_samples_per_second": 77.256,
"eval_steps_per_second": 3.233,
"step": 8600
},
{
"epoch": 1.43,
"eval_label_accuracy": {
"accuracy": 0.5206305955624757
},
"eval_label_f1_macro": {
"f1": 0.4178763398872619
},
"eval_label_f1_micro": {
"f1": 0.5206305955624757
},
"eval_loss": 0.5629637837409973,
"eval_runtime": 65.1119,
"eval_samples_per_second": 78.91,
"eval_steps_per_second": 3.302,
"step": 8700
},
{
"epoch": 1.44,
"eval_label_accuracy": {
"accuracy": 0.5194628260023355
},
"eval_label_f1_macro": {
"f1": 0.4240614359972321
},
"eval_label_f1_micro": {
"f1": 0.5194628260023355
},
"eval_loss": 0.5645840764045715,
"eval_runtime": 65.7329,
"eval_samples_per_second": 78.165,
"eval_steps_per_second": 3.271,
"step": 8800
},
{
"epoch": 1.46,
"eval_label_accuracy": {
"accuracy": 0.5270533281432463
},
"eval_label_f1_macro": {
"f1": 0.4232811654810201
},
"eval_label_f1_micro": {
"f1": 0.5270533281432463
},
"eval_loss": 0.5628061294555664,
"eval_runtime": 66.012,
"eval_samples_per_second": 77.834,
"eval_steps_per_second": 3.257,
"step": 8900
},
{
"epoch": 1.48,
"learning_rate": 3.156039993443698e-05,
"loss": 0.5043,
"step": 9000
},
{
"epoch": 1.48,
"eval_label_accuracy": {
"accuracy": 0.5192681977423121
},
"eval_label_f1_macro": {
"f1": 0.4062380982170455
},
"eval_label_f1_micro": {
"f1": 0.5192681977423121
},
"eval_loss": 0.5618172883987427,
"eval_runtime": 64.6788,
"eval_samples_per_second": 79.439,
"eval_steps_per_second": 3.324,
"step": 9000
},
{
"epoch": 1.49,
"eval_label_accuracy": {
"accuracy": 0.5210198520825223
},
"eval_label_f1_macro": {
"f1": 0.39500190437884825
},
"eval_label_f1_micro": {
"f1": 0.5210198520825223
},
"eval_loss": 0.5575444102287292,
"eval_runtime": 66.1613,
"eval_samples_per_second": 77.659,
"eval_steps_per_second": 3.25,
"step": 9100
},
{
"epoch": 1.51,
"eval_label_accuracy": {
"accuracy": 0.5253016738030362
},
"eval_label_f1_macro": {
"f1": 0.43826705554260276
},
"eval_label_f1_micro": {
"f1": 0.5253016738030362
},
"eval_loss": 0.55515456199646,
"eval_runtime": 64.9282,
"eval_samples_per_second": 79.134,
"eval_steps_per_second": 3.311,
"step": 9200
},
{
"epoch": 1.52,
"eval_label_accuracy": {
"accuracy": 0.5313351498637602
},
"eval_label_f1_macro": {
"f1": 0.4106274369569952
},
"eval_label_f1_micro": {
"f1": 0.5313351498637602
},
"eval_loss": 0.569816529750824,
"eval_runtime": 66.2885,
"eval_samples_per_second": 77.51,
"eval_steps_per_second": 3.243,
"step": 9300
},
{
"epoch": 1.54,
"eval_label_accuracy": {
"accuracy": 0.522966134682756
},
"eval_label_f1_macro": {
"f1": 0.43811143120426327
},
"eval_label_f1_micro": {
"f1": 0.522966134682756
},
"eval_loss": 0.5568819642066956,
"eval_runtime": 65.3832,
"eval_samples_per_second": 78.583,
"eval_steps_per_second": 3.288,
"step": 9400
},
{
"epoch": 1.56,
"learning_rate": 3.0535977708572365e-05,
"loss": 0.5051,
"step": 9500
},
{
"epoch": 1.56,
"eval_label_accuracy": {
"accuracy": 0.5336706889840405
},
"eval_label_f1_macro": {
"f1": 0.40680316881280776
},
"eval_label_f1_micro": {
"f1": 0.5336706889840405
},
"eval_loss": 0.5624731779098511,
"eval_runtime": 64.3008,
"eval_samples_per_second": 79.906,
"eval_steps_per_second": 3.344,
"step": 9500
},
{
"epoch": 1.57,
"eval_label_accuracy": {
"accuracy": 0.5151810042818217
},
"eval_label_f1_macro": {
"f1": 0.4369801679839114
},
"eval_label_f1_micro": {
"f1": 0.5151810042818217
},
"eval_loss": 0.5694777369499207,
"eval_runtime": 65.4516,
"eval_samples_per_second": 78.501,
"eval_steps_per_second": 3.285,
"step": 9600
},
{
"epoch": 1.59,
"eval_label_accuracy": {
"accuracy": 0.5268586998832231
},
"eval_label_f1_macro": {
"f1": 0.4132459851165683
},
"eval_label_f1_micro": {
"f1": 0.5268586998832231
},
"eval_loss": 0.56379634141922,
"eval_runtime": 64.9267,
"eval_samples_per_second": 79.135,
"eval_steps_per_second": 3.311,
"step": 9700
},
{
"epoch": 1.61,
"eval_label_accuracy": {
"accuracy": 0.526274815103153
},
"eval_label_f1_macro": {
"f1": 0.40248592881859246
},
"eval_label_f1_micro": {
"f1": 0.526274815103153
},
"eval_loss": 0.5575631856918335,
"eval_runtime": 67.2319,
"eval_samples_per_second": 76.422,
"eval_steps_per_second": 3.198,
"step": 9800
},
{
"epoch": 1.62,
"eval_label_accuracy": {
"accuracy": 0.5258855585831063
},
"eval_label_f1_macro": {
"f1": 0.4075480681122011
},
"eval_label_f1_micro": {
"f1": 0.5258855585831063
},
"eval_loss": 0.5513472557067871,
"eval_runtime": 66.4523,
"eval_samples_per_second": 77.319,
"eval_steps_per_second": 3.235,
"step": 9900
},
{
"epoch": 1.64,
"learning_rate": 2.9511555482707753e-05,
"loss": 0.4944,
"step": 10000
},
{
"epoch": 1.64,
"eval_label_accuracy": {
"accuracy": 0.5188789412222655
},
"eval_label_f1_macro": {
"f1": 0.41372907313005425
},
"eval_label_f1_micro": {
"f1": 0.5188789412222655
},
"eval_loss": 0.5596012473106384,
"eval_runtime": 65.824,
"eval_samples_per_second": 78.057,
"eval_steps_per_second": 3.266,
"step": 10000
},
{
"epoch": 1.66,
"eval_label_accuracy": {
"accuracy": 0.5278318411833398
},
"eval_label_f1_macro": {
"f1": 0.4203493324138542
},
"eval_label_f1_micro": {
"f1": 0.5278318411833398
},
"eval_loss": 0.5546495914459229,
"eval_runtime": 65.4744,
"eval_samples_per_second": 78.473,
"eval_steps_per_second": 3.284,
"step": 10100
},
{
"epoch": 1.67,
"eval_label_accuracy": {
"accuracy": 0.535616971584274
},
"eval_label_f1_macro": {
"f1": 0.4063341022177307
},
"eval_label_f1_micro": {
"f1": 0.535616971584274
},
"eval_loss": 0.5501392483711243,
"eval_runtime": 66.7163,
"eval_samples_per_second": 77.013,
"eval_steps_per_second": 3.223,
"step": 10200
},
{
"epoch": 1.69,
"eval_label_accuracy": {
"accuracy": 0.5188789412222655
},
"eval_label_f1_macro": {
"f1": 0.4212355713439969
},
"eval_label_f1_micro": {
"f1": 0.5188789412222655
},
"eval_loss": 0.5635867714881897,
"eval_runtime": 66.1134,
"eval_samples_per_second": 77.715,
"eval_steps_per_second": 3.252,
"step": 10300
},
{
"epoch": 1.7,
"eval_label_accuracy": {
"accuracy": 0.5239392759828727
},
"eval_label_f1_macro": {
"f1": 0.4234908453840735
},
"eval_label_f1_micro": {
"f1": 0.5239392759828727
},
"eval_loss": 0.557228147983551,
"eval_runtime": 64.7253,
"eval_samples_per_second": 79.382,
"eval_steps_per_second": 3.322,
"step": 10400
},
{
"epoch": 1.72,
"learning_rate": 2.848713325684314e-05,
"loss": 0.4908,
"step": 10500
},
{
"epoch": 1.72,
"eval_label_accuracy": {
"accuracy": 0.5251070455430128
},
"eval_label_f1_macro": {
"f1": 0.42864065587210554
},
"eval_label_f1_micro": {
"f1": 0.5251070455430128
},
"eval_loss": 0.5468738675117493,
"eval_runtime": 66.3227,
"eval_samples_per_second": 77.47,
"eval_steps_per_second": 3.242,
"step": 10500
},
{
"epoch": 1.74,
"eval_label_accuracy": {
"accuracy": 0.5239392759828727
},
"eval_label_f1_macro": {
"f1": 0.41795817815383196
},
"eval_label_f1_micro": {
"f1": 0.5239392759828727
},
"eval_loss": 0.5483611226081848,
"eval_runtime": 65.701,
"eval_samples_per_second": 78.203,
"eval_steps_per_second": 3.272,
"step": 10600
},
{
"epoch": 1.75,
"eval_label_accuracy": {
"accuracy": 0.5214091086025691
},
"eval_label_f1_macro": {
"f1": 0.4024775749504264
},
"eval_label_f1_micro": {
"f1": 0.5214091086025691
},
"eval_loss": 0.5590547323226929,
"eval_runtime": 64.5911,
"eval_samples_per_second": 79.547,
"eval_steps_per_second": 3.329,
"step": 10700
},
{
"epoch": 1.77,
"eval_label_accuracy": {
"accuracy": 0.5344492020241339
},
"eval_label_f1_macro": {
"f1": 0.4370419038340574
},
"eval_label_f1_micro": {
"f1": 0.5344492020241339
},
"eval_loss": 0.5482434630393982,
"eval_runtime": 65.2112,
"eval_samples_per_second": 78.79,
"eval_steps_per_second": 3.297,
"step": 10800
},
{
"epoch": 1.79,
"eval_label_accuracy": {
"accuracy": 0.5260801868431296
},
"eval_label_f1_macro": {
"f1": 0.4312513940073556
},
"eval_label_f1_micro": {
"f1": 0.5260801868431296
},
"eval_loss": 0.5548846125602722,
"eval_runtime": 66.5938,
"eval_samples_per_second": 77.154,
"eval_steps_per_second": 3.229,
"step": 10900
},
{
"epoch": 1.8,
"learning_rate": 2.7462711030978526e-05,
"loss": 0.4956,
"step": 11000
},
{
"epoch": 1.8,
"eval_label_accuracy": {
"accuracy": 0.523550019462826
},
"eval_label_f1_macro": {
"f1": 0.41662322590714945
},
"eval_label_f1_micro": {
"f1": 0.523550019462826
},
"eval_loss": 0.5459250211715698,
"eval_runtime": 65.0832,
"eval_samples_per_second": 78.945,
"eval_steps_per_second": 3.303,
"step": 11000
},
{
"epoch": 1.82,
"eval_label_accuracy": {
"accuracy": 0.5216037368625924
},
"eval_label_f1_macro": {
"f1": 0.4343169858642386
},
"eval_label_f1_micro": {
"f1": 0.5216037368625924
},
"eval_loss": 0.5509054660797119,
"eval_runtime": 66.0147,
"eval_samples_per_second": 77.831,
"eval_steps_per_second": 3.257,
"step": 11100
},
{
"epoch": 1.84,
"eval_label_accuracy": {
"accuracy": 0.520241339042429
},
"eval_label_f1_macro": {
"f1": 0.4111126597526932
},
"eval_label_f1_micro": {
"f1": 0.520241339042429
},
"eval_loss": 0.5683469176292419,
"eval_runtime": 66.7093,
"eval_samples_per_second": 77.021,
"eval_steps_per_second": 3.223,
"step": 11200
},
{
"epoch": 1.85,
"eval_label_accuracy": {
"accuracy": 0.5219929933826392
},
"eval_label_f1_macro": {
"f1": 0.4175089884590279
},
"eval_label_f1_micro": {
"f1": 0.5219929933826392
},
"eval_loss": 0.556067168712616,
"eval_runtime": 65.0023,
"eval_samples_per_second": 79.043,
"eval_steps_per_second": 3.308,
"step": 11300
},
{
"epoch": 1.87,
"eval_label_accuracy": {
"accuracy": 0.5325029194239004
},
"eval_label_f1_macro": {
"f1": 0.4221971320619319
},
"eval_label_f1_micro": {
"f1": 0.5325029194239004
},
"eval_loss": 0.5479483008384705,
"eval_runtime": 66.1944,
"eval_samples_per_second": 77.62,
"eval_steps_per_second": 3.248,
"step": 11400
},
{
"epoch": 1.88,
"learning_rate": 2.6438288805113915e-05,
"loss": 0.491,
"step": 11500
},
{
"epoch": 1.88,
"eval_label_accuracy": {
"accuracy": 0.5173219151420786
},
"eval_label_f1_macro": {
"f1": 0.41131088875014477
},
"eval_label_f1_micro": {
"f1": 0.5173219151420786
},
"eval_loss": 0.5584209561347961,
"eval_runtime": 65.4294,
"eval_samples_per_second": 78.527,
"eval_steps_per_second": 3.286,
"step": 11500
},
{
"epoch": 1.9,
"eval_label_accuracy": {
"accuracy": 0.5305566368236668
},
"eval_label_f1_macro": {
"f1": 0.42505103683817463
},
"eval_label_f1_micro": {
"f1": 0.5305566368236668
},
"eval_loss": 0.5507027506828308,
"eval_runtime": 67.0783,
"eval_samples_per_second": 76.597,
"eval_steps_per_second": 3.205,
"step": 11600
},
{
"epoch": 1.92,
"eval_label_accuracy": {
"accuracy": 0.5284157259634099
},
"eval_label_f1_macro": {
"f1": 0.43059965616956497
},
"eval_label_f1_micro": {
"f1": 0.5284157259634099
},
"eval_loss": 0.5484103560447693,
"eval_runtime": 65.412,
"eval_samples_per_second": 78.548,
"eval_steps_per_second": 3.287,
"step": 11700
},
{
"epoch": 1.93,
"eval_label_accuracy": {
"accuracy": 0.523550019462826
},
"eval_label_f1_macro": {
"f1": 0.4283240140586777
},
"eval_label_f1_micro": {
"f1": 0.523550019462826
},
"eval_loss": 0.5518194437026978,
"eval_runtime": 65.4314,
"eval_samples_per_second": 78.525,
"eval_steps_per_second": 3.286,
"step": 11800
},
{
"epoch": 1.95,
"eval_label_accuracy": {
"accuracy": 0.5249124172829895
},
"eval_label_f1_macro": {
"f1": 0.4202384595807186
},
"eval_label_f1_micro": {
"f1": 0.5249124172829895
},
"eval_loss": 0.5580205917358398,
"eval_runtime": 64.3287,
"eval_samples_per_second": 79.871,
"eval_steps_per_second": 3.342,
"step": 11900
},
{
"epoch": 1.97,
"learning_rate": 2.5413866579249306e-05,
"loss": 0.4882,
"step": 12000
},
{
"epoch": 1.97,
"eval_label_accuracy": {
"accuracy": 0.5328921759439471
},
"eval_label_f1_macro": {
"f1": 0.4168927822368606
},
"eval_label_f1_micro": {
"f1": 0.5328921759439471
},
"eval_loss": 0.5494405031204224,
"eval_runtime": 66.1472,
"eval_samples_per_second": 77.675,
"eval_steps_per_second": 3.25,
"step": 12000
},
{
"epoch": 1.98,
"eval_label_accuracy": {
"accuracy": 0.5334760607240171
},
"eval_label_f1_macro": {
"f1": 0.4259674463534441
},
"eval_label_f1_micro": {
"f1": 0.5334760607240171
},
"eval_loss": 0.54230135679245,
"eval_runtime": 65.4715,
"eval_samples_per_second": 78.477,
"eval_steps_per_second": 3.284,
"step": 12100
},
{
"epoch": 2.0,
"eval_label_accuracy": {
"accuracy": 0.5344492020241339
},
"eval_label_f1_macro": {
"f1": 0.43189802609389794
},
"eval_label_f1_micro": {
"f1": 0.5344492020241339
},
"eval_loss": 0.5440633893013,
"eval_runtime": 64.7322,
"eval_samples_per_second": 79.373,
"eval_steps_per_second": 3.321,
"step": 12200
},
{
"epoch": 2.02,
"eval_label_accuracy": {
"accuracy": 0.5274425846632931
},
"eval_label_f1_macro": {
"f1": 0.43452702916498576
},
"eval_label_f1_micro": {
"f1": 0.5274425846632931
},
"eval_loss": 0.5580821633338928,
"eval_runtime": 66.4297,
"eval_samples_per_second": 77.345,
"eval_steps_per_second": 3.237,
"step": 12300
},
{
"epoch": 2.03,
"eval_label_accuracy": {
"accuracy": 0.5219929933826392
},
"eval_label_f1_macro": {
"f1": 0.42868961422727936
},
"eval_label_f1_micro": {
"f1": 0.5219929933826392
},
"eval_loss": 0.5607529878616333,
"eval_runtime": 64.7983,
"eval_samples_per_second": 79.292,
"eval_steps_per_second": 3.318,
"step": 12400
},
{
"epoch": 2.05,
"learning_rate": 2.438944435338469e-05,
"loss": 0.4764,
"step": 12500
},
{
"epoch": 2.05,
"eval_label_accuracy": {
"accuracy": 0.5309458933437136
},
"eval_label_f1_macro": {
"f1": 0.4501164041177361
},
"eval_label_f1_micro": {
"f1": 0.5309458933437136
},
"eval_loss": 0.5601127743721008,
"eval_runtime": 66.0041,
"eval_samples_per_second": 77.844,
"eval_steps_per_second": 3.257,
"step": 12500
},
{
"epoch": 2.07,
"eval_label_accuracy": {
"accuracy": 0.515375632541845
},
"eval_label_f1_macro": {
"f1": 0.44296253953069376
},
"eval_label_f1_micro": {
"f1": 0.515375632541845
},
"eval_loss": 0.5829929113388062,
"eval_runtime": 65.2358,
"eval_samples_per_second": 78.76,
"eval_steps_per_second": 3.296,
"step": 12600
},
{
"epoch": 2.08,
"eval_label_accuracy": {
"accuracy": 0.5241339042428961
},
"eval_label_f1_macro": {
"f1": 0.4557503585498297
},
"eval_label_f1_micro": {
"f1": 0.5241339042428961
},
"eval_loss": 0.5616418719291687,
"eval_runtime": 64.7306,
"eval_samples_per_second": 79.375,
"eval_steps_per_second": 3.321,
"step": 12700
},
{
"epoch": 2.1,
"eval_label_accuracy": {
"accuracy": 0.5227715064227326
},
"eval_label_f1_macro": {
"f1": 0.43939555085391224
},
"eval_label_f1_micro": {
"f1": 0.5227715064227326
},
"eval_loss": 0.5720220804214478,
"eval_runtime": 65.4508,
"eval_samples_per_second": 78.502,
"eval_steps_per_second": 3.285,
"step": 12800
},
{
"epoch": 2.11,
"eval_label_accuracy": {
"accuracy": 0.5249124172829895
},
"eval_label_f1_macro": {
"f1": 0.4453192886564673
},
"eval_label_f1_micro": {
"f1": 0.5249124172829895
},
"eval_loss": 0.5684590935707092,
"eval_runtime": 64.1447,
"eval_samples_per_second": 80.1,
"eval_steps_per_second": 3.352,
"step": 12900
},
{
"epoch": 2.13,
"learning_rate": 2.336502212752008e-05,
"loss": 0.459,
"step": 13000
},
{
"epoch": 2.13,
"eval_label_accuracy": {
"accuracy": 0.5184896847022188
},
"eval_label_f1_macro": {
"f1": 0.45810101040373097
},
"eval_label_f1_micro": {
"f1": 0.5184896847022188
},
"eval_loss": 0.5632970333099365,
"eval_runtime": 65.4112,
"eval_samples_per_second": 78.549,
"eval_steps_per_second": 3.287,
"step": 13000
},
{
"epoch": 2.15,
"eval_label_accuracy": {
"accuracy": 0.5301673803036201
},
"eval_label_f1_macro": {
"f1": 0.41550375963718733
},
"eval_label_f1_micro": {
"f1": 0.5301673803036201
},
"eval_loss": 0.5601311922073364,
"eval_runtime": 65.7981,
"eval_samples_per_second": 78.087,
"eval_steps_per_second": 3.268,
"step": 13100
},
{
"epoch": 2.16,
"eval_label_accuracy": {
"accuracy": 0.5326975476839237
},
"eval_label_f1_macro": {
"f1": 0.4152775382545546
},
"eval_label_f1_micro": {
"f1": 0.5326975476839237
},
"eval_loss": 0.5632578730583191,
"eval_runtime": 65.4374,
"eval_samples_per_second": 78.518,
"eval_steps_per_second": 3.286,
"step": 13200
},
{
"epoch": 2.18,
"eval_label_accuracy": {
"accuracy": 0.5225768781627093
},
"eval_label_f1_macro": {
"f1": 0.4441168152604288
},
"eval_label_f1_micro": {
"f1": 0.5225768781627093
},
"eval_loss": 0.5665469765663147,
"eval_runtime": 64.3132,
"eval_samples_per_second": 79.89,
"eval_steps_per_second": 3.343,
"step": 13300
},
{
"epoch": 2.2,
"eval_label_accuracy": {
"accuracy": 0.5225768781627093
},
"eval_label_f1_macro": {
"f1": 0.4189456259580441
},
"eval_label_f1_micro": {
"f1": 0.5225768781627093
},
"eval_loss": 0.5736687779426575,
"eval_runtime": 65.6996,
"eval_samples_per_second": 78.204,
"eval_steps_per_second": 3.272,
"step": 13400
},
{
"epoch": 2.21,
"learning_rate": 2.2340599901655468e-05,
"loss": 0.4557,
"step": 13500
},
{
"epoch": 2.21,
"eval_label_accuracy": {
"accuracy": 0.5237446477228493
},
"eval_label_f1_macro": {
"f1": 0.44732903233612287
},
"eval_label_f1_micro": {
"f1": 0.5237446477228493
},
"eval_loss": 0.5651576519012451,
"eval_runtime": 66.1178,
"eval_samples_per_second": 77.71,
"eval_steps_per_second": 3.252,
"step": 13500
},
{
"epoch": 2.23,
"eval_label_accuracy": {
"accuracy": 0.5268586998832231
},
"eval_label_f1_macro": {
"f1": 0.4296354631479917
},
"eval_label_f1_micro": {
"f1": 0.5268586998832231
},
"eval_loss": 0.566527247428894,
"eval_runtime": 65.4163,
"eval_samples_per_second": 78.543,
"eval_steps_per_second": 3.287,
"step": 13600
},
{
"epoch": 2.25,
"eval_label_accuracy": {
"accuracy": 0.5194628260023355
},
"eval_label_f1_macro": {
"f1": 0.4523218163724315
},
"eval_label_f1_micro": {
"f1": 0.5194628260023355
},
"eval_loss": 0.5746815204620361,
"eval_runtime": 66.5011,
"eval_samples_per_second": 77.262,
"eval_steps_per_second": 3.233,
"step": 13700
},
{
"epoch": 2.26,
"eval_label_accuracy": {
"accuracy": 0.5241339042428961
},
"eval_label_f1_macro": {
"f1": 0.42963157337372265
},
"eval_label_f1_micro": {
"f1": 0.5241339042428961
},
"eval_loss": 0.5709651112556458,
"eval_runtime": 64.9469,
"eval_samples_per_second": 79.111,
"eval_steps_per_second": 3.31,
"step": 13800
},
{
"epoch": 2.28,
"eval_label_accuracy": {
"accuracy": 0.52899961074348
},
"eval_label_f1_macro": {
"f1": 0.4465492291635319
},
"eval_label_f1_micro": {
"f1": 0.52899961074348
},
"eval_loss": 0.5652072429656982,
"eval_runtime": 66.3178,
"eval_samples_per_second": 77.475,
"eval_steps_per_second": 3.242,
"step": 13900
},
{
"epoch": 2.29,
"learning_rate": 2.1316177675790856e-05,
"loss": 0.4539,
"step": 14000
},
{
"epoch": 2.29,
"eval_label_accuracy": {
"accuracy": 0.5276372129233164
},
"eval_label_f1_macro": {
"f1": 0.4330344585002601
},
"eval_label_f1_micro": {
"f1": 0.5276372129233164
},
"eval_loss": 0.5652056932449341,
"eval_runtime": 64.4339,
"eval_samples_per_second": 79.741,
"eval_steps_per_second": 3.337,
"step": 14000
},
{
"epoch": 2.31,
"eval_label_accuracy": {
"accuracy": 0.5274425846632931
},
"eval_label_f1_macro": {
"f1": 0.43869182088814185
},
"eval_label_f1_micro": {
"f1": 0.5274425846632931
},
"eval_loss": 0.5703505277633667,
"eval_runtime": 66.2068,
"eval_samples_per_second": 77.605,
"eval_steps_per_second": 3.247,
"step": 14100
},
{
"epoch": 2.33,
"eval_label_accuracy": {
"accuracy": 0.5258855585831063
},
"eval_label_f1_macro": {
"f1": 0.43879780170912847
},
"eval_label_f1_micro": {
"f1": 0.5258855585831063
},
"eval_loss": 0.5729069113731384,
"eval_runtime": 65.0139,
"eval_samples_per_second": 79.029,
"eval_steps_per_second": 3.307,
"step": 14200
},
{
"epoch": 2.34,
"eval_label_accuracy": {
"accuracy": 0.5192681977423121
},
"eval_label_f1_macro": {
"f1": 0.4255072429603308
},
"eval_label_f1_micro": {
"f1": 0.5192681977423121
},
"eval_loss": 0.5682628154754639,
"eval_runtime": 65.9744,
"eval_samples_per_second": 77.879,
"eval_steps_per_second": 3.259,
"step": 14300
},
{
"epoch": 2.36,
"eval_label_accuracy": {
"accuracy": 0.5264694433631764
},
"eval_label_f1_macro": {
"f1": 0.43346825826001506
},
"eval_label_f1_micro": {
"f1": 0.5264694433631764
},
"eval_loss": 0.5605142712593079,
"eval_runtime": 65.0867,
"eval_samples_per_second": 78.941,
"eval_steps_per_second": 3.303,
"step": 14400
},
{
"epoch": 2.38,
"learning_rate": 2.029175544992624e-05,
"loss": 0.459,
"step": 14500
},
{
"epoch": 2.38,
"eval_label_accuracy": {
"accuracy": 0.5239392759828727
},
"eval_label_f1_macro": {
"f1": 0.44786889559299115
},
"eval_label_f1_micro": {
"f1": 0.5239392759828727
},
"eval_loss": 0.5726383924484253,
"eval_runtime": 66.1098,
"eval_samples_per_second": 77.719,
"eval_steps_per_second": 3.252,
"step": 14500
},
{
"epoch": 2.39,
"eval_label_accuracy": {
"accuracy": 0.5291942390035033
},
"eval_label_f1_macro": {
"f1": 0.453869134213484
},
"eval_label_f1_micro": {
"f1": 0.5291942390035033
},
"eval_loss": 0.566052258014679,
"eval_runtime": 65.833,
"eval_samples_per_second": 78.046,
"eval_steps_per_second": 3.266,
"step": 14600
},
{
"epoch": 2.41,
"eval_label_accuracy": {
"accuracy": 0.5208252238224991
},
"eval_label_f1_macro": {
"f1": 0.43278533897219335
},
"eval_label_f1_micro": {
"f1": 0.5208252238224991
},
"eval_loss": 0.5726243853569031,
"eval_runtime": 66.8914,
"eval_samples_per_second": 76.811,
"eval_steps_per_second": 3.214,
"step": 14700
},
{
"epoch": 2.43,
"eval_label_accuracy": {
"accuracy": 0.52958349552355
},
"eval_label_f1_macro": {
"f1": 0.43445104819328556
},
"eval_label_f1_micro": {
"f1": 0.52958349552355
},
"eval_loss": 0.5641396045684814,
"eval_runtime": 67.2117,
"eval_samples_per_second": 76.445,
"eval_steps_per_second": 3.199,
"step": 14800
},
{
"epoch": 2.44,
"eval_label_accuracy": {
"accuracy": 0.5206305955624757
},
"eval_label_f1_macro": {
"f1": 0.4311693806201584
},
"eval_label_f1_micro": {
"f1": 0.5206305955624757
},
"eval_loss": 0.5807725787162781,
"eval_runtime": 66.5101,
"eval_samples_per_second": 77.251,
"eval_steps_per_second": 3.233,
"step": 14900
},
{
"epoch": 2.46,
"learning_rate": 1.926733322406163e-05,
"loss": 0.4443,
"step": 15000
},
{
"epoch": 2.46,
"eval_label_accuracy": {
"accuracy": 0.5268586998832231
},
"eval_label_f1_macro": {
"f1": 0.4502312972729043
},
"eval_label_f1_micro": {
"f1": 0.5268586998832231
},
"eval_loss": 0.5696139931678772,
"eval_runtime": 65.9075,
"eval_samples_per_second": 77.958,
"eval_steps_per_second": 3.262,
"step": 15000
},
{
"epoch": 2.48,
"eval_label_accuracy": {
"accuracy": 0.5282210977033865
},
"eval_label_f1_macro": {
"f1": 0.45560500346839616
},
"eval_label_f1_micro": {
"f1": 0.5282210977033865
},
"eval_loss": 0.5631005167961121,
"eval_runtime": 65.994,
"eval_samples_per_second": 77.856,
"eval_steps_per_second": 3.258,
"step": 15100
},
{
"epoch": 2.49,
"eval_label_accuracy": {
"accuracy": 0.5200467107824056
},
"eval_label_f1_macro": {
"f1": 0.4500935823936061
},
"eval_label_f1_micro": {
"f1": 0.5200467107824056
},
"eval_loss": 0.5676321983337402,
"eval_runtime": 67.7013,
"eval_samples_per_second": 75.892,
"eval_steps_per_second": 3.176,
"step": 15200
},
{
"epoch": 2.51,
"eval_label_accuracy": {
"accuracy": 0.5280264694433632
},
"eval_label_f1_macro": {
"f1": 0.4410401539944819
},
"eval_label_f1_micro": {
"f1": 0.5280264694433632
},
"eval_loss": 0.562995195388794,
"eval_runtime": 65.3674,
"eval_samples_per_second": 78.602,
"eval_steps_per_second": 3.289,
"step": 15300
},
{
"epoch": 2.52,
"eval_label_accuracy": {
"accuracy": 0.5247177890229662
},
"eval_label_f1_macro": {
"f1": 0.43980399525374536
},
"eval_label_f1_micro": {
"f1": 0.5247177890229662
},
"eval_loss": 0.5720946788787842,
"eval_runtime": 66.3575,
"eval_samples_per_second": 77.429,
"eval_steps_per_second": 3.24,
"step": 15400
},
{
"epoch": 2.54,
"learning_rate": 1.8242910998197017e-05,
"loss": 0.4542,
"step": 15500
},
{
"epoch": 2.54,
"eval_label_accuracy": {
"accuracy": 0.5260801868431296
},
"eval_label_f1_macro": {
"f1": 0.4401714578365292
},
"eval_label_f1_micro": {
"f1": 0.5260801868431296
},
"eval_loss": 0.5669940114021301,
"eval_runtime": 65.3675,
"eval_samples_per_second": 78.602,
"eval_steps_per_second": 3.289,
"step": 15500
},
{
"epoch": 2.56,
"eval_label_accuracy": {
"accuracy": 0.5278318411833398
},
"eval_label_f1_macro": {
"f1": 0.43352621547332887
},
"eval_label_f1_micro": {
"f1": 0.5278318411833398
},
"eval_loss": 0.5640930533409119,
"eval_runtime": 65.5202,
"eval_samples_per_second": 78.419,
"eval_steps_per_second": 3.281,
"step": 15600
},
{
"epoch": 2.57,
"eval_label_accuracy": {
"accuracy": 0.5264694433631764
},
"eval_label_f1_macro": {
"f1": 0.43784802366096
},
"eval_label_f1_micro": {
"f1": 0.5264694433631764
},
"eval_loss": 0.5642226934432983,
"eval_runtime": 64.9095,
"eval_samples_per_second": 79.156,
"eval_steps_per_second": 3.312,
"step": 15700
},
{
"epoch": 2.59,
"eval_label_accuracy": {
"accuracy": 0.5315297781237835
},
"eval_label_f1_macro": {
"f1": 0.4357287377608377
},
"eval_label_f1_micro": {
"f1": 0.5315297781237835
},
"eval_loss": 0.5649986863136292,
"eval_runtime": 65.4545,
"eval_samples_per_second": 78.497,
"eval_steps_per_second": 3.285,
"step": 15800
},
{
"epoch": 2.61,
"eval_label_accuracy": {
"accuracy": 0.5253016738030362
},
"eval_label_f1_macro": {
"f1": 0.45053423727995034
},
"eval_label_f1_micro": {
"f1": 0.5253016738030362
},
"eval_loss": 0.5697636008262634,
"eval_runtime": 66.7839,
"eval_samples_per_second": 76.935,
"eval_steps_per_second": 3.219,
"step": 15900
},
{
"epoch": 2.62,
"learning_rate": 1.7218488772332405e-05,
"loss": 0.451,
"step": 16000
},
{
"epoch": 2.62,
"eval_label_accuracy": {
"accuracy": 0.532308291163877
},
"eval_label_f1_macro": {
"f1": 0.43318094070825297
},
"eval_label_f1_micro": {
"f1": 0.532308291163877
},
"eval_loss": 0.5685227513313293,
"eval_runtime": 65.281,
"eval_samples_per_second": 78.706,
"eval_steps_per_second": 3.293,
"step": 16000
},
{
"epoch": 2.64,
"eval_label_accuracy": {
"accuracy": 0.5346438302841573
},
"eval_label_f1_macro": {
"f1": 0.44085402773126725
},
"eval_label_f1_micro": {
"f1": 0.5346438302841573
},
"eval_loss": 0.5624856352806091,
"eval_runtime": 65.795,
"eval_samples_per_second": 78.091,
"eval_steps_per_second": 3.268,
"step": 16100
},
{
"epoch": 2.66,
"eval_label_accuracy": {
"accuracy": 0.5165434021019852
},
"eval_label_f1_macro": {
"f1": 0.4669507042700165
},
"eval_label_f1_micro": {
"f1": 0.5165434021019852
},
"eval_loss": 0.5654544830322266,
"eval_runtime": 64.6982,
"eval_samples_per_second": 79.415,
"eval_steps_per_second": 3.323,
"step": 16200
},
{
"epoch": 2.67,
"eval_label_accuracy": {
"accuracy": 0.5321136629038536
},
"eval_label_f1_macro": {
"f1": 0.43233762699260603
},
"eval_label_f1_micro": {
"f1": 0.5321136629038536
},
"eval_loss": 0.5595969557762146,
"eval_runtime": 65.444,
"eval_samples_per_second": 78.51,
"eval_steps_per_second": 3.285,
"step": 16300
},
{
"epoch": 2.69,
"eval_label_accuracy": {
"accuracy": 0.5138186064616582
},
"eval_label_f1_macro": {
"f1": 0.44383270041686657
},
"eval_label_f1_micro": {
"f1": 0.5138186064616582
},
"eval_loss": 0.5683060884475708,
"eval_runtime": 64.9405,
"eval_samples_per_second": 79.119,
"eval_steps_per_second": 3.311,
"step": 16400
},
{
"epoch": 2.7,
"learning_rate": 1.6194066546467794e-05,
"loss": 0.4526,
"step": 16500
},
{
"epoch": 2.7,
"eval_label_accuracy": {
"accuracy": 0.5217983651226158
},
"eval_label_f1_macro": {
"f1": 0.44043820237213194
},
"eval_label_f1_micro": {
"f1": 0.5217983651226158
},
"eval_loss": 0.5779083967208862,
"eval_runtime": 66.9943,
"eval_samples_per_second": 76.693,
"eval_steps_per_second": 3.209,
"step": 16500
},
{
"epoch": 2.72,
"eval_label_accuracy": {
"accuracy": 0.5284157259634099
},
"eval_label_f1_macro": {
"f1": 0.4418742015293833
},
"eval_label_f1_micro": {
"f1": 0.5284157259634099
},
"eval_loss": 0.5650832056999207,
"eval_runtime": 65.1199,
"eval_samples_per_second": 78.901,
"eval_steps_per_second": 3.302,
"step": 16600
},
{
"epoch": 2.74,
"eval_label_accuracy": {
"accuracy": 0.5313351498637602
},
"eval_label_f1_macro": {
"f1": 0.4478480562913326
},
"eval_label_f1_micro": {
"f1": 0.5313351498637602
},
"eval_loss": 0.5622133016586304,
"eval_runtime": 65.2808,
"eval_samples_per_second": 78.706,
"eval_steps_per_second": 3.293,
"step": 16700
},
{
"epoch": 2.75,
"eval_label_accuracy": {
"accuracy": 0.5338653172440638
},
"eval_label_f1_macro": {
"f1": 0.44680022458953056
},
"eval_label_f1_micro": {
"f1": 0.5338653172440638
},
"eval_loss": 0.5588511228561401,
"eval_runtime": 65.8894,
"eval_samples_per_second": 77.979,
"eval_steps_per_second": 3.263,
"step": 16800
},
{
"epoch": 2.77,
"eval_label_accuracy": {
"accuracy": 0.5305566368236668
},
"eval_label_f1_macro": {
"f1": 0.4609241185016919
},
"eval_label_f1_micro": {
"f1": 0.5305566368236668
},
"eval_loss": 0.5588091015815735,
"eval_runtime": 66.5578,
"eval_samples_per_second": 77.196,
"eval_steps_per_second": 3.23,
"step": 16900
},
{
"epoch": 2.79,
"learning_rate": 1.516964432060318e-05,
"loss": 0.4489,
"step": 17000
},
{
"epoch": 2.79,
"eval_label_accuracy": {
"accuracy": 0.5328921759439471
},
"eval_label_f1_macro": {
"f1": 0.4330697830804149
},
"eval_label_f1_micro": {
"f1": 0.5328921759439471
},
"eval_loss": 0.5675057768821716,
"eval_runtime": 64.7586,
"eval_samples_per_second": 79.341,
"eval_steps_per_second": 3.32,
"step": 17000
},
{
"epoch": 2.8,
"eval_label_accuracy": {
"accuracy": 0.5223822499026859
},
"eval_label_f1_macro": {
"f1": 0.45172351795604215
},
"eval_label_f1_micro": {
"f1": 0.5223822499026859
},
"eval_loss": 0.5728496313095093,
"eval_runtime": 65.6773,
"eval_samples_per_second": 78.231,
"eval_steps_per_second": 3.274,
"step": 17100
},
{
"epoch": 2.82,
"eval_label_accuracy": {
"accuracy": 0.5336706889840405
},
"eval_label_f1_macro": {
"f1": 0.4601728614564582
},
"eval_label_f1_micro": {
"f1": 0.5336706889840405
},
"eval_loss": 0.5668734908103943,
"eval_runtime": 65.4933,
"eval_samples_per_second": 78.451,
"eval_steps_per_second": 3.283,
"step": 17200
},
{
"epoch": 2.84,
"eval_label_accuracy": {
"accuracy": 0.5186843129622422
},
"eval_label_f1_macro": {
"f1": 0.424912443220669
},
"eval_label_f1_micro": {
"f1": 0.5186843129622422
},
"eval_loss": 0.5832124948501587,
"eval_runtime": 66.0442,
"eval_samples_per_second": 77.796,
"eval_steps_per_second": 3.255,
"step": 17300
},
{
"epoch": 2.85,
"eval_label_accuracy": {
"accuracy": 0.5214091086025691
},
"eval_label_f1_macro": {
"f1": 0.4560620803234329
},
"eval_label_f1_micro": {
"f1": 0.5214091086025691
},
"eval_loss": 0.570393443107605,
"eval_runtime": 66.6267,
"eval_samples_per_second": 77.116,
"eval_steps_per_second": 3.227,
"step": 17400
},
{
"epoch": 2.87,
"learning_rate": 1.414522209473857e-05,
"loss": 0.4453,
"step": 17500
},
{
"epoch": 2.87,
"eval_label_accuracy": {
"accuracy": 0.5251070455430128
},
"eval_label_f1_macro": {
"f1": 0.4507490275513904
},
"eval_label_f1_micro": {
"f1": 0.5251070455430128
},
"eval_loss": 0.5699160695075989,
"eval_runtime": 65.6551,
"eval_samples_per_second": 78.257,
"eval_steps_per_second": 3.275,
"step": 17500
},
{
"epoch": 2.88,
"eval_label_accuracy": {
"accuracy": 0.5216037368625924
},
"eval_label_f1_macro": {
"f1": 0.4390779841776778
},
"eval_label_f1_micro": {
"f1": 0.5216037368625924
},
"eval_loss": 0.5724750757217407,
"eval_runtime": 65.0855,
"eval_samples_per_second": 78.942,
"eval_steps_per_second": 3.303,
"step": 17600
},
{
"epoch": 2.9,
"eval_label_accuracy": {
"accuracy": 0.523550019462826
},
"eval_label_f1_macro": {
"f1": 0.4348358288094709
},
"eval_label_f1_micro": {
"f1": 0.523550019462826
},
"eval_loss": 0.5676676034927368,
"eval_runtime": 65.9415,
"eval_samples_per_second": 77.918,
"eval_steps_per_second": 3.26,
"step": 17700
},
{
"epoch": 2.92,
"eval_label_accuracy": {
"accuracy": 0.52958349552355
},
"eval_label_f1_macro": {
"f1": 0.4412835917222539
},
"eval_label_f1_micro": {
"f1": 0.52958349552355
},
"eval_loss": 0.5665853023529053,
"eval_runtime": 64.4843,
"eval_samples_per_second": 79.678,
"eval_steps_per_second": 3.334,
"step": 17800
},
{
"epoch": 2.93,
"eval_label_accuracy": {
"accuracy": 0.5305566368236668
},
"eval_label_f1_macro": {
"f1": 0.4501012112552316
},
"eval_label_f1_micro": {
"f1": 0.5305566368236668
},
"eval_loss": 0.5652035474777222,
"eval_runtime": 65.7573,
"eval_samples_per_second": 78.136,
"eval_steps_per_second": 3.27,
"step": 17900
},
{
"epoch": 2.95,
"learning_rate": 1.3120799868873956e-05,
"loss": 0.4419,
"step": 18000
},
{
"epoch": 2.95,
"eval_label_accuracy": {
"accuracy": 0.5305566368236668
},
"eval_label_f1_macro": {
"f1": 0.4349986006914519
},
"eval_label_f1_micro": {
"f1": 0.5305566368236668
},
"eval_loss": 0.5659220814704895,
"eval_runtime": 64.2429,
"eval_samples_per_second": 79.978,
"eval_steps_per_second": 3.347,
"step": 18000
},
{
"epoch": 2.97,
"eval_label_accuracy": {
"accuracy": 0.5303620085636435
},
"eval_label_f1_macro": {
"f1": 0.4502960442686812
},
"eval_label_f1_micro": {
"f1": 0.5303620085636435
},
"eval_loss": 0.5597621202468872,
"eval_runtime": 65.7239,
"eval_samples_per_second": 78.176,
"eval_steps_per_second": 3.271,
"step": 18100
},
{
"epoch": 2.98,
"eval_label_accuracy": {
"accuracy": 0.5348384585441807
},
"eval_label_f1_macro": {
"f1": 0.4535827940910189
},
"eval_label_f1_micro": {
"f1": 0.5348384585441807
},
"eval_loss": 0.5543330311775208,
"eval_runtime": 66.2534,
"eval_samples_per_second": 77.551,
"eval_steps_per_second": 3.245,
"step": 18200
},
{
"epoch": 3.0,
"eval_label_accuracy": {
"accuracy": 0.522966134682756
},
"eval_label_f1_macro": {
"f1": 0.4555867940063284
},
"eval_label_f1_micro": {
"f1": 0.522966134682756
},
"eval_loss": 0.5589267015457153,
"eval_runtime": 65.6099,
"eval_samples_per_second": 78.311,
"eval_steps_per_second": 3.277,
"step": 18300
},
{
"epoch": 3.02,
"eval_label_accuracy": {
"accuracy": 0.5270533281432463
},
"eval_label_f1_macro": {
"f1": 0.4552882900544635
},
"eval_label_f1_micro": {
"f1": 0.5270533281432463
},
"eval_loss": 0.5679383873939514,
"eval_runtime": 64.2272,
"eval_samples_per_second": 79.997,
"eval_steps_per_second": 3.347,
"step": 18400
},
{
"epoch": 3.03,
"learning_rate": 1.2096377643009343e-05,
"loss": 0.4465,
"step": 18500
},
{
"epoch": 3.03,
"eval_label_accuracy": {
"accuracy": 0.5284157259634099
},
"eval_label_f1_macro": {
"f1": 0.44697535042783487
},
"eval_label_f1_micro": {
"f1": 0.5284157259634099
},
"eval_loss": 0.5660849213600159,
"eval_runtime": 65.7257,
"eval_samples_per_second": 78.173,
"eval_steps_per_second": 3.271,
"step": 18500
},
{
"epoch": 3.05,
"eval_label_accuracy": {
"accuracy": 0.526274815103153
},
"eval_label_f1_macro": {
"f1": 0.45912227016242724
},
"eval_label_f1_micro": {
"f1": 0.526274815103153
},
"eval_loss": 0.5786118507385254,
"eval_runtime": 65.7514,
"eval_samples_per_second": 78.143,
"eval_steps_per_second": 3.27,
"step": 18600
},
{
"epoch": 3.07,
"eval_label_accuracy": {
"accuracy": 0.5233553912028026
},
"eval_label_f1_macro": {
"f1": 0.4514329459423202
},
"eval_label_f1_micro": {
"f1": 0.5233553912028026
},
"eval_loss": 0.5863333344459534,
"eval_runtime": 65.1687,
"eval_samples_per_second": 78.842,
"eval_steps_per_second": 3.299,
"step": 18700
},
{
"epoch": 3.08,
"eval_label_accuracy": {
"accuracy": 0.5268586998832231
},
"eval_label_f1_macro": {
"f1": 0.4562521468447126
},
"eval_label_f1_micro": {
"f1": 0.5268586998832231
},
"eval_loss": 0.5805368423461914,
"eval_runtime": 66.5366,
"eval_samples_per_second": 77.221,
"eval_steps_per_second": 3.231,
"step": 18800
},
{
"epoch": 3.1,
"eval_label_accuracy": {
"accuracy": 0.5325029194239004
},
"eval_label_f1_macro": {
"f1": 0.45956515670564957
},
"eval_label_f1_micro": {
"f1": 0.5325029194239004
},
"eval_loss": 0.5740306377410889,
"eval_runtime": 64.6401,
"eval_samples_per_second": 79.486,
"eval_steps_per_second": 3.326,
"step": 18900
},
{
"epoch": 3.11,
"learning_rate": 1.107195541714473e-05,
"loss": 0.4239,
"step": 19000
},
{
"epoch": 3.11,
"eval_label_accuracy": {
"accuracy": 0.5282210977033865
},
"eval_label_f1_macro": {
"f1": 0.45915033212245415
},
"eval_label_f1_micro": {
"f1": 0.5282210977033865
},
"eval_loss": 0.5756375789642334,
"eval_runtime": 68.6905,
"eval_samples_per_second": 74.799,
"eval_steps_per_second": 3.13,
"step": 19000
},
{
"epoch": 3.13,
"eval_label_accuracy": {
"accuracy": 0.5245231607629428
},
"eval_label_f1_macro": {
"f1": 0.4461544721075864
},
"eval_label_f1_micro": {
"f1": 0.5245231607629428
},
"eval_loss": 0.582427442073822,
"eval_runtime": 66.4502,
"eval_samples_per_second": 77.321,
"eval_steps_per_second": 3.236,
"step": 19100
},
{
"epoch": 3.15,
"eval_label_accuracy": {
"accuracy": 0.5216037368625924
},
"eval_label_f1_macro": {
"f1": 0.4586287753791167
},
"eval_label_f1_micro": {
"f1": 0.5216037368625924
},
"eval_loss": 0.5848153233528137,
"eval_runtime": 66.8407,
"eval_samples_per_second": 76.869,
"eval_steps_per_second": 3.217,
"step": 19200
},
{
"epoch": 3.16,
"eval_label_accuracy": {
"accuracy": 0.5243285325029194
},
"eval_label_f1_macro": {
"f1": 0.44899785496859856
},
"eval_label_f1_micro": {
"f1": 0.5243285325029194
},
"eval_loss": 0.5789693593978882,
"eval_runtime": 64.6355,
"eval_samples_per_second": 79.492,
"eval_steps_per_second": 3.326,
"step": 19300
},
{
"epoch": 3.18,
"eval_label_accuracy": {
"accuracy": 0.5307512650836902
},
"eval_label_f1_macro": {
"f1": 0.45259529720591607
},
"eval_label_f1_micro": {
"f1": 0.5307512650836902
},
"eval_loss": 0.5765287280082703,
"eval_runtime": 65.9843,
"eval_samples_per_second": 77.867,
"eval_steps_per_second": 3.258,
"step": 19400
},
{
"epoch": 3.2,
"learning_rate": 1.004753319128012e-05,
"loss": 0.4262,
"step": 19500
},
{
"epoch": 3.2,
"eval_label_accuracy": {
"accuracy": 0.5237446477228493
},
"eval_label_f1_macro": {
"f1": 0.4596084810630643
},
"eval_label_f1_micro": {
"f1": 0.5237446477228493
},
"eval_loss": 0.5859604477882385,
"eval_runtime": 64.6675,
"eval_samples_per_second": 79.453,
"eval_steps_per_second": 3.325,
"step": 19500
},
{
"epoch": 3.21,
"eval_label_accuracy": {
"accuracy": 0.5227715064227326
},
"eval_label_f1_macro": {
"f1": 0.46148605295712625
},
"eval_label_f1_micro": {
"f1": 0.5227715064227326
},
"eval_loss": 0.5810762047767639,
"eval_runtime": 64.6184,
"eval_samples_per_second": 79.513,
"eval_steps_per_second": 3.327,
"step": 19600
},
{
"epoch": 3.23,
"eval_label_accuracy": {
"accuracy": 0.5245231607629428
},
"eval_label_f1_macro": {
"f1": 0.45539993700221526
},
"eval_label_f1_micro": {
"f1": 0.5245231607629428
},
"eval_loss": 0.5829537510871887,
"eval_runtime": 65.5451,
"eval_samples_per_second": 78.389,
"eval_steps_per_second": 3.28,
"step": 19700
},
{
"epoch": 3.25,
"eval_label_accuracy": {
"accuracy": 0.5256909303230829
},
"eval_label_f1_macro": {
"f1": 0.4484784466484443
},
"eval_label_f1_micro": {
"f1": 0.5256909303230829
},
"eval_loss": 0.5800737738609314,
"eval_runtime": 65.0145,
"eval_samples_per_second": 79.028,
"eval_steps_per_second": 3.307,
"step": 19800
},
{
"epoch": 3.26,
"eval_label_accuracy": {
"accuracy": 0.5266640716231997
},
"eval_label_f1_macro": {
"f1": 0.45752780514666963
},
"eval_label_f1_micro": {
"f1": 0.5266640716231997
},
"eval_loss": 0.577346682548523,
"eval_runtime": 66.4954,
"eval_samples_per_second": 77.268,
"eval_steps_per_second": 3.233,
"step": 19900
},
{
"epoch": 3.28,
"learning_rate": 9.023110965415506e-06,
"loss": 0.4264,
"step": 20000
},
{
"epoch": 3.28,
"eval_label_accuracy": {
"accuracy": 0.5303620085636435
},
"eval_label_f1_macro": {
"f1": 0.45750719642409793
},
"eval_label_f1_micro": {
"f1": 0.5303620085636435
},
"eval_loss": 0.5826326608657837,
"eval_runtime": 66.7123,
"eval_samples_per_second": 77.017,
"eval_steps_per_second": 3.223,
"step": 20000
},
{
"epoch": 3.29,
"eval_label_accuracy": {
"accuracy": 0.5276372129233164
},
"eval_label_f1_macro": {
"f1": 0.4493522486957304
},
"eval_label_f1_micro": {
"f1": 0.5276372129233164
},
"eval_loss": 0.5857098698616028,
"eval_runtime": 68.7242,
"eval_samples_per_second": 74.763,
"eval_steps_per_second": 3.128,
"step": 20100
},
{
"epoch": 3.31,
"eval_label_accuracy": {
"accuracy": 0.5293888672635266
},
"eval_label_f1_macro": {
"f1": 0.4554299299346091
},
"eval_label_f1_micro": {
"f1": 0.5293888672635266
},
"eval_loss": 0.582249104976654,
"eval_runtime": 66.5201,
"eval_samples_per_second": 77.24,
"eval_steps_per_second": 3.232,
"step": 20200
},
{
"epoch": 3.33,
"eval_label_accuracy": {
"accuracy": 0.5249124172829895
},
"eval_label_f1_macro": {
"f1": 0.4618692825892151
},
"eval_label_f1_micro": {
"f1": 0.5249124172829895
},
"eval_loss": 0.5820056200027466,
"eval_runtime": 67.0656,
"eval_samples_per_second": 76.612,
"eval_steps_per_second": 3.206,
"step": 20300
},
{
"epoch": 3.34,
"eval_label_accuracy": {
"accuracy": 0.5278318411833398
},
"eval_label_f1_macro": {
"f1": 0.46204183748606525
},
"eval_label_f1_micro": {
"f1": 0.5278318411833398
},
"eval_loss": 0.5718916654586792,
"eval_runtime": 66.261,
"eval_samples_per_second": 77.542,
"eval_steps_per_second": 3.245,
"step": 20400
},
{
"epoch": 3.36,
"learning_rate": 7.998688739550894e-06,
"loss": 0.4208,
"step": 20500
},
{
"epoch": 3.36,
"eval_label_accuracy": {
"accuracy": 0.5241339042428961
},
"eval_label_f1_macro": {
"f1": 0.45599482745719977
},
"eval_label_f1_micro": {
"f1": 0.5241339042428961
},
"eval_loss": 0.5849379301071167,
"eval_runtime": 66.4821,
"eval_samples_per_second": 77.284,
"eval_steps_per_second": 3.234,
"step": 20500
},
{
"epoch": 3.38,
"eval_label_accuracy": {
"accuracy": 0.5237446477228493
},
"eval_label_f1_macro": {
"f1": 0.4555559080742948
},
"eval_label_f1_micro": {
"f1": 0.5237446477228493
},
"eval_loss": 0.5843728184700012,
"eval_runtime": 64.7647,
"eval_samples_per_second": 79.333,
"eval_steps_per_second": 3.32,
"step": 20600
},
{
"epoch": 3.39,
"eval_label_accuracy": {
"accuracy": 0.5243285325029194
},
"eval_label_f1_macro": {
"f1": 0.45771013752228895
},
"eval_label_f1_micro": {
"f1": 0.5243285325029194
},
"eval_loss": 0.5808063745498657,
"eval_runtime": 65.6904,
"eval_samples_per_second": 78.215,
"eval_steps_per_second": 3.273,
"step": 20700
},
{
"epoch": 3.41,
"eval_label_accuracy": {
"accuracy": 0.5260801868431296
},
"eval_label_f1_macro": {
"f1": 0.4631605032772811
},
"eval_label_f1_micro": {
"f1": 0.5260801868431296
},
"eval_loss": 0.5888592004776001,
"eval_runtime": 64.5737,
"eval_samples_per_second": 79.568,
"eval_steps_per_second": 3.33,
"step": 20800
},
{
"epoch": 3.43,
"eval_label_accuracy": {
"accuracy": 0.5266640716231997
},
"eval_label_f1_macro": {
"f1": 0.44799630877745866
},
"eval_label_f1_micro": {
"f1": 0.5266640716231997
},
"eval_loss": 0.588912308216095,
"eval_runtime": 66.1245,
"eval_samples_per_second": 77.702,
"eval_steps_per_second": 3.251,
"step": 20900
},
{
"epoch": 3.44,
"learning_rate": 6.974266513686281e-06,
"loss": 0.4228,
"step": 21000
},
{
"epoch": 3.44,
"eval_label_accuracy": {
"accuracy": 0.5264694433631764
},
"eval_label_f1_macro": {
"f1": 0.46067694317610275
},
"eval_label_f1_micro": {
"f1": 0.5264694433631764
},
"eval_loss": 0.5871345400810242,
"eval_runtime": 66.1371,
"eval_samples_per_second": 77.687,
"eval_steps_per_second": 3.251,
"step": 21000
},
{
"epoch": 3.46,
"eval_label_accuracy": {
"accuracy": 0.5299727520435967
},
"eval_label_f1_macro": {
"f1": 0.45858742112683054
},
"eval_label_f1_micro": {
"f1": 0.5299727520435967
},
"eval_loss": 0.5821002721786499,
"eval_runtime": 64.6289,
"eval_samples_per_second": 79.5,
"eval_steps_per_second": 3.327,
"step": 21100
},
{
"epoch": 3.47,
"eval_label_accuracy": {
"accuracy": 0.5276372129233164
},
"eval_label_f1_macro": {
"f1": 0.44706482899459093
},
"eval_label_f1_micro": {
"f1": 0.5276372129233164
},
"eval_loss": 0.5892929434776306,
"eval_runtime": 65.9065,
"eval_samples_per_second": 77.959,
"eval_steps_per_second": 3.262,
"step": 21200
},
{
"epoch": 3.49,
"eval_label_accuracy": {
"accuracy": 0.5278318411833398
},
"eval_label_f1_macro": {
"f1": 0.4531243508380409
},
"eval_label_f1_micro": {
"f1": 0.5278318411833398
},
"eval_loss": 0.5871041417121887,
"eval_runtime": 64.9299,
"eval_samples_per_second": 79.131,
"eval_steps_per_second": 3.311,
"step": 21300
},
{
"epoch": 3.51,
"eval_label_accuracy": {
"accuracy": 0.5291942390035033
},
"eval_label_f1_macro": {
"f1": 0.4544963505218613
},
"eval_label_f1_micro": {
"f1": 0.5291942390035033
},
"eval_loss": 0.581632137298584,
"eval_runtime": 66.5734,
"eval_samples_per_second": 77.178,
"eval_steps_per_second": 3.23,
"step": 21400
},
{
"epoch": 3.52,
"learning_rate": 5.949844287821669e-06,
"loss": 0.4146,
"step": 21500
},
{
"epoch": 3.52,
"eval_label_accuracy": {
"accuracy": 0.526274815103153
},
"eval_label_f1_macro": {
"f1": 0.46000446310082077
},
"eval_label_f1_micro": {
"f1": 0.526274815103153
},
"eval_loss": 0.5873268842697144,
"eval_runtime": 66.2757,
"eval_samples_per_second": 77.525,
"eval_steps_per_second": 3.244,
"step": 21500
},
{
"epoch": 3.54,
"eval_label_accuracy": {
"accuracy": 0.5293888672635266
},
"eval_label_f1_macro": {
"f1": 0.4466423631159167
},
"eval_label_f1_micro": {
"f1": 0.5293888672635266
},
"eval_loss": 0.5862780809402466,
"eval_runtime": 65.0169,
"eval_samples_per_second": 79.026,
"eval_steps_per_second": 3.307,
"step": 21600
},
{
"epoch": 3.56,
"eval_label_accuracy": {
"accuracy": 0.5274425846632931
},
"eval_label_f1_macro": {
"f1": 0.4553183199766613
},
"eval_label_f1_micro": {
"f1": 0.5274425846632931
},
"eval_loss": 0.5865354537963867,
"eval_runtime": 66.6836,
"eval_samples_per_second": 77.05,
"eval_steps_per_second": 3.224,
"step": 21700
},
{
"epoch": 3.57,
"eval_label_accuracy": {
"accuracy": 0.526274815103153
},
"eval_label_f1_macro": {
"f1": 0.4584618360909396
},
"eval_label_f1_micro": {
"f1": 0.526274815103153
},
"eval_loss": 0.5862194299697876,
"eval_runtime": 65.2945,
"eval_samples_per_second": 78.69,
"eval_steps_per_second": 3.293,
"step": 21800
},
{
"epoch": 3.59,
"eval_label_accuracy": {
"accuracy": 0.5253016738030362
},
"eval_label_f1_macro": {
"f1": 0.4556988660685002
},
"eval_label_f1_micro": {
"f1": 0.5253016738030362
},
"eval_loss": 0.5815604329109192,
"eval_runtime": 66.8701,
"eval_samples_per_second": 76.836,
"eval_steps_per_second": 3.215,
"step": 21900
},
{
"epoch": 3.61,
"learning_rate": 4.925422061957056e-06,
"loss": 0.4179,
"step": 22000
},
{
"epoch": 3.61,
"eval_label_accuracy": {
"accuracy": 0.52899961074348
},
"eval_label_f1_macro": {
"f1": 0.4502397089365151
},
"eval_label_f1_micro": {
"f1": 0.52899961074348
},
"eval_loss": 0.5844454169273376,
"eval_runtime": 64.4681,
"eval_samples_per_second": 79.698,
"eval_steps_per_second": 3.335,
"step": 22000
},
{
"epoch": 3.62,
"eval_label_accuracy": {
"accuracy": 0.5293888672635266
},
"eval_label_f1_macro": {
"f1": 0.45103878090924954
},
"eval_label_f1_micro": {
"f1": 0.5293888672635266
},
"eval_loss": 0.5837833881378174,
"eval_runtime": 64.5629,
"eval_samples_per_second": 79.581,
"eval_steps_per_second": 3.33,
"step": 22100
},
{
"epoch": 3.64,
"eval_label_accuracy": {
"accuracy": 0.5264694433631764
},
"eval_label_f1_macro": {
"f1": 0.4605815902638898
},
"eval_label_f1_micro": {
"f1": 0.5264694433631764
},
"eval_loss": 0.5830559730529785,
"eval_runtime": 65.8104,
"eval_samples_per_second": 78.073,
"eval_steps_per_second": 3.267,
"step": 22200
},
{
"epoch": 3.66,
"eval_label_accuracy": {
"accuracy": 0.52958349552355
},
"eval_label_f1_macro": {
"f1": 0.45065967020468145
},
"eval_label_f1_micro": {
"f1": 0.52958349552355
},
"eval_loss": 0.5816081166267395,
"eval_runtime": 65.6528,
"eval_samples_per_second": 78.26,
"eval_steps_per_second": 3.275,
"step": 22300
},
{
"epoch": 3.67,
"eval_label_accuracy": {
"accuracy": 0.5293888672635266
},
"eval_label_f1_macro": {
"f1": 0.45309891731169716
},
"eval_label_f1_micro": {
"f1": 0.5293888672635266
},
"eval_loss": 0.5825657844543457,
"eval_runtime": 65.9972,
"eval_samples_per_second": 77.852,
"eval_steps_per_second": 3.258,
"step": 22400
},
{
"epoch": 3.69,
"learning_rate": 3.900999836092445e-06,
"loss": 0.4259,
"step": 22500
},
{
"epoch": 3.69,
"eval_label_accuracy": {
"accuracy": 0.5303620085636435
},
"eval_label_f1_macro": {
"f1": 0.4533884487481862
},
"eval_label_f1_micro": {
"f1": 0.5303620085636435
},
"eval_loss": 0.5796229839324951,
"eval_runtime": 64.285,
"eval_samples_per_second": 79.925,
"eval_steps_per_second": 3.344,
"step": 22500
},
{
"epoch": 3.7,
"eval_label_accuracy": {
"accuracy": 0.5305566368236668
},
"eval_label_f1_macro": {
"f1": 0.4515523588029331
},
"eval_label_f1_micro": {
"f1": 0.5305566368236668
},
"eval_loss": 0.5833083391189575,
"eval_runtime": 65.2335,
"eval_samples_per_second": 78.763,
"eval_steps_per_second": 3.296,
"step": 22600
},
{
"epoch": 3.72,
"eval_label_accuracy": {
"accuracy": 0.5260801868431296
},
"eval_label_f1_macro": {
"f1": 0.4593256681180433
},
"eval_label_f1_micro": {
"f1": 0.5260801868431296
},
"eval_loss": 0.578184187412262,
"eval_runtime": 66.0227,
"eval_samples_per_second": 77.822,
"eval_steps_per_second": 3.256,
"step": 22700
},
{
"epoch": 3.74,
"eval_label_accuracy": {
"accuracy": 0.5268586998832231
},
"eval_label_f1_macro": {
"f1": 0.4603837774249291
},
"eval_label_f1_micro": {
"f1": 0.5268586998832231
},
"eval_loss": 0.5812641382217407,
"eval_runtime": 65.1177,
"eval_samples_per_second": 78.903,
"eval_steps_per_second": 3.302,
"step": 22800
},
{
"epoch": 3.75,
"eval_label_accuracy": {
"accuracy": 0.5319190346438303
},
"eval_label_f1_macro": {
"f1": 0.45997737731014476
},
"eval_label_f1_micro": {
"f1": 0.5319190346438303
},
"eval_loss": 0.5770907402038574,
"eval_runtime": 65.3117,
"eval_samples_per_second": 78.669,
"eval_steps_per_second": 3.292,
"step": 22900
},
{
"epoch": 3.77,
"learning_rate": 2.8765776102278315e-06,
"loss": 0.4274,
"step": 23000
},
{
"epoch": 3.77,
"eval_label_accuracy": {
"accuracy": 0.5247177890229662
},
"eval_label_f1_macro": {
"f1": 0.45907215035166693
},
"eval_label_f1_micro": {
"f1": 0.5247177890229662
},
"eval_loss": 0.5833043456077576,
"eval_runtime": 65.7031,
"eval_samples_per_second": 78.2,
"eval_steps_per_second": 3.272,
"step": 23000
},
{
"epoch": 3.79,
"eval_label_accuracy": {
"accuracy": 0.526274815103153
},
"eval_label_f1_macro": {
"f1": 0.4541259526342647
},
"eval_label_f1_micro": {
"f1": 0.526274815103153
},
"eval_loss": 0.5847244262695312,
"eval_runtime": 65.8232,
"eval_samples_per_second": 78.058,
"eval_steps_per_second": 3.266,
"step": 23100
},
{
"epoch": 3.8,
"eval_label_accuracy": {
"accuracy": 0.5266640716231997
},
"eval_label_f1_macro": {
"f1": 0.451974998095672
},
"eval_label_f1_micro": {
"f1": 0.5266640716231997
},
"eval_loss": 0.5817099809646606,
"eval_runtime": 64.7736,
"eval_samples_per_second": 79.322,
"eval_steps_per_second": 3.319,
"step": 23200
},
{
"epoch": 3.82,
"eval_label_accuracy": {
"accuracy": 0.5288049824834566
},
"eval_label_f1_macro": {
"f1": 0.46024834620814586
},
"eval_label_f1_micro": {
"f1": 0.5288049824834566
},
"eval_loss": 0.5799014568328857,
"eval_runtime": 65.921,
"eval_samples_per_second": 77.942,
"eval_steps_per_second": 3.261,
"step": 23300
},
{
"epoch": 3.84,
"eval_label_accuracy": {
"accuracy": 0.5276372129233164
},
"eval_label_f1_macro": {
"f1": 0.4581019267498323
},
"eval_label_f1_micro": {
"f1": 0.5276372129233164
},
"eval_loss": 0.5810489058494568,
"eval_runtime": 65.3442,
"eval_samples_per_second": 78.63,
"eval_steps_per_second": 3.29,
"step": 23400
},
{
"epoch": 3.85,
"learning_rate": 1.8521553843632193e-06,
"loss": 0.4289,
"step": 23500
},
{
"epoch": 3.85,
"eval_label_accuracy": {
"accuracy": 0.5272479564032697
},
"eval_label_f1_macro": {
"f1": 0.4541508997429681
},
"eval_label_f1_micro": {
"f1": 0.5272479564032697
},
"eval_loss": 0.5799488425254822,
"eval_runtime": 65.7671,
"eval_samples_per_second": 78.124,
"eval_steps_per_second": 3.269,
"step": 23500
},
{
"epoch": 3.87,
"eval_label_accuracy": {
"accuracy": 0.5274425846632931
},
"eval_label_f1_macro": {
"f1": 0.4574637003228606
},
"eval_label_f1_micro": {
"f1": 0.5274425846632931
},
"eval_loss": 0.5793448686599731,
"eval_runtime": 65.0934,
"eval_samples_per_second": 78.933,
"eval_steps_per_second": 3.303,
"step": 23600
},
{
"epoch": 3.88,
"eval_label_accuracy": {
"accuracy": 0.5276372129233164
},
"eval_label_f1_macro": {
"f1": 0.45638935381995627
},
"eval_label_f1_micro": {
"f1": 0.5276372129233164
},
"eval_loss": 0.5791721940040588,
"eval_runtime": 64.3159,
"eval_samples_per_second": 79.887,
"eval_steps_per_second": 3.343,
"step": 23700
},
{
"epoch": 3.9,
"eval_label_accuracy": {
"accuracy": 0.5268586998832231
},
"eval_label_f1_macro": {
"f1": 0.45473822883034853
},
"eval_label_f1_micro": {
"f1": 0.5268586998832231
},
"eval_loss": 0.5804610848426819,
"eval_runtime": 66.2146,
"eval_samples_per_second": 77.596,
"eval_steps_per_second": 3.247,
"step": 23800
},
{
"epoch": 3.92,
"eval_label_accuracy": {
"accuracy": 0.5270533281432463
},
"eval_label_f1_macro": {
"f1": 0.45519902851058563
},
"eval_label_f1_micro": {
"f1": 0.5270533281432463
},
"eval_loss": 0.5822835564613342,
"eval_runtime": 65.6357,
"eval_samples_per_second": 78.281,
"eval_steps_per_second": 3.276,
"step": 23900
},
{
"epoch": 3.93,
"learning_rate": 8.277331584986067e-07,
"loss": 0.4174,
"step": 24000
},
{
"epoch": 3.93,
"eval_label_accuracy": {
"accuracy": 0.5278318411833398
},
"eval_label_f1_macro": {
"f1": 0.4562786712755399
},
"eval_label_f1_micro": {
"f1": 0.5278318411833398
},
"eval_loss": 0.5810161828994751,
"eval_runtime": 65.4574,
"eval_samples_per_second": 78.494,
"eval_steps_per_second": 3.285,
"step": 24000
},
{
"epoch": 3.95,
"eval_label_accuracy": {
"accuracy": 0.5280264694433632
},
"eval_label_f1_macro": {
"f1": 0.45638550607459255
},
"eval_label_f1_micro": {
"f1": 0.5280264694433632
},
"eval_loss": 0.5815566182136536,
"eval_runtime": 64.709,
"eval_samples_per_second": 79.402,
"eval_steps_per_second": 3.323,
"step": 24100
},
{
"epoch": 3.97,
"eval_label_accuracy": {
"accuracy": 0.5276372129233164
},
"eval_label_f1_macro": {
"f1": 0.456888857673732
},
"eval_label_f1_micro": {
"f1": 0.5276372129233164
},
"eval_loss": 0.5815967321395874,
"eval_runtime": 65.864,
"eval_samples_per_second": 78.009,
"eval_steps_per_second": 3.264,
"step": 24200
},
{
"epoch": 3.98,
"eval_label_accuracy": {
"accuracy": 0.5274425846632931
},
"eval_label_f1_macro": {
"f1": 0.456560066253126
},
"eval_label_f1_micro": {
"f1": 0.5274425846632931
},
"eval_loss": 0.5813802480697632,
"eval_runtime": 65.7676,
"eval_samples_per_second": 78.124,
"eval_steps_per_second": 3.269,
"step": 24300
},
{
"epoch": 4.0,
"eval_label_accuracy": {
"accuracy": 0.5276372129233164
},
"eval_label_f1_macro": {
"f1": 0.4561525816310299
},
"eval_label_f1_micro": {
"f1": 0.5276372129233164
},
"eval_loss": 0.5812935829162598,
"eval_runtime": 66.0073,
"eval_samples_per_second": 77.84,
"eval_steps_per_second": 3.257,
"step": 24400
},
{
"epoch": 4.0,
"step": 24404,
"total_flos": 5.079375568585728e+16,
"train_loss": 0.5641878431614374,
"train_runtime": 33021.6046,
"train_samples_per_second": 11.824,
"train_steps_per_second": 0.739
}
],
"logging_steps": 500,
"max_steps": 24404,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"total_flos": 5.079375568585728e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}