{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 100, "global_step": 24404, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 2.032832145690918, "eval_runtime": 62.2478, "eval_samples_per_second": 82.541, "eval_steps_per_second": 3.454, "step": 100 }, { "epoch": 0.03, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 1.4367562532424927, "eval_runtime": 63.0927, "eval_samples_per_second": 81.436, "eval_steps_per_second": 3.408, "step": 200 }, { "epoch": 0.05, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 1.2640273571014404, "eval_runtime": 61.4349, "eval_samples_per_second": 83.633, "eval_steps_per_second": 3.5, "step": 300 }, { "epoch": 0.07, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 1.173414707183838, "eval_runtime": 64.7986, "eval_samples_per_second": 79.292, "eval_steps_per_second": 3.318, "step": 400 }, { "epoch": 0.08, "learning_rate": 4.897557777413539e-05, "loss": 2.7729, "step": 500 }, { "epoch": 0.08, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 1.1171976327896118, "eval_runtime": 62.4514, "eval_samples_per_second": 82.272, "eval_steps_per_second": 3.443, "step": 500 }, { "epoch": 0.1, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 1.0716122388839722, "eval_runtime": 62.3679, "eval_samples_per_second": 82.382, "eval_steps_per_second": 3.447, "step": 600 }, { "epoch": 0.11, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 1.0408052206039429, "eval_runtime": 62.3699, "eval_samples_per_second": 82.379, "eval_steps_per_second": 3.447, "step": 700 }, { "epoch": 0.13, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 1.0002268552780151, "eval_runtime": 60.9857, "eval_samples_per_second": 84.249, "eval_steps_per_second": 3.525, "step": 800 }, { "epoch": 0.15, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 0.9761040806770325, "eval_runtime": 63.3879, "eval_samples_per_second": 81.057, "eval_steps_per_second": 3.392, "step": 900 }, { "epoch": 0.16, "learning_rate": 4.795115554827078e-05, "loss": 1.1066, "step": 1000 }, { "epoch": 0.16, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 0.9439008831977844, "eval_runtime": 60.9997, "eval_samples_per_second": 84.23, "eval_steps_per_second": 3.525, "step": 1000 }, { "epoch": 0.18, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 0.921099841594696, "eval_runtime": 61.9171, "eval_samples_per_second": 82.982, "eval_steps_per_second": 3.472, "step": 1100 }, { "epoch": 0.2, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 0.89990234375, "eval_runtime": 66.001, "eval_samples_per_second": 77.847, "eval_steps_per_second": 3.258, "step": 1200 }, { "epoch": 0.21, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 0.879767656326294, "eval_runtime": 65.3826, "eval_samples_per_second": 78.584, "eval_steps_per_second": 3.288, "step": 1300 }, { "epoch": 0.23, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 0.8532700538635254, "eval_runtime": 66.1288, "eval_samples_per_second": 77.697, "eval_steps_per_second": 3.251, "step": 1400 }, { "epoch": 0.25, "learning_rate": 4.692673332240616e-05, "loss": 0.9358, "step": 1500 }, { "epoch": 0.25, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 0.8349987864494324, "eval_runtime": 64.1308, "eval_samples_per_second": 80.118, "eval_steps_per_second": 3.353, "step": 1500 }, { "epoch": 0.26, "eval_label_accuracy": { "accuracy": 0.4388867263526664 }, "eval_label_f1_macro": { "f1": 0.1525091302583525 }, "eval_label_f1_micro": { "f1": 0.4388867263526664 }, "eval_loss": 0.8146479725837708, "eval_runtime": 65.0686, "eval_samples_per_second": 78.963, "eval_steps_per_second": 3.304, "step": 1600 }, { "epoch": 0.28, "eval_label_accuracy": { "accuracy": 0.4534838458544181 }, "eval_label_f1_macro": { "f1": 0.21174904672848718 }, "eval_label_f1_micro": { "f1": 0.4534838458544181 }, "eval_loss": 0.7984741926193237, "eval_runtime": 64.3594, "eval_samples_per_second": 79.833, "eval_steps_per_second": 3.341, "step": 1700 }, { "epoch": 0.3, "eval_label_accuracy": { "accuracy": 0.4560140132347217 }, "eval_label_f1_macro": { "f1": 0.24178290688684612 }, "eval_label_f1_micro": { "f1": 0.4560140132347217 }, "eval_loss": 0.7780113816261292, "eval_runtime": 66.3431, "eval_samples_per_second": 77.446, "eval_steps_per_second": 3.241, "step": 1800 }, { "epoch": 0.31, "eval_label_accuracy": { "accuracy": 0.46321525885558584 }, "eval_label_f1_macro": { "f1": 0.24751285219162994 }, "eval_label_f1_micro": { "f1": 0.4632152588555859 }, "eval_loss": 0.7548134326934814, "eval_runtime": 65.2338, "eval_samples_per_second": 78.763, "eval_steps_per_second": 3.296, "step": 1900 }, { "epoch": 0.33, "learning_rate": 4.590231109654155e-05, "loss": 0.8298, "step": 2000 }, { "epoch": 0.33, "eval_label_accuracy": { "accuracy": 0.46924873491630986 }, "eval_label_f1_macro": { "f1": 0.2464816382147299 }, "eval_label_f1_micro": { "f1": 0.46924873491630986 }, "eval_loss": 0.7355437874794006, "eval_runtime": 66.2105, "eval_samples_per_second": 77.601, "eval_steps_per_second": 3.247, "step": 2000 }, { "epoch": 0.34, "eval_label_accuracy": { "accuracy": 0.43557804593226934 }, "eval_label_f1_macro": { "f1": 0.24962525654931594 }, "eval_label_f1_micro": { "f1": 0.43557804593226934 }, "eval_loss": 0.7379868626594543, "eval_runtime": 66.543, "eval_samples_per_second": 77.213, "eval_steps_per_second": 3.231, "step": 2100 }, { "epoch": 0.36, "eval_label_accuracy": { "accuracy": 0.46555079797586607 }, "eval_label_f1_macro": { "f1": 0.2992369280869709 }, "eval_label_f1_micro": { "f1": 0.46555079797586607 }, "eval_loss": 0.7291679978370667, "eval_runtime": 65.5998, "eval_samples_per_second": 78.323, "eval_steps_per_second": 3.277, "step": 2200 }, { "epoch": 0.38, "eval_label_accuracy": { "accuracy": 0.49202024133904243 }, "eval_label_f1_macro": { "f1": 0.3141049709544147 }, "eval_label_f1_micro": { "f1": 0.49202024133904243 }, "eval_loss": 0.6944931149482727, "eval_runtime": 65.8926, "eval_samples_per_second": 77.975, "eval_steps_per_second": 3.263, "step": 2300 }, { "epoch": 0.39, "eval_label_accuracy": { "accuracy": 0.502724795640327 }, "eval_label_f1_macro": { "f1": 0.3701433983343189 }, "eval_label_f1_micro": { "f1": 0.502724795640327 }, "eval_loss": 0.6872398257255554, "eval_runtime": 64.1033, "eval_samples_per_second": 80.152, "eval_steps_per_second": 3.354, "step": 2400 }, { "epoch": 0.41, "learning_rate": 4.4877888870676944e-05, "loss": 0.7424, "step": 2500 }, { "epoch": 0.41, "eval_label_accuracy": { "accuracy": 0.49883223043985986 }, "eval_label_f1_macro": { "f1": 0.37174762565152936 }, "eval_label_f1_micro": { "f1": 0.49883223043985986 }, "eval_loss": 0.6769503951072693, "eval_runtime": 65.8998, "eval_samples_per_second": 77.967, "eval_steps_per_second": 3.263, "step": 2500 }, { "epoch": 0.43, "eval_label_accuracy": { "accuracy": 0.503308680420397 }, "eval_label_f1_macro": { "f1": 0.3680796226372295 }, "eval_label_f1_micro": { "f1": 0.503308680420397 }, "eval_loss": 0.6723325848579407, "eval_runtime": 64.9642, "eval_samples_per_second": 79.09, "eval_steps_per_second": 3.31, "step": 2600 }, { "epoch": 0.44, "eval_label_accuracy": { "accuracy": 0.5101206695212145 }, "eval_label_f1_macro": { "f1": 0.37785590629881227 }, "eval_label_f1_micro": { "f1": 0.5101206695212145 }, "eval_loss": 0.6622567772865295, "eval_runtime": 67.697, "eval_samples_per_second": 75.897, "eval_steps_per_second": 3.176, "step": 2700 }, { "epoch": 0.46, "eval_label_accuracy": { "accuracy": 0.5058388478007007 }, "eval_label_f1_macro": { "f1": 0.37480207390602494 }, "eval_label_f1_micro": { "f1": 0.5058388478007007 }, "eval_loss": 0.6581109166145325, "eval_runtime": 65.1127, "eval_samples_per_second": 78.909, "eval_steps_per_second": 3.302, "step": 2800 }, { "epoch": 0.48, "eval_label_accuracy": { "accuracy": 0.5081743869209809 }, "eval_label_f1_macro": { "f1": 0.35256174805106155 }, "eval_label_f1_micro": { "f1": 0.5081743869209809 }, "eval_loss": 0.6534283757209778, "eval_runtime": 66.7769, "eval_samples_per_second": 76.943, "eval_steps_per_second": 3.22, "step": 2900 }, { "epoch": 0.49, "learning_rate": 4.385346664481233e-05, "loss": 0.6892, "step": 3000 }, { "epoch": 0.49, "eval_label_accuracy": { "accuracy": 0.4706111327364733 }, "eval_label_f1_macro": { "f1": 0.372103581749401 }, "eval_label_f1_micro": { "f1": 0.4706111327364733 }, "eval_loss": 0.6582987308502197, "eval_runtime": 65.0583, "eval_samples_per_second": 78.975, "eval_steps_per_second": 3.305, "step": 3000 }, { "epoch": 0.51, "eval_label_accuracy": { "accuracy": 0.5044764499805372 }, "eval_label_f1_macro": { "f1": 0.37031807042249487 }, "eval_label_f1_micro": { "f1": 0.5044764499805372 }, "eval_loss": 0.6426356434822083, "eval_runtime": 65.2533, "eval_samples_per_second": 78.739, "eval_steps_per_second": 3.295, "step": 3100 }, { "epoch": 0.52, "eval_label_accuracy": { "accuracy": 0.5058388478007007 }, "eval_label_f1_macro": { "f1": 0.38279117822149433 }, "eval_label_f1_micro": { "f1": 0.5058388478007007 }, "eval_loss": 0.6383734941482544, "eval_runtime": 66.5881, "eval_samples_per_second": 77.161, "eval_steps_per_second": 3.229, "step": 3200 }, { "epoch": 0.54, "eval_label_accuracy": { "accuracy": 0.49727520435967304 }, "eval_label_f1_macro": { "f1": 0.352874833319086 }, "eval_label_f1_micro": { "f1": 0.49727520435967304 }, "eval_loss": 0.6387777328491211, "eval_runtime": 65.3478, "eval_samples_per_second": 78.625, "eval_steps_per_second": 3.29, "step": 3300 }, { "epoch": 0.56, "eval_label_accuracy": { "accuracy": 0.494550408719346 }, "eval_label_f1_macro": { "f1": 0.3715541356743288 }, "eval_label_f1_micro": { "f1": 0.494550408719346 }, "eval_loss": 0.6494720578193665, "eval_runtime": 65.2886, "eval_samples_per_second": 78.697, "eval_steps_per_second": 3.293, "step": 3400 }, { "epoch": 0.57, "learning_rate": 4.282904441894772e-05, "loss": 0.6377, "step": 3500 }, { "epoch": 0.57, "eval_label_accuracy": { "accuracy": 0.506033476060724 }, "eval_label_f1_macro": { "f1": 0.38420420926210797 }, "eval_label_f1_micro": { "f1": 0.506033476060724 }, "eval_loss": 0.6222317218780518, "eval_runtime": 64.502, "eval_samples_per_second": 79.656, "eval_steps_per_second": 3.333, "step": 3500 }, { "epoch": 0.59, "eval_label_accuracy": { "accuracy": 0.5064227325807708 }, "eval_label_f1_macro": { "f1": 0.3885026107563052 }, "eval_label_f1_micro": { "f1": 0.5064227325807708 }, "eval_loss": 0.6216300129890442, "eval_runtime": 65.7001, "eval_samples_per_second": 78.204, "eval_steps_per_second": 3.272, "step": 3600 }, { "epoch": 0.61, "eval_label_accuracy": { "accuracy": 0.5105099260412612 }, "eval_label_f1_macro": { "f1": 0.35318161489739225 }, "eval_label_f1_micro": { "f1": 0.5105099260412612 }, "eval_loss": 0.6235440373420715, "eval_runtime": 65.6641, "eval_samples_per_second": 78.247, "eval_steps_per_second": 3.274, "step": 3700 }, { "epoch": 0.62, "eval_label_accuracy": { "accuracy": 0.5062281043207474 }, "eval_label_f1_macro": { "f1": 0.37238647965373683 }, "eval_label_f1_micro": { "f1": 0.5062281043207474 }, "eval_loss": 0.6188907623291016, "eval_runtime": 65.7244, "eval_samples_per_second": 78.175, "eval_steps_per_second": 3.271, "step": 3800 }, { "epoch": 0.64, "eval_label_accuracy": { "accuracy": 0.49980537173997663 }, "eval_label_f1_macro": { "f1": 0.38651240866909087 }, "eval_label_f1_micro": { "f1": 0.49980537173997663 }, "eval_loss": 0.6195840239524841, "eval_runtime": 65.3321, "eval_samples_per_second": 78.644, "eval_steps_per_second": 3.291, "step": 3900 }, { "epoch": 0.66, "learning_rate": 4.18046221930831e-05, "loss": 0.6149, "step": 4000 }, { "epoch": 0.66, "eval_label_accuracy": { "accuracy": 0.5075905021409108 }, "eval_label_f1_macro": { "f1": 0.37502556996910846 }, "eval_label_f1_micro": { "f1": 0.5075905021409108 }, "eval_loss": 0.6072418093681335, "eval_runtime": 67.6156, "eval_samples_per_second": 75.988, "eval_steps_per_second": 3.18, "step": 4000 }, { "epoch": 0.67, "eval_label_accuracy": { "accuracy": 0.5145971195017517 }, "eval_label_f1_macro": { "f1": 0.38615080427173776 }, "eval_label_f1_micro": { "f1": 0.5145971195017517 }, "eval_loss": 0.6034330725669861, "eval_runtime": 65.7798, "eval_samples_per_second": 78.109, "eval_steps_per_second": 3.268, "step": 4100 }, { "epoch": 0.69, "eval_label_accuracy": { "accuracy": 0.5165434021019852 }, "eval_label_f1_macro": { "f1": 0.3732389735757712 }, "eval_label_f1_micro": { "f1": 0.5165434021019852 }, "eval_loss": 0.6042677760124207, "eval_runtime": 65.3392, "eval_samples_per_second": 78.636, "eval_steps_per_second": 3.291, "step": 4200 }, { "epoch": 0.7, "eval_label_accuracy": { "accuracy": 0.5029194239003504 }, "eval_label_f1_macro": { "f1": 0.38007459621604844 }, "eval_label_f1_micro": { "f1": 0.5029194239003504 }, "eval_loss": 0.6064484119415283, "eval_runtime": 64.1308, "eval_samples_per_second": 80.117, "eval_steps_per_second": 3.353, "step": 4300 }, { "epoch": 0.72, "eval_label_accuracy": { "accuracy": 0.5095367847411444 }, "eval_label_f1_macro": { "f1": 0.3946719741195423 }, "eval_label_f1_micro": { "f1": 0.5095367847411444 }, "eval_loss": 0.5933734774589539, "eval_runtime": 65.6434, "eval_samples_per_second": 78.271, "eval_steps_per_second": 3.275, "step": 4400 }, { "epoch": 0.74, "learning_rate": 4.078019996721849e-05, "loss": 0.5971, "step": 4500 }, { "epoch": 0.74, "eval_label_accuracy": { "accuracy": 0.5167380303620086 }, "eval_label_f1_macro": { "f1": 0.37773378306197386 }, "eval_label_f1_micro": { "f1": 0.5167380303620086 }, "eval_loss": 0.5919108390808105, "eval_runtime": 63.9287, "eval_samples_per_second": 80.371, "eval_steps_per_second": 3.363, "step": 4500 }, { "epoch": 0.75, "eval_label_accuracy": { "accuracy": 0.5179057999221487 }, "eval_label_f1_macro": { "f1": 0.38732144074331154 }, "eval_label_f1_micro": { "f1": 0.5179057999221487 }, "eval_loss": 0.6021246910095215, "eval_runtime": 67.8323, "eval_samples_per_second": 75.746, "eval_steps_per_second": 3.17, "step": 4600 }, { "epoch": 0.77, "eval_label_accuracy": { "accuracy": 0.522966134682756 }, "eval_label_f1_macro": { "f1": 0.38578529254016203 }, "eval_label_f1_micro": { "f1": 0.522966134682756 }, "eval_loss": 0.5902037620544434, "eval_runtime": 65.8164, "eval_samples_per_second": 78.066, "eval_steps_per_second": 3.267, "step": 4700 }, { "epoch": 0.79, "eval_label_accuracy": { "accuracy": 0.5052549630206306 }, "eval_label_f1_macro": { "f1": 0.3881510615594995 }, "eval_label_f1_micro": { "f1": 0.5052549630206306 }, "eval_loss": 0.6161624789237976, "eval_runtime": 66.7087, "eval_samples_per_second": 77.021, "eval_steps_per_second": 3.223, "step": 4800 }, { "epoch": 0.8, "eval_label_accuracy": { "accuracy": 0.5223822499026859 }, "eval_label_f1_macro": { "f1": 0.3790400980761349 }, "eval_label_f1_micro": { "f1": 0.5223822499026859 }, "eval_loss": 0.5835235714912415, "eval_runtime": 67.5779, "eval_samples_per_second": 76.031, "eval_steps_per_second": 3.182, "step": 4900 }, { "epoch": 0.82, "learning_rate": 3.975577774135388e-05, "loss": 0.5745, "step": 5000 }, { "epoch": 0.82, "eval_label_accuracy": { "accuracy": 0.5241339042428961 }, "eval_label_f1_macro": { "f1": 0.38287080754019076 }, "eval_label_f1_micro": { "f1": 0.5241339042428961 }, "eval_loss": 0.5865485668182373, "eval_runtime": 67.9881, "eval_samples_per_second": 75.572, "eval_steps_per_second": 3.162, "step": 5000 }, { "epoch": 0.84, "eval_label_accuracy": { "accuracy": 0.514791747761775 }, "eval_label_f1_macro": { "f1": 0.3932066438681785 }, "eval_label_f1_micro": { "f1": 0.514791747761775 }, "eval_loss": 0.582242488861084, "eval_runtime": 65.5261, "eval_samples_per_second": 78.412, "eval_steps_per_second": 3.281, "step": 5100 }, { "epoch": 0.85, "eval_label_accuracy": { "accuracy": 0.5266640716231997 }, "eval_label_f1_macro": { "f1": 0.3948337517276894 }, "eval_label_f1_micro": { "f1": 0.5266640716231997 }, "eval_loss": 0.5757012963294983, "eval_runtime": 66.1312, "eval_samples_per_second": 77.694, "eval_steps_per_second": 3.251, "step": 5200 }, { "epoch": 0.87, "eval_label_accuracy": { "accuracy": 0.5192681977423121 }, "eval_label_f1_macro": { "f1": 0.38846154581250647 }, "eval_label_f1_micro": { "f1": 0.5192681977423121 }, "eval_loss": 0.580342173576355, "eval_runtime": 66.5531, "eval_samples_per_second": 77.202, "eval_steps_per_second": 3.231, "step": 5300 }, { "epoch": 0.89, "eval_label_accuracy": { "accuracy": 0.5278318411833398 }, "eval_label_f1_macro": { "f1": 0.3917793049565631 }, "eval_label_f1_micro": { "f1": 0.5278318411833398 }, "eval_loss": 0.5737766623497009, "eval_runtime": 68.6161, "eval_samples_per_second": 74.88, "eval_steps_per_second": 3.133, "step": 5400 }, { "epoch": 0.9, "learning_rate": 3.8731355515489266e-05, "loss": 0.5605, "step": 5500 }, { "epoch": 0.9, "eval_label_accuracy": { "accuracy": 0.5299727520435967 }, "eval_label_f1_macro": { "f1": 0.4000572874943932 }, "eval_label_f1_micro": { "f1": 0.5299727520435967 }, "eval_loss": 0.5729739665985107, "eval_runtime": 65.0276, "eval_samples_per_second": 79.013, "eval_steps_per_second": 3.306, "step": 5500 }, { "epoch": 0.92, "eval_label_accuracy": { "accuracy": 0.5266640716231997 }, "eval_label_f1_macro": { "f1": 0.3871595405264875 }, "eval_label_f1_micro": { "f1": 0.5266640716231997 }, "eval_loss": 0.5737924575805664, "eval_runtime": 67.4714, "eval_samples_per_second": 76.151, "eval_steps_per_second": 3.187, "step": 5600 }, { "epoch": 0.93, "eval_label_accuracy": { "accuracy": 0.5278318411833398 }, "eval_label_f1_macro": { "f1": 0.3912636643655603 }, "eval_label_f1_micro": { "f1": 0.5278318411833398 }, "eval_loss": 0.5748183131217957, "eval_runtime": 66.8019, "eval_samples_per_second": 76.914, "eval_steps_per_second": 3.218, "step": 5700 }, { "epoch": 0.95, "eval_label_accuracy": { "accuracy": 0.5212144803425457 }, "eval_label_f1_macro": { "f1": 0.3655456567099176 }, "eval_label_f1_micro": { "f1": 0.5212144803425457 }, "eval_loss": 0.5782448649406433, "eval_runtime": 65.8052, "eval_samples_per_second": 78.079, "eval_steps_per_second": 3.267, "step": 5800 }, { "epoch": 0.97, "eval_label_accuracy": { "accuracy": 0.5124562086414948 }, "eval_label_f1_macro": { "f1": 0.3896713760808098 }, "eval_label_f1_micro": { "f1": 0.5124562086414948 }, "eval_loss": 0.5811282396316528, "eval_runtime": 65.8997, "eval_samples_per_second": 77.967, "eval_steps_per_second": 3.263, "step": 5900 }, { "epoch": 0.98, "learning_rate": 3.7706933289624654e-05, "loss": 0.553, "step": 6000 }, { "epoch": 0.98, "eval_label_accuracy": { "accuracy": 0.5241339042428961 }, "eval_label_f1_macro": { "f1": 0.3927065112402536 }, "eval_label_f1_micro": { "f1": 0.5241339042428961 }, "eval_loss": 0.5662025809288025, "eval_runtime": 65.5268, "eval_samples_per_second": 78.411, "eval_steps_per_second": 3.281, "step": 6000 }, { "epoch": 1.0, "eval_label_accuracy": { "accuracy": 0.5161541455819385 }, "eval_label_f1_macro": { "f1": 0.3924416598143773 }, "eval_label_f1_micro": { "f1": 0.5161541455819385 }, "eval_loss": 0.5739487409591675, "eval_runtime": 65.309, "eval_samples_per_second": 78.672, "eval_steps_per_second": 3.292, "step": 6100 }, { "epoch": 1.02, "eval_label_accuracy": { "accuracy": 0.5190735694822888 }, "eval_label_f1_macro": { "f1": 0.39288818567242456 }, "eval_label_f1_micro": { "f1": 0.5190735694822888 }, "eval_loss": 0.5729976892471313, "eval_runtime": 65.1321, "eval_samples_per_second": 78.886, "eval_steps_per_second": 3.301, "step": 6200 }, { "epoch": 1.03, "eval_label_accuracy": { "accuracy": 0.5128454651615415 }, "eval_label_f1_macro": { "f1": 0.3945008643429393 }, "eval_label_f1_micro": { "f1": 0.5128454651615415 }, "eval_loss": 0.5891692042350769, "eval_runtime": 66.1687, "eval_samples_per_second": 77.65, "eval_steps_per_second": 3.249, "step": 6300 }, { "epoch": 1.05, "eval_label_accuracy": { "accuracy": 0.5179057999221487 }, "eval_label_f1_macro": { "f1": 0.40357563770843996 }, "eval_label_f1_micro": { "f1": 0.5179057999221487 }, "eval_loss": 0.5842686891555786, "eval_runtime": 64.3545, "eval_samples_per_second": 79.839, "eval_steps_per_second": 3.341, "step": 6400 }, { "epoch": 1.07, "learning_rate": 3.6682511063760036e-05, "loss": 0.5254, "step": 6500 }, { "epoch": 1.07, "eval_label_accuracy": { "accuracy": 0.5219929933826392 }, "eval_label_f1_macro": { "f1": 0.39726071722075873 }, "eval_label_f1_micro": { "f1": 0.5219929933826392 }, "eval_loss": 0.5762518048286438, "eval_runtime": 66.0739, "eval_samples_per_second": 77.761, "eval_steps_per_second": 3.254, "step": 6500 }, { "epoch": 1.08, "eval_label_accuracy": { "accuracy": 0.5184896847022188 }, "eval_label_f1_macro": { "f1": 0.3883086023616811 }, "eval_label_f1_micro": { "f1": 0.5184896847022188 }, "eval_loss": 0.5790498852729797, "eval_runtime": 65.3688, "eval_samples_per_second": 78.6, "eval_steps_per_second": 3.289, "step": 6600 }, { "epoch": 1.1, "eval_label_accuracy": { "accuracy": 0.5260801868431296 }, "eval_label_f1_macro": { "f1": 0.39154498796599124 }, "eval_label_f1_micro": { "f1": 0.5260801868431296 }, "eval_loss": 0.5696905851364136, "eval_runtime": 65.5045, "eval_samples_per_second": 78.437, "eval_steps_per_second": 3.282, "step": 6700 }, { "epoch": 1.11, "eval_label_accuracy": { "accuracy": 0.5171272868820552 }, "eval_label_f1_macro": { "f1": 0.39755561359688063 }, "eval_label_f1_micro": { "f1": 0.5171272868820552 }, "eval_loss": 0.5722245573997498, "eval_runtime": 65.5097, "eval_samples_per_second": 78.431, "eval_steps_per_second": 3.282, "step": 6800 }, { "epoch": 1.13, "eval_label_accuracy": { "accuracy": 0.5231607629427792 }, "eval_label_f1_macro": { "f1": 0.3999224263889678 }, "eval_label_f1_micro": { "f1": 0.5231607629427792 }, "eval_loss": 0.5762615203857422, "eval_runtime": 67.2536, "eval_samples_per_second": 76.397, "eval_steps_per_second": 3.197, "step": 6900 }, { "epoch": 1.15, "learning_rate": 3.565808883789543e-05, "loss": 0.5282, "step": 7000 }, { "epoch": 1.15, "eval_label_accuracy": { "accuracy": 0.5239392759828727 }, "eval_label_f1_macro": { "f1": 0.40076468903176177 }, "eval_label_f1_micro": { "f1": 0.5239392759828727 }, "eval_loss": 0.5675185918807983, "eval_runtime": 65.953, "eval_samples_per_second": 77.904, "eval_steps_per_second": 3.26, "step": 7000 }, { "epoch": 1.16, "eval_label_accuracy": { "accuracy": 0.5291942390035033 }, "eval_label_f1_macro": { "f1": 0.3977209221647911 }, "eval_label_f1_micro": { "f1": 0.5291942390035033 }, "eval_loss": 0.578584611415863, "eval_runtime": 66.5476, "eval_samples_per_second": 77.208, "eval_steps_per_second": 3.231, "step": 7100 }, { "epoch": 1.18, "eval_label_accuracy": { "accuracy": 0.5286103542234333 }, "eval_label_f1_macro": { "f1": 0.3972971376121283 }, "eval_label_f1_micro": { "f1": 0.5286103542234333 }, "eval_loss": 0.5829901099205017, "eval_runtime": 64.4078, "eval_samples_per_second": 79.773, "eval_steps_per_second": 3.338, "step": 7200 }, { "epoch": 1.2, "eval_label_accuracy": { "accuracy": 0.5332814324639937 }, "eval_label_f1_macro": { "f1": 0.4020714398207229 }, "eval_label_f1_micro": { "f1": 0.5332814324639937 }, "eval_loss": 0.5793033242225647, "eval_runtime": 66.9158, "eval_samples_per_second": 76.783, "eval_steps_per_second": 3.213, "step": 7300 }, { "epoch": 1.21, "eval_label_accuracy": { "accuracy": 0.5251070455430128 }, "eval_label_f1_macro": { "f1": 0.40305870728099247 }, "eval_label_f1_micro": { "f1": 0.5251070455430128 }, "eval_loss": 0.5735189318656921, "eval_runtime": 64.7839, "eval_samples_per_second": 79.31, "eval_steps_per_second": 3.319, "step": 7400 }, { "epoch": 1.23, "learning_rate": 3.463366661203082e-05, "loss": 0.5098, "step": 7500 }, { "epoch": 1.23, "eval_label_accuracy": { "accuracy": 0.5309458933437136 }, "eval_label_f1_macro": { "f1": 0.40404686699618486 }, "eval_label_f1_micro": { "f1": 0.5309458933437136 }, "eval_loss": 0.5664647221565247, "eval_runtime": 65.2418, "eval_samples_per_second": 78.753, "eval_steps_per_second": 3.295, "step": 7500 }, { "epoch": 1.25, "eval_label_accuracy": { "accuracy": 0.5227715064227326 }, "eval_label_f1_macro": { "f1": 0.3988976622706162 }, "eval_label_f1_micro": { "f1": 0.5227715064227326 }, "eval_loss": 0.5650814175605774, "eval_runtime": 65.9036, "eval_samples_per_second": 77.962, "eval_steps_per_second": 3.262, "step": 7600 }, { "epoch": 1.26, "eval_label_accuracy": { "accuracy": 0.5266640716231997 }, "eval_label_f1_macro": { "f1": 0.4099005096569608 }, "eval_label_f1_micro": { "f1": 0.5266640716231997 }, "eval_loss": 0.5657761096954346, "eval_runtime": 64.8206, "eval_samples_per_second": 79.265, "eval_steps_per_second": 3.317, "step": 7700 }, { "epoch": 1.28, "eval_label_accuracy": { "accuracy": 0.5278318411833398 }, "eval_label_f1_macro": { "f1": 0.40307633583050173 }, "eval_label_f1_micro": { "f1": 0.5278318411833398 }, "eval_loss": 0.5601173043251038, "eval_runtime": 66.1454, "eval_samples_per_second": 77.677, "eval_steps_per_second": 3.25, "step": 7800 }, { "epoch": 1.29, "eval_label_accuracy": { "accuracy": 0.5313351498637602 }, "eval_label_f1_macro": { "f1": 0.40205694039231865 }, "eval_label_f1_micro": { "f1": 0.5313351498637602 }, "eval_loss": 0.5627759099006653, "eval_runtime": 64.9362, "eval_samples_per_second": 79.124, "eval_steps_per_second": 3.311, "step": 7900 }, { "epoch": 1.31, "learning_rate": 3.360924438616621e-05, "loss": 0.5085, "step": 8000 }, { "epoch": 1.31, "eval_label_accuracy": { "accuracy": 0.5293888672635266 }, "eval_label_f1_macro": { "f1": 0.4100384148965112 }, "eval_label_f1_micro": { "f1": 0.5293888672635266 }, "eval_loss": 0.5625594854354858, "eval_runtime": 65.2812, "eval_samples_per_second": 78.706, "eval_steps_per_second": 3.293, "step": 8000 }, { "epoch": 1.33, "eval_label_accuracy": { "accuracy": 0.5307512650836902 }, "eval_label_f1_macro": { "f1": 0.4056927310816246 }, "eval_label_f1_micro": { "f1": 0.5307512650836902 }, "eval_loss": 0.562485933303833, "eval_runtime": 65.2185, "eval_samples_per_second": 78.781, "eval_steps_per_second": 3.297, "step": 8100 }, { "epoch": 1.34, "eval_label_accuracy": { "accuracy": 0.5354223433242506 }, "eval_label_f1_macro": { "f1": 0.4044048936522055 }, "eval_label_f1_micro": { "f1": 0.5354223433242506 }, "eval_loss": 0.5521498918533325, "eval_runtime": 65.3339, "eval_samples_per_second": 78.642, "eval_steps_per_second": 3.291, "step": 8200 }, { "epoch": 1.36, "eval_label_accuracy": { "accuracy": 0.5330868042039704 }, "eval_label_f1_macro": { "f1": 0.40764293642642907 }, "eval_label_f1_micro": { "f1": 0.5330868042039704 }, "eval_loss": 0.5571908950805664, "eval_runtime": 64.614, "eval_samples_per_second": 79.518, "eval_steps_per_second": 3.327, "step": 8300 }, { "epoch": 1.38, "eval_label_accuracy": { "accuracy": 0.5344492020241339 }, "eval_label_f1_macro": { "f1": 0.4168299787554328 }, "eval_label_f1_micro": { "f1": 0.5344492020241339 }, "eval_loss": 0.5639694333076477, "eval_runtime": 65.7823, "eval_samples_per_second": 78.106, "eval_steps_per_second": 3.268, "step": 8400 }, { "epoch": 1.39, "learning_rate": 3.2584822160301595e-05, "loss": 0.5066, "step": 8500 }, { "epoch": 1.39, "eval_label_accuracy": { "accuracy": 0.5342545737641106 }, "eval_label_f1_macro": { "f1": 0.40719472411169355 }, "eval_label_f1_micro": { "f1": 0.5342545737641106 }, "eval_loss": 0.5575660467147827, "eval_runtime": 65.0464, "eval_samples_per_second": 78.99, "eval_steps_per_second": 3.305, "step": 8500 }, { "epoch": 1.41, "eval_label_accuracy": { "accuracy": 0.5256909303230829 }, "eval_label_f1_macro": { "f1": 0.4166295902681743 }, "eval_label_f1_micro": { "f1": 0.5256909303230829 }, "eval_loss": 0.5562366843223572, "eval_runtime": 66.5062, "eval_samples_per_second": 77.256, "eval_steps_per_second": 3.233, "step": 8600 }, { "epoch": 1.43, "eval_label_accuracy": { "accuracy": 0.5206305955624757 }, "eval_label_f1_macro": { "f1": 0.4178763398872619 }, "eval_label_f1_micro": { "f1": 0.5206305955624757 }, "eval_loss": 0.5629637837409973, "eval_runtime": 65.1119, "eval_samples_per_second": 78.91, "eval_steps_per_second": 3.302, "step": 8700 }, { "epoch": 1.44, "eval_label_accuracy": { "accuracy": 0.5194628260023355 }, "eval_label_f1_macro": { "f1": 0.4240614359972321 }, "eval_label_f1_micro": { "f1": 0.5194628260023355 }, "eval_loss": 0.5645840764045715, "eval_runtime": 65.7329, "eval_samples_per_second": 78.165, "eval_steps_per_second": 3.271, "step": 8800 }, { "epoch": 1.46, "eval_label_accuracy": { "accuracy": 0.5270533281432463 }, "eval_label_f1_macro": { "f1": 0.4232811654810201 }, "eval_label_f1_micro": { "f1": 0.5270533281432463 }, "eval_loss": 0.5628061294555664, "eval_runtime": 66.012, "eval_samples_per_second": 77.834, "eval_steps_per_second": 3.257, "step": 8900 }, { "epoch": 1.48, "learning_rate": 3.156039993443698e-05, "loss": 0.5043, "step": 9000 }, { "epoch": 1.48, "eval_label_accuracy": { "accuracy": 0.5192681977423121 }, "eval_label_f1_macro": { "f1": 0.4062380982170455 }, "eval_label_f1_micro": { "f1": 0.5192681977423121 }, "eval_loss": 0.5618172883987427, "eval_runtime": 64.6788, "eval_samples_per_second": 79.439, "eval_steps_per_second": 3.324, "step": 9000 }, { "epoch": 1.49, "eval_label_accuracy": { "accuracy": 0.5210198520825223 }, "eval_label_f1_macro": { "f1": 0.39500190437884825 }, "eval_label_f1_micro": { "f1": 0.5210198520825223 }, "eval_loss": 0.5575444102287292, "eval_runtime": 66.1613, "eval_samples_per_second": 77.659, "eval_steps_per_second": 3.25, "step": 9100 }, { "epoch": 1.51, "eval_label_accuracy": { "accuracy": 0.5253016738030362 }, "eval_label_f1_macro": { "f1": 0.43826705554260276 }, "eval_label_f1_micro": { "f1": 0.5253016738030362 }, "eval_loss": 0.55515456199646, "eval_runtime": 64.9282, "eval_samples_per_second": 79.134, "eval_steps_per_second": 3.311, "step": 9200 }, { "epoch": 1.52, "eval_label_accuracy": { "accuracy": 0.5313351498637602 }, "eval_label_f1_macro": { "f1": 0.4106274369569952 }, "eval_label_f1_micro": { "f1": 0.5313351498637602 }, "eval_loss": 0.569816529750824, "eval_runtime": 66.2885, "eval_samples_per_second": 77.51, "eval_steps_per_second": 3.243, "step": 9300 }, { "epoch": 1.54, "eval_label_accuracy": { "accuracy": 0.522966134682756 }, "eval_label_f1_macro": { "f1": 0.43811143120426327 }, "eval_label_f1_micro": { "f1": 0.522966134682756 }, "eval_loss": 0.5568819642066956, "eval_runtime": 65.3832, "eval_samples_per_second": 78.583, "eval_steps_per_second": 3.288, "step": 9400 }, { "epoch": 1.56, "learning_rate": 3.0535977708572365e-05, "loss": 0.5051, "step": 9500 }, { "epoch": 1.56, "eval_label_accuracy": { "accuracy": 0.5336706889840405 }, "eval_label_f1_macro": { "f1": 0.40680316881280776 }, "eval_label_f1_micro": { "f1": 0.5336706889840405 }, "eval_loss": 0.5624731779098511, "eval_runtime": 64.3008, "eval_samples_per_second": 79.906, "eval_steps_per_second": 3.344, "step": 9500 }, { "epoch": 1.57, "eval_label_accuracy": { "accuracy": 0.5151810042818217 }, "eval_label_f1_macro": { "f1": 0.4369801679839114 }, "eval_label_f1_micro": { "f1": 0.5151810042818217 }, "eval_loss": 0.5694777369499207, "eval_runtime": 65.4516, "eval_samples_per_second": 78.501, "eval_steps_per_second": 3.285, "step": 9600 }, { "epoch": 1.59, "eval_label_accuracy": { "accuracy": 0.5268586998832231 }, "eval_label_f1_macro": { "f1": 0.4132459851165683 }, "eval_label_f1_micro": { "f1": 0.5268586998832231 }, "eval_loss": 0.56379634141922, "eval_runtime": 64.9267, "eval_samples_per_second": 79.135, "eval_steps_per_second": 3.311, "step": 9700 }, { "epoch": 1.61, "eval_label_accuracy": { "accuracy": 0.526274815103153 }, "eval_label_f1_macro": { "f1": 0.40248592881859246 }, "eval_label_f1_micro": { "f1": 0.526274815103153 }, "eval_loss": 0.5575631856918335, "eval_runtime": 67.2319, "eval_samples_per_second": 76.422, "eval_steps_per_second": 3.198, "step": 9800 }, { "epoch": 1.62, "eval_label_accuracy": { "accuracy": 0.5258855585831063 }, "eval_label_f1_macro": { "f1": 0.4075480681122011 }, "eval_label_f1_micro": { "f1": 0.5258855585831063 }, "eval_loss": 0.5513472557067871, "eval_runtime": 66.4523, "eval_samples_per_second": 77.319, "eval_steps_per_second": 3.235, "step": 9900 }, { "epoch": 1.64, "learning_rate": 2.9511555482707753e-05, "loss": 0.4944, "step": 10000 }, { "epoch": 1.64, "eval_label_accuracy": { "accuracy": 0.5188789412222655 }, "eval_label_f1_macro": { "f1": 0.41372907313005425 }, "eval_label_f1_micro": { "f1": 0.5188789412222655 }, "eval_loss": 0.5596012473106384, "eval_runtime": 65.824, "eval_samples_per_second": 78.057, "eval_steps_per_second": 3.266, "step": 10000 }, { "epoch": 1.66, "eval_label_accuracy": { "accuracy": 0.5278318411833398 }, "eval_label_f1_macro": { "f1": 0.4203493324138542 }, "eval_label_f1_micro": { "f1": 0.5278318411833398 }, "eval_loss": 0.5546495914459229, "eval_runtime": 65.4744, "eval_samples_per_second": 78.473, "eval_steps_per_second": 3.284, "step": 10100 }, { "epoch": 1.67, "eval_label_accuracy": { "accuracy": 0.535616971584274 }, "eval_label_f1_macro": { "f1": 0.4063341022177307 }, "eval_label_f1_micro": { "f1": 0.535616971584274 }, "eval_loss": 0.5501392483711243, "eval_runtime": 66.7163, "eval_samples_per_second": 77.013, "eval_steps_per_second": 3.223, "step": 10200 }, { "epoch": 1.69, "eval_label_accuracy": { "accuracy": 0.5188789412222655 }, "eval_label_f1_macro": { "f1": 0.4212355713439969 }, "eval_label_f1_micro": { "f1": 0.5188789412222655 }, "eval_loss": 0.5635867714881897, "eval_runtime": 66.1134, "eval_samples_per_second": 77.715, "eval_steps_per_second": 3.252, "step": 10300 }, { "epoch": 1.7, "eval_label_accuracy": { "accuracy": 0.5239392759828727 }, "eval_label_f1_macro": { "f1": 0.4234908453840735 }, "eval_label_f1_micro": { "f1": 0.5239392759828727 }, "eval_loss": 0.557228147983551, "eval_runtime": 64.7253, "eval_samples_per_second": 79.382, "eval_steps_per_second": 3.322, "step": 10400 }, { "epoch": 1.72, "learning_rate": 2.848713325684314e-05, "loss": 0.4908, "step": 10500 }, { "epoch": 1.72, "eval_label_accuracy": { "accuracy": 0.5251070455430128 }, "eval_label_f1_macro": { "f1": 0.42864065587210554 }, "eval_label_f1_micro": { "f1": 0.5251070455430128 }, "eval_loss": 0.5468738675117493, "eval_runtime": 66.3227, "eval_samples_per_second": 77.47, "eval_steps_per_second": 3.242, "step": 10500 }, { "epoch": 1.74, "eval_label_accuracy": { "accuracy": 0.5239392759828727 }, "eval_label_f1_macro": { "f1": 0.41795817815383196 }, "eval_label_f1_micro": { "f1": 0.5239392759828727 }, "eval_loss": 0.5483611226081848, "eval_runtime": 65.701, "eval_samples_per_second": 78.203, "eval_steps_per_second": 3.272, "step": 10600 }, { "epoch": 1.75, "eval_label_accuracy": { "accuracy": 0.5214091086025691 }, "eval_label_f1_macro": { "f1": 0.4024775749504264 }, "eval_label_f1_micro": { "f1": 0.5214091086025691 }, "eval_loss": 0.5590547323226929, "eval_runtime": 64.5911, "eval_samples_per_second": 79.547, "eval_steps_per_second": 3.329, "step": 10700 }, { "epoch": 1.77, "eval_label_accuracy": { "accuracy": 0.5344492020241339 }, "eval_label_f1_macro": { "f1": 0.4370419038340574 }, "eval_label_f1_micro": { "f1": 0.5344492020241339 }, "eval_loss": 0.5482434630393982, "eval_runtime": 65.2112, "eval_samples_per_second": 78.79, "eval_steps_per_second": 3.297, "step": 10800 }, { "epoch": 1.79, "eval_label_accuracy": { "accuracy": 0.5260801868431296 }, "eval_label_f1_macro": { "f1": 0.4312513940073556 }, "eval_label_f1_micro": { "f1": 0.5260801868431296 }, "eval_loss": 0.5548846125602722, "eval_runtime": 66.5938, "eval_samples_per_second": 77.154, "eval_steps_per_second": 3.229, "step": 10900 }, { "epoch": 1.8, "learning_rate": 2.7462711030978526e-05, "loss": 0.4956, "step": 11000 }, { "epoch": 1.8, "eval_label_accuracy": { "accuracy": 0.523550019462826 }, "eval_label_f1_macro": { "f1": 0.41662322590714945 }, "eval_label_f1_micro": { "f1": 0.523550019462826 }, "eval_loss": 0.5459250211715698, "eval_runtime": 65.0832, "eval_samples_per_second": 78.945, "eval_steps_per_second": 3.303, "step": 11000 }, { "epoch": 1.82, "eval_label_accuracy": { "accuracy": 0.5216037368625924 }, "eval_label_f1_macro": { "f1": 0.4343169858642386 }, "eval_label_f1_micro": { "f1": 0.5216037368625924 }, "eval_loss": 0.5509054660797119, "eval_runtime": 66.0147, "eval_samples_per_second": 77.831, "eval_steps_per_second": 3.257, "step": 11100 }, { "epoch": 1.84, "eval_label_accuracy": { "accuracy": 0.520241339042429 }, "eval_label_f1_macro": { "f1": 0.4111126597526932 }, "eval_label_f1_micro": { "f1": 0.520241339042429 }, "eval_loss": 0.5683469176292419, "eval_runtime": 66.7093, "eval_samples_per_second": 77.021, "eval_steps_per_second": 3.223, "step": 11200 }, { "epoch": 1.85, "eval_label_accuracy": { "accuracy": 0.5219929933826392 }, "eval_label_f1_macro": { "f1": 0.4175089884590279 }, "eval_label_f1_micro": { "f1": 0.5219929933826392 }, "eval_loss": 0.556067168712616, "eval_runtime": 65.0023, "eval_samples_per_second": 79.043, "eval_steps_per_second": 3.308, "step": 11300 }, { "epoch": 1.87, "eval_label_accuracy": { "accuracy": 0.5325029194239004 }, "eval_label_f1_macro": { "f1": 0.4221971320619319 }, "eval_label_f1_micro": { "f1": 0.5325029194239004 }, "eval_loss": 0.5479483008384705, "eval_runtime": 66.1944, "eval_samples_per_second": 77.62, "eval_steps_per_second": 3.248, "step": 11400 }, { "epoch": 1.88, "learning_rate": 2.6438288805113915e-05, "loss": 0.491, "step": 11500 }, { "epoch": 1.88, "eval_label_accuracy": { "accuracy": 0.5173219151420786 }, "eval_label_f1_macro": { "f1": 0.41131088875014477 }, "eval_label_f1_micro": { "f1": 0.5173219151420786 }, "eval_loss": 0.5584209561347961, "eval_runtime": 65.4294, "eval_samples_per_second": 78.527, "eval_steps_per_second": 3.286, "step": 11500 }, { "epoch": 1.9, "eval_label_accuracy": { "accuracy": 0.5305566368236668 }, "eval_label_f1_macro": { "f1": 0.42505103683817463 }, "eval_label_f1_micro": { "f1": 0.5305566368236668 }, "eval_loss": 0.5507027506828308, "eval_runtime": 67.0783, "eval_samples_per_second": 76.597, "eval_steps_per_second": 3.205, "step": 11600 }, { "epoch": 1.92, "eval_label_accuracy": { "accuracy": 0.5284157259634099 }, "eval_label_f1_macro": { "f1": 0.43059965616956497 }, "eval_label_f1_micro": { "f1": 0.5284157259634099 }, "eval_loss": 0.5484103560447693, "eval_runtime": 65.412, "eval_samples_per_second": 78.548, "eval_steps_per_second": 3.287, "step": 11700 }, { "epoch": 1.93, "eval_label_accuracy": { "accuracy": 0.523550019462826 }, "eval_label_f1_macro": { "f1": 0.4283240140586777 }, "eval_label_f1_micro": { "f1": 0.523550019462826 }, "eval_loss": 0.5518194437026978, "eval_runtime": 65.4314, "eval_samples_per_second": 78.525, "eval_steps_per_second": 3.286, "step": 11800 }, { "epoch": 1.95, "eval_label_accuracy": { "accuracy": 0.5249124172829895 }, "eval_label_f1_macro": { "f1": 0.4202384595807186 }, "eval_label_f1_micro": { "f1": 0.5249124172829895 }, "eval_loss": 0.5580205917358398, "eval_runtime": 64.3287, "eval_samples_per_second": 79.871, "eval_steps_per_second": 3.342, "step": 11900 }, { "epoch": 1.97, "learning_rate": 2.5413866579249306e-05, "loss": 0.4882, "step": 12000 }, { "epoch": 1.97, "eval_label_accuracy": { "accuracy": 0.5328921759439471 }, "eval_label_f1_macro": { "f1": 0.4168927822368606 }, "eval_label_f1_micro": { "f1": 0.5328921759439471 }, "eval_loss": 0.5494405031204224, "eval_runtime": 66.1472, "eval_samples_per_second": 77.675, "eval_steps_per_second": 3.25, "step": 12000 }, { "epoch": 1.98, "eval_label_accuracy": { "accuracy": 0.5334760607240171 }, "eval_label_f1_macro": { "f1": 0.4259674463534441 }, "eval_label_f1_micro": { "f1": 0.5334760607240171 }, "eval_loss": 0.54230135679245, "eval_runtime": 65.4715, "eval_samples_per_second": 78.477, "eval_steps_per_second": 3.284, "step": 12100 }, { "epoch": 2.0, "eval_label_accuracy": { "accuracy": 0.5344492020241339 }, "eval_label_f1_macro": { "f1": 0.43189802609389794 }, "eval_label_f1_micro": { "f1": 0.5344492020241339 }, "eval_loss": 0.5440633893013, "eval_runtime": 64.7322, "eval_samples_per_second": 79.373, "eval_steps_per_second": 3.321, "step": 12200 }, { "epoch": 2.02, "eval_label_accuracy": { "accuracy": 0.5274425846632931 }, "eval_label_f1_macro": { "f1": 0.43452702916498576 }, "eval_label_f1_micro": { "f1": 0.5274425846632931 }, "eval_loss": 0.5580821633338928, "eval_runtime": 66.4297, "eval_samples_per_second": 77.345, "eval_steps_per_second": 3.237, "step": 12300 }, { "epoch": 2.03, "eval_label_accuracy": { "accuracy": 0.5219929933826392 }, "eval_label_f1_macro": { "f1": 0.42868961422727936 }, "eval_label_f1_micro": { "f1": 0.5219929933826392 }, "eval_loss": 0.5607529878616333, "eval_runtime": 64.7983, "eval_samples_per_second": 79.292, "eval_steps_per_second": 3.318, "step": 12400 }, { "epoch": 2.05, "learning_rate": 2.438944435338469e-05, "loss": 0.4764, "step": 12500 }, { "epoch": 2.05, "eval_label_accuracy": { "accuracy": 0.5309458933437136 }, "eval_label_f1_macro": { "f1": 0.4501164041177361 }, "eval_label_f1_micro": { "f1": 0.5309458933437136 }, "eval_loss": 0.5601127743721008, "eval_runtime": 66.0041, "eval_samples_per_second": 77.844, "eval_steps_per_second": 3.257, "step": 12500 }, { "epoch": 2.07, "eval_label_accuracy": { "accuracy": 0.515375632541845 }, "eval_label_f1_macro": { "f1": 0.44296253953069376 }, "eval_label_f1_micro": { "f1": 0.515375632541845 }, "eval_loss": 0.5829929113388062, "eval_runtime": 65.2358, "eval_samples_per_second": 78.76, "eval_steps_per_second": 3.296, "step": 12600 }, { "epoch": 2.08, "eval_label_accuracy": { "accuracy": 0.5241339042428961 }, "eval_label_f1_macro": { "f1": 0.4557503585498297 }, "eval_label_f1_micro": { "f1": 0.5241339042428961 }, "eval_loss": 0.5616418719291687, "eval_runtime": 64.7306, "eval_samples_per_second": 79.375, "eval_steps_per_second": 3.321, "step": 12700 }, { "epoch": 2.1, "eval_label_accuracy": { "accuracy": 0.5227715064227326 }, "eval_label_f1_macro": { "f1": 0.43939555085391224 }, "eval_label_f1_micro": { "f1": 0.5227715064227326 }, "eval_loss": 0.5720220804214478, "eval_runtime": 65.4508, "eval_samples_per_second": 78.502, "eval_steps_per_second": 3.285, "step": 12800 }, { "epoch": 2.11, "eval_label_accuracy": { "accuracy": 0.5249124172829895 }, "eval_label_f1_macro": { "f1": 0.4453192886564673 }, "eval_label_f1_micro": { "f1": 0.5249124172829895 }, "eval_loss": 0.5684590935707092, "eval_runtime": 64.1447, "eval_samples_per_second": 80.1, "eval_steps_per_second": 3.352, "step": 12900 }, { "epoch": 2.13, "learning_rate": 2.336502212752008e-05, "loss": 0.459, "step": 13000 }, { "epoch": 2.13, "eval_label_accuracy": { "accuracy": 0.5184896847022188 }, "eval_label_f1_macro": { "f1": 0.45810101040373097 }, "eval_label_f1_micro": { "f1": 0.5184896847022188 }, "eval_loss": 0.5632970333099365, "eval_runtime": 65.4112, "eval_samples_per_second": 78.549, "eval_steps_per_second": 3.287, "step": 13000 }, { "epoch": 2.15, "eval_label_accuracy": { "accuracy": 0.5301673803036201 }, "eval_label_f1_macro": { "f1": 0.41550375963718733 }, "eval_label_f1_micro": { "f1": 0.5301673803036201 }, "eval_loss": 0.5601311922073364, "eval_runtime": 65.7981, "eval_samples_per_second": 78.087, "eval_steps_per_second": 3.268, "step": 13100 }, { "epoch": 2.16, "eval_label_accuracy": { "accuracy": 0.5326975476839237 }, "eval_label_f1_macro": { "f1": 0.4152775382545546 }, "eval_label_f1_micro": { "f1": 0.5326975476839237 }, "eval_loss": 0.5632578730583191, "eval_runtime": 65.4374, "eval_samples_per_second": 78.518, "eval_steps_per_second": 3.286, "step": 13200 }, { "epoch": 2.18, "eval_label_accuracy": { "accuracy": 0.5225768781627093 }, "eval_label_f1_macro": { "f1": 0.4441168152604288 }, "eval_label_f1_micro": { "f1": 0.5225768781627093 }, "eval_loss": 0.5665469765663147, "eval_runtime": 64.3132, "eval_samples_per_second": 79.89, "eval_steps_per_second": 3.343, "step": 13300 }, { "epoch": 2.2, "eval_label_accuracy": { "accuracy": 0.5225768781627093 }, "eval_label_f1_macro": { "f1": 0.4189456259580441 }, "eval_label_f1_micro": { "f1": 0.5225768781627093 }, "eval_loss": 0.5736687779426575, "eval_runtime": 65.6996, "eval_samples_per_second": 78.204, "eval_steps_per_second": 3.272, "step": 13400 }, { "epoch": 2.21, "learning_rate": 2.2340599901655468e-05, "loss": 0.4557, "step": 13500 }, { "epoch": 2.21, "eval_label_accuracy": { "accuracy": 0.5237446477228493 }, "eval_label_f1_macro": { "f1": 0.44732903233612287 }, "eval_label_f1_micro": { "f1": 0.5237446477228493 }, "eval_loss": 0.5651576519012451, "eval_runtime": 66.1178, "eval_samples_per_second": 77.71, "eval_steps_per_second": 3.252, "step": 13500 }, { "epoch": 2.23, "eval_label_accuracy": { "accuracy": 0.5268586998832231 }, "eval_label_f1_macro": { "f1": 0.4296354631479917 }, "eval_label_f1_micro": { "f1": 0.5268586998832231 }, "eval_loss": 0.566527247428894, "eval_runtime": 65.4163, "eval_samples_per_second": 78.543, "eval_steps_per_second": 3.287, "step": 13600 }, { "epoch": 2.25, "eval_label_accuracy": { "accuracy": 0.5194628260023355 }, "eval_label_f1_macro": { "f1": 0.4523218163724315 }, "eval_label_f1_micro": { "f1": 0.5194628260023355 }, "eval_loss": 0.5746815204620361, "eval_runtime": 66.5011, "eval_samples_per_second": 77.262, "eval_steps_per_second": 3.233, "step": 13700 }, { "epoch": 2.26, "eval_label_accuracy": { "accuracy": 0.5241339042428961 }, "eval_label_f1_macro": { "f1": 0.42963157337372265 }, "eval_label_f1_micro": { "f1": 0.5241339042428961 }, "eval_loss": 0.5709651112556458, "eval_runtime": 64.9469, "eval_samples_per_second": 79.111, "eval_steps_per_second": 3.31, "step": 13800 }, { "epoch": 2.28, "eval_label_accuracy": { "accuracy": 0.52899961074348 }, "eval_label_f1_macro": { "f1": 0.4465492291635319 }, "eval_label_f1_micro": { "f1": 0.52899961074348 }, "eval_loss": 0.5652072429656982, "eval_runtime": 66.3178, "eval_samples_per_second": 77.475, "eval_steps_per_second": 3.242, "step": 13900 }, { "epoch": 2.29, "learning_rate": 2.1316177675790856e-05, "loss": 0.4539, "step": 14000 }, { "epoch": 2.29, "eval_label_accuracy": { "accuracy": 0.5276372129233164 }, "eval_label_f1_macro": { "f1": 0.4330344585002601 }, "eval_label_f1_micro": { "f1": 0.5276372129233164 }, "eval_loss": 0.5652056932449341, "eval_runtime": 64.4339, "eval_samples_per_second": 79.741, "eval_steps_per_second": 3.337, "step": 14000 }, { "epoch": 2.31, "eval_label_accuracy": { "accuracy": 0.5274425846632931 }, "eval_label_f1_macro": { "f1": 0.43869182088814185 }, "eval_label_f1_micro": { "f1": 0.5274425846632931 }, "eval_loss": 0.5703505277633667, "eval_runtime": 66.2068, "eval_samples_per_second": 77.605, "eval_steps_per_second": 3.247, "step": 14100 }, { "epoch": 2.33, "eval_label_accuracy": { "accuracy": 0.5258855585831063 }, "eval_label_f1_macro": { "f1": 0.43879780170912847 }, "eval_label_f1_micro": { "f1": 0.5258855585831063 }, "eval_loss": 0.5729069113731384, "eval_runtime": 65.0139, "eval_samples_per_second": 79.029, "eval_steps_per_second": 3.307, "step": 14200 }, { "epoch": 2.34, "eval_label_accuracy": { "accuracy": 0.5192681977423121 }, "eval_label_f1_macro": { "f1": 0.4255072429603308 }, "eval_label_f1_micro": { "f1": 0.5192681977423121 }, "eval_loss": 0.5682628154754639, "eval_runtime": 65.9744, "eval_samples_per_second": 77.879, "eval_steps_per_second": 3.259, "step": 14300 }, { "epoch": 2.36, "eval_label_accuracy": { "accuracy": 0.5264694433631764 }, "eval_label_f1_macro": { "f1": 0.43346825826001506 }, "eval_label_f1_micro": { "f1": 0.5264694433631764 }, "eval_loss": 0.5605142712593079, "eval_runtime": 65.0867, "eval_samples_per_second": 78.941, "eval_steps_per_second": 3.303, "step": 14400 }, { "epoch": 2.38, "learning_rate": 2.029175544992624e-05, "loss": 0.459, "step": 14500 }, { "epoch": 2.38, "eval_label_accuracy": { "accuracy": 0.5239392759828727 }, "eval_label_f1_macro": { "f1": 0.44786889559299115 }, "eval_label_f1_micro": { "f1": 0.5239392759828727 }, "eval_loss": 0.5726383924484253, "eval_runtime": 66.1098, "eval_samples_per_second": 77.719, "eval_steps_per_second": 3.252, "step": 14500 }, { "epoch": 2.39, "eval_label_accuracy": { "accuracy": 0.5291942390035033 }, "eval_label_f1_macro": { "f1": 0.453869134213484 }, "eval_label_f1_micro": { "f1": 0.5291942390035033 }, "eval_loss": 0.566052258014679, "eval_runtime": 65.833, "eval_samples_per_second": 78.046, "eval_steps_per_second": 3.266, "step": 14600 }, { "epoch": 2.41, "eval_label_accuracy": { "accuracy": 0.5208252238224991 }, "eval_label_f1_macro": { "f1": 0.43278533897219335 }, "eval_label_f1_micro": { "f1": 0.5208252238224991 }, "eval_loss": 0.5726243853569031, "eval_runtime": 66.8914, "eval_samples_per_second": 76.811, "eval_steps_per_second": 3.214, "step": 14700 }, { "epoch": 2.43, "eval_label_accuracy": { "accuracy": 0.52958349552355 }, "eval_label_f1_macro": { "f1": 0.43445104819328556 }, "eval_label_f1_micro": { "f1": 0.52958349552355 }, "eval_loss": 0.5641396045684814, "eval_runtime": 67.2117, "eval_samples_per_second": 76.445, "eval_steps_per_second": 3.199, "step": 14800 }, { "epoch": 2.44, "eval_label_accuracy": { "accuracy": 0.5206305955624757 }, "eval_label_f1_macro": { "f1": 0.4311693806201584 }, "eval_label_f1_micro": { "f1": 0.5206305955624757 }, "eval_loss": 0.5807725787162781, "eval_runtime": 66.5101, "eval_samples_per_second": 77.251, "eval_steps_per_second": 3.233, "step": 14900 }, { "epoch": 2.46, "learning_rate": 1.926733322406163e-05, "loss": 0.4443, "step": 15000 }, { "epoch": 2.46, "eval_label_accuracy": { "accuracy": 0.5268586998832231 }, "eval_label_f1_macro": { "f1": 0.4502312972729043 }, "eval_label_f1_micro": { "f1": 0.5268586998832231 }, "eval_loss": 0.5696139931678772, "eval_runtime": 65.9075, "eval_samples_per_second": 77.958, "eval_steps_per_second": 3.262, "step": 15000 }, { "epoch": 2.48, "eval_label_accuracy": { "accuracy": 0.5282210977033865 }, "eval_label_f1_macro": { "f1": 0.45560500346839616 }, "eval_label_f1_micro": { "f1": 0.5282210977033865 }, "eval_loss": 0.5631005167961121, "eval_runtime": 65.994, "eval_samples_per_second": 77.856, "eval_steps_per_second": 3.258, "step": 15100 }, { "epoch": 2.49, "eval_label_accuracy": { "accuracy": 0.5200467107824056 }, "eval_label_f1_macro": { "f1": 0.4500935823936061 }, "eval_label_f1_micro": { "f1": 0.5200467107824056 }, "eval_loss": 0.5676321983337402, "eval_runtime": 67.7013, "eval_samples_per_second": 75.892, "eval_steps_per_second": 3.176, "step": 15200 }, { "epoch": 2.51, "eval_label_accuracy": { "accuracy": 0.5280264694433632 }, "eval_label_f1_macro": { "f1": 0.4410401539944819 }, "eval_label_f1_micro": { "f1": 0.5280264694433632 }, "eval_loss": 0.562995195388794, "eval_runtime": 65.3674, "eval_samples_per_second": 78.602, "eval_steps_per_second": 3.289, "step": 15300 }, { "epoch": 2.52, "eval_label_accuracy": { "accuracy": 0.5247177890229662 }, "eval_label_f1_macro": { "f1": 0.43980399525374536 }, "eval_label_f1_micro": { "f1": 0.5247177890229662 }, "eval_loss": 0.5720946788787842, "eval_runtime": 66.3575, "eval_samples_per_second": 77.429, "eval_steps_per_second": 3.24, "step": 15400 }, { "epoch": 2.54, "learning_rate": 1.8242910998197017e-05, "loss": 0.4542, "step": 15500 }, { "epoch": 2.54, "eval_label_accuracy": { "accuracy": 0.5260801868431296 }, "eval_label_f1_macro": { "f1": 0.4401714578365292 }, "eval_label_f1_micro": { "f1": 0.5260801868431296 }, "eval_loss": 0.5669940114021301, "eval_runtime": 65.3675, "eval_samples_per_second": 78.602, "eval_steps_per_second": 3.289, "step": 15500 }, { "epoch": 2.56, "eval_label_accuracy": { "accuracy": 0.5278318411833398 }, "eval_label_f1_macro": { "f1": 0.43352621547332887 }, "eval_label_f1_micro": { "f1": 0.5278318411833398 }, "eval_loss": 0.5640930533409119, "eval_runtime": 65.5202, "eval_samples_per_second": 78.419, "eval_steps_per_second": 3.281, "step": 15600 }, { "epoch": 2.57, "eval_label_accuracy": { "accuracy": 0.5264694433631764 }, "eval_label_f1_macro": { "f1": 0.43784802366096 }, "eval_label_f1_micro": { "f1": 0.5264694433631764 }, "eval_loss": 0.5642226934432983, "eval_runtime": 64.9095, "eval_samples_per_second": 79.156, "eval_steps_per_second": 3.312, "step": 15700 }, { "epoch": 2.59, "eval_label_accuracy": { "accuracy": 0.5315297781237835 }, "eval_label_f1_macro": { "f1": 0.4357287377608377 }, "eval_label_f1_micro": { "f1": 0.5315297781237835 }, "eval_loss": 0.5649986863136292, "eval_runtime": 65.4545, "eval_samples_per_second": 78.497, "eval_steps_per_second": 3.285, "step": 15800 }, { "epoch": 2.61, "eval_label_accuracy": { "accuracy": 0.5253016738030362 }, "eval_label_f1_macro": { "f1": 0.45053423727995034 }, "eval_label_f1_micro": { "f1": 0.5253016738030362 }, "eval_loss": 0.5697636008262634, "eval_runtime": 66.7839, "eval_samples_per_second": 76.935, "eval_steps_per_second": 3.219, "step": 15900 }, { "epoch": 2.62, "learning_rate": 1.7218488772332405e-05, "loss": 0.451, "step": 16000 }, { "epoch": 2.62, "eval_label_accuracy": { "accuracy": 0.532308291163877 }, "eval_label_f1_macro": { "f1": 0.43318094070825297 }, "eval_label_f1_micro": { "f1": 0.532308291163877 }, "eval_loss": 0.5685227513313293, "eval_runtime": 65.281, "eval_samples_per_second": 78.706, "eval_steps_per_second": 3.293, "step": 16000 }, { "epoch": 2.64, "eval_label_accuracy": { "accuracy": 0.5346438302841573 }, "eval_label_f1_macro": { "f1": 0.44085402773126725 }, "eval_label_f1_micro": { "f1": 0.5346438302841573 }, "eval_loss": 0.5624856352806091, "eval_runtime": 65.795, "eval_samples_per_second": 78.091, "eval_steps_per_second": 3.268, "step": 16100 }, { "epoch": 2.66, "eval_label_accuracy": { "accuracy": 0.5165434021019852 }, "eval_label_f1_macro": { "f1": 0.4669507042700165 }, "eval_label_f1_micro": { "f1": 0.5165434021019852 }, "eval_loss": 0.5654544830322266, "eval_runtime": 64.6982, "eval_samples_per_second": 79.415, "eval_steps_per_second": 3.323, "step": 16200 }, { "epoch": 2.67, "eval_label_accuracy": { "accuracy": 0.5321136629038536 }, "eval_label_f1_macro": { "f1": 0.43233762699260603 }, "eval_label_f1_micro": { "f1": 0.5321136629038536 }, "eval_loss": 0.5595969557762146, "eval_runtime": 65.444, "eval_samples_per_second": 78.51, "eval_steps_per_second": 3.285, "step": 16300 }, { "epoch": 2.69, "eval_label_accuracy": { "accuracy": 0.5138186064616582 }, "eval_label_f1_macro": { "f1": 0.44383270041686657 }, "eval_label_f1_micro": { "f1": 0.5138186064616582 }, "eval_loss": 0.5683060884475708, "eval_runtime": 64.9405, "eval_samples_per_second": 79.119, "eval_steps_per_second": 3.311, "step": 16400 }, { "epoch": 2.7, "learning_rate": 1.6194066546467794e-05, "loss": 0.4526, "step": 16500 }, { "epoch": 2.7, "eval_label_accuracy": { "accuracy": 0.5217983651226158 }, "eval_label_f1_macro": { "f1": 0.44043820237213194 }, "eval_label_f1_micro": { "f1": 0.5217983651226158 }, "eval_loss": 0.5779083967208862, "eval_runtime": 66.9943, "eval_samples_per_second": 76.693, "eval_steps_per_second": 3.209, "step": 16500 }, { "epoch": 2.72, "eval_label_accuracy": { "accuracy": 0.5284157259634099 }, "eval_label_f1_macro": { "f1": 0.4418742015293833 }, "eval_label_f1_micro": { "f1": 0.5284157259634099 }, "eval_loss": 0.5650832056999207, "eval_runtime": 65.1199, "eval_samples_per_second": 78.901, "eval_steps_per_second": 3.302, "step": 16600 }, { "epoch": 2.74, "eval_label_accuracy": { "accuracy": 0.5313351498637602 }, "eval_label_f1_macro": { "f1": 0.4478480562913326 }, "eval_label_f1_micro": { "f1": 0.5313351498637602 }, "eval_loss": 0.5622133016586304, "eval_runtime": 65.2808, "eval_samples_per_second": 78.706, "eval_steps_per_second": 3.293, "step": 16700 }, { "epoch": 2.75, "eval_label_accuracy": { "accuracy": 0.5338653172440638 }, "eval_label_f1_macro": { "f1": 0.44680022458953056 }, "eval_label_f1_micro": { "f1": 0.5338653172440638 }, "eval_loss": 0.5588511228561401, "eval_runtime": 65.8894, "eval_samples_per_second": 77.979, "eval_steps_per_second": 3.263, "step": 16800 }, { "epoch": 2.77, "eval_label_accuracy": { "accuracy": 0.5305566368236668 }, "eval_label_f1_macro": { "f1": 0.4609241185016919 }, "eval_label_f1_micro": { "f1": 0.5305566368236668 }, "eval_loss": 0.5588091015815735, "eval_runtime": 66.5578, "eval_samples_per_second": 77.196, "eval_steps_per_second": 3.23, "step": 16900 }, { "epoch": 2.79, "learning_rate": 1.516964432060318e-05, "loss": 0.4489, "step": 17000 }, { "epoch": 2.79, "eval_label_accuracy": { "accuracy": 0.5328921759439471 }, "eval_label_f1_macro": { "f1": 0.4330697830804149 }, "eval_label_f1_micro": { "f1": 0.5328921759439471 }, "eval_loss": 0.5675057768821716, "eval_runtime": 64.7586, "eval_samples_per_second": 79.341, "eval_steps_per_second": 3.32, "step": 17000 }, { "epoch": 2.8, "eval_label_accuracy": { "accuracy": 0.5223822499026859 }, "eval_label_f1_macro": { "f1": 0.45172351795604215 }, "eval_label_f1_micro": { "f1": 0.5223822499026859 }, "eval_loss": 0.5728496313095093, "eval_runtime": 65.6773, "eval_samples_per_second": 78.231, "eval_steps_per_second": 3.274, "step": 17100 }, { "epoch": 2.82, "eval_label_accuracy": { "accuracy": 0.5336706889840405 }, "eval_label_f1_macro": { "f1": 0.4601728614564582 }, "eval_label_f1_micro": { "f1": 0.5336706889840405 }, "eval_loss": 0.5668734908103943, "eval_runtime": 65.4933, "eval_samples_per_second": 78.451, "eval_steps_per_second": 3.283, "step": 17200 }, { "epoch": 2.84, "eval_label_accuracy": { "accuracy": 0.5186843129622422 }, "eval_label_f1_macro": { "f1": 0.424912443220669 }, "eval_label_f1_micro": { "f1": 0.5186843129622422 }, "eval_loss": 0.5832124948501587, "eval_runtime": 66.0442, "eval_samples_per_second": 77.796, "eval_steps_per_second": 3.255, "step": 17300 }, { "epoch": 2.85, "eval_label_accuracy": { "accuracy": 0.5214091086025691 }, "eval_label_f1_macro": { "f1": 0.4560620803234329 }, "eval_label_f1_micro": { "f1": 0.5214091086025691 }, "eval_loss": 0.570393443107605, "eval_runtime": 66.6267, "eval_samples_per_second": 77.116, "eval_steps_per_second": 3.227, "step": 17400 }, { "epoch": 2.87, "learning_rate": 1.414522209473857e-05, "loss": 0.4453, "step": 17500 }, { "epoch": 2.87, "eval_label_accuracy": { "accuracy": 0.5251070455430128 }, "eval_label_f1_macro": { "f1": 0.4507490275513904 }, "eval_label_f1_micro": { "f1": 0.5251070455430128 }, "eval_loss": 0.5699160695075989, "eval_runtime": 65.6551, "eval_samples_per_second": 78.257, "eval_steps_per_second": 3.275, "step": 17500 }, { "epoch": 2.88, "eval_label_accuracy": { "accuracy": 0.5216037368625924 }, "eval_label_f1_macro": { "f1": 0.4390779841776778 }, "eval_label_f1_micro": { "f1": 0.5216037368625924 }, "eval_loss": 0.5724750757217407, "eval_runtime": 65.0855, "eval_samples_per_second": 78.942, "eval_steps_per_second": 3.303, "step": 17600 }, { "epoch": 2.9, "eval_label_accuracy": { "accuracy": 0.523550019462826 }, "eval_label_f1_macro": { "f1": 0.4348358288094709 }, "eval_label_f1_micro": { "f1": 0.523550019462826 }, "eval_loss": 0.5676676034927368, "eval_runtime": 65.9415, "eval_samples_per_second": 77.918, "eval_steps_per_second": 3.26, "step": 17700 }, { "epoch": 2.92, "eval_label_accuracy": { "accuracy": 0.52958349552355 }, "eval_label_f1_macro": { "f1": 0.4412835917222539 }, "eval_label_f1_micro": { "f1": 0.52958349552355 }, "eval_loss": 0.5665853023529053, "eval_runtime": 64.4843, "eval_samples_per_second": 79.678, "eval_steps_per_second": 3.334, "step": 17800 }, { "epoch": 2.93, "eval_label_accuracy": { "accuracy": 0.5305566368236668 }, "eval_label_f1_macro": { "f1": 0.4501012112552316 }, "eval_label_f1_micro": { "f1": 0.5305566368236668 }, "eval_loss": 0.5652035474777222, "eval_runtime": 65.7573, "eval_samples_per_second": 78.136, "eval_steps_per_second": 3.27, "step": 17900 }, { "epoch": 2.95, "learning_rate": 1.3120799868873956e-05, "loss": 0.4419, "step": 18000 }, { "epoch": 2.95, "eval_label_accuracy": { "accuracy": 0.5305566368236668 }, "eval_label_f1_macro": { "f1": 0.4349986006914519 }, "eval_label_f1_micro": { "f1": 0.5305566368236668 }, "eval_loss": 0.5659220814704895, "eval_runtime": 64.2429, "eval_samples_per_second": 79.978, "eval_steps_per_second": 3.347, "step": 18000 }, { "epoch": 2.97, "eval_label_accuracy": { "accuracy": 0.5303620085636435 }, "eval_label_f1_macro": { "f1": 0.4502960442686812 }, "eval_label_f1_micro": { "f1": 0.5303620085636435 }, "eval_loss": 0.5597621202468872, "eval_runtime": 65.7239, "eval_samples_per_second": 78.176, "eval_steps_per_second": 3.271, "step": 18100 }, { "epoch": 2.98, "eval_label_accuracy": { "accuracy": 0.5348384585441807 }, "eval_label_f1_macro": { "f1": 0.4535827940910189 }, "eval_label_f1_micro": { "f1": 0.5348384585441807 }, "eval_loss": 0.5543330311775208, "eval_runtime": 66.2534, "eval_samples_per_second": 77.551, "eval_steps_per_second": 3.245, "step": 18200 }, { "epoch": 3.0, "eval_label_accuracy": { "accuracy": 0.522966134682756 }, "eval_label_f1_macro": { "f1": 0.4555867940063284 }, "eval_label_f1_micro": { "f1": 0.522966134682756 }, "eval_loss": 0.5589267015457153, "eval_runtime": 65.6099, "eval_samples_per_second": 78.311, "eval_steps_per_second": 3.277, "step": 18300 }, { "epoch": 3.02, "eval_label_accuracy": { "accuracy": 0.5270533281432463 }, "eval_label_f1_macro": { "f1": 0.4552882900544635 }, "eval_label_f1_micro": { "f1": 0.5270533281432463 }, "eval_loss": 0.5679383873939514, "eval_runtime": 64.2272, "eval_samples_per_second": 79.997, "eval_steps_per_second": 3.347, "step": 18400 }, { "epoch": 3.03, "learning_rate": 1.2096377643009343e-05, "loss": 0.4465, "step": 18500 }, { "epoch": 3.03, "eval_label_accuracy": { "accuracy": 0.5284157259634099 }, "eval_label_f1_macro": { "f1": 0.44697535042783487 }, "eval_label_f1_micro": { "f1": 0.5284157259634099 }, "eval_loss": 0.5660849213600159, "eval_runtime": 65.7257, "eval_samples_per_second": 78.173, "eval_steps_per_second": 3.271, "step": 18500 }, { "epoch": 3.05, "eval_label_accuracy": { "accuracy": 0.526274815103153 }, "eval_label_f1_macro": { "f1": 0.45912227016242724 }, "eval_label_f1_micro": { "f1": 0.526274815103153 }, "eval_loss": 0.5786118507385254, "eval_runtime": 65.7514, "eval_samples_per_second": 78.143, "eval_steps_per_second": 3.27, "step": 18600 }, { "epoch": 3.07, "eval_label_accuracy": { "accuracy": 0.5233553912028026 }, "eval_label_f1_macro": { "f1": 0.4514329459423202 }, "eval_label_f1_micro": { "f1": 0.5233553912028026 }, "eval_loss": 0.5863333344459534, "eval_runtime": 65.1687, "eval_samples_per_second": 78.842, "eval_steps_per_second": 3.299, "step": 18700 }, { "epoch": 3.08, "eval_label_accuracy": { "accuracy": 0.5268586998832231 }, "eval_label_f1_macro": { "f1": 0.4562521468447126 }, "eval_label_f1_micro": { "f1": 0.5268586998832231 }, "eval_loss": 0.5805368423461914, "eval_runtime": 66.5366, "eval_samples_per_second": 77.221, "eval_steps_per_second": 3.231, "step": 18800 }, { "epoch": 3.1, "eval_label_accuracy": { "accuracy": 0.5325029194239004 }, "eval_label_f1_macro": { "f1": 0.45956515670564957 }, "eval_label_f1_micro": { "f1": 0.5325029194239004 }, "eval_loss": 0.5740306377410889, "eval_runtime": 64.6401, "eval_samples_per_second": 79.486, "eval_steps_per_second": 3.326, "step": 18900 }, { "epoch": 3.11, "learning_rate": 1.107195541714473e-05, "loss": 0.4239, "step": 19000 }, { "epoch": 3.11, "eval_label_accuracy": { "accuracy": 0.5282210977033865 }, "eval_label_f1_macro": { "f1": 0.45915033212245415 }, "eval_label_f1_micro": { "f1": 0.5282210977033865 }, "eval_loss": 0.5756375789642334, "eval_runtime": 68.6905, "eval_samples_per_second": 74.799, "eval_steps_per_second": 3.13, "step": 19000 }, { "epoch": 3.13, "eval_label_accuracy": { "accuracy": 0.5245231607629428 }, "eval_label_f1_macro": { "f1": 0.4461544721075864 }, "eval_label_f1_micro": { "f1": 0.5245231607629428 }, "eval_loss": 0.582427442073822, "eval_runtime": 66.4502, "eval_samples_per_second": 77.321, "eval_steps_per_second": 3.236, "step": 19100 }, { "epoch": 3.15, "eval_label_accuracy": { "accuracy": 0.5216037368625924 }, "eval_label_f1_macro": { "f1": 0.4586287753791167 }, "eval_label_f1_micro": { "f1": 0.5216037368625924 }, "eval_loss": 0.5848153233528137, "eval_runtime": 66.8407, "eval_samples_per_second": 76.869, "eval_steps_per_second": 3.217, "step": 19200 }, { "epoch": 3.16, "eval_label_accuracy": { "accuracy": 0.5243285325029194 }, "eval_label_f1_macro": { "f1": 0.44899785496859856 }, "eval_label_f1_micro": { "f1": 0.5243285325029194 }, "eval_loss": 0.5789693593978882, "eval_runtime": 64.6355, "eval_samples_per_second": 79.492, "eval_steps_per_second": 3.326, "step": 19300 }, { "epoch": 3.18, "eval_label_accuracy": { "accuracy": 0.5307512650836902 }, "eval_label_f1_macro": { "f1": 0.45259529720591607 }, "eval_label_f1_micro": { "f1": 0.5307512650836902 }, "eval_loss": 0.5765287280082703, "eval_runtime": 65.9843, "eval_samples_per_second": 77.867, "eval_steps_per_second": 3.258, "step": 19400 }, { "epoch": 3.2, "learning_rate": 1.004753319128012e-05, "loss": 0.4262, "step": 19500 }, { "epoch": 3.2, "eval_label_accuracy": { "accuracy": 0.5237446477228493 }, "eval_label_f1_macro": { "f1": 0.4596084810630643 }, "eval_label_f1_micro": { "f1": 0.5237446477228493 }, "eval_loss": 0.5859604477882385, "eval_runtime": 64.6675, "eval_samples_per_second": 79.453, "eval_steps_per_second": 3.325, "step": 19500 }, { "epoch": 3.21, "eval_label_accuracy": { "accuracy": 0.5227715064227326 }, "eval_label_f1_macro": { "f1": 0.46148605295712625 }, "eval_label_f1_micro": { "f1": 0.5227715064227326 }, "eval_loss": 0.5810762047767639, "eval_runtime": 64.6184, "eval_samples_per_second": 79.513, "eval_steps_per_second": 3.327, "step": 19600 }, { "epoch": 3.23, "eval_label_accuracy": { "accuracy": 0.5245231607629428 }, "eval_label_f1_macro": { "f1": 0.45539993700221526 }, "eval_label_f1_micro": { "f1": 0.5245231607629428 }, "eval_loss": 0.5829537510871887, "eval_runtime": 65.5451, "eval_samples_per_second": 78.389, "eval_steps_per_second": 3.28, "step": 19700 }, { "epoch": 3.25, "eval_label_accuracy": { "accuracy": 0.5256909303230829 }, "eval_label_f1_macro": { "f1": 0.4484784466484443 }, "eval_label_f1_micro": { "f1": 0.5256909303230829 }, "eval_loss": 0.5800737738609314, "eval_runtime": 65.0145, "eval_samples_per_second": 79.028, "eval_steps_per_second": 3.307, "step": 19800 }, { "epoch": 3.26, "eval_label_accuracy": { "accuracy": 0.5266640716231997 }, "eval_label_f1_macro": { "f1": 0.45752780514666963 }, "eval_label_f1_micro": { "f1": 0.5266640716231997 }, "eval_loss": 0.577346682548523, "eval_runtime": 66.4954, "eval_samples_per_second": 77.268, "eval_steps_per_second": 3.233, "step": 19900 }, { "epoch": 3.28, "learning_rate": 9.023110965415506e-06, "loss": 0.4264, "step": 20000 }, { "epoch": 3.28, "eval_label_accuracy": { "accuracy": 0.5303620085636435 }, "eval_label_f1_macro": { "f1": 0.45750719642409793 }, "eval_label_f1_micro": { "f1": 0.5303620085636435 }, "eval_loss": 0.5826326608657837, "eval_runtime": 66.7123, "eval_samples_per_second": 77.017, "eval_steps_per_second": 3.223, "step": 20000 }, { "epoch": 3.29, "eval_label_accuracy": { "accuracy": 0.5276372129233164 }, "eval_label_f1_macro": { "f1": 0.4493522486957304 }, "eval_label_f1_micro": { "f1": 0.5276372129233164 }, "eval_loss": 0.5857098698616028, "eval_runtime": 68.7242, "eval_samples_per_second": 74.763, "eval_steps_per_second": 3.128, "step": 20100 }, { "epoch": 3.31, "eval_label_accuracy": { "accuracy": 0.5293888672635266 }, "eval_label_f1_macro": { "f1": 0.4554299299346091 }, "eval_label_f1_micro": { "f1": 0.5293888672635266 }, "eval_loss": 0.582249104976654, "eval_runtime": 66.5201, "eval_samples_per_second": 77.24, "eval_steps_per_second": 3.232, "step": 20200 }, { "epoch": 3.33, "eval_label_accuracy": { "accuracy": 0.5249124172829895 }, "eval_label_f1_macro": { "f1": 0.4618692825892151 }, "eval_label_f1_micro": { "f1": 0.5249124172829895 }, "eval_loss": 0.5820056200027466, "eval_runtime": 67.0656, "eval_samples_per_second": 76.612, "eval_steps_per_second": 3.206, "step": 20300 }, { "epoch": 3.34, "eval_label_accuracy": { "accuracy": 0.5278318411833398 }, "eval_label_f1_macro": { "f1": 0.46204183748606525 }, "eval_label_f1_micro": { "f1": 0.5278318411833398 }, "eval_loss": 0.5718916654586792, "eval_runtime": 66.261, "eval_samples_per_second": 77.542, "eval_steps_per_second": 3.245, "step": 20400 }, { "epoch": 3.36, "learning_rate": 7.998688739550894e-06, "loss": 0.4208, "step": 20500 }, { "epoch": 3.36, "eval_label_accuracy": { "accuracy": 0.5241339042428961 }, "eval_label_f1_macro": { "f1": 0.45599482745719977 }, "eval_label_f1_micro": { "f1": 0.5241339042428961 }, "eval_loss": 0.5849379301071167, "eval_runtime": 66.4821, "eval_samples_per_second": 77.284, "eval_steps_per_second": 3.234, "step": 20500 }, { "epoch": 3.38, "eval_label_accuracy": { "accuracy": 0.5237446477228493 }, "eval_label_f1_macro": { "f1": 0.4555559080742948 }, "eval_label_f1_micro": { "f1": 0.5237446477228493 }, "eval_loss": 0.5843728184700012, "eval_runtime": 64.7647, "eval_samples_per_second": 79.333, "eval_steps_per_second": 3.32, "step": 20600 }, { "epoch": 3.39, "eval_label_accuracy": { "accuracy": 0.5243285325029194 }, "eval_label_f1_macro": { "f1": 0.45771013752228895 }, "eval_label_f1_micro": { "f1": 0.5243285325029194 }, "eval_loss": 0.5808063745498657, "eval_runtime": 65.6904, "eval_samples_per_second": 78.215, "eval_steps_per_second": 3.273, "step": 20700 }, { "epoch": 3.41, "eval_label_accuracy": { "accuracy": 0.5260801868431296 }, "eval_label_f1_macro": { "f1": 0.4631605032772811 }, "eval_label_f1_micro": { "f1": 0.5260801868431296 }, "eval_loss": 0.5888592004776001, "eval_runtime": 64.5737, "eval_samples_per_second": 79.568, "eval_steps_per_second": 3.33, "step": 20800 }, { "epoch": 3.43, "eval_label_accuracy": { "accuracy": 0.5266640716231997 }, "eval_label_f1_macro": { "f1": 0.44799630877745866 }, "eval_label_f1_micro": { "f1": 0.5266640716231997 }, "eval_loss": 0.588912308216095, "eval_runtime": 66.1245, "eval_samples_per_second": 77.702, "eval_steps_per_second": 3.251, "step": 20900 }, { "epoch": 3.44, "learning_rate": 6.974266513686281e-06, "loss": 0.4228, "step": 21000 }, { "epoch": 3.44, "eval_label_accuracy": { "accuracy": 0.5264694433631764 }, "eval_label_f1_macro": { "f1": 0.46067694317610275 }, "eval_label_f1_micro": { "f1": 0.5264694433631764 }, "eval_loss": 0.5871345400810242, "eval_runtime": 66.1371, "eval_samples_per_second": 77.687, "eval_steps_per_second": 3.251, "step": 21000 }, { "epoch": 3.46, "eval_label_accuracy": { "accuracy": 0.5299727520435967 }, "eval_label_f1_macro": { "f1": 0.45858742112683054 }, "eval_label_f1_micro": { "f1": 0.5299727520435967 }, "eval_loss": 0.5821002721786499, "eval_runtime": 64.6289, "eval_samples_per_second": 79.5, "eval_steps_per_second": 3.327, "step": 21100 }, { "epoch": 3.47, "eval_label_accuracy": { "accuracy": 0.5276372129233164 }, "eval_label_f1_macro": { "f1": 0.44706482899459093 }, "eval_label_f1_micro": { "f1": 0.5276372129233164 }, "eval_loss": 0.5892929434776306, "eval_runtime": 65.9065, "eval_samples_per_second": 77.959, "eval_steps_per_second": 3.262, "step": 21200 }, { "epoch": 3.49, "eval_label_accuracy": { "accuracy": 0.5278318411833398 }, "eval_label_f1_macro": { "f1": 0.4531243508380409 }, "eval_label_f1_micro": { "f1": 0.5278318411833398 }, "eval_loss": 0.5871041417121887, "eval_runtime": 64.9299, "eval_samples_per_second": 79.131, "eval_steps_per_second": 3.311, "step": 21300 }, { "epoch": 3.51, "eval_label_accuracy": { "accuracy": 0.5291942390035033 }, "eval_label_f1_macro": { "f1": 0.4544963505218613 }, "eval_label_f1_micro": { "f1": 0.5291942390035033 }, "eval_loss": 0.581632137298584, "eval_runtime": 66.5734, "eval_samples_per_second": 77.178, "eval_steps_per_second": 3.23, "step": 21400 }, { "epoch": 3.52, "learning_rate": 5.949844287821669e-06, "loss": 0.4146, "step": 21500 }, { "epoch": 3.52, "eval_label_accuracy": { "accuracy": 0.526274815103153 }, "eval_label_f1_macro": { "f1": 0.46000446310082077 }, "eval_label_f1_micro": { "f1": 0.526274815103153 }, "eval_loss": 0.5873268842697144, "eval_runtime": 66.2757, "eval_samples_per_second": 77.525, "eval_steps_per_second": 3.244, "step": 21500 }, { "epoch": 3.54, "eval_label_accuracy": { "accuracy": 0.5293888672635266 }, "eval_label_f1_macro": { "f1": 0.4466423631159167 }, "eval_label_f1_micro": { "f1": 0.5293888672635266 }, "eval_loss": 0.5862780809402466, "eval_runtime": 65.0169, "eval_samples_per_second": 79.026, "eval_steps_per_second": 3.307, "step": 21600 }, { "epoch": 3.56, "eval_label_accuracy": { "accuracy": 0.5274425846632931 }, "eval_label_f1_macro": { "f1": 0.4553183199766613 }, "eval_label_f1_micro": { "f1": 0.5274425846632931 }, "eval_loss": 0.5865354537963867, "eval_runtime": 66.6836, "eval_samples_per_second": 77.05, "eval_steps_per_second": 3.224, "step": 21700 }, { "epoch": 3.57, "eval_label_accuracy": { "accuracy": 0.526274815103153 }, "eval_label_f1_macro": { "f1": 0.4584618360909396 }, "eval_label_f1_micro": { "f1": 0.526274815103153 }, "eval_loss": 0.5862194299697876, "eval_runtime": 65.2945, "eval_samples_per_second": 78.69, "eval_steps_per_second": 3.293, "step": 21800 }, { "epoch": 3.59, "eval_label_accuracy": { "accuracy": 0.5253016738030362 }, "eval_label_f1_macro": { "f1": 0.4556988660685002 }, "eval_label_f1_micro": { "f1": 0.5253016738030362 }, "eval_loss": 0.5815604329109192, "eval_runtime": 66.8701, "eval_samples_per_second": 76.836, "eval_steps_per_second": 3.215, "step": 21900 }, { "epoch": 3.61, "learning_rate": 4.925422061957056e-06, "loss": 0.4179, "step": 22000 }, { "epoch": 3.61, "eval_label_accuracy": { "accuracy": 0.52899961074348 }, "eval_label_f1_macro": { "f1": 0.4502397089365151 }, "eval_label_f1_micro": { "f1": 0.52899961074348 }, "eval_loss": 0.5844454169273376, "eval_runtime": 64.4681, "eval_samples_per_second": 79.698, "eval_steps_per_second": 3.335, "step": 22000 }, { "epoch": 3.62, "eval_label_accuracy": { "accuracy": 0.5293888672635266 }, "eval_label_f1_macro": { "f1": 0.45103878090924954 }, "eval_label_f1_micro": { "f1": 0.5293888672635266 }, "eval_loss": 0.5837833881378174, "eval_runtime": 64.5629, "eval_samples_per_second": 79.581, "eval_steps_per_second": 3.33, "step": 22100 }, { "epoch": 3.64, "eval_label_accuracy": { "accuracy": 0.5264694433631764 }, "eval_label_f1_macro": { "f1": 0.4605815902638898 }, "eval_label_f1_micro": { "f1": 0.5264694433631764 }, "eval_loss": 0.5830559730529785, "eval_runtime": 65.8104, "eval_samples_per_second": 78.073, "eval_steps_per_second": 3.267, "step": 22200 }, { "epoch": 3.66, "eval_label_accuracy": { "accuracy": 0.52958349552355 }, "eval_label_f1_macro": { "f1": 0.45065967020468145 }, "eval_label_f1_micro": { "f1": 0.52958349552355 }, "eval_loss": 0.5816081166267395, "eval_runtime": 65.6528, "eval_samples_per_second": 78.26, "eval_steps_per_second": 3.275, "step": 22300 }, { "epoch": 3.67, "eval_label_accuracy": { "accuracy": 0.5293888672635266 }, "eval_label_f1_macro": { "f1": 0.45309891731169716 }, "eval_label_f1_micro": { "f1": 0.5293888672635266 }, "eval_loss": 0.5825657844543457, "eval_runtime": 65.9972, "eval_samples_per_second": 77.852, "eval_steps_per_second": 3.258, "step": 22400 }, { "epoch": 3.69, "learning_rate": 3.900999836092445e-06, "loss": 0.4259, "step": 22500 }, { "epoch": 3.69, "eval_label_accuracy": { "accuracy": 0.5303620085636435 }, "eval_label_f1_macro": { "f1": 0.4533884487481862 }, "eval_label_f1_micro": { "f1": 0.5303620085636435 }, "eval_loss": 0.5796229839324951, "eval_runtime": 64.285, "eval_samples_per_second": 79.925, "eval_steps_per_second": 3.344, "step": 22500 }, { "epoch": 3.7, "eval_label_accuracy": { "accuracy": 0.5305566368236668 }, "eval_label_f1_macro": { "f1": 0.4515523588029331 }, "eval_label_f1_micro": { "f1": 0.5305566368236668 }, "eval_loss": 0.5833083391189575, "eval_runtime": 65.2335, "eval_samples_per_second": 78.763, "eval_steps_per_second": 3.296, "step": 22600 }, { "epoch": 3.72, "eval_label_accuracy": { "accuracy": 0.5260801868431296 }, "eval_label_f1_macro": { "f1": 0.4593256681180433 }, "eval_label_f1_micro": { "f1": 0.5260801868431296 }, "eval_loss": 0.578184187412262, "eval_runtime": 66.0227, "eval_samples_per_second": 77.822, "eval_steps_per_second": 3.256, "step": 22700 }, { "epoch": 3.74, "eval_label_accuracy": { "accuracy": 0.5268586998832231 }, "eval_label_f1_macro": { "f1": 0.4603837774249291 }, "eval_label_f1_micro": { "f1": 0.5268586998832231 }, "eval_loss": 0.5812641382217407, "eval_runtime": 65.1177, "eval_samples_per_second": 78.903, "eval_steps_per_second": 3.302, "step": 22800 }, { "epoch": 3.75, "eval_label_accuracy": { "accuracy": 0.5319190346438303 }, "eval_label_f1_macro": { "f1": 0.45997737731014476 }, "eval_label_f1_micro": { "f1": 0.5319190346438303 }, "eval_loss": 0.5770907402038574, "eval_runtime": 65.3117, "eval_samples_per_second": 78.669, "eval_steps_per_second": 3.292, "step": 22900 }, { "epoch": 3.77, "learning_rate": 2.8765776102278315e-06, "loss": 0.4274, "step": 23000 }, { "epoch": 3.77, "eval_label_accuracy": { "accuracy": 0.5247177890229662 }, "eval_label_f1_macro": { "f1": 0.45907215035166693 }, "eval_label_f1_micro": { "f1": 0.5247177890229662 }, "eval_loss": 0.5833043456077576, "eval_runtime": 65.7031, "eval_samples_per_second": 78.2, "eval_steps_per_second": 3.272, "step": 23000 }, { "epoch": 3.79, "eval_label_accuracy": { "accuracy": 0.526274815103153 }, "eval_label_f1_macro": { "f1": 0.4541259526342647 }, "eval_label_f1_micro": { "f1": 0.526274815103153 }, "eval_loss": 0.5847244262695312, "eval_runtime": 65.8232, "eval_samples_per_second": 78.058, "eval_steps_per_second": 3.266, "step": 23100 }, { "epoch": 3.8, "eval_label_accuracy": { "accuracy": 0.5266640716231997 }, "eval_label_f1_macro": { "f1": 0.451974998095672 }, "eval_label_f1_micro": { "f1": 0.5266640716231997 }, "eval_loss": 0.5817099809646606, "eval_runtime": 64.7736, "eval_samples_per_second": 79.322, "eval_steps_per_second": 3.319, "step": 23200 }, { "epoch": 3.82, "eval_label_accuracy": { "accuracy": 0.5288049824834566 }, "eval_label_f1_macro": { "f1": 0.46024834620814586 }, "eval_label_f1_micro": { "f1": 0.5288049824834566 }, "eval_loss": 0.5799014568328857, "eval_runtime": 65.921, "eval_samples_per_second": 77.942, "eval_steps_per_second": 3.261, "step": 23300 }, { "epoch": 3.84, "eval_label_accuracy": { "accuracy": 0.5276372129233164 }, "eval_label_f1_macro": { "f1": 0.4581019267498323 }, "eval_label_f1_micro": { "f1": 0.5276372129233164 }, "eval_loss": 0.5810489058494568, "eval_runtime": 65.3442, "eval_samples_per_second": 78.63, "eval_steps_per_second": 3.29, "step": 23400 }, { "epoch": 3.85, "learning_rate": 1.8521553843632193e-06, "loss": 0.4289, "step": 23500 }, { "epoch": 3.85, "eval_label_accuracy": { "accuracy": 0.5272479564032697 }, "eval_label_f1_macro": { "f1": 0.4541508997429681 }, "eval_label_f1_micro": { "f1": 0.5272479564032697 }, "eval_loss": 0.5799488425254822, "eval_runtime": 65.7671, "eval_samples_per_second": 78.124, "eval_steps_per_second": 3.269, "step": 23500 }, { "epoch": 3.87, "eval_label_accuracy": { "accuracy": 0.5274425846632931 }, "eval_label_f1_macro": { "f1": 0.4574637003228606 }, "eval_label_f1_micro": { "f1": 0.5274425846632931 }, "eval_loss": 0.5793448686599731, "eval_runtime": 65.0934, "eval_samples_per_second": 78.933, "eval_steps_per_second": 3.303, "step": 23600 }, { "epoch": 3.88, "eval_label_accuracy": { "accuracy": 0.5276372129233164 }, "eval_label_f1_macro": { "f1": 0.45638935381995627 }, "eval_label_f1_micro": { "f1": 0.5276372129233164 }, "eval_loss": 0.5791721940040588, "eval_runtime": 64.3159, "eval_samples_per_second": 79.887, "eval_steps_per_second": 3.343, "step": 23700 }, { "epoch": 3.9, "eval_label_accuracy": { "accuracy": 0.5268586998832231 }, "eval_label_f1_macro": { "f1": 0.45473822883034853 }, "eval_label_f1_micro": { "f1": 0.5268586998832231 }, "eval_loss": 0.5804610848426819, "eval_runtime": 66.2146, "eval_samples_per_second": 77.596, "eval_steps_per_second": 3.247, "step": 23800 }, { "epoch": 3.92, "eval_label_accuracy": { "accuracy": 0.5270533281432463 }, "eval_label_f1_macro": { "f1": 0.45519902851058563 }, "eval_label_f1_micro": { "f1": 0.5270533281432463 }, "eval_loss": 0.5822835564613342, "eval_runtime": 65.6357, "eval_samples_per_second": 78.281, "eval_steps_per_second": 3.276, "step": 23900 }, { "epoch": 3.93, "learning_rate": 8.277331584986067e-07, "loss": 0.4174, "step": 24000 }, { "epoch": 3.93, "eval_label_accuracy": { "accuracy": 0.5278318411833398 }, "eval_label_f1_macro": { "f1": 0.4562786712755399 }, "eval_label_f1_micro": { "f1": 0.5278318411833398 }, "eval_loss": 0.5810161828994751, "eval_runtime": 65.4574, "eval_samples_per_second": 78.494, "eval_steps_per_second": 3.285, "step": 24000 }, { "epoch": 3.95, "eval_label_accuracy": { "accuracy": 0.5280264694433632 }, "eval_label_f1_macro": { "f1": 0.45638550607459255 }, "eval_label_f1_micro": { "f1": 0.5280264694433632 }, "eval_loss": 0.5815566182136536, "eval_runtime": 64.709, "eval_samples_per_second": 79.402, "eval_steps_per_second": 3.323, "step": 24100 }, { "epoch": 3.97, "eval_label_accuracy": { "accuracy": 0.5276372129233164 }, "eval_label_f1_macro": { "f1": 0.456888857673732 }, "eval_label_f1_micro": { "f1": 0.5276372129233164 }, "eval_loss": 0.5815967321395874, "eval_runtime": 65.864, "eval_samples_per_second": 78.009, "eval_steps_per_second": 3.264, "step": 24200 }, { "epoch": 3.98, "eval_label_accuracy": { "accuracy": 0.5274425846632931 }, "eval_label_f1_macro": { "f1": 0.456560066253126 }, "eval_label_f1_micro": { "f1": 0.5274425846632931 }, "eval_loss": 0.5813802480697632, "eval_runtime": 65.7676, "eval_samples_per_second": 78.124, "eval_steps_per_second": 3.269, "step": 24300 }, { "epoch": 4.0, "eval_label_accuracy": { "accuracy": 0.5276372129233164 }, "eval_label_f1_macro": { "f1": 0.4561525816310299 }, "eval_label_f1_micro": { "f1": 0.5276372129233164 }, "eval_loss": 0.5812935829162598, "eval_runtime": 66.0073, "eval_samples_per_second": 77.84, "eval_steps_per_second": 3.257, "step": 24400 }, { "epoch": 4.0, "step": 24404, "total_flos": 5.079375568585728e+16, "train_loss": 0.5641878431614374, "train_runtime": 33021.6046, "train_samples_per_second": 11.824, "train_steps_per_second": 0.739 } ], "logging_steps": 500, "max_steps": 24404, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 5.079375568585728e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }