diff --git "a/checkpoint-214/trainer_state.json" "b/checkpoint-214/trainer_state.json" --- "a/checkpoint-214/trainer_state.json" +++ "b/checkpoint-214/trainer_state.json" @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 1.2081863091037404, + "epoch": 1.202247191011236, "eval_steps": 27, "global_step": 214, "is_hyper_param_search": false, @@ -9,1534 +9,1534 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.016937191249117856, - "grad_norm": 34.22002029418945, - "learning_rate": 6.818181818181818e-07, - "loss": 7.2372, + "epoch": 0.016853932584269662, + "grad_norm": 2.9885776042938232, + "learning_rate": 1.4814814814814815e-06, + "loss": 0.6012, "step": 3 }, { - "epoch": 0.03387438249823571, - "grad_norm": 21.76839828491211, - "learning_rate": 1.3636363636363636e-06, - "loss": 6.855, + "epoch": 0.033707865168539325, + "grad_norm": 3.184929132461548, + "learning_rate": 2.962962962962963e-06, + "loss": 0.7573, "step": 6 }, { - "epoch": 0.05081157374735357, - "grad_norm": 21.260774612426758, - "learning_rate": 2.0454545454545457e-06, - "loss": 7.4707, + "epoch": 0.05056179775280899, + "grad_norm": 3.256159782409668, + "learning_rate": 4.444444444444444e-06, + "loss": 0.9212, "step": 9 }, { - "epoch": 0.06774876499647142, - "grad_norm": 16.885921478271484, - "learning_rate": 2.7272727272727272e-06, - "loss": 7.0187, + "epoch": 0.06741573033707865, + "grad_norm": 2.833339214324951, + "learning_rate": 5.925925925925926e-06, + "loss": 0.6117, "step": 12 }, { - "epoch": 0.08468595624558928, - "grad_norm": 19.509899139404297, - "learning_rate": 3.409090909090909e-06, - "loss": 6.6756, + "epoch": 0.08426966292134831, + "grad_norm": 3.08292818069458, + "learning_rate": 7.4074074074074075e-06, + "loss": 0.8545, "step": 15 }, { - "epoch": 0.10162314749470713, - "grad_norm": 7.9427289962768555, - "learning_rate": 4.0909090909090915e-06, - "loss": 6.0155, + "epoch": 0.10112359550561797, + "grad_norm": 2.317431688308716, + "learning_rate": 8.888888888888888e-06, + "loss": 0.6515, "step": 18 }, { - "epoch": 0.11856033874382499, - "grad_norm": 7.325345039367676, - "learning_rate": 4.772727272727273e-06, - "loss": 6.1644, + "epoch": 0.11797752808988764, + "grad_norm": 2.9611644744873047, + "learning_rate": 1.037037037037037e-05, + "loss": 0.7159, "step": 21 }, { - "epoch": 0.13549752999294284, - "grad_norm": 7.544689655303955, - "learning_rate": 5.4545454545454545e-06, - "loss": 6.2158, + "epoch": 0.1348314606741573, + "grad_norm": 2.698537826538086, + "learning_rate": 1.1851851851851852e-05, + "loss": 0.7019, "step": 24 }, { - "epoch": 0.1524347212420607, - "grad_norm": 5.141758918762207, - "learning_rate": 6.136363636363637e-06, - "loss": 6.1369, + "epoch": 0.15168539325842698, + "grad_norm": 2.222154378890991, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.4411, "step": 27 }, { - "epoch": 0.1524347212420607, + "epoch": 0.15168539325842698, "eval_NLI-v2_cosine_accuracy": 1.0, - "eval_NLI-v2_dot_accuracy": 0.109375, + "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, - "eval_VitaminC_cosine_accuracy": 0.5546875, - "eval_VitaminC_cosine_accuracy_threshold": 0.9544724822044373, - "eval_VitaminC_cosine_ap": 0.5356492030729136, - "eval_VitaminC_cosine_f1": 0.6542553191489362, - "eval_VitaminC_cosine_f1_threshold": 0.7148199081420898, - "eval_VitaminC_cosine_precision": 0.48616600790513836, + "eval_VitaminC_cosine_accuracy": 0.578125, + "eval_VitaminC_cosine_accuracy_threshold": 0.7817381620407104, + "eval_VitaminC_cosine_ap": 0.5507972943944112, + "eval_VitaminC_cosine_f1": 0.6595174262734584, + "eval_VitaminC_cosine_f1_threshold": 0.28573715686798096, + "eval_VitaminC_cosine_precision": 0.492, "eval_VitaminC_cosine_recall": 1.0, - "eval_VitaminC_dot_accuracy": 0.55078125, - "eval_VitaminC_dot_accuracy_threshold": 414.4264831542969, - "eval_VitaminC_dot_ap": 0.5108219546857565, - "eval_VitaminC_dot_f1": 0.6507936507936508, - "eval_VitaminC_dot_f1_threshold": 271.6522521972656, - "eval_VitaminC_dot_precision": 0.4823529411764706, + "eval_VitaminC_dot_accuracy": 0.5703125, + "eval_VitaminC_dot_accuracy_threshold": 316.7283020019531, + "eval_VitaminC_dot_ap": 0.5511866185449577, + "eval_VitaminC_dot_f1": 0.6577540106951871, + "eval_VitaminC_dot_f1_threshold": 106.75863647460938, + "eval_VitaminC_dot_precision": 0.4900398406374502, "eval_VitaminC_dot_recall": 1.0, - "eval_VitaminC_euclidean_accuracy": 0.55078125, - "eval_VitaminC_euclidean_accuracy_threshold": 6.519885063171387, - "eval_VitaminC_euclidean_ap": 0.5226419655984281, - "eval_VitaminC_euclidean_f1": 0.6505376344086021, - "eval_VitaminC_euclidean_f1_threshold": 15.194067001342773, - "eval_VitaminC_euclidean_precision": 0.4859437751004016, - "eval_VitaminC_euclidean_recall": 0.983739837398374, - "eval_VitaminC_manhattan_accuracy": 0.546875, - "eval_VitaminC_manhattan_accuracy_threshold": 149.20114135742188, - "eval_VitaminC_manhattan_ap": 0.5237451656134715, - "eval_VitaminC_manhattan_f1": 0.6542553191489362, - "eval_VitaminC_manhattan_f1_threshold": 259.007080078125, - "eval_VitaminC_manhattan_precision": 0.48616600790513836, + "eval_VitaminC_euclidean_accuracy": 0.578125, + "eval_VitaminC_euclidean_accuracy_threshold": 13.298419952392578, + "eval_VitaminC_euclidean_ap": 0.5476323986807207, + "eval_VitaminC_euclidean_f1": 0.6577540106951871, + "eval_VitaminC_euclidean_f1_threshold": 23.83933448791504, + "eval_VitaminC_euclidean_precision": 0.4900398406374502, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.578125, + "eval_VitaminC_manhattan_accuracy_threshold": 279.69085693359375, + "eval_VitaminC_manhattan_ap": 0.5412538781107805, + "eval_VitaminC_manhattan_f1": 0.6577540106951871, + "eval_VitaminC_manhattan_f1_threshold": 499.8836364746094, + "eval_VitaminC_manhattan_precision": 0.4900398406374502, "eval_VitaminC_manhattan_recall": 1.0, - "eval_VitaminC_max_accuracy": 0.5546875, - "eval_VitaminC_max_accuracy_threshold": 414.4264831542969, - "eval_VitaminC_max_ap": 0.5356492030729136, - "eval_VitaminC_max_f1": 0.6542553191489362, - "eval_VitaminC_max_f1_threshold": 271.6522521972656, - "eval_VitaminC_max_precision": 0.48616600790513836, + "eval_VitaminC_max_accuracy": 0.578125, + "eval_VitaminC_max_accuracy_threshold": 316.7283020019531, + "eval_VitaminC_max_ap": 0.5511866185449577, + "eval_VitaminC_max_f1": 0.6595174262734584, + "eval_VitaminC_max_f1_threshold": 499.8836364746094, + "eval_VitaminC_max_precision": 0.492, "eval_VitaminC_max_recall": 1.0, - "eval_sequential_score": 0.5356492030729136, - "eval_sts-test_pearson_cosine": 0.056062031998983373, - "eval_sts-test_pearson_dot": 0.2979259445723872, - "eval_sts-test_pearson_euclidean": 0.0498319208592713, - "eval_sts-test_pearson_manhattan": 0.07381429239121526, - "eval_sts-test_pearson_max": 0.2979259445723872, - "eval_sts-test_spearman_cosine": 0.1066788491614481, - "eval_sts-test_spearman_dot": 0.315952670306405, - "eval_sts-test_spearman_euclidean": 0.07303394554435191, - "eval_sts-test_spearman_manhattan": 0.09039525717692232, - "eval_sts-test_spearman_max": 0.315952670306405, - "eval_vitaminc-pairs_loss": 2.698580741882324, - "eval_vitaminc-pairs_runtime": 1.4747, - "eval_vitaminc-pairs_samples_per_second": 73.236, - "eval_vitaminc-pairs_steps_per_second": 1.356, + "eval_sequential_score": 0.5511866185449577, + "eval_sts-test_pearson_cosine": 0.8488243436029344, + "eval_sts-test_pearson_dot": 0.8480167969551653, + "eval_sts-test_pearson_euclidean": 0.8800283985117625, + "eval_sts-test_pearson_manhattan": 0.880588311422627, + "eval_sts-test_pearson_max": 0.880588311422627, + "eval_sts-test_spearman_cosine": 0.8905659331642088, + "eval_sts-test_spearman_dot": 0.8692084657204004, + "eval_sts-test_spearman_euclidean": 0.8809566840232712, + "eval_sts-test_spearman_manhattan": 0.883434007028195, + "eval_sts-test_spearman_max": 0.8905659331642088, + "eval_vitaminc-pairs_loss": 2.465860366821289, + "eval_vitaminc-pairs_runtime": 1.4615, + "eval_vitaminc-pairs_samples_per_second": 73.899, + "eval_vitaminc-pairs_steps_per_second": 1.368, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_negation-triplets_loss": 5.142906665802002, - "eval_negation-triplets_runtime": 0.2993, - "eval_negation-triplets_samples_per_second": 213.865, - "eval_negation-triplets_steps_per_second": 3.342, + "epoch": 0.15168539325842698, + "eval_negation-triplets_loss": 1.7310789823532104, + "eval_negation-triplets_runtime": 0.3009, + "eval_negation-triplets_samples_per_second": 212.692, + "eval_negation-triplets_steps_per_second": 3.323, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_scitail-pairs-pos_loss": 1.9216958284378052, - "eval_scitail-pairs-pos_runtime": 0.3834, - "eval_scitail-pairs-pos_samples_per_second": 140.842, - "eval_scitail-pairs-pos_steps_per_second": 2.608, + "epoch": 0.15168539325842698, + "eval_scitail-pairs-pos_loss": 0.1150394082069397, + "eval_scitail-pairs-pos_runtime": 0.3739, + "eval_scitail-pairs-pos_samples_per_second": 144.431, + "eval_scitail-pairs-pos_steps_per_second": 2.675, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_xsum-pairs_loss": 6.073049545288086, - "eval_xsum-pairs_runtime": 3.1587, - "eval_xsum-pairs_samples_per_second": 40.523, - "eval_xsum-pairs_steps_per_second": 0.633, + "epoch": 0.15168539325842698, + "eval_xsum-pairs_loss": 0.11168850213289261, + "eval_xsum-pairs_runtime": 3.1697, + "eval_xsum-pairs_samples_per_second": 40.382, + "eval_xsum-pairs_steps_per_second": 0.631, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_sciq_pairs_loss": 0.3449864387512207, - "eval_sciq_pairs_runtime": 3.3747, - "eval_sciq_pairs_samples_per_second": 37.93, - "eval_sciq_pairs_steps_per_second": 0.593, + "epoch": 0.15168539325842698, + "eval_sciq_pairs_loss": 0.03450964391231537, + "eval_sciq_pairs_runtime": 3.3283, + "eval_sciq_pairs_samples_per_second": 38.459, + "eval_sciq_pairs_steps_per_second": 0.601, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_qasc_pairs_loss": 3.2267842292785645, - "eval_qasc_pairs_runtime": 0.6576, - "eval_qasc_pairs_samples_per_second": 194.646, - "eval_qasc_pairs_steps_per_second": 3.041, + "epoch": 0.15168539325842698, + "eval_qasc_pairs_loss": 0.11095743626356125, + "eval_qasc_pairs_runtime": 0.6261, + "eval_qasc_pairs_samples_per_second": 204.45, + "eval_qasc_pairs_steps_per_second": 3.195, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_openbookqa_pairs_loss": 4.405983924865723, - "eval_openbookqa_pairs_runtime": 0.6107, - "eval_openbookqa_pairs_samples_per_second": 209.594, - "eval_openbookqa_pairs_steps_per_second": 3.275, + "epoch": 0.15168539325842698, + "eval_openbookqa_pairs_loss": 0.7092063426971436, + "eval_openbookqa_pairs_runtime": 0.5866, + "eval_openbookqa_pairs_samples_per_second": 218.19, + "eval_openbookqa_pairs_steps_per_second": 3.409, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_msmarco_pairs_loss": 6.937691688537598, - "eval_msmarco_pairs_runtime": 1.3091, - "eval_msmarco_pairs_samples_per_second": 97.779, - "eval_msmarco_pairs_steps_per_second": 1.528, + "epoch": 0.15168539325842698, + "eval_msmarco_pairs_loss": 0.3955218493938446, + "eval_msmarco_pairs_runtime": 1.2942, + "eval_msmarco_pairs_samples_per_second": 98.902, + "eval_msmarco_pairs_steps_per_second": 1.545, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_nq_pairs_loss": 6.794108867645264, - "eval_nq_pairs_runtime": 2.3968, - "eval_nq_pairs_samples_per_second": 53.404, - "eval_nq_pairs_steps_per_second": 0.834, + "epoch": 0.15168539325842698, + "eval_nq_pairs_loss": 0.42051073908805847, + "eval_nq_pairs_runtime": 2.3875, + "eval_nq_pairs_samples_per_second": 53.612, + "eval_nq_pairs_steps_per_second": 0.838, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_trivia_pairs_loss": 6.3355631828308105, - "eval_trivia_pairs_runtime": 4.4974, - "eval_trivia_pairs_samples_per_second": 28.461, - "eval_trivia_pairs_steps_per_second": 0.445, + "epoch": 0.15168539325842698, + "eval_trivia_pairs_loss": 0.93178790807724, + "eval_trivia_pairs_runtime": 4.4363, + "eval_trivia_pairs_samples_per_second": 28.853, + "eval_trivia_pairs_steps_per_second": 0.451, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_gooaq_pairs_loss": 6.405998706817627, - "eval_gooaq_pairs_runtime": 0.8745, - "eval_gooaq_pairs_samples_per_second": 146.37, - "eval_gooaq_pairs_steps_per_second": 2.287, + "epoch": 0.15168539325842698, + "eval_gooaq_pairs_loss": 0.6505913138389587, + "eval_gooaq_pairs_runtime": 0.8826, + "eval_gooaq_pairs_samples_per_second": 145.027, + "eval_gooaq_pairs_steps_per_second": 2.266, "step": 27 }, { - "epoch": 0.1524347212420607, - "eval_paws-pos_loss": 2.2308223247528076, - "eval_paws-pos_runtime": 0.6998, - "eval_paws-pos_samples_per_second": 182.908, - "eval_paws-pos_steps_per_second": 2.858, + "epoch": 0.15168539325842698, + "eval_paws-pos_loss": 0.024931101128458977, + "eval_paws-pos_runtime": 0.6852, + "eval_paws-pos_samples_per_second": 186.805, + "eval_paws-pos_steps_per_second": 2.919, "step": 27 }, { - "epoch": 0.16937191249117856, - "grad_norm": 5.885251522064209, - "learning_rate": 6.818181818181818e-06, - "loss": 5.7653, + "epoch": 0.16853932584269662, + "grad_norm": 2.826900005340576, + "learning_rate": 1.4814814814814815e-05, + "loss": 0.5125, "step": 30 }, { - "epoch": 0.1863091037402964, - "grad_norm": 7.357480049133301, - "learning_rate": 7.500000000000001e-06, - "loss": 6.1259, + "epoch": 0.1853932584269663, + "grad_norm": 2.9938910007476807, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.6885, "step": 33 }, { - "epoch": 0.20324629498941427, - "grad_norm": 7.321795463562012, - "learning_rate": 8.181818181818183e-06, - "loss": 5.7539, + "epoch": 0.20224719101123595, + "grad_norm": 3.3046395778656006, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.6435, "step": 36 }, { - "epoch": 0.22018348623853212, - "grad_norm": 4.239792346954346, - "learning_rate": 8.863636363636365e-06, - "loss": 6.0131, + "epoch": 0.21910112359550563, + "grad_norm": 2.4184651374816895, + "learning_rate": 1.925925925925926e-05, + "loss": 0.753, "step": 39 }, { - "epoch": 0.23712067748764998, - "grad_norm": 3.9554407596588135, - "learning_rate": 9.545454545454547e-06, - "loss": 6.0074, + "epoch": 0.23595505617977527, + "grad_norm": 2.9905433654785156, + "learning_rate": 2.074074074074074e-05, + "loss": 0.7427, "step": 42 }, { - "epoch": 0.25405786873676783, - "grad_norm": 4.406026840209961, - "learning_rate": 1.0227272727272729e-05, - "loss": 5.7125, + "epoch": 0.25280898876404495, + "grad_norm": 2.745820999145508, + "learning_rate": 2.2222222222222227e-05, + "loss": 0.5083, "step": 45 }, { - "epoch": 0.2709950599858857, - "grad_norm": 7.235893249511719, - "learning_rate": 1.0909090909090909e-05, - "loss": 5.5634, + "epoch": 0.2696629213483146, + "grad_norm": 2.6370577812194824, + "learning_rate": 2.3703703703703703e-05, + "loss": 0.7454, "step": 48 }, { - "epoch": 0.28793225123500354, - "grad_norm": 5.330288410186768, - "learning_rate": 1.1590909090909093e-05, - "loss": 5.2924, + "epoch": 0.28651685393258425, + "grad_norm": 3.044011116027832, + "learning_rate": 2.5185185185185187e-05, + "loss": 0.8356, "step": 51 }, { - "epoch": 0.3048694424841214, - "grad_norm": 7.216403961181641, - "learning_rate": 1.2272727272727274e-05, - "loss": 5.2286, + "epoch": 0.30337078651685395, + "grad_norm": 3.718804121017456, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.8864, "step": 54 }, { - "epoch": 0.3048694424841214, + "epoch": 0.30337078651685395, "eval_NLI-v2_cosine_accuracy": 1.0, - "eval_NLI-v2_dot_accuracy": 0.046875, + "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, - "eval_VitaminC_cosine_accuracy": 0.54296875, - "eval_VitaminC_cosine_accuracy_threshold": 0.9328227043151855, - "eval_VitaminC_cosine_ap": 0.5212059026196154, - "eval_VitaminC_cosine_f1": 0.6576819407008085, - "eval_VitaminC_cosine_f1_threshold": 0.7373804450035095, - "eval_VitaminC_cosine_precision": 0.49193548387096775, - "eval_VitaminC_cosine_recall": 0.991869918699187, - "eval_VitaminC_dot_accuracy": 0.55078125, - "eval_VitaminC_dot_accuracy_threshold": 418.2774658203125, - "eval_VitaminC_dot_ap": 0.5160594099493883, - "eval_VitaminC_dot_f1": 0.6521739130434782, - "eval_VitaminC_dot_f1_threshold": 291.5081481933594, - "eval_VitaminC_dot_precision": 0.4897959183673469, - "eval_VitaminC_dot_recall": 0.975609756097561, - "eval_VitaminC_euclidean_accuracy": 0.5390625, - "eval_VitaminC_euclidean_accuracy_threshold": 8.120429039001465, - "eval_VitaminC_euclidean_ap": 0.5224837623095228, - "eval_VitaminC_euclidean_f1": 0.6576819407008085, - "eval_VitaminC_euclidean_f1_threshold": 14.879999160766602, - "eval_VitaminC_euclidean_precision": 0.49193548387096775, - "eval_VitaminC_euclidean_recall": 0.991869918699187, - "eval_VitaminC_manhattan_accuracy": 0.53515625, - "eval_VitaminC_manhattan_accuracy_threshold": 137.40658569335938, - "eval_VitaminC_manhattan_ap": 0.5186382518671783, - "eval_VitaminC_manhattan_f1": 0.6576086956521738, - "eval_VitaminC_manhattan_f1_threshold": 263.32452392578125, - "eval_VitaminC_manhattan_precision": 0.49387755102040815, - "eval_VitaminC_manhattan_recall": 0.983739837398374, - "eval_VitaminC_max_accuracy": 0.55078125, - "eval_VitaminC_max_accuracy_threshold": 418.2774658203125, - "eval_VitaminC_max_ap": 0.5224837623095228, - "eval_VitaminC_max_f1": 0.6576819407008085, - "eval_VitaminC_max_f1_threshold": 291.5081481933594, - "eval_VitaminC_max_precision": 0.49387755102040815, - "eval_VitaminC_max_recall": 0.991869918699187, - "eval_sequential_score": 0.5224837623095228, - "eval_sts-test_pearson_cosine": 0.14377091128453176, - "eval_sts-test_pearson_dot": 0.24728387094758872, - "eval_sts-test_pearson_euclidean": 0.14604155960515372, - "eval_sts-test_pearson_manhattan": 0.1446467532231986, - "eval_sts-test_pearson_max": 0.24728387094758872, - "eval_sts-test_spearman_cosine": 0.1968510434344728, - "eval_sts-test_spearman_dot": 0.29467218283745694, - "eval_sts-test_spearman_euclidean": 0.17218164683969664, - "eval_sts-test_spearman_manhattan": 0.17741843340856742, - "eval_sts-test_spearman_max": 0.29467218283745694, - "eval_vitaminc-pairs_loss": 2.664700746536255, - "eval_vitaminc-pairs_runtime": 1.4487, - "eval_vitaminc-pairs_samples_per_second": 74.551, - "eval_vitaminc-pairs_steps_per_second": 1.381, + "eval_VitaminC_cosine_accuracy": 0.57421875, + "eval_VitaminC_cosine_accuracy_threshold": 0.7991844415664673, + "eval_VitaminC_cosine_ap": 0.5485498837322925, + "eval_VitaminC_cosine_f1": 0.6595174262734584, + "eval_VitaminC_cosine_f1_threshold": 0.3160865008831024, + "eval_VitaminC_cosine_precision": 0.492, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.578125, + "eval_VitaminC_dot_accuracy_threshold": 327.0416564941406, + "eval_VitaminC_dot_ap": 0.54993134882601, + "eval_VitaminC_dot_f1": 0.6595174262734584, + "eval_VitaminC_dot_f1_threshold": 117.44181060791016, + "eval_VitaminC_dot_precision": 0.492, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.57421875, + "eval_VitaminC_euclidean_accuracy_threshold": 13.019258499145508, + "eval_VitaminC_euclidean_ap": 0.5435066540334542, + "eval_VitaminC_euclidean_f1": 0.6577540106951871, + "eval_VitaminC_euclidean_f1_threshold": 23.688644409179688, + "eval_VitaminC_euclidean_precision": 0.4900398406374502, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.57421875, + "eval_VitaminC_manhattan_accuracy_threshold": 283.876220703125, + "eval_VitaminC_manhattan_ap": 0.5416615397828658, + "eval_VitaminC_manhattan_f1": 0.6559999999999999, + "eval_VitaminC_manhattan_f1_threshold": 514.0216064453125, + "eval_VitaminC_manhattan_precision": 0.4880952380952381, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.578125, + "eval_VitaminC_max_accuracy_threshold": 327.0416564941406, + "eval_VitaminC_max_ap": 0.54993134882601, + "eval_VitaminC_max_f1": 0.6595174262734584, + "eval_VitaminC_max_f1_threshold": 514.0216064453125, + "eval_VitaminC_max_precision": 0.492, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.54993134882601, + "eval_sts-test_pearson_cosine": 0.8452615878553369, + "eval_sts-test_pearson_dot": 0.8404858620687519, + "eval_sts-test_pearson_euclidean": 0.8780527810910925, + "eval_sts-test_pearson_manhattan": 0.878916157345712, + "eval_sts-test_pearson_max": 0.878916157345712, + "eval_sts-test_spearman_cosine": 0.8876915367075635, + "eval_sts-test_spearman_dot": 0.8608104875327304, + "eval_sts-test_spearman_euclidean": 0.8804138856889071, + "eval_sts-test_spearman_manhattan": 0.8822803815444743, + "eval_sts-test_spearman_max": 0.8876915367075635, + "eval_vitaminc-pairs_loss": 2.454524040222168, + "eval_vitaminc-pairs_runtime": 1.4583, + "eval_vitaminc-pairs_samples_per_second": 74.057, + "eval_vitaminc-pairs_steps_per_second": 1.371, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_negation-triplets_loss": 4.6218037605285645, - "eval_negation-triplets_runtime": 0.2971, - "eval_negation-triplets_samples_per_second": 215.438, - "eval_negation-triplets_steps_per_second": 3.366, + "epoch": 0.30337078651685395, + "eval_negation-triplets_loss": 1.7277792692184448, + "eval_negation-triplets_runtime": 0.3027, + "eval_negation-triplets_samples_per_second": 211.436, + "eval_negation-triplets_steps_per_second": 3.304, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_scitail-pairs-pos_loss": 1.2413936853408813, - "eval_scitail-pairs-pos_runtime": 0.372, - "eval_scitail-pairs-pos_samples_per_second": 145.175, - "eval_scitail-pairs-pos_steps_per_second": 2.688, + "epoch": 0.30337078651685395, + "eval_scitail-pairs-pos_loss": 0.11168555170297623, + "eval_scitail-pairs-pos_runtime": 0.3726, + "eval_scitail-pairs-pos_samples_per_second": 144.911, + "eval_scitail-pairs-pos_steps_per_second": 2.684, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_xsum-pairs_loss": 5.249766826629639, - "eval_xsum-pairs_runtime": 3.1506, - "eval_xsum-pairs_samples_per_second": 40.627, - "eval_xsum-pairs_steps_per_second": 0.635, + "epoch": 0.30337078651685395, + "eval_xsum-pairs_loss": 0.10087604075670242, + "eval_xsum-pairs_runtime": 3.1701, + "eval_xsum-pairs_samples_per_second": 40.377, + "eval_xsum-pairs_steps_per_second": 0.631, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_sciq_pairs_loss": 0.2961578667163849, - "eval_sciq_pairs_runtime": 3.2909, - "eval_sciq_pairs_samples_per_second": 38.895, - "eval_sciq_pairs_steps_per_second": 0.608, + "epoch": 0.30337078651685395, + "eval_sciq_pairs_loss": 0.03466618433594704, + "eval_sciq_pairs_runtime": 3.3778, + "eval_sciq_pairs_samples_per_second": 37.895, + "eval_sciq_pairs_steps_per_second": 0.592, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_qasc_pairs_loss": 2.530872344970703, - "eval_qasc_pairs_runtime": 0.6255, - "eval_qasc_pairs_samples_per_second": 204.63, - "eval_qasc_pairs_steps_per_second": 3.197, + "epoch": 0.30337078651685395, + "eval_qasc_pairs_loss": 0.10551701486110687, + "eval_qasc_pairs_runtime": 0.6271, + "eval_qasc_pairs_samples_per_second": 204.125, + "eval_qasc_pairs_steps_per_second": 3.189, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_openbookqa_pairs_loss": 3.8855104446411133, - "eval_openbookqa_pairs_runtime": 0.5742, - "eval_openbookqa_pairs_samples_per_second": 222.914, - "eval_openbookqa_pairs_steps_per_second": 3.483, + "epoch": 0.30337078651685395, + "eval_openbookqa_pairs_loss": 0.7239958643913269, + "eval_openbookqa_pairs_runtime": 0.5811, + "eval_openbookqa_pairs_samples_per_second": 220.255, + "eval_openbookqa_pairs_steps_per_second": 3.441, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_msmarco_pairs_loss": 5.246406555175781, - "eval_msmarco_pairs_runtime": 1.2872, - "eval_msmarco_pairs_samples_per_second": 99.442, - "eval_msmarco_pairs_steps_per_second": 1.554, + "epoch": 0.30337078651685395, + "eval_msmarco_pairs_loss": 0.3808779716491699, + "eval_msmarco_pairs_runtime": 1.2919, + "eval_msmarco_pairs_samples_per_second": 99.082, + "eval_msmarco_pairs_steps_per_second": 1.548, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_nq_pairs_loss": 5.332630157470703, - "eval_nq_pairs_runtime": 2.3739, - "eval_nq_pairs_samples_per_second": 53.92, - "eval_nq_pairs_steps_per_second": 0.843, + "epoch": 0.30337078651685395, + "eval_nq_pairs_loss": 0.44170400500297546, + "eval_nq_pairs_runtime": 2.3835, + "eval_nq_pairs_samples_per_second": 53.703, + "eval_nq_pairs_steps_per_second": 0.839, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_trivia_pairs_loss": 5.647429943084717, - "eval_trivia_pairs_runtime": 4.4729, - "eval_trivia_pairs_samples_per_second": 28.617, - "eval_trivia_pairs_steps_per_second": 0.447, + "epoch": 0.30337078651685395, + "eval_trivia_pairs_loss": 0.9158428907394409, + "eval_trivia_pairs_runtime": 4.4326, + "eval_trivia_pairs_samples_per_second": 28.877, + "eval_trivia_pairs_steps_per_second": 0.451, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_gooaq_pairs_loss": 5.225871562957764, - "eval_gooaq_pairs_runtime": 0.8715, - "eval_gooaq_pairs_samples_per_second": 146.868, - "eval_gooaq_pairs_steps_per_second": 2.295, + "epoch": 0.30337078651685395, + "eval_gooaq_pairs_loss": 0.6208247542381287, + "eval_gooaq_pairs_runtime": 0.8797, + "eval_gooaq_pairs_samples_per_second": 145.497, + "eval_gooaq_pairs_steps_per_second": 2.273, "step": 54 }, { - "epoch": 0.3048694424841214, - "eval_paws-pos_loss": 0.8335962891578674, - "eval_paws-pos_runtime": 0.6844, - "eval_paws-pos_samples_per_second": 187.036, - "eval_paws-pos_steps_per_second": 2.922, + "epoch": 0.30337078651685395, + "eval_paws-pos_loss": 0.02517784759402275, + "eval_paws-pos_runtime": 0.694, + "eval_paws-pos_samples_per_second": 184.442, + "eval_paws-pos_steps_per_second": 2.882, "step": 54 }, { - "epoch": 0.32180663373323926, - "grad_norm": 6.847682952880859, - "learning_rate": 1.2954545454545455e-05, - "loss": 4.4811, + "epoch": 0.3202247191011236, + "grad_norm": 2.173736572265625, + "learning_rate": 2.814814814814815e-05, + "loss": 0.6015, "step": 57 }, { - "epoch": 0.3387438249823571, - "grad_norm": 8.383002281188965, - "learning_rate": 1.3636363636363637e-05, - "loss": 4.4239, + "epoch": 0.33707865168539325, + "grad_norm": 3.8964712619781494, + "learning_rate": 2.962962962962963e-05, + "loss": 0.9482, "step": 60 }, { - "epoch": 0.35568101623147497, - "grad_norm": 7.014843463897705, - "learning_rate": 1.431818181818182e-05, - "loss": 4.0273, + "epoch": 0.3539325842696629, + "grad_norm": 2.659498691558838, + "learning_rate": 3.111111111111112e-05, + "loss": 0.5404, "step": 63 }, { - "epoch": 0.3726182074805928, - "grad_norm": 5.9739885330200195, - "learning_rate": 1.5000000000000002e-05, - "loss": 3.4508, + "epoch": 0.3707865168539326, + "grad_norm": 3.3499844074249268, + "learning_rate": 3.259259259259259e-05, + "loss": 0.805, "step": 66 }, { - "epoch": 0.3895553987297107, - "grad_norm": 11.202752113342285, - "learning_rate": 1.5681818181818182e-05, - "loss": 3.9702, + "epoch": 0.38764044943820225, + "grad_norm": 3.770142078399658, + "learning_rate": 3.4074074074074077e-05, + "loss": 0.7184, "step": 69 }, { - "epoch": 0.40649258997882853, - "grad_norm": 7.064818859100342, - "learning_rate": 1.6363636363636366e-05, - "loss": 3.5295, + "epoch": 0.4044943820224719, + "grad_norm": 3.740880012512207, + "learning_rate": 3.555555555555555e-05, + "loss": 0.8708, "step": 72 }, { - "epoch": 0.4234297812279464, - "grad_norm": 5.912719249725342, - "learning_rate": 1.7045454545454546e-05, - "loss": 3.6395, + "epoch": 0.42134831460674155, + "grad_norm": 2.981106996536255, + "learning_rate": 3.703703703703704e-05, + "loss": 0.8327, "step": 75 }, { - "epoch": 0.44036697247706424, - "grad_norm": 5.033207893371582, - "learning_rate": 1.772727272727273e-05, - "loss": 3.2398, + "epoch": 0.43820224719101125, + "grad_norm": 2.3469011783599854, + "learning_rate": 3.851851851851852e-05, + "loss": 0.5025, "step": 78 }, { - "epoch": 0.4573041637261821, - "grad_norm": 5.218384265899658, - "learning_rate": 1.840909090909091e-05, - "loss": 3.116, + "epoch": 0.4550561797752809, + "grad_norm": 3.296035051345825, + "learning_rate": 4e-05, + "loss": 0.6517, "step": 81 }, { - "epoch": 0.4573041637261821, + "epoch": 0.4550561797752809, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, - "eval_VitaminC_cosine_accuracy": 0.5546875, - "eval_VitaminC_cosine_accuracy_threshold": 0.9041332006454468, - "eval_VitaminC_cosine_ap": 0.5292859731465609, - "eval_VitaminC_cosine_f1": 0.6542553191489362, - "eval_VitaminC_cosine_f1_threshold": 0.452939510345459, - "eval_VitaminC_cosine_precision": 0.48616600790513836, + "eval_VitaminC_cosine_accuracy": 0.578125, + "eval_VitaminC_cosine_accuracy_threshold": 0.7859437465667725, + "eval_VitaminC_cosine_ap": 0.5557444337961499, + "eval_VitaminC_cosine_f1": 0.6595174262734584, + "eval_VitaminC_cosine_f1_threshold": 0.3211573362350464, + "eval_VitaminC_cosine_precision": 0.492, "eval_VitaminC_cosine_recall": 1.0, - "eval_VitaminC_dot_accuracy": 0.5546875, - "eval_VitaminC_dot_accuracy_threshold": 414.42559814453125, - "eval_VitaminC_dot_ap": 0.5222732504955002, - "eval_VitaminC_dot_f1": 0.6542553191489362, - "eval_VitaminC_dot_f1_threshold": 212.6934814453125, - "eval_VitaminC_dot_precision": 0.48616600790513836, + "eval_VitaminC_dot_accuracy": 0.578125, + "eval_VitaminC_dot_accuracy_threshold": 315.9444580078125, + "eval_VitaminC_dot_ap": 0.5539524528858992, + "eval_VitaminC_dot_f1": 0.6595174262734584, + "eval_VitaminC_dot_f1_threshold": 129.88558959960938, + "eval_VitaminC_dot_precision": 0.492, "eval_VitaminC_dot_recall": 1.0, - "eval_VitaminC_euclidean_accuracy": 0.5546875, - "eval_VitaminC_euclidean_accuracy_threshold": 9.18377685546875, - "eval_VitaminC_euclidean_ap": 0.5291787221346742, - "eval_VitaminC_euclidean_f1": 0.6542553191489362, - "eval_VitaminC_euclidean_f1_threshold": 22.683509826660156, - "eval_VitaminC_euclidean_precision": 0.48616600790513836, + "eval_VitaminC_euclidean_accuracy": 0.58203125, + "eval_VitaminC_euclidean_accuracy_threshold": 13.113249778747559, + "eval_VitaminC_euclidean_ap": 0.5510190217865811, + "eval_VitaminC_euclidean_f1": 0.6577540106951871, + "eval_VitaminC_euclidean_f1_threshold": 23.90462303161621, + "eval_VitaminC_euclidean_precision": 0.4900398406374502, "eval_VitaminC_euclidean_recall": 1.0, - "eval_VitaminC_manhattan_accuracy": 0.55859375, - "eval_VitaminC_manhattan_accuracy_threshold": 173.8212127685547, - "eval_VitaminC_manhattan_ap": 0.5305698453165033, - "eval_VitaminC_manhattan_f1": 0.6542553191489362, - "eval_VitaminC_manhattan_f1_threshold": 415.5366516113281, - "eval_VitaminC_manhattan_precision": 0.48616600790513836, - "eval_VitaminC_manhattan_recall": 1.0, - "eval_VitaminC_max_accuracy": 0.55859375, - "eval_VitaminC_max_accuracy_threshold": 414.42559814453125, - "eval_VitaminC_max_ap": 0.5305698453165033, - "eval_VitaminC_max_f1": 0.6542553191489362, - "eval_VitaminC_max_f1_threshold": 415.5366516113281, - "eval_VitaminC_max_precision": 0.48616600790513836, + "eval_VitaminC_manhattan_accuracy": 0.578125, + "eval_VitaminC_manhattan_accuracy_threshold": 276.40142822265625, + "eval_VitaminC_manhattan_ap": 0.5429240708188645, + "eval_VitaminC_manhattan_f1": 0.6576819407008085, + "eval_VitaminC_manhattan_f1_threshold": 469.7353515625, + "eval_VitaminC_manhattan_precision": 0.49193548387096775, + "eval_VitaminC_manhattan_recall": 0.991869918699187, + "eval_VitaminC_max_accuracy": 0.58203125, + "eval_VitaminC_max_accuracy_threshold": 315.9444580078125, + "eval_VitaminC_max_ap": 0.5557444337961499, + "eval_VitaminC_max_f1": 0.6595174262734584, + "eval_VitaminC_max_f1_threshold": 469.7353515625, + "eval_VitaminC_max_precision": 0.492, "eval_VitaminC_max_recall": 1.0, - "eval_sequential_score": 0.5305698453165033, - "eval_sts-test_pearson_cosine": 0.45494716382349193, - "eval_sts-test_pearson_dot": 0.44837123659858896, - "eval_sts-test_pearson_euclidean": 0.4480861256491879, - "eval_sts-test_pearson_manhattan": 0.4417008219313264, - "eval_sts-test_pearson_max": 0.45494716382349193, - "eval_sts-test_spearman_cosine": 0.48921418507251446, - "eval_sts-test_spearman_dot": 0.46707725062744593, - "eval_sts-test_spearman_euclidean": 0.4610824798409968, - "eval_sts-test_spearman_manhattan": 0.46068648052845956, - "eval_sts-test_spearman_max": 0.48921418507251446, - "eval_vitaminc-pairs_loss": 2.5043575763702393, - "eval_vitaminc-pairs_runtime": 1.4778, - "eval_vitaminc-pairs_samples_per_second": 73.079, - "eval_vitaminc-pairs_steps_per_second": 1.353, + "eval_sequential_score": 0.5557444337961499, + "eval_sts-test_pearson_cosine": 0.8483316632682467, + "eval_sts-test_pearson_dot": 0.8392403098680445, + "eval_sts-test_pearson_euclidean": 0.8814283057813619, + "eval_sts-test_pearson_manhattan": 0.8815226866327923, + "eval_sts-test_pearson_max": 0.8815226866327923, + "eval_sts-test_spearman_cosine": 0.8903503892346, + "eval_sts-test_spearman_dot": 0.857844431199042, + "eval_sts-test_spearman_euclidean": 0.8851830636663006, + "eval_sts-test_spearman_manhattan": 0.8865568876827619, + "eval_sts-test_spearman_max": 0.8903503892346, + "eval_vitaminc-pairs_loss": 2.3538782596588135, + "eval_vitaminc-pairs_runtime": 1.4618, + "eval_vitaminc-pairs_samples_per_second": 73.88, + "eval_vitaminc-pairs_steps_per_second": 1.368, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_negation-triplets_loss": 3.4229447841644287, - "eval_negation-triplets_runtime": 0.2991, - "eval_negation-triplets_samples_per_second": 213.954, - "eval_negation-triplets_steps_per_second": 3.343, + "epoch": 0.4550561797752809, + "eval_negation-triplets_loss": 1.649215579032898, + "eval_negation-triplets_runtime": 0.3081, + "eval_negation-triplets_samples_per_second": 207.723, + "eval_negation-triplets_steps_per_second": 3.246, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_scitail-pairs-pos_loss": 0.2784869372844696, - "eval_scitail-pairs-pos_runtime": 0.3633, - "eval_scitail-pairs-pos_samples_per_second": 148.649, - "eval_scitail-pairs-pos_steps_per_second": 2.753, + "epoch": 0.4550561797752809, + "eval_scitail-pairs-pos_loss": 0.11823470890522003, + "eval_scitail-pairs-pos_runtime": 0.376, + "eval_scitail-pairs-pos_samples_per_second": 143.616, + "eval_scitail-pairs-pos_steps_per_second": 2.66, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_xsum-pairs_loss": 2.428964614868164, - "eval_xsum-pairs_runtime": 3.1548, - "eval_xsum-pairs_samples_per_second": 40.573, - "eval_xsum-pairs_steps_per_second": 0.634, + "epoch": 0.4550561797752809, + "eval_xsum-pairs_loss": 0.08420603722333908, + "eval_xsum-pairs_runtime": 3.1576, + "eval_xsum-pairs_samples_per_second": 40.538, + "eval_xsum-pairs_steps_per_second": 0.633, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_sciq_pairs_loss": 0.15256048738956451, - "eval_sciq_pairs_runtime": 3.2432, - "eval_sciq_pairs_samples_per_second": 39.467, - "eval_sciq_pairs_steps_per_second": 0.617, + "epoch": 0.4550561797752809, + "eval_sciq_pairs_loss": 0.034781794995069504, + "eval_sciq_pairs_runtime": 3.2597, + "eval_sciq_pairs_samples_per_second": 39.267, + "eval_sciq_pairs_steps_per_second": 0.614, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_qasc_pairs_loss": 1.2902077436447144, - "eval_qasc_pairs_runtime": 0.6211, - "eval_qasc_pairs_samples_per_second": 206.085, - "eval_qasc_pairs_steps_per_second": 3.22, + "epoch": 0.4550561797752809, + "eval_qasc_pairs_loss": 0.10597346723079681, + "eval_qasc_pairs_runtime": 0.6245, + "eval_qasc_pairs_samples_per_second": 204.979, + "eval_qasc_pairs_steps_per_second": 3.203, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_openbookqa_pairs_loss": 2.4784862995147705, - "eval_openbookqa_pairs_runtime": 0.5758, - "eval_openbookqa_pairs_samples_per_second": 222.308, - "eval_openbookqa_pairs_steps_per_second": 3.474, + "epoch": 0.4550561797752809, + "eval_openbookqa_pairs_loss": 0.7160983681678772, + "eval_openbookqa_pairs_runtime": 0.5767, + "eval_openbookqa_pairs_samples_per_second": 221.961, + "eval_openbookqa_pairs_steps_per_second": 3.468, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_msmarco_pairs_loss": 2.967724084854126, - "eval_msmarco_pairs_runtime": 1.2944, - "eval_msmarco_pairs_samples_per_second": 98.885, - "eval_msmarco_pairs_steps_per_second": 1.545, + "epoch": 0.4550561797752809, + "eval_msmarco_pairs_loss": 0.3454173803329468, + "eval_msmarco_pairs_runtime": 1.2912, + "eval_msmarco_pairs_samples_per_second": 99.134, + "eval_msmarco_pairs_steps_per_second": 1.549, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_nq_pairs_loss": 3.358661413192749, - "eval_nq_pairs_runtime": 2.3827, - "eval_nq_pairs_samples_per_second": 53.722, - "eval_nq_pairs_steps_per_second": 0.839, + "epoch": 0.4550561797752809, + "eval_nq_pairs_loss": 0.4442503750324249, + "eval_nq_pairs_runtime": 2.3854, + "eval_nq_pairs_samples_per_second": 53.659, + "eval_nq_pairs_steps_per_second": 0.838, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_trivia_pairs_loss": 3.1391680240631104, - "eval_trivia_pairs_runtime": 4.4155, - "eval_trivia_pairs_samples_per_second": 28.989, - "eval_trivia_pairs_steps_per_second": 0.453, + "epoch": 0.4550561797752809, + "eval_trivia_pairs_loss": 0.9324482679367065, + "eval_trivia_pairs_runtime": 4.4251, + "eval_trivia_pairs_samples_per_second": 28.926, + "eval_trivia_pairs_steps_per_second": 0.452, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_gooaq_pairs_loss": 2.8774912357330322, - "eval_gooaq_pairs_runtime": 0.8746, - "eval_gooaq_pairs_samples_per_second": 146.346, - "eval_gooaq_pairs_steps_per_second": 2.287, + "epoch": 0.4550561797752809, + "eval_gooaq_pairs_loss": 0.6094165444374084, + "eval_gooaq_pairs_runtime": 0.8751, + "eval_gooaq_pairs_samples_per_second": 146.261, + "eval_gooaq_pairs_steps_per_second": 2.285, "step": 81 }, { - "epoch": 0.4573041637261821, - "eval_paws-pos_loss": 0.19754411280155182, - "eval_paws-pos_runtime": 0.684, - "eval_paws-pos_samples_per_second": 187.141, - "eval_paws-pos_steps_per_second": 2.924, + "epoch": 0.4550561797752809, + "eval_paws-pos_loss": 0.024421451613307, + "eval_paws-pos_runtime": 0.6865, + "eval_paws-pos_samples_per_second": 186.444, + "eval_paws-pos_steps_per_second": 2.913, "step": 81 }, { - "epoch": 0.47424135497529996, - "grad_norm": 5.149569988250732, - "learning_rate": 1.9090909090909094e-05, - "loss": 2.6049, + "epoch": 0.47191011235955055, + "grad_norm": 3.1395561695098877, + "learning_rate": 3.999675367909485e-05, + "loss": 0.5801, "step": 84 }, { - "epoch": 0.4911785462244178, - "grad_norm": 5.012928009033203, - "learning_rate": 1.9772727272727274e-05, - "loss": 2.7738, + "epoch": 0.4887640449438202, + "grad_norm": 2.7977917194366455, + "learning_rate": 3.998701612152597e-05, + "loss": 0.791, "step": 87 }, { - "epoch": 0.5081157374735357, - "grad_norm": 4.880725383758545, - "learning_rate": 2.0454545454545457e-05, - "loss": 2.5416, + "epoch": 0.5056179775280899, + "grad_norm": 2.3682048320770264, + "learning_rate": 3.997079154212493e-05, + "loss": 0.6042, "step": 90 }, { - "epoch": 0.5250529287226535, - "grad_norm": 5.618528366088867, - "learning_rate": 2.113636363636364e-05, - "loss": 2.3913, + "epoch": 0.5224719101123596, + "grad_norm": 2.843482255935669, + "learning_rate": 3.99480869635839e-05, + "loss": 0.7559, "step": 93 }, { - "epoch": 0.5419901199717714, - "grad_norm": 5.020515441894531, - "learning_rate": 2.1818181818181818e-05, - "loss": 2.3144, + "epoch": 0.5393258426966292, + "grad_norm": 2.7346785068511963, + "learning_rate": 3.9918912213415936e-05, + "loss": 0.6258, "step": 96 }, { - "epoch": 0.5589273112208892, - "grad_norm": 4.818451404571533, - "learning_rate": 2.25e-05, - "loss": 2.1857, + "epoch": 0.5561797752808989, + "grad_norm": 3.149007558822632, + "learning_rate": 3.9883279919701226e-05, + "loss": 0.8853, "step": 99 }, { - "epoch": 0.5758645024700071, - "grad_norm": 5.094771385192871, - "learning_rate": 2.3181818181818185e-05, - "loss": 1.8881, + "epoch": 0.5730337078651685, + "grad_norm": 3.3424761295318604, + "learning_rate": 3.9841205505621106e-05, + "loss": 0.5947, "step": 102 }, { - "epoch": 0.592801693719125, - "grad_norm": 3.795962333679199, - "learning_rate": 2.3863636363636365e-05, - "loss": 2.2699, + "epoch": 0.5898876404494382, + "grad_norm": 2.6377146244049072, + "learning_rate": 3.979270718278224e-05, + "loss": 0.644, "step": 105 }, { - "epoch": 0.6097388849682428, - "grad_norm": 4.46245813369751, - "learning_rate": 2.454545454545455e-05, - "loss": 2.1425, + "epoch": 0.6067415730337079, + "grad_norm": 1.3963145017623901, + "learning_rate": 3.973780594333386e-05, + "loss": 0.5682, "step": 108 }, { - "epoch": 0.6097388849682428, + "epoch": 0.6067415730337079, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, - "eval_VitaminC_cosine_accuracy": 0.5546875, - "eval_VitaminC_cosine_accuracy_threshold": 0.8830112218856812, - "eval_VitaminC_cosine_ap": 0.5302172957740995, - "eval_VitaminC_cosine_f1": 0.6558265582655827, - "eval_VitaminC_cosine_f1_threshold": 0.5253933668136597, - "eval_VitaminC_cosine_precision": 0.491869918699187, - "eval_VitaminC_cosine_recall": 0.983739837398374, - "eval_VitaminC_dot_accuracy": 0.5390625, - "eval_VitaminC_dot_accuracy_threshold": 427.5576171875, - "eval_VitaminC_dot_ap": 0.517120157327104, - "eval_VitaminC_dot_f1": 0.6542553191489362, - "eval_VitaminC_dot_f1_threshold": 175.80963134765625, - "eval_VitaminC_dot_precision": 0.48616600790513836, + "eval_VitaminC_cosine_accuracy": 0.58984375, + "eval_VitaminC_cosine_accuracy_threshold": 0.7784540057182312, + "eval_VitaminC_cosine_ap": 0.556890553952148, + "eval_VitaminC_cosine_f1": 0.6595174262734584, + "eval_VitaminC_cosine_f1_threshold": 0.3015836775302887, + "eval_VitaminC_cosine_precision": 0.492, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.57421875, + "eval_VitaminC_dot_accuracy_threshold": 310.07818603515625, + "eval_VitaminC_dot_ap": 0.5486679382699982, + "eval_VitaminC_dot_f1": 0.6595174262734584, + "eval_VitaminC_dot_f1_threshold": 110.96945190429688, + "eval_VitaminC_dot_precision": 0.492, "eval_VitaminC_dot_recall": 1.0, - "eval_VitaminC_euclidean_accuracy": 0.5625, - "eval_VitaminC_euclidean_accuracy_threshold": 10.817148208618164, - "eval_VitaminC_euclidean_ap": 0.532255112376416, - "eval_VitaminC_euclidean_f1": 0.6558265582655827, - "eval_VitaminC_euclidean_f1_threshold": 21.10729217529297, - "eval_VitaminC_euclidean_precision": 0.491869918699187, - "eval_VitaminC_euclidean_recall": 0.983739837398374, - "eval_VitaminC_manhattan_accuracy": 0.5546875, - "eval_VitaminC_manhattan_accuracy_threshold": 224.70416259765625, - "eval_VitaminC_manhattan_ap": 0.5298930718604624, - "eval_VitaminC_manhattan_f1": 0.6558265582655827, - "eval_VitaminC_manhattan_f1_threshold": 415.3311767578125, - "eval_VitaminC_manhattan_precision": 0.491869918699187, + "eval_VitaminC_euclidean_accuracy": 0.578125, + "eval_VitaminC_euclidean_accuracy_threshold": 13.410951614379883, + "eval_VitaminC_euclidean_ap": 0.5551150763619972, + "eval_VitaminC_euclidean_f1": 0.6577540106951871, + "eval_VitaminC_euclidean_f1_threshold": 23.933565139770508, + "eval_VitaminC_euclidean_precision": 0.4900398406374502, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.58203125, + "eval_VitaminC_manhattan_accuracy_threshold": 236.87246704101562, + "eval_VitaminC_manhattan_ap": 0.5465417933692003, + "eval_VitaminC_manhattan_f1": 0.6576086956521738, + "eval_VitaminC_manhattan_f1_threshold": 479.8819580078125, + "eval_VitaminC_manhattan_precision": 0.49387755102040815, "eval_VitaminC_manhattan_recall": 0.983739837398374, - "eval_VitaminC_max_accuracy": 0.5625, - "eval_VitaminC_max_accuracy_threshold": 427.5576171875, - "eval_VitaminC_max_ap": 0.532255112376416, - "eval_VitaminC_max_f1": 0.6558265582655827, - "eval_VitaminC_max_f1_threshold": 415.3311767578125, - "eval_VitaminC_max_precision": 0.491869918699187, + "eval_VitaminC_max_accuracy": 0.58984375, + "eval_VitaminC_max_accuracy_threshold": 310.07818603515625, + "eval_VitaminC_max_ap": 0.556890553952148, + "eval_VitaminC_max_f1": 0.6595174262734584, + "eval_VitaminC_max_f1_threshold": 479.8819580078125, + "eval_VitaminC_max_precision": 0.49387755102040815, "eval_VitaminC_max_recall": 1.0, - "eval_sequential_score": 0.532255112376416, - "eval_sts-test_pearson_cosine": 0.755963151531783, - "eval_sts-test_pearson_dot": 0.7384823091540473, - "eval_sts-test_pearson_euclidean": 0.764089555623164, - "eval_sts-test_pearson_manhattan": 0.7670467479701304, - "eval_sts-test_pearson_max": 0.7670467479701304, - "eval_sts-test_spearman_cosine": 0.7806331583677342, - "eval_sts-test_spearman_dot": 0.7442842883778696, - "eval_sts-test_spearman_euclidean": 0.7674205303105437, - "eval_sts-test_spearman_manhattan": 0.7664974867050092, - "eval_sts-test_spearman_max": 0.7806331583677342, - "eval_vitaminc-pairs_loss": 2.721674919128418, - "eval_vitaminc-pairs_runtime": 1.4468, - "eval_vitaminc-pairs_samples_per_second": 74.65, - "eval_vitaminc-pairs_steps_per_second": 1.382, + "eval_sequential_score": 0.556890553952148, + "eval_sts-test_pearson_cosine": 0.8554011700695058, + "eval_sts-test_pearson_dot": 0.8545651085608208, + "eval_sts-test_pearson_euclidean": 0.8842988585732054, + "eval_sts-test_pearson_manhattan": 0.8850809337540164, + "eval_sts-test_pearson_max": 0.8850809337540164, + "eval_sts-test_spearman_cosine": 0.8959079853731212, + "eval_sts-test_spearman_dot": 0.876135947365041, + "eval_sts-test_spearman_euclidean": 0.8856381406339634, + "eval_sts-test_spearman_manhattan": 0.8868948834793577, + "eval_sts-test_spearman_max": 0.8959079853731212, + "eval_vitaminc-pairs_loss": 2.4271271228790283, + "eval_vitaminc-pairs_runtime": 1.4672, + "eval_vitaminc-pairs_samples_per_second": 73.61, + "eval_vitaminc-pairs_steps_per_second": 1.363, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_negation-triplets_loss": 2.338909387588501, - "eval_negation-triplets_runtime": 0.3017, - "eval_negation-triplets_samples_per_second": 212.101, - "eval_negation-triplets_steps_per_second": 3.314, + "epoch": 0.6067415730337079, + "eval_negation-triplets_loss": 1.6174229383468628, + "eval_negation-triplets_runtime": 0.3094, + "eval_negation-triplets_samples_per_second": 206.869, + "eval_negation-triplets_steps_per_second": 3.232, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_scitail-pairs-pos_loss": 0.23291125893592834, - "eval_scitail-pairs-pos_runtime": 0.3664, - "eval_scitail-pairs-pos_samples_per_second": 147.385, - "eval_scitail-pairs-pos_steps_per_second": 2.729, + "epoch": 0.6067415730337079, + "eval_scitail-pairs-pos_loss": 0.10586681962013245, + "eval_scitail-pairs-pos_runtime": 0.4307, + "eval_scitail-pairs-pos_samples_per_second": 125.374, + "eval_scitail-pairs-pos_steps_per_second": 2.322, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_xsum-pairs_loss": 1.2065516710281372, - "eval_xsum-pairs_runtime": 3.1488, - "eval_xsum-pairs_samples_per_second": 40.65, - "eval_xsum-pairs_steps_per_second": 0.635, + "epoch": 0.6067415730337079, + "eval_xsum-pairs_loss": 0.1041470319032669, + "eval_xsum-pairs_runtime": 3.2912, + "eval_xsum-pairs_samples_per_second": 38.891, + "eval_xsum-pairs_steps_per_second": 0.608, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_sciq_pairs_loss": 0.09487833082675934, - "eval_sciq_pairs_runtime": 3.2618, - "eval_sciq_pairs_samples_per_second": 39.242, - "eval_sciq_pairs_steps_per_second": 0.613, + "epoch": 0.6067415730337079, + "eval_sciq_pairs_loss": 0.03364330902695656, + "eval_sciq_pairs_runtime": 3.3617, + "eval_sciq_pairs_samples_per_second": 38.076, + "eval_sciq_pairs_steps_per_second": 0.595, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_qasc_pairs_loss": 0.8461999297142029, - "eval_qasc_pairs_runtime": 0.6246, - "eval_qasc_pairs_samples_per_second": 204.93, - "eval_qasc_pairs_steps_per_second": 3.202, + "epoch": 0.6067415730337079, + "eval_qasc_pairs_loss": 0.10827122628688812, + "eval_qasc_pairs_runtime": 0.6238, + "eval_qasc_pairs_samples_per_second": 205.206, + "eval_qasc_pairs_steps_per_second": 3.206, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_openbookqa_pairs_loss": 1.5739191770553589, - "eval_openbookqa_pairs_runtime": 0.5751, - "eval_openbookqa_pairs_samples_per_second": 222.568, - "eval_openbookqa_pairs_steps_per_second": 3.478, + "epoch": 0.6067415730337079, + "eval_openbookqa_pairs_loss": 0.7513518929481506, + "eval_openbookqa_pairs_runtime": 0.5882, + "eval_openbookqa_pairs_samples_per_second": 217.628, + "eval_openbookqa_pairs_steps_per_second": 3.4, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_msmarco_pairs_loss": 1.6446179151535034, - "eval_msmarco_pairs_runtime": 1.2828, - "eval_msmarco_pairs_samples_per_second": 99.784, - "eval_msmarco_pairs_steps_per_second": 1.559, + "epoch": 0.6067415730337079, + "eval_msmarco_pairs_loss": 0.3447520434856415, + "eval_msmarco_pairs_runtime": 1.2968, + "eval_msmarco_pairs_samples_per_second": 98.705, + "eval_msmarco_pairs_steps_per_second": 1.542, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_nq_pairs_loss": 2.364896535873413, - "eval_nq_pairs_runtime": 2.3802, - "eval_nq_pairs_samples_per_second": 53.777, - "eval_nq_pairs_steps_per_second": 0.84, + "epoch": 0.6067415730337079, + "eval_nq_pairs_loss": 0.4307234287261963, + "eval_nq_pairs_runtime": 2.4164, + "eval_nq_pairs_samples_per_second": 52.971, + "eval_nq_pairs_steps_per_second": 0.828, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_trivia_pairs_loss": 1.7080069780349731, - "eval_trivia_pairs_runtime": 4.4372, - "eval_trivia_pairs_samples_per_second": 28.847, - "eval_trivia_pairs_steps_per_second": 0.451, + "epoch": 0.6067415730337079, + "eval_trivia_pairs_loss": 0.9260168075561523, + "eval_trivia_pairs_runtime": 4.4644, + "eval_trivia_pairs_samples_per_second": 28.671, + "eval_trivia_pairs_steps_per_second": 0.448, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_gooaq_pairs_loss": 1.7924479246139526, - "eval_gooaq_pairs_runtime": 0.8761, - "eval_gooaq_pairs_samples_per_second": 146.094, - "eval_gooaq_pairs_steps_per_second": 2.283, + "epoch": 0.6067415730337079, + "eval_gooaq_pairs_loss": 0.6301646828651428, + "eval_gooaq_pairs_runtime": 0.9092, + "eval_gooaq_pairs_samples_per_second": 140.777, + "eval_gooaq_pairs_steps_per_second": 2.2, "step": 108 }, { - "epoch": 0.6097388849682428, - "eval_paws-pos_loss": 0.08000019192695618, - "eval_paws-pos_runtime": 0.6839, - "eval_paws-pos_samples_per_second": 187.168, - "eval_paws-pos_steps_per_second": 2.924, + "epoch": 0.6067415730337079, + "eval_paws-pos_loss": 0.024637963622808456, + "eval_paws-pos_runtime": 0.696, + "eval_paws-pos_samples_per_second": 183.9, + "eval_paws-pos_steps_per_second": 2.873, "step": 108 }, { - "epoch": 0.6266760762173607, - "grad_norm": 4.418070316314697, - "learning_rate": 2.5227272727272732e-05, - "loss": 2.1276, + "epoch": 0.6235955056179775, + "grad_norm": 3.0150091648101807, + "learning_rate": 3.9676525550881484e-05, + "loss": 0.5974, "step": 111 }, { - "epoch": 0.6436132674664785, - "grad_norm": 4.3495259284973145, - "learning_rate": 2.590909090909091e-05, - "loss": 1.7531, + "epoch": 0.6404494382022472, + "grad_norm": 2.7985854148864746, + "learning_rate": 3.9608892530200996e-05, + "loss": 0.649, "step": 114 }, { - "epoch": 0.6605504587155964, - "grad_norm": 4.294332027435303, - "learning_rate": 2.6590909090909093e-05, - "loss": 2.0179, + "epoch": 0.6573033707865169, + "grad_norm": 3.00435471534729, + "learning_rate": 3.953493615575757e-05, + "loss": 0.6966, "step": 117 }, { - "epoch": 0.6774876499647142, - "grad_norm": 3.4215610027313232, - "learning_rate": 2.7272727272727273e-05, - "loss": 1.5305, + "epoch": 0.6741573033707865, + "grad_norm": 3.276264190673828, + "learning_rate": 3.945468843903448e-05, + "loss": 0.542, "step": 120 }, { - "epoch": 0.6944248412138321, - "grad_norm": 4.37844181060791, - "learning_rate": 2.7954545454545457e-05, - "loss": 1.6925, + "epoch": 0.6910112359550562, + "grad_norm": 3.144037961959839, + "learning_rate": 3.936818411467709e-05, + "loss": 0.8583, "step": 123 }, { - "epoch": 0.7113620324629499, - "grad_norm": 4.019878387451172, - "learning_rate": 2.863636363636364e-05, - "loss": 1.5248, + "epoch": 0.7078651685393258, + "grad_norm": 2.498800754547119, + "learning_rate": 3.9275460625458294e-05, + "loss": 0.6416, "step": 126 }, { - "epoch": 0.7282992237120678, - "grad_norm": 4.662445068359375, - "learning_rate": 2.931818181818182e-05, - "loss": 1.523, + "epoch": 0.7247191011235955, + "grad_norm": 3.01488995552063, + "learning_rate": 3.917655810607162e-05, + "loss": 0.6273, "step": 129 }, { - "epoch": 0.7452364149611856, - "grad_norm": 4.6323161125183105, - "learning_rate": 3.0000000000000004e-05, - "loss": 1.5474, + "epoch": 0.7415730337078652, + "grad_norm": 3.439838171005249, + "learning_rate": 3.907151936575922e-05, + "loss": 0.8621, "step": 132 }, { - "epoch": 0.7621736062103035, - "grad_norm": 4.586575984954834, - "learning_rate": 3.068181818181819e-05, - "loss": 1.7221, + "epoch": 0.7584269662921348, + "grad_norm": 3.0246922969818115, + "learning_rate": 3.896038986978224e-05, + "loss": 0.7221, "step": 135 }, { - "epoch": 0.7621736062103035, + "epoch": 0.7584269662921348, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, - "eval_VitaminC_cosine_accuracy": 0.56640625, - "eval_VitaminC_cosine_accuracy_threshold": 0.8478574156761169, - "eval_VitaminC_cosine_ap": 0.5325579595957614, - "eval_VitaminC_cosine_f1": 0.6559999999999999, - "eval_VitaminC_cosine_f1_threshold": 0.35839784145355225, - "eval_VitaminC_cosine_precision": 0.4880952380952381, + "eval_VitaminC_cosine_accuracy": 0.57421875, + "eval_VitaminC_cosine_accuracy_threshold": 0.8462294936180115, + "eval_VitaminC_cosine_ap": 0.5610960529859609, + "eval_VitaminC_cosine_f1": 0.6595174262734584, + "eval_VitaminC_cosine_f1_threshold": 0.3176865577697754, + "eval_VitaminC_cosine_precision": 0.492, "eval_VitaminC_cosine_recall": 1.0, - "eval_VitaminC_dot_accuracy": 0.5625, - "eval_VitaminC_dot_accuracy_threshold": 366.9839172363281, - "eval_VitaminC_dot_ap": 0.5326813797607027, - "eval_VitaminC_dot_f1": 0.6559999999999999, - "eval_VitaminC_dot_f1_threshold": 157.35829162597656, - "eval_VitaminC_dot_precision": 0.4880952380952381, + "eval_VitaminC_dot_accuracy": 0.5859375, + "eval_VitaminC_dot_accuracy_threshold": 319.20416259765625, + "eval_VitaminC_dot_ap": 0.5540963620104548, + "eval_VitaminC_dot_f1": 0.6595174262734584, + "eval_VitaminC_dot_f1_threshold": 121.18156433105469, + "eval_VitaminC_dot_precision": 0.492, "eval_VitaminC_dot_recall": 1.0, - "eval_VitaminC_euclidean_accuracy": 0.5625, - "eval_VitaminC_euclidean_accuracy_threshold": 12.044445037841797, - "eval_VitaminC_euclidean_ap": 0.5304103559932005, - "eval_VitaminC_euclidean_f1": 0.6542553191489362, - "eval_VitaminC_euclidean_f1_threshold": 24.461441040039062, - "eval_VitaminC_euclidean_precision": 0.48616600790513836, + "eval_VitaminC_euclidean_accuracy": 0.58203125, + "eval_VitaminC_euclidean_accuracy_threshold": 15.303094863891602, + "eval_VitaminC_euclidean_ap": 0.5590109366458975, + "eval_VitaminC_euclidean_f1": 0.6577540106951871, + "eval_VitaminC_euclidean_f1_threshold": 23.605255126953125, + "eval_VitaminC_euclidean_precision": 0.4900398406374502, "eval_VitaminC_euclidean_recall": 1.0, - "eval_VitaminC_manhattan_accuracy": 0.5625, - "eval_VitaminC_manhattan_accuracy_threshold": 239.24815368652344, - "eval_VitaminC_manhattan_ap": 0.5314780667834758, - "eval_VitaminC_manhattan_f1": 0.6575342465753424, - "eval_VitaminC_manhattan_f1_threshold": 400.6834716796875, - "eval_VitaminC_manhattan_precision": 0.49586776859504134, - "eval_VitaminC_manhattan_recall": 0.975609756097561, - "eval_VitaminC_max_accuracy": 0.56640625, - "eval_VitaminC_max_accuracy_threshold": 366.9839172363281, - "eval_VitaminC_max_ap": 0.5326813797607027, - "eval_VitaminC_max_f1": 0.6575342465753424, - "eval_VitaminC_max_f1_threshold": 400.6834716796875, - "eval_VitaminC_max_precision": 0.49586776859504134, + "eval_VitaminC_manhattan_accuracy": 0.578125, + "eval_VitaminC_manhattan_accuracy_threshold": 240.15660095214844, + "eval_VitaminC_manhattan_ap": 0.5523438512618317, + "eval_VitaminC_manhattan_f1": 0.6577540106951871, + "eval_VitaminC_manhattan_f1_threshold": 503.4666748046875, + "eval_VitaminC_manhattan_precision": 0.4900398406374502, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.5859375, + "eval_VitaminC_max_accuracy_threshold": 319.20416259765625, + "eval_VitaminC_max_ap": 0.5610960529859609, + "eval_VitaminC_max_f1": 0.6595174262734584, + "eval_VitaminC_max_f1_threshold": 503.4666748046875, + "eval_VitaminC_max_precision": 0.492, "eval_VitaminC_max_recall": 1.0, - "eval_sequential_score": 0.5326813797607027, - "eval_sts-test_pearson_cosine": 0.7919597804368175, - "eval_sts-test_pearson_dot": 0.7994867531185785, - "eval_sts-test_pearson_euclidean": 0.8117960113303863, - "eval_sts-test_pearson_manhattan": 0.8144714466358016, - "eval_sts-test_pearson_max": 0.8144714466358016, - "eval_sts-test_spearman_cosine": 0.831478610786181, - "eval_sts-test_spearman_dot": 0.8192534746855707, - "eval_sts-test_spearman_euclidean": 0.8185577905406703, - "eval_sts-test_spearman_manhattan": 0.8154771593606782, - "eval_sts-test_spearman_max": 0.831478610786181, - "eval_vitaminc-pairs_loss": 2.852091073989868, - "eval_vitaminc-pairs_runtime": 1.4427, - "eval_vitaminc-pairs_samples_per_second": 74.858, - "eval_vitaminc-pairs_steps_per_second": 1.386, + "eval_sequential_score": 0.5610960529859609, + "eval_sts-test_pearson_cosine": 0.8558158452024208, + "eval_sts-test_pearson_dot": 0.8576514422982167, + "eval_sts-test_pearson_euclidean": 0.8865043101846597, + "eval_sts-test_pearson_manhattan": 0.8866527906896583, + "eval_sts-test_pearson_max": 0.8866527906896583, + "eval_sts-test_spearman_cosine": 0.8964919590428757, + "eval_sts-test_spearman_dot": 0.8785377225806833, + "eval_sts-test_spearman_euclidean": 0.8874999818863996, + "eval_sts-test_spearman_manhattan": 0.8881272791699125, + "eval_sts-test_spearman_max": 0.8964919590428757, + "eval_vitaminc-pairs_loss": 2.336690902709961, + "eval_vitaminc-pairs_runtime": 1.4561, + "eval_vitaminc-pairs_samples_per_second": 74.172, + "eval_vitaminc-pairs_steps_per_second": 1.374, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_negation-triplets_loss": 2.074247121810913, - "eval_negation-triplets_runtime": 0.3, - "eval_negation-triplets_samples_per_second": 213.353, - "eval_negation-triplets_steps_per_second": 3.334, + "epoch": 0.7584269662921348, + "eval_negation-triplets_loss": 1.5712968111038208, + "eval_negation-triplets_runtime": 0.2973, + "eval_negation-triplets_samples_per_second": 215.304, + "eval_negation-triplets_steps_per_second": 3.364, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_scitail-pairs-pos_loss": 0.2149849385023117, - "eval_scitail-pairs-pos_runtime": 0.3744, - "eval_scitail-pairs-pos_samples_per_second": 144.219, - "eval_scitail-pairs-pos_steps_per_second": 2.671, + "epoch": 0.7584269662921348, + "eval_scitail-pairs-pos_loss": 0.1093834936618805, + "eval_scitail-pairs-pos_runtime": 0.3709, + "eval_scitail-pairs-pos_samples_per_second": 145.598, + "eval_scitail-pairs-pos_steps_per_second": 2.696, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_xsum-pairs_loss": 0.7706837058067322, - "eval_xsum-pairs_runtime": 3.1609, - "eval_xsum-pairs_samples_per_second": 40.495, - "eval_xsum-pairs_steps_per_second": 0.633, + "epoch": 0.7584269662921348, + "eval_xsum-pairs_loss": 0.09297582507133484, + "eval_xsum-pairs_runtime": 3.1496, + "eval_xsum-pairs_samples_per_second": 40.64, + "eval_xsum-pairs_steps_per_second": 0.635, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_sciq_pairs_loss": 0.07513368874788284, - "eval_sciq_pairs_runtime": 3.2949, - "eval_sciq_pairs_samples_per_second": 38.848, - "eval_sciq_pairs_steps_per_second": 0.607, + "epoch": 0.7584269662921348, + "eval_sciq_pairs_loss": 0.031587302684783936, + "eval_sciq_pairs_runtime": 3.2666, + "eval_sciq_pairs_samples_per_second": 39.185, + "eval_sciq_pairs_steps_per_second": 0.612, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_qasc_pairs_loss": 0.6355602741241455, - "eval_qasc_pairs_runtime": 0.6392, - "eval_qasc_pairs_samples_per_second": 200.246, - "eval_qasc_pairs_steps_per_second": 3.129, + "epoch": 0.7584269662921348, + "eval_qasc_pairs_loss": 0.10210572183132172, + "eval_qasc_pairs_runtime": 0.6258, + "eval_qasc_pairs_samples_per_second": 204.538, + "eval_qasc_pairs_steps_per_second": 3.196, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_openbookqa_pairs_loss": 1.4014525413513184, - "eval_openbookqa_pairs_runtime": 0.622, - "eval_openbookqa_pairs_samples_per_second": 205.786, - "eval_openbookqa_pairs_steps_per_second": 3.215, + "epoch": 0.7584269662921348, + "eval_openbookqa_pairs_loss": 0.7485109567642212, + "eval_openbookqa_pairs_runtime": 0.5836, + "eval_openbookqa_pairs_samples_per_second": 219.317, + "eval_openbookqa_pairs_steps_per_second": 3.427, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_msmarco_pairs_loss": 1.1524099111557007, - "eval_msmarco_pairs_runtime": 1.31, - "eval_msmarco_pairs_samples_per_second": 97.709, - "eval_msmarco_pairs_steps_per_second": 1.527, + "epoch": 0.7584269662921348, + "eval_msmarco_pairs_loss": 0.34248754382133484, + "eval_msmarco_pairs_runtime": 1.286, + "eval_msmarco_pairs_samples_per_second": 99.53, + "eval_msmarco_pairs_steps_per_second": 1.555, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_nq_pairs_loss": 1.7768574953079224, - "eval_nq_pairs_runtime": 2.3979, - "eval_nq_pairs_samples_per_second": 53.379, - "eval_nq_pairs_steps_per_second": 0.834, + "epoch": 0.7584269662921348, + "eval_nq_pairs_loss": 0.41257673501968384, + "eval_nq_pairs_runtime": 2.3755, + "eval_nq_pairs_samples_per_second": 53.883, + "eval_nq_pairs_steps_per_second": 0.842, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_trivia_pairs_loss": 1.4495295286178589, - "eval_trivia_pairs_runtime": 4.4194, - "eval_trivia_pairs_samples_per_second": 28.964, + "epoch": 0.7584269662921348, + "eval_trivia_pairs_loss": 0.9274640083312988, + "eval_trivia_pairs_runtime": 4.4185, + "eval_trivia_pairs_samples_per_second": 28.969, "eval_trivia_pairs_steps_per_second": 0.453, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_gooaq_pairs_loss": 1.3955378532409668, - "eval_gooaq_pairs_runtime": 0.8788, - "eval_gooaq_pairs_samples_per_second": 145.649, - "eval_gooaq_pairs_steps_per_second": 2.276, + "epoch": 0.7584269662921348, + "eval_gooaq_pairs_loss": 0.5840359330177307, + "eval_gooaq_pairs_runtime": 0.8784, + "eval_gooaq_pairs_samples_per_second": 145.726, + "eval_gooaq_pairs_steps_per_second": 2.277, "step": 135 }, { - "epoch": 0.7621736062103035, - "eval_paws-pos_loss": 0.06006813049316406, - "eval_paws-pos_runtime": 0.6896, - "eval_paws-pos_samples_per_second": 185.603, - "eval_paws-pos_steps_per_second": 2.9, + "epoch": 0.7584269662921348, + "eval_paws-pos_loss": 0.024575484916567802, + "eval_paws-pos_runtime": 0.6869, + "eval_paws-pos_samples_per_second": 186.355, + "eval_paws-pos_steps_per_second": 2.912, "step": 135 }, { - "epoch": 0.7791107974594214, - "grad_norm": 3.864208936691284, - "learning_rate": 3.1363636363636365e-05, - "loss": 1.5366, + "epoch": 0.7752808988764045, + "grad_norm": 3.189115285873413, + "learning_rate": 3.884321771974146e-05, + "loss": 0.9421, "step": 138 }, { - "epoch": 0.7960479887085392, - "grad_norm": 3.837550640106201, - "learning_rate": 3.204545454545455e-05, - "loss": 1.3045, + "epoch": 0.7921348314606742, + "grad_norm": 1.8421012163162231, + "learning_rate": 3.872005363275693e-05, + "loss": 0.6845, "step": 141 }, { - "epoch": 0.8129851799576571, - "grad_norm": 3.5258102416992188, - "learning_rate": 3.272727272727273e-05, - "loss": 1.1999, + "epoch": 0.8089887640449438, + "grad_norm": 2.2710273265838623, + "learning_rate": 3.859095091951534e-05, + "loss": 0.5464, "step": 144 }, { - "epoch": 0.8299223712067749, - "grad_norm": 3.4431183338165283, - "learning_rate": 3.340909090909091e-05, - "loss": 1.3483, + "epoch": 0.8258426966292135, + "grad_norm": 2.98201060295105, + "learning_rate": 3.845596546119496e-05, + "loss": 0.6338, "step": 147 }, { - "epoch": 0.8468595624558928, - "grad_norm": 3.6455864906311035, - "learning_rate": 3.409090909090909e-05, - "loss": 1.2009, + "epoch": 0.8426966292134831, + "grad_norm": 2.9222068786621094, + "learning_rate": 3.831515568527782e-05, + "loss": 0.4993, "step": 150 }, { - "epoch": 0.8637967537050106, - "grad_norm": 4.508525371551514, - "learning_rate": 3.4772727272727276e-05, - "loss": 1.4495, + "epoch": 0.8595505617977528, + "grad_norm": 3.6596696376800537, + "learning_rate": 3.81685825402598e-05, + "loss": 0.6939, "step": 153 }, { - "epoch": 0.8807339449541285, - "grad_norm": 3.0432400703430176, - "learning_rate": 3.545454545454546e-05, - "loss": 1.2329, + "epoch": 0.8764044943820225, + "grad_norm": 3.5363073348999023, + "learning_rate": 3.801630946926956e-05, + "loss": 0.5791, "step": 156 }, { - "epoch": 0.8976711362032463, - "grad_norm": 3.0190365314483643, - "learning_rate": 3.613636363636364e-05, - "loss": 1.1905, + "epoch": 0.8932584269662921, + "grad_norm": 3.6599326133728027, + "learning_rate": 3.785840238260758e-05, + "loss": 0.9226, "step": 159 }, { - "epoch": 0.9146083274523642, - "grad_norm": 3.74668288230896, - "learning_rate": 3.681818181818182e-05, - "loss": 1.277, + "epoch": 0.9101123595505618, + "grad_norm": 2.6967382431030273, + "learning_rate": 3.7694929629217385e-05, + "loss": 0.6336, "step": 162 }, { - "epoch": 0.9146083274523642, + "epoch": 0.9101123595505618, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, - "eval_VitaminC_cosine_accuracy": 0.57421875, - "eval_VitaminC_cosine_accuracy_threshold": 0.8101799488067627, - "eval_VitaminC_cosine_ap": 0.5298515171639175, - "eval_VitaminC_cosine_f1": 0.6542553191489362, - "eval_VitaminC_cosine_f1_threshold": 0.345889687538147, - "eval_VitaminC_cosine_precision": 0.48616600790513836, + "eval_VitaminC_cosine_accuracy": 0.578125, + "eval_VitaminC_cosine_accuracy_threshold": 0.8077100515365601, + "eval_VitaminC_cosine_ap": 0.560345569715395, + "eval_VitaminC_cosine_f1": 0.6577540106951871, + "eval_VitaminC_cosine_f1_threshold": 0.25425243377685547, + "eval_VitaminC_cosine_precision": 0.4900398406374502, "eval_VitaminC_cosine_recall": 1.0, - "eval_VitaminC_dot_accuracy": 0.55078125, - "eval_VitaminC_dot_accuracy_threshold": 373.5804443359375, - "eval_VitaminC_dot_ap": 0.5310954683437364, - "eval_VitaminC_dot_f1": 0.6542553191489362, - "eval_VitaminC_dot_f1_threshold": 155.41326904296875, - "eval_VitaminC_dot_precision": 0.48616600790513836, - "eval_VitaminC_dot_recall": 1.0, - "eval_VitaminC_euclidean_accuracy": 0.57421875, - "eval_VitaminC_euclidean_accuracy_threshold": 13.60124683380127, - "eval_VitaminC_euclidean_ap": 0.5286057955992807, - "eval_VitaminC_euclidean_f1": 0.6577540106951871, - "eval_VitaminC_euclidean_f1_threshold": 22.904512405395508, - "eval_VitaminC_euclidean_precision": 0.4900398406374502, + "eval_VitaminC_dot_accuracy": 0.578125, + "eval_VitaminC_dot_accuracy_threshold": 331.0595703125, + "eval_VitaminC_dot_ap": 0.5499174718252662, + "eval_VitaminC_dot_f1": 0.6594594594594595, + "eval_VitaminC_dot_f1_threshold": 121.5512924194336, + "eval_VitaminC_dot_precision": 0.4939271255060729, + "eval_VitaminC_dot_recall": 0.991869918699187, + "eval_VitaminC_euclidean_accuracy": 0.58203125, + "eval_VitaminC_euclidean_accuracy_threshold": 13.087348937988281, + "eval_VitaminC_euclidean_ap": 0.5563988051869968, + "eval_VitaminC_euclidean_f1": 0.6595174262734584, + "eval_VitaminC_euclidean_f1_threshold": 24.085674285888672, + "eval_VitaminC_euclidean_precision": 0.492, "eval_VitaminC_euclidean_recall": 1.0, - "eval_VitaminC_manhattan_accuracy": 0.57421875, - "eval_VitaminC_manhattan_accuracy_threshold": 262.37322998046875, - "eval_VitaminC_manhattan_ap": 0.5253560845853567, - "eval_VitaminC_manhattan_f1": 0.6559999999999999, - "eval_VitaminC_manhattan_f1_threshold": 465.94549560546875, - "eval_VitaminC_manhattan_precision": 0.4880952380952381, + "eval_VitaminC_manhattan_accuracy": 0.58203125, + "eval_VitaminC_manhattan_accuracy_threshold": 339.80157470703125, + "eval_VitaminC_manhattan_ap": 0.5563552623581395, + "eval_VitaminC_manhattan_f1": 0.6595174262734584, + "eval_VitaminC_manhattan_f1_threshold": 513.6148681640625, + "eval_VitaminC_manhattan_precision": 0.492, "eval_VitaminC_manhattan_recall": 1.0, - "eval_VitaminC_max_accuracy": 0.57421875, - "eval_VitaminC_max_accuracy_threshold": 373.5804443359375, - "eval_VitaminC_max_ap": 0.5310954683437364, - "eval_VitaminC_max_f1": 0.6577540106951871, - "eval_VitaminC_max_f1_threshold": 465.94549560546875, - "eval_VitaminC_max_precision": 0.4900398406374502, + "eval_VitaminC_max_accuracy": 0.58203125, + "eval_VitaminC_max_accuracy_threshold": 339.80157470703125, + "eval_VitaminC_max_ap": 0.560345569715395, + "eval_VitaminC_max_f1": 0.6595174262734584, + "eval_VitaminC_max_f1_threshold": 513.6148681640625, + "eval_VitaminC_max_precision": 0.4939271255060729, "eval_VitaminC_max_recall": 1.0, - "eval_sequential_score": 0.5310954683437364, - "eval_sts-test_pearson_cosine": 0.8067612938723231, - "eval_sts-test_pearson_dot": 0.8217874837658639, - "eval_sts-test_pearson_euclidean": 0.827948115812785, - "eval_sts-test_pearson_manhattan": 0.8261527694953693, - "eval_sts-test_pearson_max": 0.827948115812785, - "eval_sts-test_spearman_cosine": 0.8547777638284432, - "eval_sts-test_spearman_dot": 0.8498786150097738, - "eval_sts-test_spearman_euclidean": 0.8373845860667446, - "eval_sts-test_spearman_manhattan": 0.8324507067477893, - "eval_sts-test_spearman_max": 0.8547777638284432, - "eval_vitaminc-pairs_loss": 2.776399612426758, - "eval_vitaminc-pairs_runtime": 1.4503, - "eval_vitaminc-pairs_samples_per_second": 74.467, - "eval_vitaminc-pairs_steps_per_second": 1.379, + "eval_sequential_score": 0.560345569715395, + "eval_sts-test_pearson_cosine": 0.8572374144718338, + "eval_sts-test_pearson_dot": 0.8574922304214377, + "eval_sts-test_pearson_euclidean": 0.8855365860444931, + "eval_sts-test_pearson_manhattan": 0.886788554614621, + "eval_sts-test_pearson_max": 0.886788554614621, + "eval_sts-test_spearman_cosine": 0.8965259734503722, + "eval_sts-test_spearman_dot": 0.8784832995286892, + "eval_sts-test_spearman_euclidean": 0.8855371715506584, + "eval_sts-test_spearman_manhattan": 0.8887295132057965, + "eval_sts-test_spearman_max": 0.8965259734503722, + "eval_vitaminc-pairs_loss": 2.376066207885742, + "eval_vitaminc-pairs_runtime": 1.4378, + "eval_vitaminc-pairs_samples_per_second": 75.117, + "eval_vitaminc-pairs_steps_per_second": 1.391, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_negation-triplets_loss": 2.005451202392578, - "eval_negation-triplets_runtime": 0.2981, - "eval_negation-triplets_samples_per_second": 214.709, - "eval_negation-triplets_steps_per_second": 3.355, + "epoch": 0.9101123595505618, + "eval_negation-triplets_loss": 1.63050377368927, + "eval_negation-triplets_runtime": 0.3004, + "eval_negation-triplets_samples_per_second": 213.075, + "eval_negation-triplets_steps_per_second": 3.329, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_scitail-pairs-pos_loss": 0.19877880811691284, - "eval_scitail-pairs-pos_runtime": 0.3623, - "eval_scitail-pairs-pos_samples_per_second": 149.043, - "eval_scitail-pairs-pos_steps_per_second": 2.76, + "epoch": 0.9101123595505618, + "eval_scitail-pairs-pos_loss": 0.10006655752658844, + "eval_scitail-pairs-pos_runtime": 0.365, + "eval_scitail-pairs-pos_samples_per_second": 147.952, + "eval_scitail-pairs-pos_steps_per_second": 2.74, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_xsum-pairs_loss": 0.5586928725242615, - "eval_xsum-pairs_runtime": 3.1466, - "eval_xsum-pairs_samples_per_second": 40.679, - "eval_xsum-pairs_steps_per_second": 0.636, + "epoch": 0.9101123595505618, + "eval_xsum-pairs_loss": 0.0762382224202156, + "eval_xsum-pairs_runtime": 3.1586, + "eval_xsum-pairs_samples_per_second": 40.525, + "eval_xsum-pairs_steps_per_second": 0.633, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_sciq_pairs_loss": 0.06038254499435425, - "eval_sciq_pairs_runtime": 3.4092, - "eval_sciq_pairs_samples_per_second": 37.545, - "eval_sciq_pairs_steps_per_second": 0.587, + "epoch": 0.9101123595505618, + "eval_sciq_pairs_loss": 0.032084282487630844, + "eval_sciq_pairs_runtime": 3.2586, + "eval_sciq_pairs_samples_per_second": 39.28, + "eval_sciq_pairs_steps_per_second": 0.614, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_qasc_pairs_loss": 0.49434012174606323, - "eval_qasc_pairs_runtime": 0.6342, - "eval_qasc_pairs_samples_per_second": 201.832, - "eval_qasc_pairs_steps_per_second": 3.154, + "epoch": 0.9101123595505618, + "eval_qasc_pairs_loss": 0.10310036689043045, + "eval_qasc_pairs_runtime": 0.6197, + "eval_qasc_pairs_samples_per_second": 206.545, + "eval_qasc_pairs_steps_per_second": 3.227, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_openbookqa_pairs_loss": 1.1903400421142578, - "eval_openbookqa_pairs_runtime": 0.5754, - "eval_openbookqa_pairs_samples_per_second": 222.449, - "eval_openbookqa_pairs_steps_per_second": 3.476, + "epoch": 0.9101123595505618, + "eval_openbookqa_pairs_loss": 0.6995278000831604, + "eval_openbookqa_pairs_runtime": 0.575, + "eval_openbookqa_pairs_samples_per_second": 222.624, + "eval_openbookqa_pairs_steps_per_second": 3.478, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_msmarco_pairs_loss": 0.8656420707702637, - "eval_msmarco_pairs_runtime": 1.2858, - "eval_msmarco_pairs_samples_per_second": 99.547, + "epoch": 0.9101123595505618, + "eval_msmarco_pairs_loss": 0.37089064717292786, + "eval_msmarco_pairs_runtime": 1.2866, + "eval_msmarco_pairs_samples_per_second": 99.49, "eval_msmarco_pairs_steps_per_second": 1.555, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_nq_pairs_loss": 1.1553651094436646, - "eval_nq_pairs_runtime": 2.3754, - "eval_nq_pairs_samples_per_second": 53.885, - "eval_nq_pairs_steps_per_second": 0.842, + "epoch": 0.9101123595505618, + "eval_nq_pairs_loss": 0.39078566431999207, + "eval_nq_pairs_runtime": 2.3928, + "eval_nq_pairs_samples_per_second": 53.493, + "eval_nq_pairs_steps_per_second": 0.836, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_trivia_pairs_loss": 1.2928619384765625, - "eval_trivia_pairs_runtime": 4.4084, - "eval_trivia_pairs_samples_per_second": 29.035, - "eval_trivia_pairs_steps_per_second": 0.454, + "epoch": 0.9101123595505618, + "eval_trivia_pairs_loss": 0.9003691077232361, + "eval_trivia_pairs_runtime": 4.479, + "eval_trivia_pairs_samples_per_second": 28.578, + "eval_trivia_pairs_steps_per_second": 0.447, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_gooaq_pairs_loss": 1.1580811738967896, - "eval_gooaq_pairs_runtime": 0.8731, - "eval_gooaq_pairs_samples_per_second": 146.607, - "eval_gooaq_pairs_steps_per_second": 2.291, + "epoch": 0.9101123595505618, + "eval_gooaq_pairs_loss": 0.5496390461921692, + "eval_gooaq_pairs_runtime": 0.8735, + "eval_gooaq_pairs_samples_per_second": 146.532, + "eval_gooaq_pairs_steps_per_second": 2.29, "step": 162 }, { - "epoch": 0.9146083274523642, - "eval_paws-pos_loss": 0.052534349262714386, - "eval_paws-pos_runtime": 0.6835, - "eval_paws-pos_samples_per_second": 187.258, + "epoch": 0.9101123595505618, + "eval_paws-pos_loss": 0.024540428072214127, + "eval_paws-pos_runtime": 0.6836, + "eval_paws-pos_samples_per_second": 187.246, "eval_paws-pos_steps_per_second": 2.926, "step": 162 }, { - "epoch": 0.9315455187014821, - "grad_norm": 4.7817864418029785, - "learning_rate": 3.7500000000000003e-05, - "loss": 1.339, + "epoch": 0.9269662921348315, + "grad_norm": 3.270443916320801, + "learning_rate": 3.7525961967101216e-05, + "loss": 0.5395, "step": 165 }, { - "epoch": 0.9484827099505999, - "grad_norm": 4.000570774078369, - "learning_rate": 3.818181818181819e-05, - "loss": 1.1535, + "epoch": 0.9438202247191011, + "grad_norm": 2.608531951904297, + "learning_rate": 3.7351572532692915e-05, + "loss": 0.6874, "step": 168 }, { - "epoch": 0.9654199011997178, - "grad_norm": 3.5971670150756836, - "learning_rate": 3.8863636363636364e-05, - "loss": 1.1643, + "epoch": 0.9606741573033708, + "grad_norm": 2.9995415210723877, + "learning_rate": 3.717183680920136e-05, + "loss": 0.5614, "step": 171 }, { - "epoch": 0.9823570924488356, - "grad_norm": 3.6582131385803223, - "learning_rate": 3.954545454545455e-05, - "loss": 1.2221, + "epoch": 0.9775280898876404, + "grad_norm": 3.42486572265625, + "learning_rate": 3.698683259393809e-05, + "loss": 0.5812, "step": 174 }, { - "epoch": 0.9992942836979535, - "grad_norm": 4.0953898429870605, - "learning_rate": 3.9999477905707075e-05, - "loss": 1.0974, + "epoch": 0.9943820224719101, + "grad_norm": 1.8073548078536987, + "learning_rate": 3.679663996464331e-05, + "loss": 0.427, "step": 177 }, { - "epoch": 1.0162314749470713, - "grad_norm": 4.092026233673096, - "learning_rate": 3.999164730903481e-05, - "loss": 1.0984, + "epoch": 1.0112359550561798, + "grad_norm": 3.2385106086730957, + "learning_rate": 3.660134124482482e-05, + "loss": 0.4603, "step": 180 }, { - "epoch": 1.0331686661961892, - "grad_norm": 3.6480906009674072, - "learning_rate": 3.997442539262898e-05, - "loss": 1.0543, + "epoch": 1.0280898876404494, + "grad_norm": 2.8062069416046143, + "learning_rate": 3.640102096812488e-05, + "loss": 0.6493, "step": 183 }, { - "epoch": 1.050105857445307, - "grad_norm": 3.433056592941284, - "learning_rate": 3.99478242943326e-05, - "loss": 1.0994, + "epoch": 1.0449438202247192, + "grad_norm": 2.6892104148864746, + "learning_rate": 3.619576584173041e-05, + "loss": 0.6646, "step": 186 }, { - "epoch": 1.067043048694425, - "grad_norm": 3.507981777191162, - "learning_rate": 3.991186276234698e-05, - "loss": 1.0621, + "epoch": 1.0617977528089888, + "grad_norm": 2.6583099365234375, + "learning_rate": 3.598566470884244e-05, + "loss": 0.7239, "step": 189 }, { - "epoch": 1.067043048694425, + "epoch": 1.0617977528089888, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, - "eval_VitaminC_cosine_accuracy": 0.578125, - "eval_VitaminC_cosine_accuracy_threshold": 0.7840081453323364, - "eval_VitaminC_cosine_ap": 0.5400770399437144, - "eval_VitaminC_cosine_f1": 0.6577540106951871, - "eval_VitaminC_cosine_f1_threshold": 0.39448243379592896, - "eval_VitaminC_cosine_precision": 0.4900398406374502, + "eval_VitaminC_cosine_accuracy": 0.58984375, + "eval_VitaminC_cosine_accuracy_threshold": 0.8360881209373474, + "eval_VitaminC_cosine_ap": 0.5601848253252508, + "eval_VitaminC_cosine_f1": 0.6559999999999999, + "eval_VitaminC_cosine_f1_threshold": 0.26484909653663635, + "eval_VitaminC_cosine_precision": 0.4880952380952381, "eval_VitaminC_cosine_recall": 1.0, - "eval_VitaminC_dot_accuracy": 0.5625, - "eval_VitaminC_dot_accuracy_threshold": 323.20281982421875, - "eval_VitaminC_dot_ap": 0.5420016101916201, - "eval_VitaminC_dot_f1": 0.6575342465753424, - "eval_VitaminC_dot_f1_threshold": 198.04354858398438, - "eval_VitaminC_dot_precision": 0.49586776859504134, - "eval_VitaminC_dot_recall": 0.975609756097561, - "eval_VitaminC_euclidean_accuracy": 0.5859375, - "eval_VitaminC_euclidean_accuracy_threshold": 13.84214973449707, - "eval_VitaminC_euclidean_ap": 0.5392157650683609, + "eval_VitaminC_dot_accuracy": 0.58203125, + "eval_VitaminC_dot_accuracy_threshold": 314.279052734375, + "eval_VitaminC_dot_ap": 0.5513292673695236, + "eval_VitaminC_dot_f1": 0.6558265582655827, + "eval_VitaminC_dot_f1_threshold": 126.1304931640625, + "eval_VitaminC_dot_precision": 0.491869918699187, + "eval_VitaminC_dot_recall": 0.983739837398374, + "eval_VitaminC_euclidean_accuracy": 0.578125, + "eval_VitaminC_euclidean_accuracy_threshold": 15.01893424987793, + "eval_VitaminC_euclidean_ap": 0.5549132214851141, "eval_VitaminC_euclidean_f1": 0.6577540106951871, - "eval_VitaminC_euclidean_f1_threshold": 22.595678329467773, + "eval_VitaminC_euclidean_f1_threshold": 23.76571273803711, "eval_VitaminC_euclidean_precision": 0.4900398406374502, "eval_VitaminC_euclidean_recall": 1.0, - "eval_VitaminC_manhattan_accuracy": 0.5703125, - "eval_VitaminC_manhattan_accuracy_threshold": 275.1253356933594, - "eval_VitaminC_manhattan_ap": 0.5341380380767263, - "eval_VitaminC_manhattan_f1": 0.6576819407008085, - "eval_VitaminC_manhattan_f1_threshold": 457.04986572265625, - "eval_VitaminC_manhattan_precision": 0.49193548387096775, - "eval_VitaminC_manhattan_recall": 0.991869918699187, - "eval_VitaminC_max_accuracy": 0.5859375, - "eval_VitaminC_max_accuracy_threshold": 323.20281982421875, - "eval_VitaminC_max_ap": 0.5420016101916201, + "eval_VitaminC_manhattan_accuracy": 0.57421875, + "eval_VitaminC_manhattan_accuracy_threshold": 244.02972412109375, + "eval_VitaminC_manhattan_ap": 0.5562338006363409, + "eval_VitaminC_manhattan_f1": 0.6577540106951871, + "eval_VitaminC_manhattan_f1_threshold": 498.5762634277344, + "eval_VitaminC_manhattan_precision": 0.4900398406374502, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.58984375, + "eval_VitaminC_max_accuracy_threshold": 314.279052734375, + "eval_VitaminC_max_ap": 0.5601848253252508, "eval_VitaminC_max_f1": 0.6577540106951871, - "eval_VitaminC_max_f1_threshold": 457.04986572265625, - "eval_VitaminC_max_precision": 0.49586776859504134, + "eval_VitaminC_max_f1_threshold": 498.5762634277344, + "eval_VitaminC_max_precision": 0.491869918699187, "eval_VitaminC_max_recall": 1.0, - "eval_sequential_score": 0.5420016101916201, - "eval_sts-test_pearson_cosine": 0.8193410747427454, - "eval_sts-test_pearson_dot": 0.8275444476338831, - "eval_sts-test_pearson_euclidean": 0.8464528142983967, - "eval_sts-test_pearson_manhattan": 0.8440476980962159, - "eval_sts-test_pearson_max": 0.8464528142983967, - "eval_sts-test_spearman_cosine": 0.8680272706642878, - "eval_sts-test_spearman_dot": 0.8555529342729671, - "eval_sts-test_spearman_euclidean": 0.8542457068859202, - "eval_sts-test_spearman_manhattan": 0.8510265117511795, - "eval_sts-test_spearman_max": 0.8680272706642878, - "eval_vitaminc-pairs_loss": 2.6755428314208984, - "eval_vitaminc-pairs_runtime": 1.4509, - "eval_vitaminc-pairs_samples_per_second": 74.437, - "eval_vitaminc-pairs_steps_per_second": 1.378, + "eval_sequential_score": 0.5601848253252508, + "eval_sts-test_pearson_cosine": 0.854968805652805, + "eval_sts-test_pearson_dot": 0.8534110565503882, + "eval_sts-test_pearson_euclidean": 0.8853384519331917, + "eval_sts-test_pearson_manhattan": 0.8864271118397893, + "eval_sts-test_pearson_max": 0.8864271118397893, + "eval_sts-test_spearman_cosine": 0.8956917253507228, + "eval_sts-test_spearman_dot": 0.877726389450295, + "eval_sts-test_spearman_euclidean": 0.8875533307992096, + "eval_sts-test_spearman_manhattan": 0.890112288382125, + "eval_sts-test_spearman_max": 0.8956917253507228, + "eval_vitaminc-pairs_loss": 2.3751792907714844, + "eval_vitaminc-pairs_runtime": 1.4524, + "eval_vitaminc-pairs_samples_per_second": 74.358, + "eval_vitaminc-pairs_steps_per_second": 1.377, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_negation-triplets_loss": 1.9071491956710815, - "eval_negation-triplets_runtime": 0.3051, - "eval_negation-triplets_samples_per_second": 209.756, - "eval_negation-triplets_steps_per_second": 3.277, + "epoch": 1.0617977528089888, + "eval_negation-triplets_loss": 1.580323338508606, + "eval_negation-triplets_runtime": 0.301, + "eval_negation-triplets_samples_per_second": 212.649, + "eval_negation-triplets_steps_per_second": 3.323, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_scitail-pairs-pos_loss": 0.18539850413799286, - "eval_scitail-pairs-pos_runtime": 0.4199, - "eval_scitail-pairs-pos_samples_per_second": 128.604, - "eval_scitail-pairs-pos_steps_per_second": 2.382, + "epoch": 1.0617977528089888, + "eval_scitail-pairs-pos_loss": 0.10438331216573715, + "eval_scitail-pairs-pos_runtime": 0.363, + "eval_scitail-pairs-pos_samples_per_second": 148.774, + "eval_scitail-pairs-pos_steps_per_second": 2.755, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_xsum-pairs_loss": 0.38365328311920166, - "eval_xsum-pairs_runtime": 3.1907, - "eval_xsum-pairs_samples_per_second": 40.116, - "eval_xsum-pairs_steps_per_second": 0.627, + "epoch": 1.0617977528089888, + "eval_xsum-pairs_loss": 0.06599828600883484, + "eval_xsum-pairs_runtime": 3.1538, + "eval_xsum-pairs_samples_per_second": 40.586, + "eval_xsum-pairs_steps_per_second": 0.634, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_sciq_pairs_loss": 0.05558515340089798, - "eval_sciq_pairs_runtime": 3.2891, - "eval_sciq_pairs_samples_per_second": 38.917, - "eval_sciq_pairs_steps_per_second": 0.608, + "epoch": 1.0617977528089888, + "eval_sciq_pairs_loss": 0.033071305602788925, + "eval_sciq_pairs_runtime": 3.2631, + "eval_sciq_pairs_samples_per_second": 39.227, + "eval_sciq_pairs_steps_per_second": 0.613, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_qasc_pairs_loss": 0.40469691157341003, - "eval_qasc_pairs_runtime": 0.6267, - "eval_qasc_pairs_samples_per_second": 204.245, - "eval_qasc_pairs_steps_per_second": 3.191, + "epoch": 1.0617977528089888, + "eval_qasc_pairs_loss": 0.10076003521680832, + "eval_qasc_pairs_runtime": 0.6385, + "eval_qasc_pairs_samples_per_second": 200.46, + "eval_qasc_pairs_steps_per_second": 3.132, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_openbookqa_pairs_loss": 1.0837312936782837, - "eval_openbookqa_pairs_runtime": 0.5765, - "eval_openbookqa_pairs_samples_per_second": 222.02, - "eval_openbookqa_pairs_steps_per_second": 3.469, + "epoch": 1.0617977528089888, + "eval_openbookqa_pairs_loss": 0.6888580918312073, + "eval_openbookqa_pairs_runtime": 0.6067, + "eval_openbookqa_pairs_samples_per_second": 210.99, + "eval_openbookqa_pairs_steps_per_second": 3.297, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_msmarco_pairs_loss": 0.6897398233413696, - "eval_msmarco_pairs_runtime": 1.2918, - "eval_msmarco_pairs_samples_per_second": 99.089, - "eval_msmarco_pairs_steps_per_second": 1.548, + "epoch": 1.0617977528089888, + "eval_msmarco_pairs_loss": 0.335863322019577, + "eval_msmarco_pairs_runtime": 1.3206, + "eval_msmarco_pairs_samples_per_second": 96.922, + "eval_msmarco_pairs_steps_per_second": 1.514, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_nq_pairs_loss": 0.9603796601295471, - "eval_nq_pairs_runtime": 2.3975, - "eval_nq_pairs_samples_per_second": 53.39, + "epoch": 1.0617977528089888, + "eval_nq_pairs_loss": 0.3690747916698456, + "eval_nq_pairs_runtime": 2.3983, + "eval_nq_pairs_samples_per_second": 53.371, "eval_nq_pairs_steps_per_second": 0.834, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_trivia_pairs_loss": 1.200446605682373, - "eval_trivia_pairs_runtime": 4.4582, - "eval_trivia_pairs_samples_per_second": 28.711, - "eval_trivia_pairs_steps_per_second": 0.449, + "epoch": 1.0617977528089888, + "eval_trivia_pairs_loss": 0.881881594657898, + "eval_trivia_pairs_runtime": 4.4261, + "eval_trivia_pairs_samples_per_second": 28.919, + "eval_trivia_pairs_steps_per_second": 0.452, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_gooaq_pairs_loss": 1.0353316068649292, - "eval_gooaq_pairs_runtime": 0.8765, - "eval_gooaq_pairs_samples_per_second": 146.042, - "eval_gooaq_pairs_steps_per_second": 2.282, + "epoch": 1.0617977528089888, + "eval_gooaq_pairs_loss": 0.5453925728797913, + "eval_gooaq_pairs_runtime": 0.8775, + "eval_gooaq_pairs_samples_per_second": 145.871, + "eval_gooaq_pairs_steps_per_second": 2.279, "step": 189 }, { - "epoch": 1.067043048694425, - "eval_paws-pos_loss": 0.042069558054208755, - "eval_paws-pos_runtime": 0.6909, - "eval_paws-pos_samples_per_second": 185.263, - "eval_paws-pos_steps_per_second": 2.895, + "epoch": 1.0617977528089888, + "eval_paws-pos_loss": 0.024841103702783585, + "eval_paws-pos_runtime": 0.6851, + "eval_paws-pos_samples_per_second": 186.83, + "eval_paws-pos_steps_per_second": 2.919, "step": 189 }, { - "epoch": 1.0839802399435428, - "grad_norm": 2.979419469833374, - "learning_rate": 3.986656614201813e-05, - "loss": 0.8724, + "epoch": 1.0786516853932584, + "grad_norm": 2.942894220352173, + "learning_rate": 3.5770808510220957e-05, + "loss": 0.7593, "step": 192 }, { - "epoch": 1.1009174311926606, - "grad_norm": 2.835219144821167, - "learning_rate": 3.981196635797361e-05, - "loss": 0.9381, + "epoch": 1.095505617977528, + "grad_norm": 2.636993169784546, + "learning_rate": 3.5551290244821855e-05, + "loss": 0.6877, "step": 195 }, { - "epoch": 1.1178546224417785, - "grad_norm": 3.6650869846343994, - "learning_rate": 3.974810189162238e-05, - "loss": 0.9617, + "epoch": 1.1123595505617978, + "grad_norm": 2.7987968921661377, + "learning_rate": 3.5327204929543e-05, + "loss": 0.5482, "step": 198 }, { - "epoch": 1.1347918136908963, - "grad_norm": 4.188896656036377, - "learning_rate": 3.967501775403343e-05, - "loss": 1.0139, + "epoch": 1.1292134831460674, + "grad_norm": 2.3775548934936523, + "learning_rate": 3.509864955809687e-05, + "loss": 0.6047, "step": 201 }, { - "epoch": 1.1517290049400142, - "grad_norm": 3.1624915599823, - "learning_rate": 3.959276545421244e-05, - "loss": 1.1073, + "epoch": 1.146067415730337, + "grad_norm": 1.982017993927002, + "learning_rate": 3.4865723059027493e-05, + "loss": 0.4358, "step": 204 }, { - "epoch": 1.168666196189132, - "grad_norm": 3.245002508163452, - "learning_rate": 3.950140296279871e-05, - "loss": 0.8365, + "epoch": 1.1629213483146068, + "grad_norm": 1.5380574464797974, + "learning_rate": 3.462852625288999e-05, + "loss": 0.3343, "step": 207 }, { - "epoch": 1.18560338743825, - "grad_norm": 4.376185894012451, - "learning_rate": 3.9400994671208e-05, - "loss": 1.1012, + "epoch": 1.1797752808988764, + "grad_norm": 3.152486562728882, + "learning_rate": 3.438716180861106e-05, + "loss": 0.5624, "step": 210 }, { - "epoch": 1.2025405786873677, - "grad_norm": 3.236583948135376, - "learning_rate": 3.9291611346250066e-05, - "loss": 1.0016, + "epoch": 1.196629213483146, + "grad_norm": 2.410505533218384, + "learning_rate": 3.414173419904956e-05, + "loss": 0.4578, "step": 213 } ], "logging_steps": 3, - "max_steps": 531, + "max_steps": 534, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 107, @@ -1553,7 +1553,7 @@ } }, "total_flos": 0.0, - "train_batch_size": 160, + "train_batch_size": 320, "trial_name": null, "trial_params": null }