|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.601123595505618, |
|
"eval_steps": 27, |
|
"global_step": 107, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016853932584269662, |
|
"grad_norm": 2.9885776042938232, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 0.6012, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.033707865168539325, |
|
"grad_norm": 3.184929132461548, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 0.7573, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05056179775280899, |
|
"grad_norm": 3.256159782409668, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.9212, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06741573033707865, |
|
"grad_norm": 2.833339214324951, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 0.6117, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08426966292134831, |
|
"grad_norm": 3.08292818069458, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.8545, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"grad_norm": 2.317431688308716, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.6515, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11797752808988764, |
|
"grad_norm": 2.9611644744873047, |
|
"learning_rate": 1.037037037037037e-05, |
|
"loss": 0.7159, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1348314606741573, |
|
"grad_norm": 2.698537826538086, |
|
"learning_rate": 1.1851851851851852e-05, |
|
"loss": 0.7019, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"grad_norm": 2.222154378890991, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.4411, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.578125, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7817381620407104, |
|
"eval_VitaminC_cosine_ap": 0.5507972943944112, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.28573715686798096, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5703125, |
|
"eval_VitaminC_dot_accuracy_threshold": 316.7283020019531, |
|
"eval_VitaminC_dot_ap": 0.5511866185449577, |
|
"eval_VitaminC_dot_f1": 0.6577540106951871, |
|
"eval_VitaminC_dot_f1_threshold": 106.75863647460938, |
|
"eval_VitaminC_dot_precision": 0.4900398406374502, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.578125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.298419952392578, |
|
"eval_VitaminC_euclidean_ap": 0.5476323986807207, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.83933448791504, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.578125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 279.69085693359375, |
|
"eval_VitaminC_manhattan_ap": 0.5412538781107805, |
|
"eval_VitaminC_manhattan_f1": 0.6577540106951871, |
|
"eval_VitaminC_manhattan_f1_threshold": 499.8836364746094, |
|
"eval_VitaminC_manhattan_precision": 0.4900398406374502, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.578125, |
|
"eval_VitaminC_max_accuracy_threshold": 316.7283020019531, |
|
"eval_VitaminC_max_ap": 0.5511866185449577, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 499.8836364746094, |
|
"eval_VitaminC_max_precision": 0.492, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5511866185449577, |
|
"eval_sts-test_pearson_cosine": 0.8488243436029344, |
|
"eval_sts-test_pearson_dot": 0.8480167969551653, |
|
"eval_sts-test_pearson_euclidean": 0.8800283985117625, |
|
"eval_sts-test_pearson_manhattan": 0.880588311422627, |
|
"eval_sts-test_pearson_max": 0.880588311422627, |
|
"eval_sts-test_spearman_cosine": 0.8905659331642088, |
|
"eval_sts-test_spearman_dot": 0.8692084657204004, |
|
"eval_sts-test_spearman_euclidean": 0.8809566840232712, |
|
"eval_sts-test_spearman_manhattan": 0.883434007028195, |
|
"eval_sts-test_spearman_max": 0.8905659331642088, |
|
"eval_vitaminc-pairs_loss": 2.465860366821289, |
|
"eval_vitaminc-pairs_runtime": 1.4615, |
|
"eval_vitaminc-pairs_samples_per_second": 73.899, |
|
"eval_vitaminc-pairs_steps_per_second": 1.368, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_negation-triplets_loss": 1.7310789823532104, |
|
"eval_negation-triplets_runtime": 0.3009, |
|
"eval_negation-triplets_samples_per_second": 212.692, |
|
"eval_negation-triplets_steps_per_second": 3.323, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_scitail-pairs-pos_loss": 0.1150394082069397, |
|
"eval_scitail-pairs-pos_runtime": 0.3739, |
|
"eval_scitail-pairs-pos_samples_per_second": 144.431, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.675, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_xsum-pairs_loss": 0.11168850213289261, |
|
"eval_xsum-pairs_runtime": 3.1697, |
|
"eval_xsum-pairs_samples_per_second": 40.382, |
|
"eval_xsum-pairs_steps_per_second": 0.631, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_sciq_pairs_loss": 0.03450964391231537, |
|
"eval_sciq_pairs_runtime": 3.3283, |
|
"eval_sciq_pairs_samples_per_second": 38.459, |
|
"eval_sciq_pairs_steps_per_second": 0.601, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_qasc_pairs_loss": 0.11095743626356125, |
|
"eval_qasc_pairs_runtime": 0.6261, |
|
"eval_qasc_pairs_samples_per_second": 204.45, |
|
"eval_qasc_pairs_steps_per_second": 3.195, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_openbookqa_pairs_loss": 0.7092063426971436, |
|
"eval_openbookqa_pairs_runtime": 0.5866, |
|
"eval_openbookqa_pairs_samples_per_second": 218.19, |
|
"eval_openbookqa_pairs_steps_per_second": 3.409, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_msmarco_pairs_loss": 0.3955218493938446, |
|
"eval_msmarco_pairs_runtime": 1.2942, |
|
"eval_msmarco_pairs_samples_per_second": 98.902, |
|
"eval_msmarco_pairs_steps_per_second": 1.545, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_nq_pairs_loss": 0.42051073908805847, |
|
"eval_nq_pairs_runtime": 2.3875, |
|
"eval_nq_pairs_samples_per_second": 53.612, |
|
"eval_nq_pairs_steps_per_second": 0.838, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_trivia_pairs_loss": 0.93178790807724, |
|
"eval_trivia_pairs_runtime": 4.4363, |
|
"eval_trivia_pairs_samples_per_second": 28.853, |
|
"eval_trivia_pairs_steps_per_second": 0.451, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_gooaq_pairs_loss": 0.6505913138389587, |
|
"eval_gooaq_pairs_runtime": 0.8826, |
|
"eval_gooaq_pairs_samples_per_second": 145.027, |
|
"eval_gooaq_pairs_steps_per_second": 2.266, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_paws-pos_loss": 0.024931101128458977, |
|
"eval_paws-pos_runtime": 0.6852, |
|
"eval_paws-pos_samples_per_second": 186.805, |
|
"eval_paws-pos_steps_per_second": 2.919, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.16853932584269662, |
|
"grad_norm": 2.826900005340576, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.5125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1853932584269663, |
|
"grad_norm": 2.9938910007476807, |
|
"learning_rate": 1.6296296296296297e-05, |
|
"loss": 0.6885, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"grad_norm": 3.3046395778656006, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.6435, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.21910112359550563, |
|
"grad_norm": 2.4184651374816895, |
|
"learning_rate": 1.925925925925926e-05, |
|
"loss": 0.753, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.23595505617977527, |
|
"grad_norm": 2.9905433654785156, |
|
"learning_rate": 2.074074074074074e-05, |
|
"loss": 0.7427, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.25280898876404495, |
|
"grad_norm": 2.745820999145508, |
|
"learning_rate": 2.2222222222222227e-05, |
|
"loss": 0.5083, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2696629213483146, |
|
"grad_norm": 2.6370577812194824, |
|
"learning_rate": 2.3703703703703703e-05, |
|
"loss": 0.7454, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.28651685393258425, |
|
"grad_norm": 3.044011116027832, |
|
"learning_rate": 2.5185185185185187e-05, |
|
"loss": 0.8356, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"grad_norm": 3.718804121017456, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.8864, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.57421875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7991844415664673, |
|
"eval_VitaminC_cosine_ap": 0.5485498837322925, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3160865008831024, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.578125, |
|
"eval_VitaminC_dot_accuracy_threshold": 327.0416564941406, |
|
"eval_VitaminC_dot_ap": 0.54993134882601, |
|
"eval_VitaminC_dot_f1": 0.6595174262734584, |
|
"eval_VitaminC_dot_f1_threshold": 117.44181060791016, |
|
"eval_VitaminC_dot_precision": 0.492, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.57421875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.019258499145508, |
|
"eval_VitaminC_euclidean_ap": 0.5435066540334542, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.688644409179688, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.57421875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 283.876220703125, |
|
"eval_VitaminC_manhattan_ap": 0.5416615397828658, |
|
"eval_VitaminC_manhattan_f1": 0.6559999999999999, |
|
"eval_VitaminC_manhattan_f1_threshold": 514.0216064453125, |
|
"eval_VitaminC_manhattan_precision": 0.4880952380952381, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.578125, |
|
"eval_VitaminC_max_accuracy_threshold": 327.0416564941406, |
|
"eval_VitaminC_max_ap": 0.54993134882601, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 514.0216064453125, |
|
"eval_VitaminC_max_precision": 0.492, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.54993134882601, |
|
"eval_sts-test_pearson_cosine": 0.8452615878553369, |
|
"eval_sts-test_pearson_dot": 0.8404858620687519, |
|
"eval_sts-test_pearson_euclidean": 0.8780527810910925, |
|
"eval_sts-test_pearson_manhattan": 0.878916157345712, |
|
"eval_sts-test_pearson_max": 0.878916157345712, |
|
"eval_sts-test_spearman_cosine": 0.8876915367075635, |
|
"eval_sts-test_spearman_dot": 0.8608104875327304, |
|
"eval_sts-test_spearman_euclidean": 0.8804138856889071, |
|
"eval_sts-test_spearman_manhattan": 0.8822803815444743, |
|
"eval_sts-test_spearman_max": 0.8876915367075635, |
|
"eval_vitaminc-pairs_loss": 2.454524040222168, |
|
"eval_vitaminc-pairs_runtime": 1.4583, |
|
"eval_vitaminc-pairs_samples_per_second": 74.057, |
|
"eval_vitaminc-pairs_steps_per_second": 1.371, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_negation-triplets_loss": 1.7277792692184448, |
|
"eval_negation-triplets_runtime": 0.3027, |
|
"eval_negation-triplets_samples_per_second": 211.436, |
|
"eval_negation-triplets_steps_per_second": 3.304, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_scitail-pairs-pos_loss": 0.11168555170297623, |
|
"eval_scitail-pairs-pos_runtime": 0.3726, |
|
"eval_scitail-pairs-pos_samples_per_second": 144.911, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.684, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_xsum-pairs_loss": 0.10087604075670242, |
|
"eval_xsum-pairs_runtime": 3.1701, |
|
"eval_xsum-pairs_samples_per_second": 40.377, |
|
"eval_xsum-pairs_steps_per_second": 0.631, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_sciq_pairs_loss": 0.03466618433594704, |
|
"eval_sciq_pairs_runtime": 3.3778, |
|
"eval_sciq_pairs_samples_per_second": 37.895, |
|
"eval_sciq_pairs_steps_per_second": 0.592, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_qasc_pairs_loss": 0.10551701486110687, |
|
"eval_qasc_pairs_runtime": 0.6271, |
|
"eval_qasc_pairs_samples_per_second": 204.125, |
|
"eval_qasc_pairs_steps_per_second": 3.189, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_openbookqa_pairs_loss": 0.7239958643913269, |
|
"eval_openbookqa_pairs_runtime": 0.5811, |
|
"eval_openbookqa_pairs_samples_per_second": 220.255, |
|
"eval_openbookqa_pairs_steps_per_second": 3.441, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_msmarco_pairs_loss": 0.3808779716491699, |
|
"eval_msmarco_pairs_runtime": 1.2919, |
|
"eval_msmarco_pairs_samples_per_second": 99.082, |
|
"eval_msmarco_pairs_steps_per_second": 1.548, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_nq_pairs_loss": 0.44170400500297546, |
|
"eval_nq_pairs_runtime": 2.3835, |
|
"eval_nq_pairs_samples_per_second": 53.703, |
|
"eval_nq_pairs_steps_per_second": 0.839, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_trivia_pairs_loss": 0.9158428907394409, |
|
"eval_trivia_pairs_runtime": 4.4326, |
|
"eval_trivia_pairs_samples_per_second": 28.877, |
|
"eval_trivia_pairs_steps_per_second": 0.451, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_gooaq_pairs_loss": 0.6208247542381287, |
|
"eval_gooaq_pairs_runtime": 0.8797, |
|
"eval_gooaq_pairs_samples_per_second": 145.497, |
|
"eval_gooaq_pairs_steps_per_second": 2.273, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_paws-pos_loss": 0.02517784759402275, |
|
"eval_paws-pos_runtime": 0.694, |
|
"eval_paws-pos_samples_per_second": 184.442, |
|
"eval_paws-pos_steps_per_second": 2.882, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3202247191011236, |
|
"grad_norm": 2.173736572265625, |
|
"learning_rate": 2.814814814814815e-05, |
|
"loss": 0.6015, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.33707865168539325, |
|
"grad_norm": 3.8964712619781494, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.9482, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3539325842696629, |
|
"grad_norm": 2.659498691558838, |
|
"learning_rate": 3.111111111111112e-05, |
|
"loss": 0.5404, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3707865168539326, |
|
"grad_norm": 3.3499844074249268, |
|
"learning_rate": 3.259259259259259e-05, |
|
"loss": 0.805, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.38764044943820225, |
|
"grad_norm": 3.770142078399658, |
|
"learning_rate": 3.4074074074074077e-05, |
|
"loss": 0.7184, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"grad_norm": 3.740880012512207, |
|
"learning_rate": 3.555555555555555e-05, |
|
"loss": 0.8708, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.42134831460674155, |
|
"grad_norm": 2.981106996536255, |
|
"learning_rate": 3.703703703703704e-05, |
|
"loss": 0.8327, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.43820224719101125, |
|
"grad_norm": 2.3469011783599854, |
|
"learning_rate": 3.851851851851852e-05, |
|
"loss": 0.5025, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"grad_norm": 3.296035051345825, |
|
"learning_rate": 4e-05, |
|
"loss": 0.6517, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.578125, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7859437465667725, |
|
"eval_VitaminC_cosine_ap": 0.5557444337961499, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3211573362350464, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.578125, |
|
"eval_VitaminC_dot_accuracy_threshold": 315.9444580078125, |
|
"eval_VitaminC_dot_ap": 0.5539524528858992, |
|
"eval_VitaminC_dot_f1": 0.6595174262734584, |
|
"eval_VitaminC_dot_f1_threshold": 129.88558959960938, |
|
"eval_VitaminC_dot_precision": 0.492, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.58203125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.113249778747559, |
|
"eval_VitaminC_euclidean_ap": 0.5510190217865811, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.90462303161621, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.578125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 276.40142822265625, |
|
"eval_VitaminC_manhattan_ap": 0.5429240708188645, |
|
"eval_VitaminC_manhattan_f1": 0.6576819407008085, |
|
"eval_VitaminC_manhattan_f1_threshold": 469.7353515625, |
|
"eval_VitaminC_manhattan_precision": 0.49193548387096775, |
|
"eval_VitaminC_manhattan_recall": 0.991869918699187, |
|
"eval_VitaminC_max_accuracy": 0.58203125, |
|
"eval_VitaminC_max_accuracy_threshold": 315.9444580078125, |
|
"eval_VitaminC_max_ap": 0.5557444337961499, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 469.7353515625, |
|
"eval_VitaminC_max_precision": 0.492, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5557444337961499, |
|
"eval_sts-test_pearson_cosine": 0.8483316632682467, |
|
"eval_sts-test_pearson_dot": 0.8392403098680445, |
|
"eval_sts-test_pearson_euclidean": 0.8814283057813619, |
|
"eval_sts-test_pearson_manhattan": 0.8815226866327923, |
|
"eval_sts-test_pearson_max": 0.8815226866327923, |
|
"eval_sts-test_spearman_cosine": 0.8903503892346, |
|
"eval_sts-test_spearman_dot": 0.857844431199042, |
|
"eval_sts-test_spearman_euclidean": 0.8851830636663006, |
|
"eval_sts-test_spearman_manhattan": 0.8865568876827619, |
|
"eval_sts-test_spearman_max": 0.8903503892346, |
|
"eval_vitaminc-pairs_loss": 2.3538782596588135, |
|
"eval_vitaminc-pairs_runtime": 1.4618, |
|
"eval_vitaminc-pairs_samples_per_second": 73.88, |
|
"eval_vitaminc-pairs_steps_per_second": 1.368, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_negation-triplets_loss": 1.649215579032898, |
|
"eval_negation-triplets_runtime": 0.3081, |
|
"eval_negation-triplets_samples_per_second": 207.723, |
|
"eval_negation-triplets_steps_per_second": 3.246, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_scitail-pairs-pos_loss": 0.11823470890522003, |
|
"eval_scitail-pairs-pos_runtime": 0.376, |
|
"eval_scitail-pairs-pos_samples_per_second": 143.616, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.66, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_xsum-pairs_loss": 0.08420603722333908, |
|
"eval_xsum-pairs_runtime": 3.1576, |
|
"eval_xsum-pairs_samples_per_second": 40.538, |
|
"eval_xsum-pairs_steps_per_second": 0.633, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_sciq_pairs_loss": 0.034781794995069504, |
|
"eval_sciq_pairs_runtime": 3.2597, |
|
"eval_sciq_pairs_samples_per_second": 39.267, |
|
"eval_sciq_pairs_steps_per_second": 0.614, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_qasc_pairs_loss": 0.10597346723079681, |
|
"eval_qasc_pairs_runtime": 0.6245, |
|
"eval_qasc_pairs_samples_per_second": 204.979, |
|
"eval_qasc_pairs_steps_per_second": 3.203, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_openbookqa_pairs_loss": 0.7160983681678772, |
|
"eval_openbookqa_pairs_runtime": 0.5767, |
|
"eval_openbookqa_pairs_samples_per_second": 221.961, |
|
"eval_openbookqa_pairs_steps_per_second": 3.468, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_msmarco_pairs_loss": 0.3454173803329468, |
|
"eval_msmarco_pairs_runtime": 1.2912, |
|
"eval_msmarco_pairs_samples_per_second": 99.134, |
|
"eval_msmarco_pairs_steps_per_second": 1.549, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_nq_pairs_loss": 0.4442503750324249, |
|
"eval_nq_pairs_runtime": 2.3854, |
|
"eval_nq_pairs_samples_per_second": 53.659, |
|
"eval_nq_pairs_steps_per_second": 0.838, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_trivia_pairs_loss": 0.9324482679367065, |
|
"eval_trivia_pairs_runtime": 4.4251, |
|
"eval_trivia_pairs_samples_per_second": 28.926, |
|
"eval_trivia_pairs_steps_per_second": 0.452, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_gooaq_pairs_loss": 0.6094165444374084, |
|
"eval_gooaq_pairs_runtime": 0.8751, |
|
"eval_gooaq_pairs_samples_per_second": 146.261, |
|
"eval_gooaq_pairs_steps_per_second": 2.285, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_paws-pos_loss": 0.024421451613307, |
|
"eval_paws-pos_runtime": 0.6865, |
|
"eval_paws-pos_samples_per_second": 186.444, |
|
"eval_paws-pos_steps_per_second": 2.913, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.47191011235955055, |
|
"grad_norm": 3.1395561695098877, |
|
"learning_rate": 3.999675367909485e-05, |
|
"loss": 0.5801, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4887640449438202, |
|
"grad_norm": 2.7977917194366455, |
|
"learning_rate": 3.998701612152597e-05, |
|
"loss": 0.791, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"grad_norm": 2.3682048320770264, |
|
"learning_rate": 3.997079154212493e-05, |
|
"loss": 0.6042, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5224719101123596, |
|
"grad_norm": 2.843482255935669, |
|
"learning_rate": 3.99480869635839e-05, |
|
"loss": 0.7559, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5393258426966292, |
|
"grad_norm": 2.7346785068511963, |
|
"learning_rate": 3.9918912213415936e-05, |
|
"loss": 0.6258, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5561797752808989, |
|
"grad_norm": 3.149007558822632, |
|
"learning_rate": 3.9883279919701226e-05, |
|
"loss": 0.8853, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5730337078651685, |
|
"grad_norm": 3.3424761295318604, |
|
"learning_rate": 3.9841205505621106e-05, |
|
"loss": 0.5947, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5898876404494382, |
|
"grad_norm": 2.6377146244049072, |
|
"learning_rate": 3.979270718278224e-05, |
|
"loss": 0.644, |
|
"step": 105 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 534, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 107, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 320, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|