|
{ |
|
"best_metric": 0.8359799918026352, |
|
"best_model_checkpoint": "result/simcse-celectra-amlp-dmlp-bs128-lr2e-6-mask0.40-elew0.01-roberta-base", |
|
"epoch": 1.0, |
|
"global_step": 7813, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"electra_acc": 0.2448, |
|
"electra_fix_acc": 0.006, |
|
"electra_rep_acc": 0.9912, |
|
"epoch": 0.0, |
|
"learning_rate": 1.9997440163829513e-06, |
|
"loss": 8.6888, |
|
"neg_sim": 0.2041, |
|
"pos_sim": 0.3651, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_avg_sts": 0.7368351609735859, |
|
"eval_sickr_spearman": 0.6964049570574558, |
|
"eval_stsb_spearman": 0.7772653648897162, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_avg_sts": 0.7530561409181189, |
|
"eval_sickr_spearman": 0.707297882521504, |
|
"eval_stsb_spearman": 0.7988143993147339, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_avg_sts": 0.7605110871922394, |
|
"eval_sickr_spearman": 0.7124740982612865, |
|
"eval_stsb_spearman": 0.8085480761231921, |
|
"step": 375 |
|
}, |
|
{ |
|
"electra_acc": 0.7243, |
|
"electra_fix_acc": 0.8981, |
|
"electra_rep_acc": 0.2064, |
|
"epoch": 0.06, |
|
"learning_rate": 1.8720081914757456e-06, |
|
"loss": 0.6062, |
|
"neg_sim": 0.0055, |
|
"pos_sim": 0.6593, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_avg_sts": 0.763939631028596, |
|
"eval_sickr_spearman": 0.7155249377624325, |
|
"eval_stsb_spearman": 0.8123543242947595, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_avg_sts": 0.7670128200581756, |
|
"eval_sickr_spearman": 0.7176028112417467, |
|
"eval_stsb_spearman": 0.8164228288746044, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_avg_sts": 0.7683647856842559, |
|
"eval_sickr_spearman": 0.7182528161375087, |
|
"eval_stsb_spearman": 0.8184767552310033, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_avg_sts": 0.7703405632893722, |
|
"eval_sickr_spearman": 0.7201300637061159, |
|
"eval_stsb_spearman": 0.8205510628726285, |
|
"step": 875 |
|
}, |
|
{ |
|
"electra_acc": 0.7954, |
|
"electra_fix_acc": 0.9411, |
|
"electra_rep_acc": 0.3621, |
|
"epoch": 0.13, |
|
"learning_rate": 1.744016382951491e-06, |
|
"loss": 0.0043, |
|
"neg_sim": -0.0066, |
|
"pos_sim": 0.7176, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_avg_sts": 0.7716371834988085, |
|
"eval_sickr_spearman": 0.7207027865596639, |
|
"eval_stsb_spearman": 0.8225715804379532, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_avg_sts": 0.7719569589534554, |
|
"eval_sickr_spearman": 0.7195236710504597, |
|
"eval_stsb_spearman": 0.8243902468564511, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_avg_sts": 0.7725905596083784, |
|
"eval_sickr_spearman": 0.7204897205936844, |
|
"eval_stsb_spearman": 0.8246913986230725, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_avg_sts": 0.7740128875580021, |
|
"eval_sickr_spearman": 0.7209413090094056, |
|
"eval_stsb_spearman": 0.8270844661065986, |
|
"step": 1375 |
|
}, |
|
{ |
|
"electra_acc": 0.806, |
|
"electra_fix_acc": 0.9446, |
|
"electra_rep_acc": 0.3921, |
|
"epoch": 0.19, |
|
"learning_rate": 1.6160245744272365e-06, |
|
"loss": 0.0039, |
|
"neg_sim": -0.0067, |
|
"pos_sim": 0.7288, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_avg_sts": 0.7747104419219604, |
|
"eval_sickr_spearman": 0.721527768757965, |
|
"eval_stsb_spearman": 0.8278931150859556, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_avg_sts": 0.7750669451240817, |
|
"eval_sickr_spearman": 0.722242135329654, |
|
"eval_stsb_spearman": 0.8278917549185095, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_avg_sts": 0.7751917311744843, |
|
"eval_sickr_spearman": 0.7214598527805315, |
|
"eval_stsb_spearman": 0.8289236095684369, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_avg_sts": 0.775110379872699, |
|
"eval_sickr_spearman": 0.7211095139266419, |
|
"eval_stsb_spearman": 0.8291112458187563, |
|
"step": 1875 |
|
}, |
|
{ |
|
"electra_acc": 0.8111, |
|
"electra_fix_acc": 0.9455, |
|
"electra_rep_acc": 0.4077, |
|
"epoch": 0.26, |
|
"learning_rate": 1.4880327659029822e-06, |
|
"loss": 0.0037, |
|
"neg_sim": -0.0067, |
|
"pos_sim": 0.7361, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_avg_sts": 0.7752389174925427, |
|
"eval_sickr_spearman": 0.7207011535022151, |
|
"eval_stsb_spearman": 0.8297766814828704, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_avg_sts": 0.7759291537103485, |
|
"eval_sickr_spearman": 0.7218097593545081, |
|
"eval_stsb_spearman": 0.8300485480661889, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_avg_sts": 0.7759100192724719, |
|
"eval_sickr_spearman": 0.7214468843831432, |
|
"eval_stsb_spearman": 0.8303731541618006, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_avg_sts": 0.77580052054564, |
|
"eval_sickr_spearman": 0.7208077825474077, |
|
"eval_stsb_spearman": 0.8307932585438723, |
|
"step": 2375 |
|
}, |
|
{ |
|
"electra_acc": 0.8137, |
|
"electra_fix_acc": 0.9463, |
|
"electra_rep_acc": 0.4184, |
|
"epoch": 0.32, |
|
"learning_rate": 1.3600409573787276e-06, |
|
"loss": 0.0036, |
|
"neg_sim": -0.0067, |
|
"pos_sim": 0.7422, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_avg_sts": 0.7764057696201115, |
|
"eval_sickr_spearman": 0.7207564372999703, |
|
"eval_stsb_spearman": 0.8320551019402528, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_avg_sts": 0.7771167451325478, |
|
"eval_sickr_spearman": 0.7216867036726237, |
|
"eval_stsb_spearman": 0.8325467865924719, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_avg_sts": 0.777409082133846, |
|
"eval_sickr_spearman": 0.7218441496231378, |
|
"eval_stsb_spearman": 0.8329740146445542, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_avg_sts": 0.7776325118235485, |
|
"eval_sickr_spearman": 0.722264277667417, |
|
"eval_stsb_spearman": 0.8330007459796801, |
|
"step": 2875 |
|
}, |
|
{ |
|
"electra_acc": 0.8163, |
|
"electra_fix_acc": 0.9464, |
|
"electra_rep_acc": 0.4284, |
|
"epoch": 0.38, |
|
"learning_rate": 1.2320491488544733e-06, |
|
"loss": 0.0034, |
|
"neg_sim": -0.0068, |
|
"pos_sim": 0.7471, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_avg_sts": 0.7777138423021386, |
|
"eval_sickr_spearman": 0.7227461256770441, |
|
"eval_stsb_spearman": 0.832681558927233, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_avg_sts": 0.7781702830872564, |
|
"eval_sickr_spearman": 0.7235760070476919, |
|
"eval_stsb_spearman": 0.832764559126821, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_avg_sts": 0.7788742472264717, |
|
"eval_sickr_spearman": 0.7235008864050426, |
|
"eval_stsb_spearman": 0.8342476080479008, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_avg_sts": 0.7786540883853343, |
|
"eval_sickr_spearman": 0.7228896906392426, |
|
"eval_stsb_spearman": 0.8344184861314262, |
|
"step": 3375 |
|
}, |
|
{ |
|
"electra_acc": 0.8185, |
|
"electra_fix_acc": 0.9468, |
|
"electra_rep_acc": 0.4365, |
|
"epoch": 0.45, |
|
"learning_rate": 1.104057340330219e-06, |
|
"loss": 0.0034, |
|
"neg_sim": -0.0068, |
|
"pos_sim": 0.7518, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_avg_sts": 0.7785390972296262, |
|
"eval_sickr_spearman": 0.7228380572051966, |
|
"eval_stsb_spearman": 0.8342401372540559, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_avg_sts": 0.778413022922875, |
|
"eval_sickr_spearman": 0.7219714320419488, |
|
"eval_stsb_spearman": 0.8348546138038011, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_avg_sts": 0.778801601085773, |
|
"eval_sickr_spearman": 0.722748959512029, |
|
"eval_stsb_spearman": 0.8348542426595171, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_avg_sts": 0.7782529644249387, |
|
"eval_sickr_spearman": 0.7217000082877221, |
|
"eval_stsb_spearman": 0.8348059205621555, |
|
"step": 3875 |
|
}, |
|
{ |
|
"electra_acc": 0.8202, |
|
"electra_fix_acc": 0.9468, |
|
"electra_rep_acc": 0.4426, |
|
"epoch": 0.51, |
|
"learning_rate": 9.760655318059644e-07, |
|
"loss": 0.0033, |
|
"neg_sim": -0.0068, |
|
"pos_sim": 0.7559, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_avg_sts": 0.7782394657347366, |
|
"eval_sickr_spearman": 0.7217301237883238, |
|
"eval_stsb_spearman": 0.8347488076811493, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_avg_sts": 0.7782895079814026, |
|
"eval_sickr_spearman": 0.7220120663537655, |
|
"eval_stsb_spearman": 0.8345669496090398, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_avg_sts": 0.778080439519264, |
|
"eval_sickr_spearman": 0.7216004398144409, |
|
"eval_stsb_spearman": 0.834560439224087, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_avg_sts": 0.7782284276808015, |
|
"eval_sickr_spearman": 0.7216616794687745, |
|
"eval_stsb_spearman": 0.8347951758928285, |
|
"step": 4375 |
|
}, |
|
{ |
|
"electra_acc": 0.8217, |
|
"electra_fix_acc": 0.9477, |
|
"electra_rep_acc": 0.4458, |
|
"epoch": 0.58, |
|
"learning_rate": 8.480737232817099e-07, |
|
"loss": 0.0032, |
|
"neg_sim": -0.0068, |
|
"pos_sim": 0.7596, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_avg_sts": 0.7781400153641456, |
|
"eval_sickr_spearman": 0.7214844927355691, |
|
"eval_stsb_spearman": 0.8347955379927221, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_avg_sts": 0.7783651714396398, |
|
"eval_sickr_spearman": 0.721677865949959, |
|
"eval_stsb_spearman": 0.8350524769293207, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_avg_sts": 0.7779810461862591, |
|
"eval_sickr_spearman": 0.7215376151337598, |
|
"eval_stsb_spearman": 0.8344244772387583, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_avg_sts": 0.7774863632137369, |
|
"eval_sickr_spearman": 0.7212506225972007, |
|
"eval_stsb_spearman": 0.833722103830273, |
|
"step": 4875 |
|
}, |
|
{ |
|
"electra_acc": 0.8222, |
|
"electra_fix_acc": 0.9476, |
|
"electra_rep_acc": 0.4478, |
|
"epoch": 0.64, |
|
"learning_rate": 7.200819147574555e-07, |
|
"loss": 0.0033, |
|
"neg_sim": -0.0068, |
|
"pos_sim": 0.7614, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_avg_sts": 0.7780441928626822, |
|
"eval_sickr_spearman": 0.7222703776172996, |
|
"eval_stsb_spearman": 0.833818008108065, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_avg_sts": 0.7772624437857663, |
|
"eval_sickr_spearman": 0.7211198886445525, |
|
"eval_stsb_spearman": 0.8334049989269801, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_avg_sts": 0.777433289264948, |
|
"eval_sickr_spearman": 0.7208792047952461, |
|
"eval_stsb_spearman": 0.83398737373465, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_avg_sts": 0.778471716959233, |
|
"eval_sickr_spearman": 0.7219667730251094, |
|
"eval_stsb_spearman": 0.8349766608933566, |
|
"step": 5375 |
|
}, |
|
{ |
|
"electra_acc": 0.824, |
|
"electra_fix_acc": 0.9479, |
|
"electra_rep_acc": 0.4538, |
|
"epoch": 0.7, |
|
"learning_rate": 5.920901062332011e-07, |
|
"loss": 0.0033, |
|
"neg_sim": -0.0068, |
|
"pos_sim": 0.7634, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_avg_sts": 0.7783835165868276, |
|
"eval_sickr_spearman": 0.7220774366828226, |
|
"eval_stsb_spearman": 0.8346895964908326, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_avg_sts": 0.7777215174417764, |
|
"eval_sickr_spearman": 0.7211491395853283, |
|
"eval_stsb_spearman": 0.8342938952982245, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_avg_sts": 0.7783006770868415, |
|
"eval_sickr_spearman": 0.7218946783418506, |
|
"eval_stsb_spearman": 0.8347066758318323, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_avg_sts": 0.7786525912567952, |
|
"eval_sickr_spearman": 0.7218885303608666, |
|
"eval_stsb_spearman": 0.8354166521527239, |
|
"step": 5875 |
|
}, |
|
{ |
|
"electra_acc": 0.8245, |
|
"electra_fix_acc": 0.9479, |
|
"electra_rep_acc": 0.4557, |
|
"epoch": 0.77, |
|
"learning_rate": 4.640982977089466e-07, |
|
"loss": 0.0033, |
|
"neg_sim": -0.0068, |
|
"pos_sim": 0.7639, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_avg_sts": 0.7786598191346372, |
|
"eval_sickr_spearman": 0.7214591323140099, |
|
"eval_stsb_spearman": 0.8358605059552645, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_avg_sts": 0.7785391387872032, |
|
"eval_sickr_spearman": 0.7212359798167284, |
|
"eval_stsb_spearman": 0.8358422977576779, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_avg_sts": 0.7780962443354171, |
|
"eval_sickr_spearman": 0.7207366965172793, |
|
"eval_stsb_spearman": 0.835455792153555, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_avg_sts": 0.7787170644410477, |
|
"eval_sickr_spearman": 0.7214541370794603, |
|
"eval_stsb_spearman": 0.8359799918026352, |
|
"step": 6375 |
|
}, |
|
{ |
|
"electra_acc": 0.8249, |
|
"electra_fix_acc": 0.9477, |
|
"electra_rep_acc": 0.458, |
|
"epoch": 0.83, |
|
"learning_rate": 3.361064891846921e-07, |
|
"loss": 0.0034, |
|
"neg_sim": -0.0068, |
|
"pos_sim": 0.765, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_avg_sts": 0.7787359930993909, |
|
"eval_sickr_spearman": 0.7215321395881958, |
|
"eval_stsb_spearman": 0.835939846610586, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_avg_sts": 0.778662230635321, |
|
"eval_sickr_spearman": 0.7217551960232744, |
|
"eval_stsb_spearman": 0.8355692652473675, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_avg_sts": 0.7788844288300487, |
|
"eval_sickr_spearman": 0.7221871330355174, |
|
"eval_stsb_spearman": 0.8355817246245801, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_avg_sts": 0.7789522767655956, |
|
"eval_sickr_spearman": 0.722305728507958, |
|
"eval_stsb_spearman": 0.8355988250232333, |
|
"step": 6875 |
|
}, |
|
{ |
|
"electra_acc": 0.8251, |
|
"electra_fix_acc": 0.9476, |
|
"electra_rep_acc": 0.4589, |
|
"epoch": 0.9, |
|
"learning_rate": 2.0811468066043772e-07, |
|
"loss": 0.0034, |
|
"neg_sim": -0.0069, |
|
"pos_sim": 0.7655, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_avg_sts": 0.7787617595098588, |
|
"eval_sickr_spearman": 0.721966628931805, |
|
"eval_stsb_spearman": 0.8355568900879128, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_avg_sts": 0.7786478565950568, |
|
"eval_sickr_spearman": 0.7216480386359659, |
|
"eval_stsb_spearman": 0.8356476745541477, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_avg_sts": 0.7787981847358625, |
|
"eval_sickr_spearman": 0.7219965523080009, |
|
"eval_stsb_spearman": 0.8355998171637242, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_avg_sts": 0.7788565954562792, |
|
"eval_sickr_spearman": 0.7221892050558691, |
|
"eval_stsb_spearman": 0.8355239858566892, |
|
"step": 7375 |
|
}, |
|
{ |
|
"electra_acc": 0.8254, |
|
"electra_fix_acc": 0.9477, |
|
"electra_rep_acc": 0.4607, |
|
"epoch": 0.96, |
|
"learning_rate": 8.012287213618328e-08, |
|
"loss": 0.0032, |
|
"neg_sim": -0.0069, |
|
"pos_sim": 0.7665, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_avg_sts": 0.7788488501171182, |
|
"eval_sickr_spearman": 0.7221623076390639, |
|
"eval_stsb_spearman": 0.8355353925951726, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_avg_sts": 0.7788989946596425, |
|
"eval_sickr_spearman": 0.7222944411991201, |
|
"eval_stsb_spearman": 0.8355035481201649, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_avg_sts": 0.7788451325075824, |
|
"eval_sickr_spearman": 0.7221886767137534, |
|
"eval_stsb_spearman": 0.8355015883014113, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 7813, |
|
"train_runtime": 7033.0789, |
|
"train_samples_per_second": 1.111 |
|
} |
|
], |
|
"max_steps": 7813, |
|
"num_train_epochs": 1, |
|
"total_flos": 285611882411596800, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|