|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994666666666666, |
|
"eval_steps": 500, |
|
"global_step": 937, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.319148936170213e-08, |
|
"logits/chosen": 0.015347272157669067, |
|
"logits/rejected": -0.12729741632938385, |
|
"logps/chosen": -550.8414916992188, |
|
"logps/rejected": -492.32574462890625, |
|
"loss": 0.2285, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": 0.08696222305297852, |
|
"logits/rejected": 0.14597061276435852, |
|
"logps/chosen": -464.8576965332031, |
|
"logps/rejected": -456.1559753417969, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.3611111044883728, |
|
"rewards/chosen": 0.000442314165411517, |
|
"rewards/margins": -2.51087640208425e-05, |
|
"rewards/rejected": 0.00046742294216528535, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": 0.053336001932621, |
|
"logits/rejected": 0.027396252378821373, |
|
"logps/chosen": -463.5480041503906, |
|
"logps/rejected": -488.28094482421875, |
|
"loss": 0.2026, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.0002625098277349025, |
|
"rewards/margins": -8.491716289427131e-05, |
|
"rewards/rejected": 0.00034742700518108904, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": 0.08084158599376678, |
|
"logits/rejected": 0.09464940428733826, |
|
"logps/chosen": -506.58782958984375, |
|
"logps/rejected": -511.69696044921875, |
|
"loss": 0.2155, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0002810861624311656, |
|
"rewards/margins": 8.317727770190686e-05, |
|
"rewards/rejected": -0.00036426345468498766, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": 0.13661757111549377, |
|
"logits/rejected": 0.13296189904212952, |
|
"logps/chosen": -555.6744995117188, |
|
"logps/rejected": -553.1989135742188, |
|
"loss": 0.2092, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.000944433850236237, |
|
"rewards/margins": 0.00023557464010082185, |
|
"rewards/rejected": -0.0011800084030255675, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": 0.11821017414331436, |
|
"logits/rejected": 0.09106048941612244, |
|
"logps/chosen": -486.61151123046875, |
|
"logps/rejected": -502.881591796875, |
|
"loss": 0.2104, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.0019579054787755013, |
|
"rewards/margins": 0.00025602790992707014, |
|
"rewards/rejected": -0.002213933737948537, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": 0.02751517854630947, |
|
"logits/rejected": 0.14413723349571228, |
|
"logps/chosen": -522.82177734375, |
|
"logps/rejected": -522.5377197265625, |
|
"loss": 0.2126, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0038118392694741488, |
|
"rewards/margins": 0.0003993002756033093, |
|
"rewards/rejected": -0.004211139865219593, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": 0.1060861125588417, |
|
"logits/rejected": 0.10927315801382065, |
|
"logps/chosen": -507.0030822753906, |
|
"logps/rejected": -500.82489013671875, |
|
"loss": 0.2142, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.004309800453484058, |
|
"rewards/margins": 0.0013471845304593444, |
|
"rewards/rejected": -0.005656985100358725, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": 0.10468300431966782, |
|
"logits/rejected": 0.06107153370976448, |
|
"logps/chosen": -479.45989990234375, |
|
"logps/rejected": -488.6297302246094, |
|
"loss": 0.2109, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.004880805965512991, |
|
"rewards/margins": 0.0016875596484169364, |
|
"rewards/rejected": -0.0065683661960065365, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": 0.031617164611816406, |
|
"logits/rejected": 0.03786861151456833, |
|
"logps/chosen": -457.5731506347656, |
|
"logps/rejected": -467.76806640625, |
|
"loss": 0.2108, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.006191219203174114, |
|
"rewards/margins": 0.003313812892884016, |
|
"rewards/rejected": -0.009505031630396843, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999375059004058e-06, |
|
"logits/chosen": 0.0778881087899208, |
|
"logits/rejected": -0.01506942231208086, |
|
"logps/chosen": -491.6114196777344, |
|
"logps/rejected": -491.8241271972656, |
|
"loss": 0.1988, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.010530633851885796, |
|
"rewards/margins": 0.003181540174409747, |
|
"rewards/rejected": -0.013712175190448761, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9955571065548795e-06, |
|
"logits/chosen": -0.0275646410882473, |
|
"logits/rejected": -0.05159539729356766, |
|
"logps/chosen": -512.9607543945312, |
|
"logps/rejected": -539.0792846679688, |
|
"loss": 0.2013, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.01575169898569584, |
|
"rewards/margins": 0.009278899058699608, |
|
"rewards/rejected": -0.025030598044395447, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9882736864879e-06, |
|
"logits/chosen": -0.037609659135341644, |
|
"logits/rejected": -0.007756671402603388, |
|
"logps/chosen": -528.1864624023438, |
|
"logps/rejected": -521.6083984375, |
|
"loss": 0.2006, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.016717851161956787, |
|
"rewards/margins": 0.00735442852601409, |
|
"rewards/rejected": -0.024072280153632164, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977534912960124e-06, |
|
"logits/chosen": 0.020100779831409454, |
|
"logits/rejected": 8.137784607242793e-05, |
|
"logps/chosen": -486.9697265625, |
|
"logps/rejected": -506.7543029785156, |
|
"loss": 0.1979, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.024340303614735603, |
|
"rewards/margins": 0.011229689233005047, |
|
"rewards/rejected": -0.03556998819112778, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963355698422092e-06, |
|
"logits/chosen": -0.014675384387373924, |
|
"logits/rejected": -0.04730897396802902, |
|
"logps/chosen": -488.8623046875, |
|
"logps/rejected": -493.58087158203125, |
|
"loss": 0.198, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.018940208479762077, |
|
"rewards/margins": 0.01975865848362446, |
|
"rewards/rejected": -0.038698866963386536, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945755732909625e-06, |
|
"logits/chosen": -0.11544609069824219, |
|
"logits/rejected": -0.014225010760128498, |
|
"logps/chosen": -516.4327392578125, |
|
"logps/rejected": -552.8570556640625, |
|
"loss": 0.1998, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.026035359129309654, |
|
"rewards/margins": 0.02031863108277321, |
|
"rewards/rejected": -0.04635399580001831, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924759456701167e-06, |
|
"logits/chosen": -0.06522537022829056, |
|
"logits/rejected": -0.0837598517537117, |
|
"logps/chosen": -467.21722412109375, |
|
"logps/rejected": -517.3309326171875, |
|
"loss": 0.2005, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.02084728702902794, |
|
"rewards/margins": 0.025486458092927933, |
|
"rewards/rejected": -0.04633374512195587, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.900396026378671e-06, |
|
"logits/chosen": -0.18057578802108765, |
|
"logits/rejected": -0.12668588757514954, |
|
"logps/chosen": -534.2403564453125, |
|
"logps/rejected": -615.2245483398438, |
|
"loss": 0.1981, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.017506513744592667, |
|
"rewards/margins": 0.03957374021410942, |
|
"rewards/rejected": -0.057080257683992386, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872699274339169e-06, |
|
"logits/chosen": -0.1534993201494217, |
|
"logits/rejected": -0.03731600195169449, |
|
"logps/chosen": -496.35247802734375, |
|
"logps/rejected": -547.9083862304688, |
|
"loss": 0.2, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.014407465234398842, |
|
"rewards/margins": 0.03549625352025032, |
|
"rewards/rejected": -0.049903713166713715, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8417076618132434e-06, |
|
"logits/chosen": -0.0889858677983284, |
|
"logits/rejected": -0.18024688959121704, |
|
"logps/chosen": -534.732421875, |
|
"logps/rejected": -549.1685791015625, |
|
"loss": 0.1997, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.02401977963745594, |
|
"rewards/margins": 0.026353713124990463, |
|
"rewards/rejected": -0.05037349462509155, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807464225455655e-06, |
|
"logits/chosen": -0.13999292254447937, |
|
"logits/rejected": -0.20869961380958557, |
|
"logps/chosen": -501.455810546875, |
|
"logps/rejected": -529.4840087890625, |
|
"loss": 0.1994, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.037735745310783386, |
|
"rewards/margins": 0.02034623734652996, |
|
"rewards/rejected": -0.0580819770693779, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770016517582283e-06, |
|
"logits/chosen": -0.16457560658454895, |
|
"logits/rejected": -0.16379991173744202, |
|
"logps/chosen": -577.7967529296875, |
|
"logps/rejected": -628.288818359375, |
|
"loss": 0.1953, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.05010377615690231, |
|
"rewards/margins": 0.03183088079094887, |
|
"rewards/rejected": -0.08193466067314148, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7294165401363616e-06, |
|
"logits/chosen": -0.19137120246887207, |
|
"logits/rejected": -0.0707189291715622, |
|
"logps/chosen": -590.2723999023438, |
|
"logps/rejected": -624.1236572265625, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0555083341896534, |
|
"rewards/margins": 0.029821401461958885, |
|
"rewards/rejected": -0.08532973378896713, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68572067247573e-06, |
|
"logits/chosen": -0.19102320075035095, |
|
"logits/rejected": -0.18087339401245117, |
|
"logps/chosen": -493.77886962890625, |
|
"logps/rejected": -550.4117431640625, |
|
"loss": 0.2015, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.06666065752506256, |
|
"rewards/margins": 0.018180230632424355, |
|
"rewards/rejected": -0.08484089374542236, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638989593081364e-06, |
|
"logits/chosen": -0.14345607161521912, |
|
"logits/rejected": -0.14746864140033722, |
|
"logps/chosen": -489.48248291015625, |
|
"logps/rejected": -535.217041015625, |
|
"loss": 0.1916, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.04723743349313736, |
|
"rewards/margins": 0.03517382964491844, |
|
"rewards/rejected": -0.0824112594127655, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5892881952959015e-06, |
|
"logits/chosen": -0.11015196144580841, |
|
"logits/rejected": -0.07484224438667297, |
|
"logps/chosen": -516.731689453125, |
|
"logps/rejected": -559.3098754882812, |
|
"loss": 0.187, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.03826197609305382, |
|
"rewards/margins": 0.031822480261325836, |
|
"rewards/rejected": -0.07008445262908936, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536685497209182e-06, |
|
"logits/chosen": -0.11327183246612549, |
|
"logits/rejected": -0.16368862986564636, |
|
"logps/chosen": -511.823974609375, |
|
"logps/rejected": -565.7365112304688, |
|
"loss": 0.1924, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.04240923374891281, |
|
"rewards/margins": 0.04024555906653404, |
|
"rewards/rejected": -0.08265479654073715, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481254545815943e-06, |
|
"logits/chosen": -0.11171416938304901, |
|
"logits/rejected": -0.09737102687358856, |
|
"logps/chosen": -594.6912231445312, |
|
"logps/rejected": -623.1627807617188, |
|
"loss": 0.2014, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.04938292130827904, |
|
"rewards/margins": 0.02359354868531227, |
|
"rewards/rejected": -0.07297646999359131, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.42307231557875e-06, |
|
"logits/chosen": -0.02691875956952572, |
|
"logits/rejected": -0.15112504363059998, |
|
"logps/chosen": -557.1254272460938, |
|
"logps/rejected": -591.5187377929688, |
|
"loss": 0.1912, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.03798414021730423, |
|
"rewards/margins": 0.02616792358458042, |
|
"rewards/rejected": -0.0641520619392395, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3622196015370305e-06, |
|
"logits/chosen": -0.06971077620983124, |
|
"logits/rejected": -0.1268663853406906, |
|
"logps/chosen": -498.1439514160156, |
|
"logps/rejected": -574.2821044921875, |
|
"loss": 0.195, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.044213201850652695, |
|
"rewards/margins": 0.02420051395893097, |
|
"rewards/rejected": -0.06841371208429337, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.298780907110648e-06, |
|
"logits/chosen": -0.03687559440732002, |
|
"logits/rejected": -0.08016426861286163, |
|
"logps/chosen": -526.9334716796875, |
|
"logps/rejected": -581.825439453125, |
|
"loss": 0.1907, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.046329911798238754, |
|
"rewards/margins": 0.03305746242403984, |
|
"rewards/rejected": -0.079387366771698, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.23284432675381e-06, |
|
"logits/chosen": -0.10335829108953476, |
|
"logits/rejected": -0.12378430366516113, |
|
"logps/chosen": -504.7950134277344, |
|
"logps/rejected": -553.68115234375, |
|
"loss": 0.1905, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.053347356617450714, |
|
"rewards/margins": 0.03879328817129135, |
|
"rewards/rejected": -0.09214064478874207, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.164501423622277e-06, |
|
"logits/chosen": -0.1206481009721756, |
|
"logits/rejected": -0.052752863615751266, |
|
"logps/chosen": -516.8829345703125, |
|
"logps/rejected": -491.26849365234375, |
|
"loss": 0.2077, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0364055298268795, |
|
"rewards/margins": 0.00497065857052803, |
|
"rewards/rejected": -0.04137618839740753, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0938471024237355e-06, |
|
"logits/chosen": -0.058399152010679245, |
|
"logits/rejected": -0.004985150881111622, |
|
"logps/chosen": -496.40020751953125, |
|
"logps/rejected": -557.3847045898438, |
|
"loss": 0.1882, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.013055197894573212, |
|
"rewards/margins": 0.04152555763721466, |
|
"rewards/rejected": -0.05458075553178787, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020979477627907e-06, |
|
"logits/chosen": -0.00046800225391052663, |
|
"logits/rejected": -0.04824506491422653, |
|
"logps/chosen": -511.21728515625, |
|
"logps/rejected": -525.9166259765625, |
|
"loss": 0.196, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.004664528649300337, |
|
"rewards/margins": 0.02710866369307041, |
|
"rewards/rejected": -0.03177319094538689, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9459997372194105e-06, |
|
"logits/chosen": -0.01971629448235035, |
|
"logits/rejected": 0.02092793397605419, |
|
"logps/chosen": -464.59808349609375, |
|
"logps/rejected": -518.8754272460938, |
|
"loss": 0.196, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.011982052586972713, |
|
"rewards/margins": 0.03715554624795914, |
|
"rewards/rejected": -0.049137599766254425, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869012002182573e-06, |
|
"logits/chosen": -0.13931025564670563, |
|
"logits/rejected": -0.10487548261880875, |
|
"logps/chosen": -488.48443603515625, |
|
"logps/rejected": -540.0478515625, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.007292003836482763, |
|
"rewards/margins": 0.028780395165085793, |
|
"rewards/rejected": -0.03607239946722984, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7901231819133104e-06, |
|
"logits/chosen": -0.0676584541797638, |
|
"logits/rejected": 0.000223781171371229, |
|
"logps/chosen": -515.3255615234375, |
|
"logps/rejected": -534.4259643554688, |
|
"loss": 0.1928, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.003454460995271802, |
|
"rewards/margins": 0.024057697504758835, |
|
"rewards/rejected": -0.02751215733587742, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.709442825758875e-06, |
|
"logits/chosen": -0.13946941494941711, |
|
"logits/rejected": -0.10190458595752716, |
|
"logps/chosen": -506.9981994628906, |
|
"logps/rejected": -556.169921875, |
|
"loss": 0.1991, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.011531559750437737, |
|
"rewards/margins": 0.044275928288698196, |
|
"rewards/rejected": -0.055807482451200485, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6270829708916113e-06, |
|
"logits/chosen": -0.19370055198669434, |
|
"logits/rejected": -0.13238494098186493, |
|
"logps/chosen": -493.8172302246094, |
|
"logps/rejected": -526.5756225585938, |
|
"loss": 0.1965, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.01078014075756073, |
|
"rewards/margins": 0.024004068225622177, |
|
"rewards/rejected": -0.03478420898318291, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543157986727991e-06, |
|
"logits/chosen": -0.07623752951622009, |
|
"logits/rejected": -0.13485901057720184, |
|
"logps/chosen": -493.17303466796875, |
|
"logps/rejected": -547.02099609375, |
|
"loss": 0.2009, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.013095049187541008, |
|
"rewards/margins": 0.04162532463669777, |
|
"rewards/rejected": -0.05472037196159363, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4577844161089614e-06, |
|
"logits/chosen": 0.006638138089329004, |
|
"logits/rejected": -0.16132843494415283, |
|
"logps/chosen": -505.0772399902344, |
|
"logps/rejected": -525.45849609375, |
|
"loss": 0.2036, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -9.345449507236481e-05, |
|
"rewards/margins": 0.02224581316113472, |
|
"rewards/rejected": -0.022339265793561935, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3710808134621577e-06, |
|
"logits/chosen": -0.07844109833240509, |
|
"logits/rejected": -0.09653671830892563, |
|
"logps/chosen": -464.03411865234375, |
|
"logps/rejected": -497.51910400390625, |
|
"loss": 0.2023, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0025765378959476948, |
|
"rewards/margins": 0.03173336759209633, |
|
"rewards/rejected": -0.02915683016180992, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2831675801707126e-06, |
|
"logits/chosen": -0.12853233516216278, |
|
"logits/rejected": -0.06648223102092743, |
|
"logps/chosen": -487.64410400390625, |
|
"logps/rejected": -543.6026611328125, |
|
"loss": 0.1898, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0037212104070931673, |
|
"rewards/margins": 0.044964469969272614, |
|
"rewards/rejected": -0.041243262588977814, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.194166797377289e-06, |
|
"logits/chosen": -0.13361698389053345, |
|
"logits/rejected": -0.0924164280295372, |
|
"logps/chosen": -488.75225830078125, |
|
"logps/rejected": -554.2010498046875, |
|
"loss": 0.1913, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.007956433109939098, |
|
"rewards/margins": 0.04044501110911369, |
|
"rewards/rejected": -0.03248857706785202, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.104202056455501e-06, |
|
"logits/chosen": -0.05866111442446709, |
|
"logits/rejected": -0.11707846075296402, |
|
"logps/chosen": -475.849609375, |
|
"logps/rejected": -527.5254516601562, |
|
"loss": 0.1889, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0031708565074950457, |
|
"rewards/margins": 0.04169551655650139, |
|
"rewards/rejected": -0.03852466121315956, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.013398287384144e-06, |
|
"logits/chosen": -0.03628942370414734, |
|
"logits/rejected": -0.05839689448475838, |
|
"logps/chosen": -450.5067443847656, |
|
"logps/rejected": -480.0916442871094, |
|
"loss": 0.1959, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.007704668678343296, |
|
"rewards/margins": 0.020786713808774948, |
|
"rewards/rejected": -0.013082042336463928, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9218815852625717e-06, |
|
"logits/chosen": -0.13268591463565826, |
|
"logits/rejected": -0.039528947323560715, |
|
"logps/chosen": -506.5321350097656, |
|
"logps/rejected": -551.9951782226562, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.0014046819414943457, |
|
"rewards/margins": 0.021756207570433617, |
|
"rewards/rejected": -0.020351527258753777, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.829779035208113e-06, |
|
"logits/chosen": -0.022976398468017578, |
|
"logits/rejected": -0.12432871758937836, |
|
"logps/chosen": -508.6607360839844, |
|
"logps/rejected": -550.5410766601562, |
|
"loss": 0.1858, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.002696055918931961, |
|
"rewards/margins": 0.03165871649980545, |
|
"rewards/rejected": -0.03435477241873741, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.737218535878705e-06, |
|
"logits/chosen": -0.011603012681007385, |
|
"logits/rejected": -0.056485723704099655, |
|
"logps/chosen": -468.14764404296875, |
|
"logps/rejected": -571.5040283203125, |
|
"loss": 0.1853, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005876007489860058, |
|
"rewards/margins": 0.05560145527124405, |
|
"rewards/rejected": -0.049725450575351715, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.64432862186579e-06, |
|
"logits/chosen": -0.09491895884275436, |
|
"logits/rejected": -0.0038899630308151245, |
|
"logps/chosen": -498.3922424316406, |
|
"logps/rejected": -521.9002075195312, |
|
"loss": 0.1883, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0021348330192267895, |
|
"rewards/margins": 0.0486973337829113, |
|
"rewards/rejected": -0.050832170993089676, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.551238285204126e-06, |
|
"logits/chosen": -0.15483462810516357, |
|
"logits/rejected": 0.049260228872299194, |
|
"logps/chosen": -476.8915100097656, |
|
"logps/rejected": -518.4007568359375, |
|
"loss": 0.187, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.006897159852087498, |
|
"rewards/margins": 0.04596192017197609, |
|
"rewards/rejected": -0.05285907909274101, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4580767962463688e-06, |
|
"logits/chosen": -0.11445822566747665, |
|
"logits/rejected": -0.12616074085235596, |
|
"logps/chosen": -445.810302734375, |
|
"logps/rejected": -483.44439697265625, |
|
"loss": 0.1997, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.011665165424346924, |
|
"rewards/margins": 0.030944203957915306, |
|
"rewards/rejected": -0.04260937124490738, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3649735241511546e-06, |
|
"logits/chosen": -0.08440948277711868, |
|
"logits/rejected": -0.15135635435581207, |
|
"logps/chosen": -458.70513916015625, |
|
"logps/rejected": -531.6619262695312, |
|
"loss": 0.1857, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.007908688858151436, |
|
"rewards/margins": 0.04392402246594429, |
|
"rewards/rejected": -0.051832713186740875, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2720577572339914e-06, |
|
"logits/chosen": -0.11203843355178833, |
|
"logits/rejected": -0.14960184693336487, |
|
"logps/chosen": -450.1597595214844, |
|
"logps/rejected": -564.0913696289062, |
|
"loss": 0.1827, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.019688036292791367, |
|
"rewards/margins": 0.04803295060992241, |
|
"rewards/rejected": -0.06772098690271378, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1794585234303995e-06, |
|
"logits/chosen": -0.1444821059703827, |
|
"logits/rejected": -0.093144990503788, |
|
"logps/chosen": -526.9356689453125, |
|
"logps/rejected": -576.2337646484375, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.0206059031188488, |
|
"rewards/margins": 0.031863369047641754, |
|
"rewards/rejected": -0.05246926471590996, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0873044111206407e-06, |
|
"logits/chosen": -0.02479735016822815, |
|
"logits/rejected": -0.17415586113929749, |
|
"logps/chosen": -460.03057861328125, |
|
"logps/rejected": -571.5958251953125, |
|
"loss": 0.19, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.021069908514618874, |
|
"rewards/margins": 0.052955545485019684, |
|
"rewards/rejected": -0.07402545213699341, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9957233905648293e-06, |
|
"logits/chosen": -0.1578993797302246, |
|
"logits/rejected": -0.2234770804643631, |
|
"logps/chosen": -494.98492431640625, |
|
"logps/rejected": -540.7052001953125, |
|
"loss": 0.1871, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.012826258316636086, |
|
"rewards/margins": 0.03663797304034233, |
|
"rewards/rejected": -0.049464233219623566, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.904842636196402e-06, |
|
"logits/chosen": -0.13111546635627747, |
|
"logits/rejected": -0.05531447380781174, |
|
"logps/chosen": -508.1211853027344, |
|
"logps/rejected": -585.5679931640625, |
|
"loss": 0.1889, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0070688240230083466, |
|
"rewards/margins": 0.04199820011854172, |
|
"rewards/rejected": -0.04906702786684036, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.814788350020726e-06, |
|
"logits/chosen": -0.05076460912823677, |
|
"logits/rejected": -0.09190233051776886, |
|
"logps/chosen": -486.60882568359375, |
|
"logps/rejected": -594.5232543945312, |
|
"loss": 0.1931, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.008244060911238194, |
|
"rewards/margins": 0.04668049514293671, |
|
"rewards/rejected": -0.054924555122852325, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.725685586364051e-06, |
|
"logits/chosen": -0.09060011804103851, |
|
"logits/rejected": -0.03806861490011215, |
|
"logps/chosen": -504.2727966308594, |
|
"logps/rejected": -519.7486572265625, |
|
"loss": 0.1944, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.011549371294677258, |
|
"rewards/margins": 0.027325350791215897, |
|
"rewards/rejected": -0.03887472301721573, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6376580782162172e-06, |
|
"logits/chosen": -0.11662360280752182, |
|
"logits/rejected": -0.13336268067359924, |
|
"logps/chosen": -536.3695678710938, |
|
"logps/rejected": -606.1983032226562, |
|
"loss": 0.1908, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.020137406885623932, |
|
"rewards/margins": 0.03434290364384651, |
|
"rewards/rejected": -0.054480306804180145, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.550828065408227e-06, |
|
"logits/chosen": -0.14947877824306488, |
|
"logits/rejected": -0.03148087114095688, |
|
"logps/chosen": -485.51934814453125, |
|
"logps/rejected": -529.2047119140625, |
|
"loss": 0.1913, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.014703591354191303, |
|
"rewards/margins": 0.03080858290195465, |
|
"rewards/rejected": -0.04551216959953308, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4653161248633053e-06, |
|
"logits/chosen": -0.10968085378408432, |
|
"logits/rejected": -0.01607862487435341, |
|
"logps/chosen": -470.38006591796875, |
|
"logps/rejected": -511.1072692871094, |
|
"loss": 0.1858, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.014205960556864738, |
|
"rewards/margins": 0.03790050745010376, |
|
"rewards/rejected": -0.05210646986961365, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.381241003157162e-06, |
|
"logits/chosen": -0.10932781547307968, |
|
"logits/rejected": -0.12227501720190048, |
|
"logps/chosen": -479.05859375, |
|
"logps/rejected": -543.1046752929688, |
|
"loss": 0.1972, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.024690503254532814, |
|
"rewards/margins": 0.03049684688448906, |
|
"rewards/rejected": -0.05518735572695732, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.298719451619979e-06, |
|
"logits/chosen": -0.04668913036584854, |
|
"logits/rejected": -0.13933394849300385, |
|
"logps/chosen": -487.51971435546875, |
|
"logps/rejected": -560.5028076171875, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.009937921538949013, |
|
"rewards/margins": 0.03267529979348183, |
|
"rewards/rejected": -0.04261321574449539, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2178660642091036e-06, |
|
"logits/chosen": -0.09352131187915802, |
|
"logits/rejected": -0.10257798433303833, |
|
"logps/chosen": -509.4551696777344, |
|
"logps/rejected": -567.7073364257812, |
|
"loss": 0.1825, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.012187512591481209, |
|
"rewards/margins": 0.05223285034298897, |
|
"rewards/rejected": -0.06442036479711533, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1387931183775821e-06, |
|
"logits/chosen": -0.07275299727916718, |
|
"logits/rejected": -0.16395212709903717, |
|
"logps/chosen": -499.53277587890625, |
|
"logps/rejected": -567.4371337890625, |
|
"loss": 0.1844, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.008052601478993893, |
|
"rewards/margins": 0.05070211738348007, |
|
"rewards/rejected": -0.05875472351908684, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.061610419159532e-06, |
|
"logits/chosen": -0.03610026463866234, |
|
"logits/rejected": -0.07686237245798111, |
|
"logps/chosen": -508.24285888671875, |
|
"logps/rejected": -545.7764892578125, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.011298848316073418, |
|
"rewards/margins": 0.04110453277826309, |
|
"rewards/rejected": -0.05240337923169136, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.864251466888364e-07, |
|
"logits/chosen": -0.14515052735805511, |
|
"logits/rejected": -0.05630829930305481, |
|
"logps/chosen": -514.5619506835938, |
|
"logps/rejected": -557.375732421875, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.015279670245945454, |
|
"rewards/margins": 0.037416163831949234, |
|
"rewards/rejected": -0.052695829421281815, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.133417073629288e-07, |
|
"logits/chosen": -0.13727428019046783, |
|
"logits/rejected": -0.10859493911266327, |
|
"logps/chosen": -461.16961669921875, |
|
"logps/rejected": -528.3992309570312, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.01616724021732807, |
|
"rewards/margins": 0.049376003444194794, |
|
"rewards/rejected": -0.06554324924945831, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.424615888583332e-07, |
|
"logits/chosen": -0.13368161022663116, |
|
"logits/rejected": -0.07306365668773651, |
|
"logps/chosen": -490.7369689941406, |
|
"logps/rejected": -558.0841674804688, |
|
"loss": 0.1911, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.014125635847449303, |
|
"rewards/margins": 0.04241669178009033, |
|
"rewards/rejected": -0.056542325764894485, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.738832191993092e-07, |
|
"logits/chosen": -0.10794935375452042, |
|
"logits/rejected": -0.10061631351709366, |
|
"logps/chosen": -505.11273193359375, |
|
"logps/rejected": -576.4747314453125, |
|
"loss": 0.1859, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.016501661390066147, |
|
"rewards/margins": 0.03631531447172165, |
|
"rewards/rejected": -0.052816975861787796, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.077018300752917e-07, |
|
"logits/chosen": -0.19430354237556458, |
|
"logits/rejected": -0.09076061099767685, |
|
"logps/chosen": -483.529052734375, |
|
"logps/rejected": -520.256103515625, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.01530246902257204, |
|
"rewards/margins": 0.03641772270202637, |
|
"rewards/rejected": -0.05172019079327583, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.440093245969342e-07, |
|
"logits/chosen": -0.2117353230714798, |
|
"logits/rejected": -0.00878197606652975, |
|
"logps/chosen": -498.167724609375, |
|
"logps/rejected": -512.6229248046875, |
|
"loss": 0.1946, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.028363991528749466, |
|
"rewards/margins": 0.02800668217241764, |
|
"rewards/rejected": -0.05637066811323166, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.828941496744075e-07, |
|
"logits/chosen": -0.1706351935863495, |
|
"logits/rejected": -0.09497860074043274, |
|
"logps/chosen": -449.25640869140625, |
|
"logps/rejected": -503.36346435546875, |
|
"loss": 0.1914, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.021301377564668655, |
|
"rewards/margins": 0.03773494437336922, |
|
"rewards/rejected": -0.05903632566332817, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.244411731951671e-07, |
|
"logits/chosen": -0.16339917480945587, |
|
"logits/rejected": -0.09793440252542496, |
|
"logps/chosen": -485.3291931152344, |
|
"logps/rejected": -541.0707397460938, |
|
"loss": 0.1932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.015980413183569908, |
|
"rewards/margins": 0.036792390048503876, |
|
"rewards/rejected": -0.052772797644138336, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6873156617173594e-07, |
|
"logits/chosen": -0.16512815654277802, |
|
"logits/rejected": -0.04607289656996727, |
|
"logps/chosen": -482.732421875, |
|
"logps/rejected": -537.2395629882812, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.017042802646756172, |
|
"rewards/margins": 0.030301541090011597, |
|
"rewards/rejected": -0.04734434187412262, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1584269002318653e-07, |
|
"logits/chosen": -0.06346157193183899, |
|
"logits/rejected": -0.17497694492340088, |
|
"logps/chosen": -448.33721923828125, |
|
"logps/rejected": -536.0673217773438, |
|
"loss": 0.1874, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.021191086620092392, |
|
"rewards/margins": 0.04755834862589836, |
|
"rewards/rejected": -0.06874943524599075, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.658479891468258e-07, |
|
"logits/chosen": -0.17253567278385162, |
|
"logits/rejected": -0.11398845911026001, |
|
"logps/chosen": -448.2945861816406, |
|
"logps/rejected": -549.4234008789062, |
|
"loss": 0.1922, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.014737410470843315, |
|
"rewards/margins": 0.061607301235198975, |
|
"rewards/rejected": -0.07634472101926804, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.18816888929272e-07, |
|
"logits/chosen": -0.12382335960865021, |
|
"logits/rejected": -0.1283954679965973, |
|
"logps/chosen": -486.486572265625, |
|
"logps/rejected": -524.1173706054688, |
|
"loss": 0.1883, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.01756083406507969, |
|
"rewards/margins": 0.035057444125413895, |
|
"rewards/rejected": -0.052618276327848434, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.748146993385484e-07, |
|
"logits/chosen": -0.09039425849914551, |
|
"logits/rejected": -0.14174523949623108, |
|
"logps/chosen": -496.9384765625, |
|
"logps/rejected": -547.9468994140625, |
|
"loss": 0.1865, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.02103576436638832, |
|
"rewards/margins": 0.04283936321735382, |
|
"rewards/rejected": -0.06387512385845184, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3390252423108077e-07, |
|
"logits/chosen": -0.10462252050638199, |
|
"logits/rejected": -0.11660999059677124, |
|
"logps/chosen": -477.86163330078125, |
|
"logps/rejected": -558.4508056640625, |
|
"loss": 0.1865, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.01919230818748474, |
|
"rewards/margins": 0.046876341104507446, |
|
"rewards/rejected": -0.06606864929199219, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.961371764995243e-07, |
|
"logits/chosen": -0.10605908930301666, |
|
"logits/rejected": -0.1372717022895813, |
|
"logps/chosen": -453.75067138671875, |
|
"logps/rejected": -494.4928283691406, |
|
"loss": 0.1788, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.013546602800488472, |
|
"rewards/margins": 0.03696237877011299, |
|
"rewards/rejected": -0.05050898343324661, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.61571099179261e-07, |
|
"logits/chosen": -0.12508642673492432, |
|
"logits/rejected": -0.13528813421726227, |
|
"logps/chosen": -527.0031127929688, |
|
"logps/rejected": -544.302001953125, |
|
"loss": 0.1908, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.02414495311677456, |
|
"rewards/margins": 0.02833731472492218, |
|
"rewards/rejected": -0.05248226970434189, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3025229262312367e-07, |
|
"logits/chosen": -0.11270849406719208, |
|
"logits/rejected": -0.13684390485286713, |
|
"logps/chosen": -481.3531799316406, |
|
"logps/rejected": -518.9368896484375, |
|
"loss": 0.1929, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.02105192095041275, |
|
"rewards/margins": 0.031041234731674194, |
|
"rewards/rejected": -0.05209314823150635, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0222424784546853e-07, |
|
"logits/chosen": -0.1346197873353958, |
|
"logits/rejected": -0.07176433503627777, |
|
"logps/chosen": -523.1410522460938, |
|
"logps/rejected": -589.2576904296875, |
|
"loss": 0.1997, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.025201961398124695, |
|
"rewards/margins": 0.04333069920539856, |
|
"rewards/rejected": -0.06853266805410385, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.752588612816553e-08, |
|
"logits/chosen": -0.10653670132160187, |
|
"logits/rejected": -0.09153258800506592, |
|
"logps/chosen": -498.6484375, |
|
"logps/rejected": -578.7772216796875, |
|
"loss": 0.19, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.022535882890224457, |
|
"rewards/margins": 0.05137655884027481, |
|
"rewards/rejected": -0.07391244173049927, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.619150497236991e-08, |
|
"logits/chosen": -0.06052190810441971, |
|
"logits/rejected": -0.1323552131652832, |
|
"logps/chosen": -515.4520263671875, |
|
"logps/rejected": -532.12353515625, |
|
"loss": 0.184, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.018582861870527267, |
|
"rewards/margins": 0.03698521479964256, |
|
"rewards/rejected": -0.05556807667016983, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.825073047112743e-08, |
|
"logits/chosen": -0.0825682058930397, |
|
"logits/rejected": -0.08825576305389404, |
|
"logps/chosen": -547.6183471679688, |
|
"logps/rejected": -580.6526489257812, |
|
"loss": 0.2039, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.02423805184662342, |
|
"rewards/margins": 0.04648369550704956, |
|
"rewards/rejected": -0.07072174549102783, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.372847616895685e-08, |
|
"logits/chosen": -0.24079068005084991, |
|
"logits/rejected": -0.1403379738330841, |
|
"logps/chosen": -467.0528869628906, |
|
"logps/rejected": -511.5205078125, |
|
"loss": 0.1853, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.023756807669997215, |
|
"rewards/margins": 0.03743208199739456, |
|
"rewards/rejected": -0.061188895255327225, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.264490846553279e-08, |
|
"logits/chosen": -0.15193654596805573, |
|
"logits/rejected": -0.08774205297231674, |
|
"logps/chosen": -493.47113037109375, |
|
"logps/rejected": -540.6048583984375, |
|
"loss": 0.1914, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.019282512366771698, |
|
"rewards/margins": 0.04396004602313042, |
|
"rewards/rejected": -0.06324255466461182, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.015418611516165e-09, |
|
"logits/chosen": -0.05882133170962334, |
|
"logits/rejected": -0.18861576914787292, |
|
"logps/chosen": -478.2850646972656, |
|
"logps/rejected": -563.0004272460938, |
|
"loss": 0.1882, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.012600253336131573, |
|
"rewards/margins": 0.05376917123794556, |
|
"rewards/rejected": -0.0663694217801094, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.506013354186993e-10, |
|
"logits/chosen": -0.08989032357931137, |
|
"logits/rejected": -0.17091888189315796, |
|
"logps/chosen": -487.1897888183594, |
|
"logps/rejected": -533.8787231445312, |
|
"loss": 0.1867, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.01984177902340889, |
|
"rewards/margins": 0.038224250078201294, |
|
"rewards/rejected": -0.058066029101610184, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 937, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1943182722290654, |
|
"train_runtime": 7996.3001, |
|
"train_samples_per_second": 3.752, |
|
"train_steps_per_second": 0.117 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 937, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|