|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994666666666666, |
|
"eval_steps": 500, |
|
"global_step": 937, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.319148936170213e-08, |
|
"logits/chosen": -0.31276124715805054, |
|
"logits/rejected": -0.11341337859630585, |
|
"logps/chosen": -559.525146484375, |
|
"logps/rejected": -486.2456970214844, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": -0.20243170857429504, |
|
"logits/rejected": -0.07215167582035065, |
|
"logps/chosen": -473.5186767578125, |
|
"logps/rejected": -507.1302185058594, |
|
"loss": 0.2065, |
|
"rewards/accuracies": 0.3541666567325592, |
|
"rewards/chosen": -7.249015470733866e-05, |
|
"rewards/margins": 0.00014273211127147079, |
|
"rewards/rejected": -0.0002152222878066823, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": -0.18446393311023712, |
|
"logits/rejected": -0.09755989164113998, |
|
"logps/chosen": -501.7010803222656, |
|
"logps/rejected": -487.3160705566406, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -5.829105430166237e-05, |
|
"rewards/margins": 7.958527567097917e-05, |
|
"rewards/rejected": -0.0001378763117827475, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": -0.15609130263328552, |
|
"logits/rejected": -0.04423709958791733, |
|
"logps/chosen": -560.1486206054688, |
|
"logps/rejected": -544.0206298828125, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0003287494764663279, |
|
"rewards/margins": 0.00016076143947429955, |
|
"rewards/rejected": -0.0004895109450444579, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": -0.2074490785598755, |
|
"logits/rejected": -0.14103737473487854, |
|
"logps/chosen": -507.80450439453125, |
|
"logps/rejected": -515.2080078125, |
|
"loss": 0.214, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0010981714585795999, |
|
"rewards/margins": 0.00048262160271406174, |
|
"rewards/rejected": -0.0015807930612936616, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": -0.12519846856594086, |
|
"logits/rejected": -0.1412961781024933, |
|
"logps/chosen": -461.9590759277344, |
|
"logps/rejected": -499.2351989746094, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0024143296759575605, |
|
"rewards/margins": 0.0007537025958299637, |
|
"rewards/rejected": -0.0031680327374488115, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": -0.173623189330101, |
|
"logits/rejected": -0.03094838559627533, |
|
"logps/chosen": -551.9820556640625, |
|
"logps/rejected": -527.4284057617188, |
|
"loss": 0.2003, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.00582545343786478, |
|
"rewards/margins": 0.0019644282292574644, |
|
"rewards/rejected": -0.007789881434291601, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": -0.161810502409935, |
|
"logits/rejected": -0.10678007453680038, |
|
"logps/chosen": -567.8081665039062, |
|
"logps/rejected": -562.3734130859375, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.012994857504963875, |
|
"rewards/margins": 0.003251770045608282, |
|
"rewards/rejected": -0.016246628016233444, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": -0.15964026749134064, |
|
"logits/rejected": -0.27652230858802795, |
|
"logps/chosen": -562.570556640625, |
|
"logps/rejected": -621.7036743164062, |
|
"loss": 0.2037, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.026814639568328857, |
|
"rewards/margins": 0.0097076166421175, |
|
"rewards/rejected": -0.03652225807309151, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": -0.2600744664669037, |
|
"logits/rejected": -0.20050808787345886, |
|
"logps/chosen": -609.1525268554688, |
|
"logps/rejected": -612.4235229492188, |
|
"loss": 0.2067, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.059279996901750565, |
|
"rewards/margins": 0.004630334675312042, |
|
"rewards/rejected": -0.0639103353023529, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999375059004058e-06, |
|
"logits/chosen": -0.2565140724182129, |
|
"logits/rejected": -0.22637882828712463, |
|
"logps/chosen": -574.8885498046875, |
|
"logps/rejected": -590.8546142578125, |
|
"loss": 0.1998, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.07415835559368134, |
|
"rewards/margins": 0.01800454594194889, |
|
"rewards/rejected": -0.09216289967298508, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9955571065548795e-06, |
|
"logits/chosen": -0.1685013473033905, |
|
"logits/rejected": -0.2401442974805832, |
|
"logps/chosen": -557.1212158203125, |
|
"logps/rejected": -602.7764892578125, |
|
"loss": 0.196, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09011422097682953, |
|
"rewards/margins": 0.019372332841157913, |
|
"rewards/rejected": -0.10948655754327774, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9882736864879e-06, |
|
"logits/chosen": -0.2641439139842987, |
|
"logits/rejected": -0.2980344891548157, |
|
"logps/chosen": -588.050537109375, |
|
"logps/rejected": -627.3956298828125, |
|
"loss": 0.2053, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.10959631204605103, |
|
"rewards/margins": 0.014565527439117432, |
|
"rewards/rejected": -0.12416181713342667, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977534912960124e-06, |
|
"logits/chosen": -0.2924054265022278, |
|
"logits/rejected": -0.08088915795087814, |
|
"logps/chosen": -576.1680297851562, |
|
"logps/rejected": -614.0890502929688, |
|
"loss": 0.1901, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.09112486243247986, |
|
"rewards/margins": 0.025440961122512817, |
|
"rewards/rejected": -0.11656580865383148, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963355698422092e-06, |
|
"logits/chosen": -0.10601979494094849, |
|
"logits/rejected": -0.1950257569551468, |
|
"logps/chosen": -595.1011352539062, |
|
"logps/rejected": -659.9929809570312, |
|
"loss": 0.2058, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.1052999347448349, |
|
"rewards/margins": 0.02551344595849514, |
|
"rewards/rejected": -0.1308133900165558, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945755732909625e-06, |
|
"logits/chosen": -0.2408047914505005, |
|
"logits/rejected": -0.2040824145078659, |
|
"logps/chosen": -551.7179565429688, |
|
"logps/rejected": -606.5433959960938, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.07721008360385895, |
|
"rewards/margins": 0.026318836957216263, |
|
"rewards/rejected": -0.10352891683578491, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924759456701167e-06, |
|
"logits/chosen": -0.21895582973957062, |
|
"logits/rejected": -0.2554505467414856, |
|
"logps/chosen": -608.0427856445312, |
|
"logps/rejected": -679.7128295898438, |
|
"loss": 0.2025, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.10357453674077988, |
|
"rewards/margins": 0.022874176502227783, |
|
"rewards/rejected": -0.12644873559474945, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.900396026378671e-06, |
|
"logits/chosen": -0.25241002440452576, |
|
"logits/rejected": -0.2686356008052826, |
|
"logps/chosen": -576.2278442382812, |
|
"logps/rejected": -611.9133911132812, |
|
"loss": 0.2044, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.1014503687620163, |
|
"rewards/margins": 0.020282840356230736, |
|
"rewards/rejected": -0.12173320353031158, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872699274339169e-06, |
|
"logits/chosen": -0.24474278092384338, |
|
"logits/rejected": -0.19586482644081116, |
|
"logps/chosen": -570.9044189453125, |
|
"logps/rejected": -617.5431518554688, |
|
"loss": 0.1944, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.09906121343374252, |
|
"rewards/margins": 0.01674678549170494, |
|
"rewards/rejected": -0.11580799520015717, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8417076618132434e-06, |
|
"logits/chosen": -0.2917916774749756, |
|
"logits/rejected": -0.20423956215381622, |
|
"logps/chosen": -567.7699584960938, |
|
"logps/rejected": -593.5147705078125, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.08719009160995483, |
|
"rewards/margins": 0.013276703655719757, |
|
"rewards/rejected": -0.10046680271625519, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807464225455655e-06, |
|
"logits/chosen": -0.14698217809200287, |
|
"logits/rejected": -0.23266562819480896, |
|
"logps/chosen": -531.8690185546875, |
|
"logps/rejected": -583.5828857421875, |
|
"loss": 0.1964, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.07782838493585587, |
|
"rewards/margins": 0.0252009816467762, |
|
"rewards/rejected": -0.10302937030792236, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770016517582283e-06, |
|
"logits/chosen": -0.21580150723457336, |
|
"logits/rejected": -0.18905040621757507, |
|
"logps/chosen": -626.87744140625, |
|
"logps/rejected": -649.6925659179688, |
|
"loss": 0.1977, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.104043148458004, |
|
"rewards/margins": 0.021797046065330505, |
|
"rewards/rejected": -0.1258401870727539, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7294165401363616e-06, |
|
"logits/chosen": -0.12353191524744034, |
|
"logits/rejected": -0.2215413749217987, |
|
"logps/chosen": -633.0154418945312, |
|
"logps/rejected": -633.0941162109375, |
|
"loss": 0.2058, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.10003998130559921, |
|
"rewards/margins": 0.009050301276147366, |
|
"rewards/rejected": -0.10909029096364975, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68572067247573e-06, |
|
"logits/chosen": -0.16852374374866486, |
|
"logits/rejected": -0.21371085941791534, |
|
"logps/chosen": -614.1183471679688, |
|
"logps/rejected": -670.2012939453125, |
|
"loss": 0.2077, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.08841963112354279, |
|
"rewards/margins": 0.02279593050479889, |
|
"rewards/rejected": -0.11121556907892227, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638989593081364e-06, |
|
"logits/chosen": -0.1663983315229416, |
|
"logits/rejected": -0.21970775723457336, |
|
"logps/chosen": -602.5869750976562, |
|
"logps/rejected": -618.7034912109375, |
|
"loss": 0.2061, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.07862231880426407, |
|
"rewards/margins": 0.021257968619465828, |
|
"rewards/rejected": -0.09988027811050415, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5892881952959015e-06, |
|
"logits/chosen": -0.21088270843029022, |
|
"logits/rejected": -0.14775848388671875, |
|
"logps/chosen": -577.7684326171875, |
|
"logps/rejected": -632.3033447265625, |
|
"loss": 0.2054, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0773148387670517, |
|
"rewards/margins": 0.026050010696053505, |
|
"rewards/rejected": -0.10336484014987946, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536685497209182e-06, |
|
"logits/chosen": -0.1055503636598587, |
|
"logits/rejected": -0.06379745155572891, |
|
"logps/chosen": -522.751708984375, |
|
"logps/rejected": -602.4344482421875, |
|
"loss": 0.2001, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.06098253279924393, |
|
"rewards/margins": 0.030480870977044106, |
|
"rewards/rejected": -0.09146340191364288, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481254545815943e-06, |
|
"logits/chosen": -0.15926873683929443, |
|
"logits/rejected": -0.04976898431777954, |
|
"logps/chosen": -529.4932250976562, |
|
"logps/rejected": -549.9386596679688, |
|
"loss": 0.1973, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.06077051907777786, |
|
"rewards/margins": 0.01582062616944313, |
|
"rewards/rejected": -0.0765911340713501, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.42307231557875e-06, |
|
"logits/chosen": -0.07944826781749725, |
|
"logits/rejected": -0.05855567380785942, |
|
"logps/chosen": -512.50439453125, |
|
"logps/rejected": -543.458984375, |
|
"loss": 0.1986, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.06550983339548111, |
|
"rewards/margins": 0.023027174174785614, |
|
"rewards/rejected": -0.08853700011968613, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3622196015370305e-06, |
|
"logits/chosen": -0.12430046498775482, |
|
"logits/rejected": -0.06956211477518082, |
|
"logps/chosen": -550.2479248046875, |
|
"logps/rejected": -614.044189453125, |
|
"loss": 0.1944, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.056610800325870514, |
|
"rewards/margins": 0.029858995229005814, |
|
"rewards/rejected": -0.08646979182958603, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.298780907110648e-06, |
|
"logits/chosen": -0.09455857425928116, |
|
"logits/rejected": -0.07383386790752411, |
|
"logps/chosen": -598.065185546875, |
|
"logps/rejected": -647.9603271484375, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.06337399780750275, |
|
"rewards/margins": 0.026696253567934036, |
|
"rewards/rejected": -0.09007024019956589, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.23284432675381e-06, |
|
"logits/chosen": -0.19348487257957458, |
|
"logits/rejected": -0.1443384736776352, |
|
"logps/chosen": -539.6243896484375, |
|
"logps/rejected": -612.7183837890625, |
|
"loss": 0.1963, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.05517622083425522, |
|
"rewards/margins": 0.02591213583946228, |
|
"rewards/rejected": -0.0810883566737175, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.164501423622277e-06, |
|
"logits/chosen": -0.19629542529582977, |
|
"logits/rejected": -0.13960464298725128, |
|
"logps/chosen": -516.0609130859375, |
|
"logps/rejected": -658.4205932617188, |
|
"loss": 0.1915, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.05958019569516182, |
|
"rewards/margins": 0.06007415056228638, |
|
"rewards/rejected": -0.1196543425321579, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0938471024237355e-06, |
|
"logits/chosen": -0.1600683629512787, |
|
"logits/rejected": -0.10378336906433105, |
|
"logps/chosen": -590.7578125, |
|
"logps/rejected": -621.64697265625, |
|
"loss": 0.2007, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.08227936178445816, |
|
"rewards/margins": 0.01520558726042509, |
|
"rewards/rejected": -0.09748493880033493, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020979477627907e-06, |
|
"logits/chosen": -0.19418606162071228, |
|
"logits/rejected": -0.1177397221326828, |
|
"logps/chosen": -586.6962890625, |
|
"logps/rejected": -654.0504150390625, |
|
"loss": 0.1894, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.07023846358060837, |
|
"rewards/margins": 0.03478557616472244, |
|
"rewards/rejected": -0.10502403974533081, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9459997372194105e-06, |
|
"logits/chosen": -0.1304813176393509, |
|
"logits/rejected": -0.04862945154309273, |
|
"logps/chosen": -594.4133911132812, |
|
"logps/rejected": -617.715087890625, |
|
"loss": 0.192, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08139745891094208, |
|
"rewards/margins": 0.026553615927696228, |
|
"rewards/rejected": -0.10795106738805771, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869012002182573e-06, |
|
"logits/chosen": -0.21274884045124054, |
|
"logits/rejected": -0.03855857998132706, |
|
"logps/chosen": -557.4656982421875, |
|
"logps/rejected": -637.321044921875, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.07546891272068024, |
|
"rewards/margins": 0.03727220743894577, |
|
"rewards/rejected": -0.1127411276102066, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7901231819133104e-06, |
|
"logits/chosen": -0.10762195289134979, |
|
"logits/rejected": -0.10060106217861176, |
|
"logps/chosen": -599.8753051757812, |
|
"logps/rejected": -646.8792724609375, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.0741112157702446, |
|
"rewards/margins": 0.03268015384674072, |
|
"rewards/rejected": -0.10679137706756592, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.709442825758875e-06, |
|
"logits/chosen": -0.12406639009714127, |
|
"logits/rejected": -0.053130537271499634, |
|
"logps/chosen": -587.0034790039062, |
|
"logps/rejected": -618.0760498046875, |
|
"loss": 0.19, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.07897321879863739, |
|
"rewards/margins": 0.025586843490600586, |
|
"rewards/rejected": -0.10456006228923798, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6270829708916113e-06, |
|
"logits/chosen": -0.11101411283016205, |
|
"logits/rejected": -0.08626400679349899, |
|
"logps/chosen": -569.6163330078125, |
|
"logps/rejected": -620.4082641601562, |
|
"loss": 0.1913, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.06503543257713318, |
|
"rewards/margins": 0.037478551268577576, |
|
"rewards/rejected": -0.10251398384571075, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543157986727991e-06, |
|
"logits/chosen": -0.11596628278493881, |
|
"logits/rejected": -0.09326865524053574, |
|
"logps/chosen": -569.7626342773438, |
|
"logps/rejected": -647.47119140625, |
|
"loss": 0.1913, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0574682354927063, |
|
"rewards/margins": 0.03390919789671898, |
|
"rewards/rejected": -0.09137743711471558, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4577844161089614e-06, |
|
"logits/chosen": -0.1688176691532135, |
|
"logits/rejected": -0.1762055903673172, |
|
"logps/chosen": -548.4512939453125, |
|
"logps/rejected": -596.2463989257812, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.054659001529216766, |
|
"rewards/margins": 0.025764942169189453, |
|
"rewards/rejected": -0.08042393624782562, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3710808134621577e-06, |
|
"logits/chosen": -0.12280504405498505, |
|
"logits/rejected": -0.018482182174921036, |
|
"logps/chosen": -567.9172973632812, |
|
"logps/rejected": -593.0560302734375, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0538947694003582, |
|
"rewards/margins": 0.02232169173657894, |
|
"rewards/rejected": -0.07621645927429199, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2831675801707126e-06, |
|
"logits/chosen": -0.04735702648758888, |
|
"logits/rejected": -0.10849102586507797, |
|
"logps/chosen": -590.4489135742188, |
|
"logps/rejected": -649.82568359375, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04551684111356735, |
|
"rewards/margins": 0.026576777920126915, |
|
"rewards/rejected": -0.07209362089633942, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.194166797377289e-06, |
|
"logits/chosen": -0.08134131878614426, |
|
"logits/rejected": -0.1677294671535492, |
|
"logps/chosen": -574.8263549804688, |
|
"logps/rejected": -607.7601318359375, |
|
"loss": 0.1893, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.04221652075648308, |
|
"rewards/margins": 0.030459443107247353, |
|
"rewards/rejected": -0.07267596572637558, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.104202056455501e-06, |
|
"logits/chosen": -0.0588027760386467, |
|
"logits/rejected": -0.1330319195985794, |
|
"logps/chosen": -547.6630249023438, |
|
"logps/rejected": -580.7600708007812, |
|
"loss": 0.1985, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.04689568281173706, |
|
"rewards/margins": 0.024683769792318344, |
|
"rewards/rejected": -0.07157944142818451, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.013398287384144e-06, |
|
"logits/chosen": -0.0910586565732956, |
|
"logits/rejected": -0.13333860039710999, |
|
"logps/chosen": -520.99267578125, |
|
"logps/rejected": -608.8109130859375, |
|
"loss": 0.1948, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04666762426495552, |
|
"rewards/margins": 0.04471370577812195, |
|
"rewards/rejected": -0.09138132631778717, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9218815852625717e-06, |
|
"logits/chosen": -0.09454444795846939, |
|
"logits/rejected": -0.04375922679901123, |
|
"logps/chosen": -620.7197265625, |
|
"logps/rejected": -636.3668212890625, |
|
"loss": 0.201, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.06732948869466782, |
|
"rewards/margins": 0.026028599590063095, |
|
"rewards/rejected": -0.09335808455944061, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.829779035208113e-06, |
|
"logits/chosen": -0.09432949125766754, |
|
"logits/rejected": -0.08926217257976532, |
|
"logps/chosen": -597.0772705078125, |
|
"logps/rejected": -639.5493774414062, |
|
"loss": 0.1909, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.040321771055459976, |
|
"rewards/margins": 0.03370783478021622, |
|
"rewards/rejected": -0.07402960956096649, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.737218535878705e-06, |
|
"logits/chosen": -0.1773318350315094, |
|
"logits/rejected": -0.07903443276882172, |
|
"logps/chosen": -552.8883666992188, |
|
"logps/rejected": -618.2833251953125, |
|
"loss": 0.2029, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04510737583041191, |
|
"rewards/margins": 0.028245270252227783, |
|
"rewards/rejected": -0.07335264980792999, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.64432862186579e-06, |
|
"logits/chosen": -0.07201124727725983, |
|
"logits/rejected": -0.04144411161541939, |
|
"logps/chosen": -526.00634765625, |
|
"logps/rejected": -577.3812255859375, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.03259889408946037, |
|
"rewards/margins": 0.028664156794548035, |
|
"rewards/rejected": -0.06126304715871811, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.551238285204126e-06, |
|
"logits/chosen": -0.13225743174552917, |
|
"logits/rejected": -0.03518156707286835, |
|
"logps/chosen": -558.69970703125, |
|
"logps/rejected": -633.7002563476562, |
|
"loss": 0.1987, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.034947603940963745, |
|
"rewards/margins": 0.041034139692783356, |
|
"rewards/rejected": -0.0759817361831665, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4580767962463688e-06, |
|
"logits/chosen": -0.03775392845273018, |
|
"logits/rejected": -0.06259463727474213, |
|
"logps/chosen": -564.3277587890625, |
|
"logps/rejected": -616.877685546875, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.041550230234861374, |
|
"rewards/margins": 0.04528028517961502, |
|
"rewards/rejected": -0.0868305116891861, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3649735241511546e-06, |
|
"logits/chosen": -0.11865083128213882, |
|
"logits/rejected": -0.14535991847515106, |
|
"logps/chosen": -539.8975219726562, |
|
"logps/rejected": -628.8270263671875, |
|
"loss": 0.1988, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.06274162977933884, |
|
"rewards/margins": 0.050676118582487106, |
|
"rewards/rejected": -0.11341774463653564, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2720577572339914e-06, |
|
"logits/chosen": -0.1661374866962433, |
|
"logits/rejected": -0.10748039186000824, |
|
"logps/chosen": -546.2053833007812, |
|
"logps/rejected": -584.2305908203125, |
|
"loss": 0.1901, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.05626441910862923, |
|
"rewards/margins": 0.02776341699063778, |
|
"rewards/rejected": -0.08402784168720245, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1794585234303995e-06, |
|
"logits/chosen": -0.10749207437038422, |
|
"logits/rejected": -0.13697417080402374, |
|
"logps/chosen": -517.0869140625, |
|
"logps/rejected": -581.8153686523438, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.052382372319698334, |
|
"rewards/margins": 0.035972487181425095, |
|
"rewards/rejected": -0.08835486322641373, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0873044111206407e-06, |
|
"logits/chosen": -0.1282195746898651, |
|
"logits/rejected": -0.1339006870985031, |
|
"logps/chosen": -576.3350830078125, |
|
"logps/rejected": -666.8603515625, |
|
"loss": 0.1907, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04062817618250847, |
|
"rewards/margins": 0.03738432377576828, |
|
"rewards/rejected": -0.07801250368356705, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9957233905648293e-06, |
|
"logits/chosen": -0.10549817234277725, |
|
"logits/rejected": -0.11278073489665985, |
|
"logps/chosen": -566.6007080078125, |
|
"logps/rejected": -636.8270263671875, |
|
"loss": 0.1877, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.048470962792634964, |
|
"rewards/margins": 0.04373977333307266, |
|
"rewards/rejected": -0.09221073240041733, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.904842636196402e-06, |
|
"logits/chosen": -0.0554957278072834, |
|
"logits/rejected": -0.13037823140621185, |
|
"logps/chosen": -597.04150390625, |
|
"logps/rejected": -615.6434326171875, |
|
"loss": 0.1909, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0562109649181366, |
|
"rewards/margins": 0.028234709054231644, |
|
"rewards/rejected": -0.08444567024707794, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.814788350020726e-06, |
|
"logits/chosen": -0.0553332157433033, |
|
"logits/rejected": -0.14984294772148132, |
|
"logps/chosen": -511.7176818847656, |
|
"logps/rejected": -577.5421752929688, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.05183824896812439, |
|
"rewards/margins": 0.0338759571313858, |
|
"rewards/rejected": -0.08571420609951019, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.725685586364051e-06, |
|
"logits/chosen": -0.1068972796201706, |
|
"logits/rejected": -0.13699831068515778, |
|
"logps/chosen": -547.6019897460938, |
|
"logps/rejected": -624.2053833007812, |
|
"loss": 0.1908, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04226940870285034, |
|
"rewards/margins": 0.04575734585523605, |
|
"rewards/rejected": -0.08802676200866699, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6376580782162172e-06, |
|
"logits/chosen": -0.12253417819738388, |
|
"logits/rejected": -0.09159277379512787, |
|
"logps/chosen": -534.8265380859375, |
|
"logps/rejected": -639.2476806640625, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.038840554654598236, |
|
"rewards/margins": 0.04929639771580696, |
|
"rewards/rejected": -0.0881369560956955, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.550828065408227e-06, |
|
"logits/chosen": -0.11153294146060944, |
|
"logits/rejected": -0.0631122812628746, |
|
"logps/chosen": -581.9796142578125, |
|
"logps/rejected": -639.3689575195312, |
|
"loss": 0.1738, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.04295315593481064, |
|
"rewards/margins": 0.037230443209409714, |
|
"rewards/rejected": -0.08018360286951065, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4653161248633053e-06, |
|
"logits/chosen": -0.10305066406726837, |
|
"logits/rejected": -0.13783864676952362, |
|
"logps/chosen": -582.2150268554688, |
|
"logps/rejected": -607.2169799804688, |
|
"loss": 0.1865, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.05051354691386223, |
|
"rewards/margins": 0.02962956391274929, |
|
"rewards/rejected": -0.08014310896396637, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.381241003157162e-06, |
|
"logits/chosen": -0.09553556144237518, |
|
"logits/rejected": -0.1049310564994812, |
|
"logps/chosen": -561.0845947265625, |
|
"logps/rejected": -615.9722900390625, |
|
"loss": 0.19, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.046824414283037186, |
|
"rewards/margins": 0.03598689287900925, |
|
"rewards/rejected": -0.08281131088733673, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.298719451619979e-06, |
|
"logits/chosen": -0.1247280016541481, |
|
"logits/rejected": -0.0659816786646843, |
|
"logps/chosen": -560.4979858398438, |
|
"logps/rejected": -620.7578735351562, |
|
"loss": 0.2002, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.04652046412229538, |
|
"rewards/margins": 0.041216202080249786, |
|
"rewards/rejected": -0.08773668110370636, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2178660642091036e-06, |
|
"logits/chosen": -0.03698350116610527, |
|
"logits/rejected": -0.2196667492389679, |
|
"logps/chosen": -521.7525634765625, |
|
"logps/rejected": -626.46435546875, |
|
"loss": 0.1979, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.05702243372797966, |
|
"rewards/margins": 0.041262269020080566, |
|
"rewards/rejected": -0.09828470647335052, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1387931183775821e-06, |
|
"logits/chosen": -0.1309659779071808, |
|
"logits/rejected": -0.126008078455925, |
|
"logps/chosen": -526.6151123046875, |
|
"logps/rejected": -586.6326293945312, |
|
"loss": 0.1836, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0365142747759819, |
|
"rewards/margins": 0.039250634610652924, |
|
"rewards/rejected": -0.07576490938663483, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.061610419159532e-06, |
|
"logits/chosen": -0.06580721586942673, |
|
"logits/rejected": -0.11697240173816681, |
|
"logps/chosen": -545.3971557617188, |
|
"logps/rejected": -590.3699340820312, |
|
"loss": 0.186, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.040514297783374786, |
|
"rewards/margins": 0.041993193328380585, |
|
"rewards/rejected": -0.08250749111175537, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.864251466888364e-07, |
|
"logits/chosen": 0.015632059425115585, |
|
"logits/rejected": -0.14370284974575043, |
|
"logps/chosen": -527.1017456054688, |
|
"logps/rejected": -602.5015869140625, |
|
"loss": 0.1872, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.03584219887852669, |
|
"rewards/margins": 0.0341840498149395, |
|
"rewards/rejected": -0.07002625614404678, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.133417073629288e-07, |
|
"logits/chosen": -0.1096029132604599, |
|
"logits/rejected": -0.09382790327072144, |
|
"logps/chosen": -552.9088745117188, |
|
"logps/rejected": -619.2091674804688, |
|
"loss": 0.1929, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04123011603951454, |
|
"rewards/margins": 0.03130009397864342, |
|
"rewards/rejected": -0.07253019511699677, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.424615888583332e-07, |
|
"logits/chosen": -0.1330350786447525, |
|
"logits/rejected": -0.07537052035331726, |
|
"logps/chosen": -521.3177490234375, |
|
"logps/rejected": -601.4888305664062, |
|
"loss": 0.1829, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.037754353135824203, |
|
"rewards/margins": 0.041079822927713394, |
|
"rewards/rejected": -0.0788341760635376, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.738832191993092e-07, |
|
"logits/chosen": -0.13393089175224304, |
|
"logits/rejected": -0.07735292613506317, |
|
"logps/chosen": -589.1104736328125, |
|
"logps/rejected": -623.0423583984375, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04533671587705612, |
|
"rewards/margins": 0.03662148863077164, |
|
"rewards/rejected": -0.08195820450782776, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.077018300752917e-07, |
|
"logits/chosen": -0.09014391899108887, |
|
"logits/rejected": -0.02712271548807621, |
|
"logps/chosen": -550.0320434570312, |
|
"logps/rejected": -605.1174926757812, |
|
"loss": 0.1961, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.05133052542805672, |
|
"rewards/margins": 0.041539210826158524, |
|
"rewards/rejected": -0.09286972880363464, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.440093245969342e-07, |
|
"logits/chosen": -0.08313737064599991, |
|
"logits/rejected": -0.1943168193101883, |
|
"logps/chosen": -516.8920288085938, |
|
"logps/rejected": -601.4186401367188, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.04221433773636818, |
|
"rewards/margins": 0.0475175604224205, |
|
"rewards/rejected": -0.08973188698291779, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.828941496744075e-07, |
|
"logits/chosen": -0.11161942780017853, |
|
"logits/rejected": -0.0919300764799118, |
|
"logps/chosen": -563.8603515625, |
|
"logps/rejected": -619.1151733398438, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.04418020322918892, |
|
"rewards/margins": 0.03953651711344719, |
|
"rewards/rejected": -0.08371671289205551, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.244411731951671e-07, |
|
"logits/chosen": -0.13506890833377838, |
|
"logits/rejected": -0.033810555934906006, |
|
"logps/chosen": -605.5892944335938, |
|
"logps/rejected": -609.83544921875, |
|
"loss": 0.1878, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.03747162967920303, |
|
"rewards/margins": 0.02192925289273262, |
|
"rewards/rejected": -0.059400878846645355, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6873156617173594e-07, |
|
"logits/chosen": -0.07261113822460175, |
|
"logits/rejected": -0.16117814183235168, |
|
"logps/chosen": -553.5911254882812, |
|
"logps/rejected": -624.5232543945312, |
|
"loss": 0.1921, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04296105355024338, |
|
"rewards/margins": 0.0388905294239521, |
|
"rewards/rejected": -0.08185158669948578, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1584269002318653e-07, |
|
"logits/chosen": -0.07403261959552765, |
|
"logits/rejected": -0.054157156497240067, |
|
"logps/chosen": -535.3461303710938, |
|
"logps/rejected": -585.4727783203125, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0406302735209465, |
|
"rewards/margins": 0.03608276695013046, |
|
"rewards/rejected": -0.07671303302049637, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.658479891468258e-07, |
|
"logits/chosen": -0.1717700958251953, |
|
"logits/rejected": -0.08853835612535477, |
|
"logps/chosen": -527.3263549804688, |
|
"logps/rejected": -540.2444458007812, |
|
"loss": 0.1778, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.04036609083414078, |
|
"rewards/margins": 0.03141506761312485, |
|
"rewards/rejected": -0.07178115844726562, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.18816888929272e-07, |
|
"logits/chosen": -0.09848084300756454, |
|
"logits/rejected": -0.06764743477106094, |
|
"logps/chosen": -563.3206787109375, |
|
"logps/rejected": -668.9093017578125, |
|
"loss": 0.2002, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.046812716871500015, |
|
"rewards/margins": 0.054834604263305664, |
|
"rewards/rejected": -0.10164730250835419, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.748146993385484e-07, |
|
"logits/chosen": -0.09693370759487152, |
|
"logits/rejected": -0.07278673350811005, |
|
"logps/chosen": -522.9954833984375, |
|
"logps/rejected": -612.6608276367188, |
|
"loss": 0.1854, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04407941550016403, |
|
"rewards/margins": 0.05026249960064888, |
|
"rewards/rejected": -0.09434191882610321, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3390252423108077e-07, |
|
"logits/chosen": -0.07084161043167114, |
|
"logits/rejected": -0.18225322663784027, |
|
"logps/chosen": -488.76483154296875, |
|
"logps/rejected": -558.3425903320312, |
|
"loss": 0.1939, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.035873524844646454, |
|
"rewards/margins": 0.037640780210494995, |
|
"rewards/rejected": -0.07351429760456085, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.961371764995243e-07, |
|
"logits/chosen": -0.11218070983886719, |
|
"logits/rejected": -0.143798828125, |
|
"logps/chosen": -548.5975341796875, |
|
"logps/rejected": -618.435302734375, |
|
"loss": 0.2009, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.03908687084913254, |
|
"rewards/margins": 0.042751066386699677, |
|
"rewards/rejected": -0.08183793723583221, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.61571099179261e-07, |
|
"logits/chosen": -0.0712205171585083, |
|
"logits/rejected": -0.06110917776823044, |
|
"logps/chosen": -584.1240234375, |
|
"logps/rejected": -650.0173950195312, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.04009150713682175, |
|
"rewards/margins": 0.030330544337630272, |
|
"rewards/rejected": -0.07042204588651657, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3025229262312367e-07, |
|
"logits/chosen": -0.0935712531208992, |
|
"logits/rejected": -0.05454383045434952, |
|
"logps/chosen": -496.932861328125, |
|
"logps/rejected": -605.6661987304688, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.042653247714042664, |
|
"rewards/margins": 0.048957787454128265, |
|
"rewards/rejected": -0.09161103516817093, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0222424784546853e-07, |
|
"logits/chosen": -0.08921684324741364, |
|
"logits/rejected": -0.15163610875606537, |
|
"logps/chosen": -579.2117919921875, |
|
"logps/rejected": -619.4464111328125, |
|
"loss": 0.1904, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04733709245920181, |
|
"rewards/margins": 0.03301934152841568, |
|
"rewards/rejected": -0.08035643398761749, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.752588612816553e-08, |
|
"logits/chosen": -0.04686546325683594, |
|
"logits/rejected": -0.15816907584667206, |
|
"logps/chosen": -509.0023498535156, |
|
"logps/rejected": -572.1159057617188, |
|
"loss": 0.1754, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.042182981967926025, |
|
"rewards/margins": 0.04292844608426094, |
|
"rewards/rejected": -0.08511142432689667, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.619150497236991e-08, |
|
"logits/chosen": -0.07643123716115952, |
|
"logits/rejected": -0.16245657205581665, |
|
"logps/chosen": -535.0369873046875, |
|
"logps/rejected": -608.0992431640625, |
|
"loss": 0.192, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.04792182892560959, |
|
"rewards/margins": 0.03496783226728439, |
|
"rewards/rejected": -0.08288966119289398, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.825073047112743e-08, |
|
"logits/chosen": -0.13168227672576904, |
|
"logits/rejected": -0.046010442078113556, |
|
"logps/chosen": -579.3240356445312, |
|
"logps/rejected": -674.3414306640625, |
|
"loss": 0.1964, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04349333792924881, |
|
"rewards/margins": 0.047455307096242905, |
|
"rewards/rejected": -0.09094865620136261, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.372847616895685e-08, |
|
"logits/chosen": -0.04904794320464134, |
|
"logits/rejected": -0.019006099551916122, |
|
"logps/chosen": -542.4931640625, |
|
"logps/rejected": -638.1673583984375, |
|
"loss": 0.1889, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04928978905081749, |
|
"rewards/margins": 0.03806794807314873, |
|
"rewards/rejected": -0.08735774457454681, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.264490846553279e-08, |
|
"logits/chosen": -0.12707039713859558, |
|
"logits/rejected": -0.10833065211772919, |
|
"logps/chosen": -579.73681640625, |
|
"logps/rejected": -622.3654174804688, |
|
"loss": 0.1897, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.046609390527009964, |
|
"rewards/margins": 0.03541853651404381, |
|
"rewards/rejected": -0.08202792704105377, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.015418611516165e-09, |
|
"logits/chosen": -0.0854305848479271, |
|
"logits/rejected": -0.11656080186367035, |
|
"logps/chosen": -616.4360961914062, |
|
"logps/rejected": -670.5054931640625, |
|
"loss": 0.1907, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04680439084768295, |
|
"rewards/margins": 0.05593379586935043, |
|
"rewards/rejected": -0.10273818671703339, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.506013354186993e-10, |
|
"logits/chosen": -0.11298644542694092, |
|
"logits/rejected": -0.03937912359833717, |
|
"logps/chosen": -532.8866577148438, |
|
"logps/rejected": -597.7803344726562, |
|
"loss": 0.2033, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.043054092675447464, |
|
"rewards/margins": 0.037277717143297195, |
|
"rewards/rejected": -0.08033180981874466, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 937, |
|
"total_flos": 0.0, |
|
"train_loss": 0.19462941225971966, |
|
"train_runtime": 7972.3934, |
|
"train_samples_per_second": 3.763, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 937, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|