phi-2-irepo-chatml-v1-i2 / trainer_state.json
lole25's picture
Model save
84e4e18 verified
raw
history blame
46 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9994666666666666,
"eval_steps": 500,
"global_step": 937,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.319148936170213e-08,
"logits/chosen": -0.31276124715805054,
"logits/rejected": -0.11341337859630585,
"logps/chosen": -559.525146484375,
"logps/rejected": -486.2456970214844,
"loss": 0.21,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 5.319148936170213e-07,
"logits/chosen": -0.20243170857429504,
"logits/rejected": -0.07215167582035065,
"logps/chosen": -473.5186767578125,
"logps/rejected": -507.1302185058594,
"loss": 0.2065,
"rewards/accuracies": 0.3541666567325592,
"rewards/chosen": -7.249015470733866e-05,
"rewards/margins": 0.00014273211127147079,
"rewards/rejected": -0.0002152222878066823,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 1.0638297872340427e-06,
"logits/chosen": -0.18446393311023712,
"logits/rejected": -0.09755989164113998,
"logps/chosen": -501.7010803222656,
"logps/rejected": -487.3160705566406,
"loss": 0.2124,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -5.829105430166237e-05,
"rewards/margins": 7.958527567097917e-05,
"rewards/rejected": -0.0001378763117827475,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 1.595744680851064e-06,
"logits/chosen": -0.15609130263328552,
"logits/rejected": -0.04423709958791733,
"logps/chosen": -560.1486206054688,
"logps/rejected": -544.0206298828125,
"loss": 0.2048,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.0003287494764663279,
"rewards/margins": 0.00016076143947429955,
"rewards/rejected": -0.0004895109450444579,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 2.1276595744680853e-06,
"logits/chosen": -0.2074490785598755,
"logits/rejected": -0.14103737473487854,
"logps/chosen": -507.80450439453125,
"logps/rejected": -515.2080078125,
"loss": 0.214,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.0010981714585795999,
"rewards/margins": 0.00048262160271406174,
"rewards/rejected": -0.0015807930612936616,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 2.6595744680851065e-06,
"logits/chosen": -0.12519846856594086,
"logits/rejected": -0.1412961781024933,
"logps/chosen": -461.9590759277344,
"logps/rejected": -499.2351989746094,
"loss": 0.2124,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.0024143296759575605,
"rewards/margins": 0.0007537025958299637,
"rewards/rejected": -0.0031680327374488115,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 3.191489361702128e-06,
"logits/chosen": -0.173623189330101,
"logits/rejected": -0.03094838559627533,
"logps/chosen": -551.9820556640625,
"logps/rejected": -527.4284057617188,
"loss": 0.2003,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.00582545343786478,
"rewards/margins": 0.0019644282292574644,
"rewards/rejected": -0.007789881434291601,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 3.723404255319149e-06,
"logits/chosen": -0.161810502409935,
"logits/rejected": -0.10678007453680038,
"logps/chosen": -567.8081665039062,
"logps/rejected": -562.3734130859375,
"loss": 0.2098,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.012994857504963875,
"rewards/margins": 0.003251770045608282,
"rewards/rejected": -0.016246628016233444,
"step": 70
},
{
"epoch": 0.09,
"learning_rate": 4.255319148936171e-06,
"logits/chosen": -0.15964026749134064,
"logits/rejected": -0.27652230858802795,
"logps/chosen": -562.570556640625,
"logps/rejected": -621.7036743164062,
"loss": 0.2037,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.026814639568328857,
"rewards/margins": 0.0097076166421175,
"rewards/rejected": -0.03652225807309151,
"step": 80
},
{
"epoch": 0.1,
"learning_rate": 4.787234042553192e-06,
"logits/chosen": -0.2600744664669037,
"logits/rejected": -0.20050808787345886,
"logps/chosen": -609.1525268554688,
"logps/rejected": -612.4235229492188,
"loss": 0.2067,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.059279996901750565,
"rewards/margins": 0.004630334675312042,
"rewards/rejected": -0.0639103353023529,
"step": 90
},
{
"epoch": 0.11,
"learning_rate": 4.999375059004058e-06,
"logits/chosen": -0.2565140724182129,
"logits/rejected": -0.22637882828712463,
"logps/chosen": -574.8885498046875,
"logps/rejected": -590.8546142578125,
"loss": 0.1998,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.07415835559368134,
"rewards/margins": 0.01800454594194889,
"rewards/rejected": -0.09216289967298508,
"step": 100
},
{
"epoch": 0.12,
"learning_rate": 4.9955571065548795e-06,
"logits/chosen": -0.1685013473033905,
"logits/rejected": -0.2401442974805832,
"logps/chosen": -557.1212158203125,
"logps/rejected": -602.7764892578125,
"loss": 0.196,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.09011422097682953,
"rewards/margins": 0.019372332841157913,
"rewards/rejected": -0.10948655754327774,
"step": 110
},
{
"epoch": 0.13,
"learning_rate": 4.9882736864879e-06,
"logits/chosen": -0.2641439139842987,
"logits/rejected": -0.2980344891548157,
"logps/chosen": -588.050537109375,
"logps/rejected": -627.3956298828125,
"loss": 0.2053,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.10959631204605103,
"rewards/margins": 0.014565527439117432,
"rewards/rejected": -0.12416181713342667,
"step": 120
},
{
"epoch": 0.14,
"learning_rate": 4.977534912960124e-06,
"logits/chosen": -0.2924054265022278,
"logits/rejected": -0.08088915795087814,
"logps/chosen": -576.1680297851562,
"logps/rejected": -614.0890502929688,
"loss": 0.1901,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -0.09112486243247986,
"rewards/margins": 0.025440961122512817,
"rewards/rejected": -0.11656580865383148,
"step": 130
},
{
"epoch": 0.15,
"learning_rate": 4.963355698422092e-06,
"logits/chosen": -0.10601979494094849,
"logits/rejected": -0.1950257569551468,
"logps/chosen": -595.1011352539062,
"logps/rejected": -659.9929809570312,
"loss": 0.2058,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.1052999347448349,
"rewards/margins": 0.02551344595849514,
"rewards/rejected": -0.1308133900165558,
"step": 140
},
{
"epoch": 0.16,
"learning_rate": 4.945755732909625e-06,
"logits/chosen": -0.2408047914505005,
"logits/rejected": -0.2040824145078659,
"logps/chosen": -551.7179565429688,
"logps/rejected": -606.5433959960938,
"loss": 0.1955,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.07721008360385895,
"rewards/margins": 0.026318836957216263,
"rewards/rejected": -0.10352891683578491,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 4.924759456701167e-06,
"logits/chosen": -0.21895582973957062,
"logits/rejected": -0.2554505467414856,
"logps/chosen": -608.0427856445312,
"logps/rejected": -679.7128295898438,
"loss": 0.2025,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.10357453674077988,
"rewards/margins": 0.022874176502227783,
"rewards/rejected": -0.12644873559474945,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 4.900396026378671e-06,
"logits/chosen": -0.25241002440452576,
"logits/rejected": -0.2686356008052826,
"logps/chosen": -576.2278442382812,
"logps/rejected": -611.9133911132812,
"loss": 0.2044,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.1014503687620163,
"rewards/margins": 0.020282840356230736,
"rewards/rejected": -0.12173320353031158,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 4.872699274339169e-06,
"logits/chosen": -0.24474278092384338,
"logits/rejected": -0.19586482644081116,
"logps/chosen": -570.9044189453125,
"logps/rejected": -617.5431518554688,
"loss": 0.1944,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.09906121343374252,
"rewards/margins": 0.01674678549170494,
"rewards/rejected": -0.11580799520015717,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 4.8417076618132434e-06,
"logits/chosen": -0.2917916774749756,
"logits/rejected": -0.20423956215381622,
"logps/chosen": -567.7699584960938,
"logps/rejected": -593.5147705078125,
"loss": 0.2046,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.08719009160995483,
"rewards/margins": 0.013276703655719757,
"rewards/rejected": -0.10046680271625519,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 4.807464225455655e-06,
"logits/chosen": -0.14698217809200287,
"logits/rejected": -0.23266562819480896,
"logps/chosen": -531.8690185546875,
"logps/rejected": -583.5828857421875,
"loss": 0.1964,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.07782838493585587,
"rewards/margins": 0.0252009816467762,
"rewards/rejected": -0.10302937030792236,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 4.770016517582283e-06,
"logits/chosen": -0.21580150723457336,
"logits/rejected": -0.18905040621757507,
"logps/chosen": -626.87744140625,
"logps/rejected": -649.6925659179688,
"loss": 0.1977,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.104043148458004,
"rewards/margins": 0.021797046065330505,
"rewards/rejected": -0.1258401870727539,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 4.7294165401363616e-06,
"logits/chosen": -0.12353191524744034,
"logits/rejected": -0.2215413749217987,
"logps/chosen": -633.0154418945312,
"logps/rejected": -633.0941162109375,
"loss": 0.2058,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.10003998130559921,
"rewards/margins": 0.009050301276147366,
"rewards/rejected": -0.10909029096364975,
"step": 220
},
{
"epoch": 0.25,
"learning_rate": 4.68572067247573e-06,
"logits/chosen": -0.16852374374866486,
"logits/rejected": -0.21371085941791534,
"logps/chosen": -614.1183471679688,
"logps/rejected": -670.2012939453125,
"loss": 0.2077,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.08841963112354279,
"rewards/margins": 0.02279593050479889,
"rewards/rejected": -0.11121556907892227,
"step": 230
},
{
"epoch": 0.26,
"learning_rate": 4.638989593081364e-06,
"logits/chosen": -0.1663983315229416,
"logits/rejected": -0.21970775723457336,
"logps/chosen": -602.5869750976562,
"logps/rejected": -618.7034912109375,
"loss": 0.2061,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.07862231880426407,
"rewards/margins": 0.021257968619465828,
"rewards/rejected": -0.09988027811050415,
"step": 240
},
{
"epoch": 0.27,
"learning_rate": 4.5892881952959015e-06,
"logits/chosen": -0.21088270843029022,
"logits/rejected": -0.14775848388671875,
"logps/chosen": -577.7684326171875,
"logps/rejected": -632.3033447265625,
"loss": 0.2054,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.0773148387670517,
"rewards/margins": 0.026050010696053505,
"rewards/rejected": -0.10336484014987946,
"step": 250
},
{
"epoch": 0.28,
"learning_rate": 4.536685497209182e-06,
"logits/chosen": -0.1055503636598587,
"logits/rejected": -0.06379745155572891,
"logps/chosen": -522.751708984375,
"logps/rejected": -602.4344482421875,
"loss": 0.2001,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.06098253279924393,
"rewards/margins": 0.030480870977044106,
"rewards/rejected": -0.09146340191364288,
"step": 260
},
{
"epoch": 0.29,
"learning_rate": 4.481254545815943e-06,
"logits/chosen": -0.15926873683929443,
"logits/rejected": -0.04976898431777954,
"logps/chosen": -529.4932250976562,
"logps/rejected": -549.9386596679688,
"loss": 0.1973,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.06077051907777786,
"rewards/margins": 0.01582062616944313,
"rewards/rejected": -0.0765911340713501,
"step": 270
},
{
"epoch": 0.3,
"learning_rate": 4.42307231557875e-06,
"logits/chosen": -0.07944826781749725,
"logits/rejected": -0.05855567380785942,
"logps/chosen": -512.50439453125,
"logps/rejected": -543.458984375,
"loss": 0.1986,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -0.06550983339548111,
"rewards/margins": 0.023027174174785614,
"rewards/rejected": -0.08853700011968613,
"step": 280
},
{
"epoch": 0.31,
"learning_rate": 4.3622196015370305e-06,
"logits/chosen": -0.12430046498775482,
"logits/rejected": -0.06956211477518082,
"logps/chosen": -550.2479248046875,
"logps/rejected": -614.044189453125,
"loss": 0.1944,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.056610800325870514,
"rewards/margins": 0.029858995229005814,
"rewards/rejected": -0.08646979182958603,
"step": 290
},
{
"epoch": 0.32,
"learning_rate": 4.298780907110648e-06,
"logits/chosen": -0.09455857425928116,
"logits/rejected": -0.07383386790752411,
"logps/chosen": -598.065185546875,
"logps/rejected": -647.9603271484375,
"loss": 0.1876,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.06337399780750275,
"rewards/margins": 0.026696253567934036,
"rewards/rejected": -0.09007024019956589,
"step": 300
},
{
"epoch": 0.33,
"learning_rate": 4.23284432675381e-06,
"logits/chosen": -0.19348487257957458,
"logits/rejected": -0.1443384736776352,
"logps/chosen": -539.6243896484375,
"logps/rejected": -612.7183837890625,
"loss": 0.1963,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.05517622083425522,
"rewards/margins": 0.02591213583946228,
"rewards/rejected": -0.0810883566737175,
"step": 310
},
{
"epoch": 0.34,
"learning_rate": 4.164501423622277e-06,
"logits/chosen": -0.19629542529582977,
"logits/rejected": -0.13960464298725128,
"logps/chosen": -516.0609130859375,
"logps/rejected": -658.4205932617188,
"loss": 0.1915,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.05958019569516182,
"rewards/margins": 0.06007415056228638,
"rewards/rejected": -0.1196543425321579,
"step": 320
},
{
"epoch": 0.35,
"learning_rate": 4.0938471024237355e-06,
"logits/chosen": -0.1600683629512787,
"logits/rejected": -0.10378336906433105,
"logps/chosen": -590.7578125,
"logps/rejected": -621.64697265625,
"loss": 0.2007,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.08227936178445816,
"rewards/margins": 0.01520558726042509,
"rewards/rejected": -0.09748493880033493,
"step": 330
},
{
"epoch": 0.36,
"learning_rate": 4.020979477627907e-06,
"logits/chosen": -0.19418606162071228,
"logits/rejected": -0.1177397221326828,
"logps/chosen": -586.6962890625,
"logps/rejected": -654.0504150390625,
"loss": 0.1894,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.07023846358060837,
"rewards/margins": 0.03478557616472244,
"rewards/rejected": -0.10502403974533081,
"step": 340
},
{
"epoch": 0.37,
"learning_rate": 3.9459997372194105e-06,
"logits/chosen": -0.1304813176393509,
"logits/rejected": -0.04862945154309273,
"logps/chosen": -594.4133911132812,
"logps/rejected": -617.715087890625,
"loss": 0.192,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08139745891094208,
"rewards/margins": 0.026553615927696228,
"rewards/rejected": -0.10795106738805771,
"step": 350
},
{
"epoch": 0.38,
"learning_rate": 3.869012002182573e-06,
"logits/chosen": -0.21274884045124054,
"logits/rejected": -0.03855857998132706,
"logps/chosen": -557.4656982421875,
"logps/rejected": -637.321044921875,
"loss": 0.1848,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.07546891272068024,
"rewards/margins": 0.03727220743894577,
"rewards/rejected": -0.1127411276102066,
"step": 360
},
{
"epoch": 0.39,
"learning_rate": 3.7901231819133104e-06,
"logits/chosen": -0.10762195289134979,
"logits/rejected": -0.10060106217861176,
"logps/chosen": -599.8753051757812,
"logps/rejected": -646.8792724609375,
"loss": 0.1955,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.0741112157702446,
"rewards/margins": 0.03268015384674072,
"rewards/rejected": -0.10679137706756592,
"step": 370
},
{
"epoch": 0.41,
"learning_rate": 3.709442825758875e-06,
"logits/chosen": -0.12406639009714127,
"logits/rejected": -0.053130537271499634,
"logps/chosen": -587.0034790039062,
"logps/rejected": -618.0760498046875,
"loss": 0.19,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.07897321879863739,
"rewards/margins": 0.025586843490600586,
"rewards/rejected": -0.10456006228923798,
"step": 380
},
{
"epoch": 0.42,
"learning_rate": 3.6270829708916113e-06,
"logits/chosen": -0.11101411283016205,
"logits/rejected": -0.08626400679349899,
"logps/chosen": -569.6163330078125,
"logps/rejected": -620.4082641601562,
"loss": 0.1913,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.06503543257713318,
"rewards/margins": 0.037478551268577576,
"rewards/rejected": -0.10251398384571075,
"step": 390
},
{
"epoch": 0.43,
"learning_rate": 3.543157986727991e-06,
"logits/chosen": -0.11596628278493881,
"logits/rejected": -0.09326865524053574,
"logps/chosen": -569.7626342773438,
"logps/rejected": -647.47119140625,
"loss": 0.1913,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.0574682354927063,
"rewards/margins": 0.03390919789671898,
"rewards/rejected": -0.09137743711471558,
"step": 400
},
{
"epoch": 0.44,
"learning_rate": 3.4577844161089614e-06,
"logits/chosen": -0.1688176691532135,
"logits/rejected": -0.1762055903673172,
"logps/chosen": -548.4512939453125,
"logps/rejected": -596.2463989257812,
"loss": 0.1879,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.054659001529216766,
"rewards/margins": 0.025764942169189453,
"rewards/rejected": -0.08042393624782562,
"step": 410
},
{
"epoch": 0.45,
"learning_rate": 3.3710808134621577e-06,
"logits/chosen": -0.12280504405498505,
"logits/rejected": -0.018482182174921036,
"logps/chosen": -567.9172973632812,
"logps/rejected": -593.0560302734375,
"loss": 0.189,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.0538947694003582,
"rewards/margins": 0.02232169173657894,
"rewards/rejected": -0.07621645927429199,
"step": 420
},
{
"epoch": 0.46,
"learning_rate": 3.2831675801707126e-06,
"logits/chosen": -0.04735702648758888,
"logits/rejected": -0.10849102586507797,
"logps/chosen": -590.4489135742188,
"logps/rejected": -649.82568359375,
"loss": 0.1887,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.04551684111356735,
"rewards/margins": 0.026576777920126915,
"rewards/rejected": -0.07209362089633942,
"step": 430
},
{
"epoch": 0.47,
"learning_rate": 3.194166797377289e-06,
"logits/chosen": -0.08134131878614426,
"logits/rejected": -0.1677294671535492,
"logps/chosen": -574.8263549804688,
"logps/rejected": -607.7601318359375,
"loss": 0.1893,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.04221652075648308,
"rewards/margins": 0.030459443107247353,
"rewards/rejected": -0.07267596572637558,
"step": 440
},
{
"epoch": 0.48,
"learning_rate": 3.104202056455501e-06,
"logits/chosen": -0.0588027760386467,
"logits/rejected": -0.1330319195985794,
"logps/chosen": -547.6630249023438,
"logps/rejected": -580.7600708007812,
"loss": 0.1985,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.04689568281173706,
"rewards/margins": 0.024683769792318344,
"rewards/rejected": -0.07157944142818451,
"step": 450
},
{
"epoch": 0.49,
"learning_rate": 3.013398287384144e-06,
"logits/chosen": -0.0910586565732956,
"logits/rejected": -0.13333860039710999,
"logps/chosen": -520.99267578125,
"logps/rejected": -608.8109130859375,
"loss": 0.1948,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.04666762426495552,
"rewards/margins": 0.04471370577812195,
"rewards/rejected": -0.09138132631778717,
"step": 460
},
{
"epoch": 0.5,
"learning_rate": 2.9218815852625717e-06,
"logits/chosen": -0.09454444795846939,
"logits/rejected": -0.04375922679901123,
"logps/chosen": -620.7197265625,
"logps/rejected": -636.3668212890625,
"loss": 0.201,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.06732948869466782,
"rewards/margins": 0.026028599590063095,
"rewards/rejected": -0.09335808455944061,
"step": 470
},
{
"epoch": 0.51,
"learning_rate": 2.829779035208113e-06,
"logits/chosen": -0.09432949125766754,
"logits/rejected": -0.08926217257976532,
"logps/chosen": -597.0772705078125,
"logps/rejected": -639.5493774414062,
"loss": 0.1909,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.040321771055459976,
"rewards/margins": 0.03370783478021622,
"rewards/rejected": -0.07402960956096649,
"step": 480
},
{
"epoch": 0.52,
"learning_rate": 2.737218535878705e-06,
"logits/chosen": -0.1773318350315094,
"logits/rejected": -0.07903443276882172,
"logps/chosen": -552.8883666992188,
"logps/rejected": -618.2833251953125,
"loss": 0.2029,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.04510737583041191,
"rewards/margins": 0.028245270252227783,
"rewards/rejected": -0.07335264980792999,
"step": 490
},
{
"epoch": 0.53,
"learning_rate": 2.64432862186579e-06,
"logits/chosen": -0.07201124727725983,
"logits/rejected": -0.04144411161541939,
"logps/chosen": -526.00634765625,
"logps/rejected": -577.3812255859375,
"loss": 0.1891,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.03259889408946037,
"rewards/margins": 0.028664156794548035,
"rewards/rejected": -0.06126304715871811,
"step": 500
},
{
"epoch": 0.54,
"learning_rate": 2.551238285204126e-06,
"logits/chosen": -0.13225743174552917,
"logits/rejected": -0.03518156707286835,
"logps/chosen": -558.69970703125,
"logps/rejected": -633.7002563476562,
"loss": 0.1987,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.034947603940963745,
"rewards/margins": 0.041034139692783356,
"rewards/rejected": -0.0759817361831665,
"step": 510
},
{
"epoch": 0.55,
"learning_rate": 2.4580767962463688e-06,
"logits/chosen": -0.03775392845273018,
"logits/rejected": -0.06259463727474213,
"logps/chosen": -564.3277587890625,
"logps/rejected": -616.877685546875,
"loss": 0.1935,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.041550230234861374,
"rewards/margins": 0.04528028517961502,
"rewards/rejected": -0.0868305116891861,
"step": 520
},
{
"epoch": 0.57,
"learning_rate": 2.3649735241511546e-06,
"logits/chosen": -0.11865083128213882,
"logits/rejected": -0.14535991847515106,
"logps/chosen": -539.8975219726562,
"logps/rejected": -628.8270263671875,
"loss": 0.1988,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.06274162977933884,
"rewards/margins": 0.050676118582487106,
"rewards/rejected": -0.11341774463653564,
"step": 530
},
{
"epoch": 0.58,
"learning_rate": 2.2720577572339914e-06,
"logits/chosen": -0.1661374866962433,
"logits/rejected": -0.10748039186000824,
"logps/chosen": -546.2053833007812,
"logps/rejected": -584.2305908203125,
"loss": 0.1901,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.05626441910862923,
"rewards/margins": 0.02776341699063778,
"rewards/rejected": -0.08402784168720245,
"step": 540
},
{
"epoch": 0.59,
"learning_rate": 2.1794585234303995e-06,
"logits/chosen": -0.10749207437038422,
"logits/rejected": -0.13697417080402374,
"logps/chosen": -517.0869140625,
"logps/rejected": -581.8153686523438,
"loss": 0.1866,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.052382372319698334,
"rewards/margins": 0.035972487181425095,
"rewards/rejected": -0.08835486322641373,
"step": 550
},
{
"epoch": 0.6,
"learning_rate": 2.0873044111206407e-06,
"logits/chosen": -0.1282195746898651,
"logits/rejected": -0.1339006870985031,
"logps/chosen": -576.3350830078125,
"logps/rejected": -666.8603515625,
"loss": 0.1907,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.04062817618250847,
"rewards/margins": 0.03738432377576828,
"rewards/rejected": -0.07801250368356705,
"step": 560
},
{
"epoch": 0.61,
"learning_rate": 1.9957233905648293e-06,
"logits/chosen": -0.10549817234277725,
"logits/rejected": -0.11278073489665985,
"logps/chosen": -566.6007080078125,
"logps/rejected": -636.8270263671875,
"loss": 0.1877,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.048470962792634964,
"rewards/margins": 0.04373977333307266,
"rewards/rejected": -0.09221073240041733,
"step": 570
},
{
"epoch": 0.62,
"learning_rate": 1.904842636196402e-06,
"logits/chosen": -0.0554957278072834,
"logits/rejected": -0.13037823140621185,
"logps/chosen": -597.04150390625,
"logps/rejected": -615.6434326171875,
"loss": 0.1909,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0562109649181366,
"rewards/margins": 0.028234709054231644,
"rewards/rejected": -0.08444567024707794,
"step": 580
},
{
"epoch": 0.63,
"learning_rate": 1.814788350020726e-06,
"logits/chosen": -0.0553332157433033,
"logits/rejected": -0.14984294772148132,
"logps/chosen": -511.7176818847656,
"logps/rejected": -577.5421752929688,
"loss": 0.1891,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.05183824896812439,
"rewards/margins": 0.0338759571313858,
"rewards/rejected": -0.08571420609951019,
"step": 590
},
{
"epoch": 0.64,
"learning_rate": 1.725685586364051e-06,
"logits/chosen": -0.1068972796201706,
"logits/rejected": -0.13699831068515778,
"logps/chosen": -547.6019897460938,
"logps/rejected": -624.2053833007812,
"loss": 0.1908,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.04226940870285034,
"rewards/margins": 0.04575734585523605,
"rewards/rejected": -0.08802676200866699,
"step": 600
},
{
"epoch": 0.65,
"learning_rate": 1.6376580782162172e-06,
"logits/chosen": -0.12253417819738388,
"logits/rejected": -0.09159277379512787,
"logps/chosen": -534.8265380859375,
"logps/rejected": -639.2476806640625,
"loss": 0.1866,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.038840554654598236,
"rewards/margins": 0.04929639771580696,
"rewards/rejected": -0.0881369560956955,
"step": 610
},
{
"epoch": 0.66,
"learning_rate": 1.550828065408227e-06,
"logits/chosen": -0.11153294146060944,
"logits/rejected": -0.0631122812628746,
"logps/chosen": -581.9796142578125,
"logps/rejected": -639.3689575195312,
"loss": 0.1738,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.04295315593481064,
"rewards/margins": 0.037230443209409714,
"rewards/rejected": -0.08018360286951065,
"step": 620
},
{
"epoch": 0.67,
"learning_rate": 1.4653161248633053e-06,
"logits/chosen": -0.10305066406726837,
"logits/rejected": -0.13783864676952362,
"logps/chosen": -582.2150268554688,
"logps/rejected": -607.2169799804688,
"loss": 0.1865,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.05051354691386223,
"rewards/margins": 0.02962956391274929,
"rewards/rejected": -0.08014310896396637,
"step": 630
},
{
"epoch": 0.68,
"learning_rate": 1.381241003157162e-06,
"logits/chosen": -0.09553556144237518,
"logits/rejected": -0.1049310564994812,
"logps/chosen": -561.0845947265625,
"logps/rejected": -615.9722900390625,
"loss": 0.19,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.046824414283037186,
"rewards/margins": 0.03598689287900925,
"rewards/rejected": -0.08281131088733673,
"step": 640
},
{
"epoch": 0.69,
"learning_rate": 1.298719451619979e-06,
"logits/chosen": -0.1247280016541481,
"logits/rejected": -0.0659816786646843,
"logps/chosen": -560.4979858398438,
"logps/rejected": -620.7578735351562,
"loss": 0.2002,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.04652046412229538,
"rewards/margins": 0.041216202080249786,
"rewards/rejected": -0.08773668110370636,
"step": 650
},
{
"epoch": 0.7,
"learning_rate": 1.2178660642091036e-06,
"logits/chosen": -0.03698350116610527,
"logits/rejected": -0.2196667492389679,
"logps/chosen": -521.7525634765625,
"logps/rejected": -626.46435546875,
"loss": 0.1979,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.05702243372797966,
"rewards/margins": 0.041262269020080566,
"rewards/rejected": -0.09828470647335052,
"step": 660
},
{
"epoch": 0.71,
"learning_rate": 1.1387931183775821e-06,
"logits/chosen": -0.1309659779071808,
"logits/rejected": -0.126008078455925,
"logps/chosen": -526.6151123046875,
"logps/rejected": -586.6326293945312,
"loss": 0.1836,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0365142747759819,
"rewards/margins": 0.039250634610652924,
"rewards/rejected": -0.07576490938663483,
"step": 670
},
{
"epoch": 0.73,
"learning_rate": 1.061610419159532e-06,
"logits/chosen": -0.06580721586942673,
"logits/rejected": -0.11697240173816681,
"logps/chosen": -545.3971557617188,
"logps/rejected": -590.3699340820312,
"loss": 0.186,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.040514297783374786,
"rewards/margins": 0.041993193328380585,
"rewards/rejected": -0.08250749111175537,
"step": 680
},
{
"epoch": 0.74,
"learning_rate": 9.864251466888364e-07,
"logits/chosen": 0.015632059425115585,
"logits/rejected": -0.14370284974575043,
"logps/chosen": -527.1017456054688,
"logps/rejected": -602.5015869140625,
"loss": 0.1872,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.03584219887852669,
"rewards/margins": 0.0341840498149395,
"rewards/rejected": -0.07002625614404678,
"step": 690
},
{
"epoch": 0.75,
"learning_rate": 9.133417073629288e-07,
"logits/chosen": -0.1096029132604599,
"logits/rejected": -0.09382790327072144,
"logps/chosen": -552.9088745117188,
"logps/rejected": -619.2091674804688,
"loss": 0.1929,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.04123011603951454,
"rewards/margins": 0.03130009397864342,
"rewards/rejected": -0.07253019511699677,
"step": 700
},
{
"epoch": 0.76,
"learning_rate": 8.424615888583332e-07,
"logits/chosen": -0.1330350786447525,
"logits/rejected": -0.07537052035331726,
"logps/chosen": -521.3177490234375,
"logps/rejected": -601.4888305664062,
"loss": 0.1829,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.037754353135824203,
"rewards/margins": 0.041079822927713394,
"rewards/rejected": -0.0788341760635376,
"step": 710
},
{
"epoch": 0.77,
"learning_rate": 7.738832191993092e-07,
"logits/chosen": -0.13393089175224304,
"logits/rejected": -0.07735292613506317,
"logps/chosen": -589.1104736328125,
"logps/rejected": -623.0423583984375,
"loss": 0.1937,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.04533671587705612,
"rewards/margins": 0.03662148863077164,
"rewards/rejected": -0.08195820450782776,
"step": 720
},
{
"epoch": 0.78,
"learning_rate": 7.077018300752917e-07,
"logits/chosen": -0.09014391899108887,
"logits/rejected": -0.02712271548807621,
"logps/chosen": -550.0320434570312,
"logps/rejected": -605.1174926757812,
"loss": 0.1961,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.05133052542805672,
"rewards/margins": 0.041539210826158524,
"rewards/rejected": -0.09286972880363464,
"step": 730
},
{
"epoch": 0.79,
"learning_rate": 6.440093245969342e-07,
"logits/chosen": -0.08313737064599991,
"logits/rejected": -0.1943168193101883,
"logps/chosen": -516.8920288085938,
"logps/rejected": -601.4186401367188,
"loss": 0.1848,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.04221433773636818,
"rewards/margins": 0.0475175604224205,
"rewards/rejected": -0.08973188698291779,
"step": 740
},
{
"epoch": 0.8,
"learning_rate": 5.828941496744075e-07,
"logits/chosen": -0.11161942780017853,
"logits/rejected": -0.0919300764799118,
"logps/chosen": -563.8603515625,
"logps/rejected": -619.1151733398438,
"loss": 0.1903,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.04418020322918892,
"rewards/margins": 0.03953651711344719,
"rewards/rejected": -0.08371671289205551,
"step": 750
},
{
"epoch": 0.81,
"learning_rate": 5.244411731951671e-07,
"logits/chosen": -0.13506890833377838,
"logits/rejected": -0.033810555934906006,
"logps/chosen": -605.5892944335938,
"logps/rejected": -609.83544921875,
"loss": 0.1878,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.03747162967920303,
"rewards/margins": 0.02192925289273262,
"rewards/rejected": -0.059400878846645355,
"step": 760
},
{
"epoch": 0.82,
"learning_rate": 4.6873156617173594e-07,
"logits/chosen": -0.07261113822460175,
"logits/rejected": -0.16117814183235168,
"logps/chosen": -553.5911254882812,
"logps/rejected": -624.5232543945312,
"loss": 0.1921,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.04296105355024338,
"rewards/margins": 0.0388905294239521,
"rewards/rejected": -0.08185158669948578,
"step": 770
},
{
"epoch": 0.83,
"learning_rate": 4.1584269002318653e-07,
"logits/chosen": -0.07403261959552765,
"logits/rejected": -0.054157156497240067,
"logps/chosen": -535.3461303710938,
"logps/rejected": -585.4727783203125,
"loss": 0.1828,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.0406302735209465,
"rewards/margins": 0.03608276695013046,
"rewards/rejected": -0.07671303302049637,
"step": 780
},
{
"epoch": 0.84,
"learning_rate": 3.658479891468258e-07,
"logits/chosen": -0.1717700958251953,
"logits/rejected": -0.08853835612535477,
"logps/chosen": -527.3263549804688,
"logps/rejected": -540.2444458007812,
"loss": 0.1778,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.04036609083414078,
"rewards/margins": 0.03141506761312485,
"rewards/rejected": -0.07178115844726562,
"step": 790
},
{
"epoch": 0.85,
"learning_rate": 3.18816888929272e-07,
"logits/chosen": -0.09848084300756454,
"logits/rejected": -0.06764743477106094,
"logps/chosen": -563.3206787109375,
"logps/rejected": -668.9093017578125,
"loss": 0.2002,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.046812716871500015,
"rewards/margins": 0.054834604263305664,
"rewards/rejected": -0.10164730250835419,
"step": 800
},
{
"epoch": 0.86,
"learning_rate": 2.748146993385484e-07,
"logits/chosen": -0.09693370759487152,
"logits/rejected": -0.07278673350811005,
"logps/chosen": -522.9954833984375,
"logps/rejected": -612.6608276367188,
"loss": 0.1854,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.04407941550016403,
"rewards/margins": 0.05026249960064888,
"rewards/rejected": -0.09434191882610321,
"step": 810
},
{
"epoch": 0.87,
"learning_rate": 2.3390252423108077e-07,
"logits/chosen": -0.07084161043167114,
"logits/rejected": -0.18225322663784027,
"logps/chosen": -488.76483154296875,
"logps/rejected": -558.3425903320312,
"loss": 0.1939,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.035873524844646454,
"rewards/margins": 0.037640780210494995,
"rewards/rejected": -0.07351429760456085,
"step": 820
},
{
"epoch": 0.89,
"learning_rate": 1.961371764995243e-07,
"logits/chosen": -0.11218070983886719,
"logits/rejected": -0.143798828125,
"logps/chosen": -548.5975341796875,
"logps/rejected": -618.435302734375,
"loss": 0.2009,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.03908687084913254,
"rewards/margins": 0.042751066386699677,
"rewards/rejected": -0.08183793723583221,
"step": 830
},
{
"epoch": 0.9,
"learning_rate": 1.61571099179261e-07,
"logits/chosen": -0.0712205171585083,
"logits/rejected": -0.06110917776823044,
"logps/chosen": -584.1240234375,
"logps/rejected": -650.0173950195312,
"loss": 0.1955,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.04009150713682175,
"rewards/margins": 0.030330544337630272,
"rewards/rejected": -0.07042204588651657,
"step": 840
},
{
"epoch": 0.91,
"learning_rate": 1.3025229262312367e-07,
"logits/chosen": -0.0935712531208992,
"logits/rejected": -0.05454383045434952,
"logps/chosen": -496.932861328125,
"logps/rejected": -605.6661987304688,
"loss": 0.1884,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.042653247714042664,
"rewards/margins": 0.048957787454128265,
"rewards/rejected": -0.09161103516817093,
"step": 850
},
{
"epoch": 0.92,
"learning_rate": 1.0222424784546853e-07,
"logits/chosen": -0.08921684324741364,
"logits/rejected": -0.15163610875606537,
"logps/chosen": -579.2117919921875,
"logps/rejected": -619.4464111328125,
"loss": 0.1904,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.04733709245920181,
"rewards/margins": 0.03301934152841568,
"rewards/rejected": -0.08035643398761749,
"step": 860
},
{
"epoch": 0.93,
"learning_rate": 7.752588612816553e-08,
"logits/chosen": -0.04686546325683594,
"logits/rejected": -0.15816907584667206,
"logps/chosen": -509.0023498535156,
"logps/rejected": -572.1159057617188,
"loss": 0.1754,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.042182981967926025,
"rewards/margins": 0.04292844608426094,
"rewards/rejected": -0.08511142432689667,
"step": 870
},
{
"epoch": 0.94,
"learning_rate": 5.619150497236991e-08,
"logits/chosen": -0.07643123716115952,
"logits/rejected": -0.16245657205581665,
"logps/chosen": -535.0369873046875,
"logps/rejected": -608.0992431640625,
"loss": 0.192,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.04792182892560959,
"rewards/margins": 0.03496783226728439,
"rewards/rejected": -0.08288966119289398,
"step": 880
},
{
"epoch": 0.95,
"learning_rate": 3.825073047112743e-08,
"logits/chosen": -0.13168227672576904,
"logits/rejected": -0.046010442078113556,
"logps/chosen": -579.3240356445312,
"logps/rejected": -674.3414306640625,
"loss": 0.1964,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.04349333792924881,
"rewards/margins": 0.047455307096242905,
"rewards/rejected": -0.09094865620136261,
"step": 890
},
{
"epoch": 0.96,
"learning_rate": 2.372847616895685e-08,
"logits/chosen": -0.04904794320464134,
"logits/rejected": -0.019006099551916122,
"logps/chosen": -542.4931640625,
"logps/rejected": -638.1673583984375,
"loss": 0.1889,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.04928978905081749,
"rewards/margins": 0.03806794807314873,
"rewards/rejected": -0.08735774457454681,
"step": 900
},
{
"epoch": 0.97,
"learning_rate": 1.264490846553279e-08,
"logits/chosen": -0.12707039713859558,
"logits/rejected": -0.10833065211772919,
"logps/chosen": -579.73681640625,
"logps/rejected": -622.3654174804688,
"loss": 0.1897,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.046609390527009964,
"rewards/margins": 0.03541853651404381,
"rewards/rejected": -0.08202792704105377,
"step": 910
},
{
"epoch": 0.98,
"learning_rate": 5.015418611516165e-09,
"logits/chosen": -0.0854305848479271,
"logits/rejected": -0.11656080186367035,
"logps/chosen": -616.4360961914062,
"logps/rejected": -670.5054931640625,
"loss": 0.1907,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.04680439084768295,
"rewards/margins": 0.05593379586935043,
"rewards/rejected": -0.10273818671703339,
"step": 920
},
{
"epoch": 0.99,
"learning_rate": 8.506013354186993e-10,
"logits/chosen": -0.11298644542694092,
"logits/rejected": -0.03937912359833717,
"logps/chosen": -532.8866577148438,
"logps/rejected": -597.7803344726562,
"loss": 0.2033,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.043054092675447464,
"rewards/margins": 0.037277717143297195,
"rewards/rejected": -0.08033180981874466,
"step": 930
},
{
"epoch": 1.0,
"step": 937,
"total_flos": 0.0,
"train_loss": 0.19462941225971966,
"train_runtime": 7972.3934,
"train_samples_per_second": 3.763,
"train_steps_per_second": 0.118
}
],
"logging_steps": 10,
"max_steps": 937,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}